i965: Use GTT maps when available to upload vertex arrays and system VBOs.
authorEric Anholt <eric@anholt.net>
Mon, 6 Apr 2009 16:38:16 +0000 (09:38 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 6 Apr 2009 17:58:52 +0000 (10:58 -0700)
This speeds up OA on my GM45 by 21% (more than the original CPU cost of
the upload path).  We might still be able to squeeze a few more percent out
by avoiding repeatedly mapping/unmapping buffers as we upload elements into
them.

src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/intel/intel_screen.c
src/mesa/drivers/dri/intel/intel_screen.h

index 02998d595715613a8d4b48efd0a807bd91f79b45..b91b20bec6f09908e78dc365e2429ea99ffc855f 100644 (file)
@@ -277,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw,
                         struct brw_vertex_element *element,
                         GLuint dst_stride)
 {
+   struct intel_context *intel = &brw->intel;
    GLuint size = element->count * dst_stride;
 
    get_space(brw, size, &element->bo, &element->offset);
@@ -289,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw,
    }
 
    if (dst_stride == element->glarray->StrideB) {
-      dri_bo_subdata(element->bo,
-                    element->offset,
-                    size,
-                    element->glarray->Ptr);
+      if (intel->intelScreen->kernel_exec_fencing) {
+        drm_intel_gem_bo_map_gtt(element->bo);
+        memcpy((char *)element->bo->virtual + element->offset,
+               element->glarray->Ptr, size);
+        drm_intel_gem_bo_unmap_gtt(element->bo);
+      } else {
+        dri_bo_subdata(element->bo,
+                       element->offset,
+                       size,
+                       element->glarray->Ptr);
+      }
    } else {
-      void *data;
       char *dest;
       const unsigned char *src = element->glarray->Ptr;
       int i;
 
-      data = _mesa_malloc(dst_stride * element->count);
-      dest = data;
-      for (i = 0; i < element->count; i++) {
-        memcpy(dest, src, dst_stride);
-        src += element->glarray->StrideB;
-        dest += dst_stride;
-      }
+      if (intel->intelScreen->kernel_exec_fencing) {
+        drm_intel_gem_bo_map_gtt(element->bo);
+        dest = element->bo->virtual;
+        dest += element->offset;
 
-      dri_bo_subdata(element->bo,
-                    element->offset,
-                    size,
-                    data);
-      _mesa_free(data);
+        for (i = 0; i < element->count; i++) {
+           memcpy(dest, src, dst_stride);
+           src += element->glarray->StrideB;
+           dest += dst_stride;
+        }
+
+        drm_intel_gem_bo_unmap_gtt(element->bo);
+      } else {
+        void *data;
+
+        data = _mesa_malloc(dst_stride * element->count);
+        dest = data;
+        for (i = 0; i < element->count; i++) {
+           memcpy(dest, src, dst_stride);
+           src += element->glarray->StrideB;
+           dest += dst_stride;
+        }
+
+        dri_bo_subdata(element->bo,
+                       element->offset,
+                       size,
+                       data);
+
+        _mesa_free(data);
+      }
    }
 }
 
@@ -563,7 +587,13 @@ static void brw_prepare_indices(struct brw_context *brw)
 
       /* Straight upload
        */
-      dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+      if (intel->intelScreen->kernel_exec_fencing) {
+        drm_intel_gem_bo_map_gtt(bo);
+        memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+        drm_intel_gem_bo_unmap_gtt(bo);
+      } else {
+        dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+      }
    } else {
       offset = (GLuint) (unsigned long) index_buffer->ptr;
 
index 752195aa35c485152ca23591a667388cceb1d945..65e62947ef6abe1eaf8f93348b30a670e9c639c2 100644 (file)
@@ -563,6 +563,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
    GLboolean gem_supported;
    struct drm_i915_getparam gp;
    __DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
+   int num_fences;
 
    intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
 
@@ -613,6 +614,11 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
                                &intelScreen->sarea->last_dispatch);
    }
 
+   if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences))
+      intelScreen->kernel_exec_fencing = !!num_fences;
+   else
+      intelScreen->kernel_exec_fencing = GL_FALSE;
+
    return GL_TRUE;
 }
 
index e1036de4db8aecc8367a250a685428c887774b4d..a9b9e109a6aeec05838e0900471dd248c466f192 100644 (file)
@@ -79,6 +79,7 @@ typedef struct
    GLboolean no_vbo;
    int ttm;
    dri_bufmgr *bufmgr;
+   GLboolean kernel_exec_fencing;
 
    /**
    * Configuration cache with default values for all contexts