i965: Avoid re-uploading the index buffer when we don't need to.
authorEric Anholt <eric@anholt.net>
Tue, 11 Aug 2009 21:48:03 +0000 (14:48 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 12 Aug 2009 19:43:42 +0000 (12:43 -0700)
No performance difference proven at 95% confidence with my GLSL demo (n=10).

src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c

index 00d5980dd08eb0277b77b1d46dd13c297461e553..847c44ed83ac3cf1867fb211c4ae9a4810975b2b 100644 (file)
@@ -143,6 +143,7 @@ struct brw_context;
 #define BRW_NEW_DEPTH_BUFFER           0x20000
 #define BRW_NEW_NR_WM_SURFACES         0x40000
 #define BRW_NEW_NR_VS_SURFACES         0x80000
+#define BRW_NEW_INDEX_BUFFER           0x100000
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -505,8 +506,15 @@ struct brw_context
        */
       const struct _mesa_index_buffer *ib;
 
+      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
       dri_bo *bo;
       unsigned int offset;
+      unsigned int size;
+      /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+       * avoid re-uploading the IB packet over and over if we're actually
+       * referencing the same index buffer.
+       */
+      unsigned int start_vertex_offset;
    } ib;
 
    /* Active vertex program: 
index 8c94c904c1f11d6b16c56ae59aef13b3263ba746..682094ff139aaa1025c99fbf63a3ceaceb71133f 100644 (file)
@@ -141,6 +141,8 @@ static void brw_emit_prim(struct brw_context *brw,
 
    prim_packet.verts_per_instance = trim(prim->mode, prim->count);
    prim_packet.start_vert_location = prim->start;
+   if (prim->indexed)
+      prim_packet.start_vert_location += brw->ib.start_vertex_offset;
    prim_packet.instance_count = 1;
    prim_packet.start_instance_location = 0;
    prim_packet.base_vert_location = 0;
index 4bdb37349b79621361da5082b82907e5479d4d93..ab6b62812f1efbe2febbae799c2d6426607da4f2 100644 (file)
@@ -612,17 +612,20 @@ static void brw_prepare_indices(struct brw_context *brw)
    dri_bo *bo = NULL;
    struct gl_buffer_object *bufferobj;
    GLuint offset;
+   GLuint ib_type_size;
 
    if (index_buffer == NULL)
       return;
 
-   ib_size = get_size(index_buffer->type) * index_buffer->count;
+   ib_type_size = get_size(index_buffer->type);
+   ib_size = ib_type_size * index_buffer->count;
    bufferobj = index_buffer->obj;;
 
    /* Turn into a proper VBO:
     */
    if (!bufferobj->Name) {
-     
+      brw->ib.start_vertex_offset = 0;
+
       /* Get new bufferobj, offset:
        */
       get_space(brw, ib_size, &bo, &offset);
@@ -638,6 +641,7 @@ static void brw_prepare_indices(struct brw_context *brw)
       }
    } else {
       offset = (GLuint) (unsigned long) index_buffer->ptr;
+      brw->ib.start_vertex_offset = 0;
 
       /* If the index buffer isn't aligned to its element size, we have to
        * rebase it into a temporary.
@@ -658,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw)
          bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
                                      INTEL_READ);
          dri_bo_reference(bo);
+
+         /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+          * the index buffer state when we're just moving the start index
+          * of our drawing.
+          */
+         brw->ib.start_vertex_offset = offset / ib_type_size;
+         offset = 0;
+         ib_size = bo->size;
        }
    }
 
-   dri_bo_unreference(brw->ib.bo);
-   brw->ib.bo = bo;
-   brw->ib.offset = offset;
+   if (brw->ib.bo != bo ||
+       brw->ib.offset != offset ||
+       brw->ib.size != ib_size)
+   {
+      drm_intel_bo_unreference(brw->ib.bo);
+      brw->ib.bo = bo;
+      brw->ib.offset = offset;
+      brw->ib.size = ib_size;
+
+      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+   } else {
+      drm_intel_bo_unreference(bo);
+   }
 
    brw_add_validated_bo(brw, brw->ib.bo);
 }
 
-static void brw_emit_indices(struct brw_context *brw)
+const struct brw_tracked_state brw_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_INDICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint ib_size;
 
    if (index_buffer == NULL)
       return;
 
-   ib_size = get_size(index_buffer->type) * index_buffer->count - 1;
-
    /* Emit the indexbuffer packet:
     */
    {
       struct brw_indexbuffer ib;
 
       memset(&ib, 0, sizeof(ib));
-   
+
       ib.header.bits.opcode = CMD_INDEX_BUFFER;
       ib.header.bits.length = sizeof(ib)/4 - 2;
       ib.header.bits.index_format = get_index_type(index_buffer->type);
       ib.header.bits.cut_index_enable = 0;
-   
 
       BEGIN_BATCH(4, IGNORE_CLIPRECTS);
       OUT_BATCH( ib.header.dword );
@@ -699,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw)
                brw->ib.offset);
       OUT_RELOC(brw->ib.bo,
                I915_GEM_DOMAIN_VERTEX, 0,
-               brw->ib.offset + ib_size);
+               brw->ib.offset + brw->ib.size);
       OUT_BATCH( 0 );
       ADVANCE_BATCH();
    }
 }
 
-const struct brw_tracked_state brw_indices = {
+const struct brw_tracked_state brw_index_buffer = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_BATCH | BRW_NEW_INDICES,
+      .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
       .cache = 0,
    },
-   .prepare = brw_prepare_indices,
-   .emit = brw_emit_indices,
+   .emit = brw_emit_index_buffer,
 };
index bf9f6cae55e5af2836dffb3aff2db26ba2b375b1..78572356a3dcdba85128aeaed777ea480b54ab40 100644 (file)
@@ -92,6 +92,7 @@ const struct brw_tracked_state brw_clear_batch_cache;
 const struct brw_tracked_state brw_drawing_rect;
 const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
 
 /**
  * Use same key for WM and VS surfaces.
index 38d9dd8991eeb9374ae701c834b7ae13dc382d67..95d42d2dcc5bc47f701c017cb826c0df44c0b5d5 100644 (file)
@@ -94,6 +94,7 @@ const struct brw_tracked_state *atoms[] =
 
    &brw_drawing_rect,
    &brw_indices,
+   &brw_index_buffer,
    &brw_vertices,
 
    &brw_constant_buffer
@@ -208,6 +209,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_PSP),
    DEFINE_BIT(BRW_NEW_FENCE),
    DEFINE_BIT(BRW_NEW_INDICES),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
    DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),