i965: Split the gen6 GS binding table to a separate table.
authorEric Anholt <eric@anholt.net>
Wed, 15 Feb 2012 22:15:14 +0000 (14:15 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 21 Feb 2012 19:54:14 +0000 (11:54 -0800)
Improves VS state change microbenchmark performance by 7.08729% +/-
1.22289% (n=10) on gen7, because we don't upload the 64 dwords of
unused binding table any more.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen6_sol.c

index 44a01e69ba59d807363449eb6d232a92ebc6afbd..9c89617e66da3f77d69fdd9a71dac6b2215cb088 100644 (file)
@@ -484,11 +484,6 @@ struct brw_vs_ouput_sizes {
  *    |   . |     .                   |
  *    |   : |     :                   |
  *    |  24 | Texture 15              |
- *    +-----|-------------------------+
- *    |  25 | SOL Binding 0           |
- *    |   . |     .                   |
- *    |   : |     :                   |
- *    |  88 | SOL Binding 63          |
  *    +-------------------------------+
  *
  * Our VS binding tables are programmed as follows:
@@ -502,6 +497,15 @@ struct brw_vs_ouput_sizes {
  *    |  16 | Texture 15              |
  *    +-------------------------------+
  *
+ * Our (gen6) GS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+
+ *    |  0  | SOL Binding 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  63 | SOL Binding 63          |
+ *    +-----+-------------------------+
+ *
  * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
  * the identity function or things will break.  We do want to keep draw buffers
  * first so we can use headerless render target writes for RT 0.
@@ -509,15 +513,17 @@ struct brw_vs_ouput_sizes {
 #define SURF_INDEX_DRAW(d)           (d)
 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
 #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
-#define SURF_INDEX_SOL_BINDING(t)    (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
 
 /** Maximum size of the binding table. */
-#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define BRW_MAX_SURFACES             SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT)
 
 #define SURF_INDEX_VERT_CONST_BUFFER (0)
 #define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
 #define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
 
+#define SURF_INDEX_SOL_BINDING(t)    ((t))
+#define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+
 enum brw_cache_id {
    BRW_BLEND_STATE,
    BRW_DEPTH_STENCIL_STATE,
@@ -868,6 +874,9 @@ struct brw_context
       /** Offset in the program cache to the CLIP program pre-gen6 */
       uint32_t prog_offset;
       uint32_t state_offset;
+
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
    } gs;
 
    struct {
index 7bc7e1c10254471a18c16c5268ff0b584c686e65..c86755de6593cc2044ab285804f7246617f490e8 100644 (file)
@@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
             GEN6_BINDING_TABLE_MODIFY_PS |
             (4 - 2));
    OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
-   OUT_BATCH(brw->bind.bo_offset); /* gs */
+   OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
    OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
    ADVANCE_BATCH();
 }
index 59a2bb325017ce32b22741eefb06271ea54d8281..a58b4b3c0b8373bd0083fe0ddda826d714b1ddf6 100644 (file)
@@ -90,6 +90,7 @@ extern const struct brw_tracked_state gen6_clip_vp;
 extern const struct brw_tracked_state gen6_color_calc_state;
 extern const struct brw_tracked_state gen6_depth_stencil_state;
 extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_gs_binding_table;
 extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
 extern const struct brw_tracked_state gen6_sampler_state;
 extern const struct brw_tracked_state gen6_scissor_state;
index 28e4d26209e068244166e9599c337cb64c1a8123..3f5c03d8f901058d8674a6e98edf26f43c81fb94 100644 (file)
@@ -148,6 +148,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_texture_surfaces,
    &gen6_sol_surface,
    &brw_vs_binding_table,
+   &gen6_gs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
index 41923b7f527f7962e9dd5f7020dea46d947dd171..fbd8e71631f465c5b5a6641e55b3f7f045b1904c 100644 (file)
@@ -30,6 +30,7 @@
 #include "brw_context.h"
 #include "intel_batchbuffer.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 
 static void
 gen6_update_sol_surfaces(struct brw_context *brw)
@@ -54,11 +55,11 @@ gen6_update_sol_surfaces(struct brw_context *brw)
             xfb_obj->Offset[buffer] / 4 +
             linked_xfb_info->Outputs[i].DstOffset;
          brw_update_sol_surface(
-            brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index],
+            brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index],
             linked_xfb_info->Outputs[i].NumComponents,
             linked_xfb_info->BufferStride[buffer], buffer_offset);
       } else {
-         brw->bind.surf_offset[surf_index] = 0;
+         brw->gs.surf_offset[surf_index] = 0;
       }
    }
 
@@ -75,6 +76,59 @@ const struct brw_tracked_state gen6_sol_surface = {
    .emit = gen6_update_sol_surfaces,
 };
 
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_gs_upload_binding_table(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *shaderprog =
+      ctx->Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &shaderprog->LinkedTransformFeedback;
+   /* Currently we only ever upload surfaces for SOL. */
+   bool has_surfaces = linked_xfb_info->NumOutputs != 0;
+
+   uint32_t *bind;
+
+   /* CACHE_NEW_GS_PROG: Skip making a binding table if we don't use textures or
+    * pull constants.
+    */
+   if (!has_surfaces) {
+      if (brw->gs.bind_bo_offset != 0) {
+        brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+        brw->gs.bind_bo_offset = 0;
+      }
+      return;
+   }
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                         sizeof(uint32_t) * BRW_MAX_SURFACES,
+                         32, &brw->gs.bind_bo_offset);
+
+   /* BRW_NEW_SURFACES */
+   memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t));
+
+   brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state gen6_gs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_SURFACES),
+      .cache = 0
+   },
+   .emit = brw_gs_upload_binding_table,
+};
+
 static void
 gen6_update_sol_indices(struct brw_context *brw)
 {