i965: Split the VS binding table to a separate table.
authorEric Anholt <eric@anholt.net>
Wed, 15 Feb 2012 21:33:07 +0000 (13:33 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 21 Feb 2012 19:54:12 +0000 (11:54 -0800)
This is a step toward making the samplers/binding tables reflect
sampler uniform mappings instead of embedding those in the programs.
No significant performance difference on the microbenchmark (n=10).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_misc_state.c
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_vs_surface_state.c
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c

index 98f68e7c3817b3a6154aadb11bcfda4038b41be4..44a01e69ba59d807363449eb6d232a92ebc6afbd 100644 (file)
@@ -409,6 +409,8 @@ struct brw_vs_prog_data {
    bool uses_new_param_layout;
    bool uses_vertexid;
    bool userclip;
+
+   int num_surfaces;
 };
 
 
@@ -468,7 +470,7 @@ struct brw_vs_ouput_sizes {
  * (VS, HS, DS, GS, PS), we currently share a single binding table for all of
  * them.  This is purely for convenience.
  *
- * Currently our binding tables are (arbitrarily) programmed as follows:
+ * Currently our SOL/WM binding tables are (arbitrarily) programmed as follows:
  *
  *    +-------------------------------+
  *    |   0 | Draw buffer 0           | .
@@ -476,18 +478,28 @@ struct brw_vs_ouput_sizes {
  *    |   : |     :                   |   > Only relevant to the WM.
  *    |   7 | Draw buffer 7           |  /
  *    |-----|-------------------------| `
- *    |   8 | VS Pull Constant Buffer |
- *    |   9 | WM Pull Constant Buffer |
+ *    |   8 | WM Pull Constant Buffer |
  *    |-----|-------------------------|
- *    |  10 | Texture 0               |
+ *    |   9 | Texture 0               |
  *    |   . |     .                   |
  *    |   : |     :                   |
- *    |  25 | Texture 15              |
+ *    |  24 | Texture 15              |
  *    +-----|-------------------------+
- *    |  26 | SOL Binding 0           |
+ *    |  25 | SOL Binding 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  88 | SOL Binding 63          |
+ *    +-------------------------------+
+ *
+ * Our VS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+ `
+ *    |   0 | VS Pull Constant Buffer |
+ *    +-----+-------------------------+
+ *    |   1 | Texture 0               |
  *    |   . |     .                   |
  *    |   : |     :                   |
- *    |  89 | SOL Binding 63          |
+ *    |  16 | Texture 15              |
  *    +-------------------------------+
  *
  * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
@@ -495,7 +507,6 @@ struct brw_vs_ouput_sizes {
  * first so we can use headerless render target writes for RT 0.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
 #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
 #define SURF_INDEX_SOL_BINDING(t)    (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
@@ -503,6 +514,10 @@ struct brw_vs_ouput_sizes {
 /** Maximum size of the binding table. */
 #define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
 
+#define SURF_INDEX_VERT_CONST_BUFFER (0)
+#define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
+#define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
+
 enum brw_cache_id {
    BRW_BLEND_STATE,
    BRW_DEPTH_STENCIL_STATE,
@@ -841,6 +856,9 @@ struct brw_context
       */
       uint8_t *ra_reg_to_grf;
       /** @} */
+
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
    } vs;
 
    struct {
index 0343ae19073ccb0cf1f55eb09516289947ec93fc..7bc7e1c10254471a18c16c5268ff0b584c686e65 100644 (file)
@@ -77,7 +77,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
-   OUT_BATCH(brw->bind.bo_offset);
+   OUT_BATCH(brw->vs.bind_bo_offset);
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
@@ -115,7 +115,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
             GEN6_BINDING_TABLE_MODIFY_GS |
             GEN6_BINDING_TABLE_MODIFY_PS |
             (4 - 2));
-   OUT_BATCH(brw->bind.bo_offset); /* vs */
+   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
    OUT_BATCH(brw->bind.bo_offset); /* gs */
    OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
    ADVANCE_BATCH();
index 2dd566538ee9167c6e103e04f2c055cdbfbe0c7a..59a2bb325017ce32b22741eefb06271ea54d8281 100644 (file)
@@ -71,6 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
 extern const struct brw_tracked_state brw_renderbuffer_surfaces;
 extern const struct brw_tracked_state brw_texture_surfaces;
 extern const struct brw_tracked_state brw_binding_table;
+extern const struct brw_tracked_state brw_vs_binding_table;
 extern const struct brw_tracked_state brw_wm_unit;
 
 extern const struct brw_tracked_state brw_psp_urb_cbs;
index ea506950c46f44bcf8042800afbd048d547e738f..28e4d26209e068244166e9599c337cb64c1a8123 100644 (file)
@@ -70,6 +70,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_wm_pull_constants,
    &brw_renderbuffer_surfaces,
    &brw_texture_surfaces,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
@@ -146,6 +147,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &gen6_renderbuffer_surfaces,
    &brw_texture_surfaces,
    &gen6_sol_surface,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
@@ -214,6 +216,7 @@ const struct brw_tracked_state *gen7_atoms[] =
    &brw_wm_pull_constants,
    &gen6_renderbuffer_surfaces,
    &brw_texture_surfaces,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &gen7_samplers,
index f9eed61d92cc6a0b5f2d9fd054d537caf49e8e88..9df7b11f5ad51e06b856ef1319aa470ba45b97e9 100644 (file)
@@ -465,7 +465,7 @@ vec4_visitor::generate_tex(vec4_instruction *inst,
              dst,
              inst->base_mrf,
              src,
-             SURF_INDEX_TEXTURE(inst->sampler),
+             SURF_INDEX_VS_TEXTURE(inst->sampler),
              inst->sampler,
              WRITEMASK_XYZW,
              msg_type,
index ca205cdf79adf025ea0a075e7339d3d71f47642b..bd703c7389ae4fb9aeaddb4f8f1d0c62f5fec181 100644 (file)
@@ -247,6 +247,11 @@ do_vs_prog(struct brw_context *brw,
       brw_old_vs_emit(&c);
    }
 
+   if (c.prog_data.nr_pull_params)
+      c.prog_data.num_surfaces = 1;
+   if (c.vp->program.Base.SamplersUsed)
+      c.prog_data.num_surfaces = BRW_MAX_VS_SURFACES;
+
    /* Scratch space is used for register spilling */
    if (c.last_scratch) {
       c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
index 2f7b211d5eccfc72ae2da1d3654793b7b7be9692..b29e414a54eea6dbebdfec2215b7658ec2a3f567 100644 (file)
@@ -65,7 +65,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
       if (brw->vs.const_bo) {
         drm_intel_bo_unreference(brw->vs.const_bo);
         brw->vs.const_bo = NULL;
-        brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
+        brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
         brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
       }
       return;
@@ -97,7 +97,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
    const int surf = SURF_INDEX_VERT_CONST_BUFFER;
    intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
                                       params->NumParameters,
-                                      &brw->bind.surf_offset[surf]);
+                                      &brw->vs.surf_offset[surf]);
 
    brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
 }
@@ -110,3 +110,50 @@ const struct brw_tracked_state brw_vs_pull_constants = {
    },
    .emit = brw_upload_vs_pull_constants,
 };
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_vs_upload_binding_table(struct brw_context *brw)
+{
+   uint32_t *bind;
+   int i;
+
+   /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
+    * pull constants.
+    */
+   if (brw->vs.prog_data->num_surfaces == 0) {
+      if (brw->vs.bind_bo_offset != 0) {
+        brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+        brw->vs.bind_bo_offset = 0;
+      }
+      return;
+   }
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                         sizeof(uint32_t) * BRW_MAX_SURFACES,
+                         32, &brw->vs.bind_bo_offset);
+
+   /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+   for (i = 0; i < BRW_MAX_VS_SURFACES; i++) {
+      bind[i] = brw->vs.surf_offset[i];
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state brw_vs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+             BRW_NEW_VS_CONSTBUF |
+             BRW_NEW_SURFACES),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = brw_vs_upload_binding_table,
+};
index 97ae489ea96c41ea0d612242f0a6c8d27b797df4..a975b2d1c55e2c362992367475fb7b909fc4ef88 100644 (file)
@@ -1097,6 +1097,10 @@ brw_update_texture_surfaces(struct brw_context *brw)
       } else {
          brw->bind.surf_offset[surf] = 0;
       }
+
+      /* For now, just mirror the texture setup to the VS slots. */
+      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] =
+        brw->bind.surf_offset[surf];
    }
 
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
@@ -1128,12 +1132,11 @@ brw_upload_binding_table(struct brw_context *brw)
                          sizeof(uint32_t) * BRW_MAX_SURFACES,
                          32, &brw->bind.bo_offset);
 
-   /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+   /* BRW_NEW_SURFACES */
    for (i = 0; i < BRW_MAX_SURFACES; i++) {
       bind[i] = brw->bind.surf_offset[i];
    }
 
-   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 }
 
@@ -1141,7 +1144,6 @@ const struct brw_tracked_state brw_binding_table = {
    .dirty = {
       .mesa = 0,
       .brw = (BRW_NEW_BATCH |
-             BRW_NEW_VS_CONSTBUF |
              BRW_NEW_SURFACES),
       .cache = 0
    },
index a3d652cb6f7e4b43f504f64a0f646a8c25164393..73822e3350c5eaef4e6c7b72f7a930c83e57180f 100644 (file)
@@ -37,9 +37,10 @@ upload_vs_state(struct brw_context *brw)
 
    gen7_emit_vs_workaround_flush(intel);
 
+   /* BRW_NEW_VS_BINDING_TABLE */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(brw->bind.bo_offset);
+   OUT_BATCH(brw->vs.bind_bo_offset);
    ADVANCE_BATCH();
 
    /* CACHE_NEW_SAMPLER */