i965/vs: Rework binding table size calculation.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 15 Aug 2013 03:25:40 +0000 (20:25 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 19 Aug 2013 20:17:00 +0000 (13:17 -0700)
Unlike the FS, the VS backend already computed the binding table size.
However, it did so poorly: after compilation, it looked to see if any
pull constants/textures/UBOs were in use, and set num_surfaces to the
maximum surface index for that category.  If the VS only used a single
texture or UBO, this overcounted by quite a bit.

The shader time surface was also noted at state upload time (during
drawing), not at compile time, which is inefficient.  I believe it also
had an off by one error.

This patch computes it accurately, while also simplifying the code.

It also renames num_surfaces to binding_table_size, since num_surfaces
wasn't actually the number of surfaces used.  For example, a VS that
used one UBO and no other surfaces would have set num_surfaces to
SURF_INDEX_VS_UBO(1) == 18, rather than 1.  A bit of a misnomer there.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_vs_surface_state.c

index 9178485cb5f227b643d92b164b1bd0420ad777be..dae3219a09a5f689e92c19bca1108ded643a3511 100644 (file)
@@ -493,7 +493,7 @@ struct brw_vec4_prog_data {
     */
    GLuint urb_entry_size;
 
-   int num_surfaces;
+   unsigned binding_table_size;
 
    /* These pointers must appear last.  See brw_vec4_prog_data_compare(). */
    const float **param;
index 833bef17d0e2ceecde709e295cc9029f4804dd5e..111b1050983c67ee29e57c18a9f8df9920c61b0e 100644 (file)
@@ -596,6 +596,8 @@ private:
    void generate_unpack_flags(vec4_instruction *inst,
                               struct brw_reg dst);
 
+   void mark_surface_used(unsigned surf_index);
+
    struct brw_context *brw;
    struct gl_context *ctx;
 
index 5417c82dbb2c25f23a4f8fc7221e773ee78bbdad..ce9bcd08d387deb07ca6b100c181d867a0732c74 100644 (file)
@@ -151,6 +151,15 @@ vec4_generator::~vec4_generator()
 {
 }
 
+void
+vec4_generator::mark_surface_used(unsigned surf_index)
+{
+   assert(surf_index < BRW_MAX_VS_SURFACES);
+
+   prog_data->binding_table_size = MAX2(prog_data->binding_table_size,
+                                        surf_index + 1);
+}
+
 void
 vec4_generator::generate_math1_gen4(vec4_instruction *inst,
                                     struct brw_reg dst,
@@ -384,6 +393,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
              inst->header_present,
              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
              return_format);
+
+   mark_surface_used(SURF_INDEX_VS_TEXTURE(inst->sampler));
 }
 
 void
@@ -614,6 +625,8 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
                           2, /* mlen */
                            true, /* header_present */
                           1 /* rlen */);
+
+   mark_surface_used(surf_index);
 }
 
 void
@@ -637,6 +650,8 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                            false, /* no header */
                            BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                            0);
+
+   mark_surface_used(surf_index.dw1.ud);
 }
 
 /**
@@ -869,6 +884,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
 
    case SHADER_OPCODE_SHADER_TIME_ADD:
       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
+      mark_surface_used(SURF_INDEX_VS_SHADER_TIME);
       break;
 
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
index 9e18ef0119aee4cbcdcc50f64338c2c47165df47..dcd14a319dff538df806625e29a0390ccfae9bd5 100644 (file)
@@ -306,16 +306,6 @@ do_vs_prog(struct brw_context *brw,
       return false;
    }
 
-   if (prog_data.base.nr_pull_params)
-      prog_data.base.num_surfaces = 1;
-   if (c.vp->program.Base.SamplersUsed)
-      prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
-   if (prog &&
-       prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) {
-      prog_data.base.num_surfaces =
-        SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks);
-   }
-
    /* Scratch space is used for register spilling */
    if (c.base.last_scratch) {
       perf_debug("Vertex shader triggered register spilling.  "
index eaeff95676986d9d4fb7cbc3f60ccd7d6b6e304d..2c2d713b98c1e427536b65844cfca2b99e8ab9a4 100644 (file)
@@ -140,16 +140,12 @@ brw_vs_upload_binding_table(struct brw_context *brw)
 
    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
       gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
-
-      assert(brw->vs.prog_data->base.num_surfaces
-             <= SURF_INDEX_VS_SHADER_TIME);
-      brw->vs.prog_data->base.num_surfaces = SURF_INDEX_VS_SHADER_TIME;
    }
 
    /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
     * pull constants.
     */
-   if (brw->vs.prog_data->base.num_surfaces == 0) {
+   if (brw->vs.prog_data->base.binding_table_size == 0) {
       if (brw->vs.bind_bo_offset != 0) {
         brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
         brw->vs.bind_bo_offset = 0;