i965: Tell the unit states how many binding table entries we have.
authorEric Anholt <eric@anholt.net>
Sat, 2 Nov 2013 00:43:43 +0000 (17:43 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 5 Nov 2013 23:39:45 +0000 (15:39 -0800)
Before the series with 3c9dc2d31b80fc73bffa1f40a91443a53229c8e2 to
dynamically assign our binding table indices, we didn't really track our
binding table count per shader, so we never filled in these fields.

Affects cairo-gl trace runtime by -2.47953% +/- 1.07281% (n=20)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_vs_state.c
src/mesa/drivers/dri/i965/brw_wm_state.c
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_gs_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c

index cdffac3b385759aaab98d66bbb315c253c42a1ef..216b3dd77afc415c8503420a1da075872bb20fc8 100644 (file)
@@ -79,7 +79,8 @@ brw_upload_vs_unit(struct brw_context *brw)
    */
    vs->thread1.single_program_flow = (brw->gen == 5);
 
-   vs->thread1.binding_table_entry_count = 0;
+   vs->thread1.binding_table_entry_count =
+      brw->vs.prog_data->base.base.binding_table.size_bytes / 4;
 
    if (brw->vs.prog_data->base.total_scratch != 0) {
       vs->thread2.scratch_space_base_pointer =
index 9aa32c08ec98dd6c0ca1abc8e1920e2a2332268b..406dbbe333870309cb2843ec389dee06e2362a28 100644 (file)
@@ -120,7 +120,8 @@ brw_upload_wm_unit(struct brw_context *brw)
    else
       wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
 
-   wm->thread1.binding_table_entry_count = 0;
+   wm->thread1.binding_table_entry_count =
+      brw->wm.prog_data->base.binding_table.size_bytes / 4;
 
    if (brw->wm.prog_data->total_scratch != 0) {
       wm->thread2.scratch_space_base_pointer =
index 569ec8cdc891ef9084c6b34c6eb283ae805af53c..80129cdf1687926133d83ac759cab6cc3161c02d 100644 (file)
@@ -165,7 +165,9 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
    OUT_BATCH(stage_state->prog_offset);
    OUT_BATCH(floating_point_mode |
-            ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
+            ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+             ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 
    if (brw->vs.prog_data->base.total_scratch) {
       OUT_RELOC(stage_state->scratch_bo,
index 42d8789ed67868871ddc42256d3ab9acf9bd6f81..577324651146c0efaba7fb98dec39677a442dffe 100644 (file)
@@ -146,6 +146,11 @@ upload_wm_state(struct brw_context *brw)
    /* CACHE_NEW_SAMPLER */
    dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
            GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+   /* CACHE_NEW_WM_PROG */
+   dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+           GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
           GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
    dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
index 2602200eb683c62f4c7cfddf585556e272b6f7d8..584f2db8f8e430f32409a65486ec6263b345e663 100644 (file)
@@ -85,7 +85,9 @@ upload_gs_state(struct brw_context *brw)
       OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
       OUT_BATCH(stage_state->prog_offset);
       OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
-                 GEN6_GS_SAMPLER_COUNT_SHIFT));
+                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
+                ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 
       if (brw->gs.prog_data->base.total_scratch) {
          OUT_RELOC(stage_state->scratch_bo,
index 4fd19139e8ba98ae02dd7c790f069043d93784f3..1e76eb1ee2eb268582f9807a5df6bb8b685a7580 100644 (file)
@@ -100,7 +100,9 @@ upload_vs_state(struct brw_context *brw)
    OUT_BATCH(stage_state->prog_offset);
    OUT_BATCH(floating_point_mode |
             ((ALIGN(stage_state->sampler_count, 4)/4) <<
-              GEN6_VS_SAMPLER_COUNT_SHIFT));
+              GEN6_VS_SAMPLER_COUNT_SHIFT) |
+             ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
 
    if (brw->vs.prog_data->base.total_scratch) {
       OUT_RELOC(stage_state->scratch_bo,
index 58a6438e79e755dba957dba5e8b8b93704f00d55..531b1a4c9cd5f72fb87e1ae116f989944191af53 100644 (file)
@@ -160,6 +160,10 @@ upload_ps_state(struct brw_context *brw)
    dw2 |=
       (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
 
+   /* CACHE_NEW_WM_PROG */
+   dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+           GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
    /* Use ALT floating point mode for ARB fragment programs, because they
     * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
     * rendering, CurrentFragmentProgram is used for this check to