i965: Move push constant state packets to push constant update time.
authorEric Anholt <eric@anholt.net>
Thu, 1 May 2014 16:59:22 +0000 (09:59 -0700)
committerEric Anholt <eric@anholt.net>
Sat, 3 May 2014 00:01:40 +0000 (17:01 -0700)
-0.553779% +/- 0.423394% effect on cairo-perf-trace runtime on glamor
(n=612)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/gen6_vs_state.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_gs_state.c
src/mesa/drivers/dri/i965/gen7_vs_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c
src/mesa/drivers/dri/i965/gen8_gs_state.c
src/mesa/drivers/dri/i965/gen8_ps_state.c
src/mesa/drivers/dri/i965/gen8_vs_state.c

index 25f55c47d9eb3609316f6654d225c255c0bad0a8..9764645daab47274b7d89ee936fce8cc395b3bfb 100644 (file)
@@ -98,13 +98,22 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
 
    gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
                                    stage_state, AUB_TRACE_VS_CONSTANTS);
+
+   if (brw->gen >= 7) {
+      if (brw->gen == 7 && !brw->is_haswell)
+         gen7_emit_vs_workaround_flush(brw);
+
+      gen7_upload_constant_state(brw, stage_state, true /* active */,
+                                 _3DSTATE_CONSTANT_VS);
+   }
 }
 
 const struct brw_tracked_state gen6_vs_push_constants = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-               BRW_NEW_VERTEX_PROGRAM),
+                BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_VS_PROG,
    },
    .emit = gen6_upload_vs_push_constants,
index 3f5dd29487f5215ed8b28ab3ab95d0a081ef142a..402d9c38635b743d38a6e3494f7a11771c323a30 100644 (file)
@@ -81,13 +81,19 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
 
       brw->wm.base.push_const_size = ALIGN(prog_data->base.nr_params, 8) / 8;
    }
+
+   if (brw->gen >= 7) {
+      gen7_upload_constant_state(brw, &brw->wm.base, true,
+                                 _3DSTATE_CONSTANT_PS);
+   }
 }
 
 const struct brw_tracked_state gen6_wm_push_constants = {
    .dirty = {
       .mesa  = _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-               BRW_NEW_FRAGMENT_PROGRAM),
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_WM_PROG,
    },
    .emit = gen6_upload_wm_push_constants,
index 599997d8ad8b6b1c51df543d92cace803e6a2883..30dfa6b87c6e17dbae8e447ac582b6f623e7ac55 100644 (file)
 static void
 gen7_upload_gs_push_constants(struct brw_context *brw)
 {
+   const struct brw_stage_state *stage_state = &brw->gs.base;
    /* BRW_NEW_GEOMETRY_PROGRAM */
-   const struct brw_geometry_program *vp =
+   const struct brw_geometry_program *gp =
       (struct brw_geometry_program *) brw->geometry_program;
-   if (!vp)
-      return;
 
-   /* CACHE_NEW_GS_PROG */
-   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
-   struct brw_stage_state *stage_state = &brw->gs.base;
+   if (gp) {
+      /* CACHE_NEW_GS_PROG */
+      const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
+      struct brw_stage_state *stage_state = &brw->gs.base;
+
+      gen6_upload_vec4_push_constants(brw, &gp->program.Base, prog_data,
+                                      stage_state, AUB_TRACE_VS_CONSTANTS);
+   }
 
-   gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
-                                   stage_state, AUB_TRACE_VS_CONSTANTS);
+   gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
 }
 
 const struct brw_tracked_state gen7_gs_push_constants = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-               BRW_NEW_GEOMETRY_PROGRAM),
+                BRW_NEW_GEOMETRY_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_GS_PROG,
    },
    .emit = gen7_upload_gs_push_constants,
@@ -66,8 +70,6 @@ upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
    /**
     * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
     * Geometry > Geometry Shader > State:
@@ -186,11 +188,10 @@ upload_gs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_gs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
                 BRW_NEW_GEOMETRY_PROGRAM |
-                BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = CACHE_NEW_GS_PROG
    },
    .emit = upload_gs_state,
index a0303104c17e199717668e8f52f7433ab7060d61..4d9915059cce82fc441f011209c3e65aec07a37d 100644 (file)
@@ -75,9 +75,6 @@ upload_vs_state(struct brw_context *brw)
    if (!brw->is_haswell)
       gen7_emit_vs_workaround_flush(brw);
 
-   gen7_upload_constant_state(brw, stage_state, true /* active */,
-                              _3DSTATE_CONSTANT_VS);
-
    /* Use ALT floating point mode for ARB vertex programs, because they
     * require 0^0 == 1.
     */
@@ -114,11 +111,10 @@ upload_vs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_vs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
                BRW_NEW_VERTEX_PROGRAM |
-               BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+               BRW_NEW_BATCH),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,
index 972ee0098ec4a9b9114de4931ecf42d045a43237..078284101b48263e29d2510cc442067eaf1010d3 100644 (file)
@@ -143,9 +143,6 @@ upload_ps_state(struct brw_context *brw)
    const int max_threads_shift = brw->is_haswell ?
       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
 
-   /* CACHE_NEW_WM_PROG */
-   gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
    dw2 = dw4 = dw5 = 0;
 
    dw2 |=
@@ -267,13 +264,11 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_ps_state = {
    .dirty = {
-      .mesa  = (_NEW_PROGRAM_CONSTANTS |
-               _NEW_COLOR |
+      .mesa  = (_NEW_COLOR |
                 _NEW_BUFFERS |
                 _NEW_MULTISAMPLE),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = (CACHE_NEW_WM_PROG)
    },
    .emit = upload_ps_state,
index ef25115db9254b6d14a7a6280550dda4c42c79dc..a0f933c091832c3a724c7b4ae85a69ed657f37e9 100644 (file)
@@ -36,8 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
    if (active) {
       int urb_entry_write_offset = 1;
       uint32_t urb_entry_output_length =
@@ -123,11 +121,10 @@ gen8_upload_gs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_gs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
                 BRW_NEW_GEOMETRY_PROGRAM |
-                BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = CACHE_NEW_GS_PROG
    },
    .emit = gen8_upload_gs_state,
index f0362a276d0dff795ecc21a71e7ece630f6ccbb9..8b8d5ea77474d3b3a688456e4c4d1dc6c1bb6615 100644 (file)
@@ -136,9 +136,6 @@ upload_ps_state(struct brw_context *brw)
    struct gl_context *ctx = &brw->ctx;
    uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
 
-   /* CACHE_NEW_WM_PROG */
-   gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
    /* Initialize the execution mask with VMask.  Otherwise, derivatives are
     * incorrect for subspans where some of the pixels are unlit.  We believe
     * the bit just didn't take effect in previous generations.
@@ -243,10 +240,9 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_ps_state = {
    .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS | _NEW_MULTISAMPLE,
+      .mesa  = _NEW_MULTISAMPLE,
       .brw   = BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_BATCH |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+               BRW_NEW_BATCH,
       .cache = CACHE_NEW_WM_PROG
    },
    .emit = upload_ps_state,
index 9ac681fb4199753a0e6a83d95c8a520e7dbc177b..638e216a469406428ae5c635e98ab9d472891788 100644 (file)
@@ -39,9 +39,6 @@ upload_vs_state(struct brw_context *brw)
    /* CACHE_NEW_VS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, true /* active */,
-                              _3DSTATE_CONSTANT_VS);
-
    /* Use ALT floating point mode for ARB vertex programs, because they
     * require 0^0 == 1.
     */
@@ -84,11 +81,10 @@ upload_vs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_vs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = BRW_NEW_CONTEXT |
                BRW_NEW_VERTEX_PROGRAM |
-               BRW_NEW_BATCH |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+               BRW_NEW_BATCH,
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,