i965: Compute required barycentric interp modes once at FS compile time.
authorEric Anholt <eric@anholt.net>
Wed, 15 Feb 2012 05:48:43 +0000 (21:48 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 21 Feb 2012 19:54:06 +0000 (11:54 -0800)
Improves VS state change microbenchmark performance 1.78817% +/-
0.556878% (n=25).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/gen6_wm_state.c
src/mesa/drivers/dri/i965/gen7_wm_state.c

index 503585c70c8f9b1f70d463004c4dd018cc309bb0..a5a98b257a9dfe776843f75a9a930e54daf6b782 100644 (file)
@@ -271,6 +271,12 @@ struct brw_wm_prog_data {
    int dispatch_width;
    uint32_t prog_offset_16;
 
+   /**
+    * Mask of which interpolation modes are required by the fragment shader.
+    * Used in hardware setup on gen6+.
+    */
+   uint32_t barycentric_interp_modes;
+
    /* Pointer to tracked values (only valid once
     * _mesa_load_state_parameters has been called at runtime).
     */
@@ -1049,11 +1055,6 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 /* brw_vs.c */
 gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
 
-/* brw_wm.c */
-unsigned
-brw_compute_barycentric_interp_modes(bool shade_model_flat,
-                                     const struct gl_fragment_program *fprog);
-
 /* brw_wm_surface_state.c */
 void brw_init_surface_formats(struct brw_context *brw);
 void
index 7dee20b7f926ca7b5f7b0b7da97cfb60053b6137..e59ab622c6c16618c6c224950e785496c30e5e66 100644 (file)
@@ -128,7 +128,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  * Return a bitfield where bit n is set if barycentric interpolation mode n
  * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
  */
-unsigned
+static unsigned
 brw_compute_barycentric_interp_modes(bool shade_model_flat,
                                      const struct gl_fragment_program *fprog)
 {
@@ -174,9 +174,7 @@ brw_wm_payload_setup(struct brw_context *brw,
    struct intel_context *intel = &brw->intel;
    bool uses_depth = (c->fp->program.Base.InputsRead &
                      (1 << FRAG_ATTRIB_WPOS)) != 0;
-   unsigned barycentric_interp_modes =
-      brw_compute_barycentric_interp_modes(c->key.flat_shade,
-                                           &c->fp->program);
+   unsigned barycentric_interp_modes = c->prog_data.barycentric_interp_modes;
    int i;
 
    if (intel->gen >= 6) {
@@ -278,6 +276,9 @@ bool do_wm_prog(struct brw_context *brw,
 
    brw_init_compile(brw, &c->func, c);
 
+   c->prog_data.barycentric_interp_modes =
+      brw_compute_barycentric_interp_modes(c->key.flat_shade, &fp->program);
+
    if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
       if (!brw_wm_fs_emit(brw, c, prog))
         return false;
index 205e6480e078a7b42331a2e01f702a41c7810ea5..fd1eca450493ab280805004c39fb7464f614122d 100644 (file)
@@ -99,9 +99,6 @@ upload_wm_state(struct brw_context *brw)
       brw_fragment_program_const(brw->fragment_program);
    uint32_t dw2, dw4, dw5, dw6;
 
-   /* _NEW_LIGHT */
-   bool flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
-
     /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->nr_params == 0) {
       /* Disable the push constant buffers. */
@@ -173,7 +170,8 @@ upload_wm_state(struct brw_context *brw)
       dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
    if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
       dw5 |= GEN6_WM_COMPUTED_DEPTH;
-   dw6 |= brw_compute_barycentric_interp_modes(flat_shade, &fp->program) <<
+   /* CACHE_NEW_WM_PROG */
+   dw6 |= brw->wm.prog_data->barycentric_interp_modes <<
       GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
    /* _NEW_COLOR */
@@ -210,7 +208,6 @@ upload_wm_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_wm_state = {
    .dirty = {
       .mesa  = (_NEW_LINE |
-                _NEW_LIGHT |
                _NEW_COLOR |
                _NEW_BUFFERS |
                _NEW_PROGRAM_CONSTANTS |
index 870590fbe7e4375676ba89df19eeed60737ddd1b..8037966dd6d9cc0876da3b906e6242da9f22fab2 100644 (file)
@@ -41,9 +41,6 @@ upload_wm_state(struct brw_context *brw)
    bool writes_depth = false;
    uint32_t dw1;
 
-   /* _NEW_LIGHT */
-   bool flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
-
    dw1 = 0;
    dw1 |= GEN7_WM_STATISTICS_ENABLE;
    dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
@@ -64,7 +61,8 @@ upload_wm_state(struct brw_context *brw)
       writes_depth = true;
       dw1 |= GEN7_WM_PSCDEPTH_ON;
    }
-   dw1 |= brw_compute_barycentric_interp_modes(flat_shade, &fp->program) <<
+   /* CACHE_NEW_WM_PROG */
+   dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
       GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
    /* _NEW_COLOR */
@@ -86,11 +84,11 @@ upload_wm_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_wm_state = {
    .dirty = {
-      .mesa  = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
+      .mesa  = (_NEW_LINE | _NEW_POLYGON |
                _NEW_COLOR | _NEW_BUFFERS),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
                BRW_NEW_BATCH),
-      .cache = 0,
+      .cache = CACHE_NEW_WM_PROG,
    },
    .emit = upload_wm_state,
 };