i965: Remove fixed-function texture projection avoidance optimization.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 13 Mar 2013 04:09:19 +0000 (21:09 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 4 Apr 2013 22:38:19 +0000 (15:38 -0700)
This optimization attempts to avoid extra attribute interpolation
instructions for texture coordinates where the W-component is 1.0.

Unfortunately, it requires a lot of complexity: the brw_wm_input_sizes
state atom (all the brw_vs_constval.c code) needs to run on each draw.
It computes the input_size_masks array, then uses that to compute
proj_attrib_mask.  Differences in proj_attrib_mask can cause
state-dependent fragment shader recompiles.  We also often fail to guess
proj_attrib_mask for the fragment shader precompile, causing us to
needlessly compile it twice.

Furthermore, this optimization only applies to fixed-function programs;
it does not help modern GLSL-based programs at all.  Generally, older
fixed-function programs run fine on modern hardware anyway.

The optimization has existed in some form since the initial commit.  When
we rewrote the fragment shader backend, we dropped it for a while.  Eric
readded it in commit eb30820f268608cf451da32de69723036dddbc62 as part of
an attempt to cure a ~1% performance regression caused by converting the
fixed-function fragment shader generation code from Mesa IR to GLSL IR.
However, no performance data was included in the commit message, so it's
unclear whether or not it was successful.

Time has passed, so I decided to re-measure this.  Surprisingly,
Eric's OpenArena timedemo actually runs /faster/ after removing this and
the brw_wm_input_sizes atom.  On Ivybridge at 1024x768, I measured a
1.39532% +/- 0.91833% increase in FPS (n = 55).  On Ironlake, there was
no statistically significant difference (n = 37).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_fs.cpp

index 42c6ab4673ac49b33a21c4e622095ef3001f6798..1dc08bfec5085ec9102254ba024f011f7605ab21 100644 (file)
@@ -1039,31 +1039,24 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
                * attribute, as well as making brw_vs_constval.c
                * handle varyings other than gl_TexCoord.
                */
-              if (location >= VARYING_SLOT_TEX0 &&
-                  location <= VARYING_SLOT_TEX7 &&
-                  k == 3 && !(c->key.proj_attrib_mask
-                               & BITFIELD64_BIT(location))) {
-                 emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
-              } else {
-                 struct brw_reg interp = interp_reg(location, k);
-                  emit_linterp(attr, fs_reg(interp), interpolation_mode,
-                               ir->centroid);
-                  if (brw->needs_unlit_centroid_workaround && ir->centroid) {
-                     /* Get the pixel/sample mask into f0 so that we know
-                      * which pixels are lit.  Then, for each channel that is
-                      * unlit, replace the centroid data with non-centroid
-                      * data.
-                      */
-                     emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS, attr);
-                     fs_inst *inst = emit_linterp(attr, fs_reg(interp),
-                                                  interpolation_mode, false);
-                     inst->predicate = BRW_PREDICATE_NORMAL;
-                     inst->predicate_inverse = true;
-                  }
-                 if (intel->gen < 6) {
-                    emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
-                 }
-              }
+               struct brw_reg interp = interp_reg(location, k);
+               emit_linterp(attr, fs_reg(interp), interpolation_mode,
+                            ir->centroid);
+               if (brw->needs_unlit_centroid_workaround && ir->centroid) {
+                  /* Get the pixel/sample mask into f0 so that we know
+                   * which pixels are lit.  Then, for each channel that is
+                   * unlit, replace the centroid data with non-centroid
+                   * data.
+                   */
+                  emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS, attr);
+                  fs_inst *inst = emit_linterp(attr, fs_reg(interp),
+                                               interpolation_mode, false);
+                  inst->predicate = BRW_PREDICATE_NORMAL;
+                  inst->predicate_inverse = true;
+               }
+               if (intel->gen < 6) {
+                  emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
+               }
               attr.reg_offset++;
            }