From 7183568869a0ce856b2f3d4cd9e1d7bd63ff9092 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 12 Mar 2013 21:09:19 -0700 Subject: [PATCH] i965: Remove fixed-function texture projection avoidance optimization. This optimization attempts to avoid extra attribute interpolation instructions for texture coordinates where the W-component is 1.0. Unfortunately, it requires a lot of complexity: the brw_wm_input_sizes state atom (all the brw_vs_constval.c code) needs to run on each draw. It computes the input_size_masks array, then uses that to compute proj_attrib_mask. Differences in proj_attrib_mask can cause state-dependent fragment shader recompiles. We also often fail to guess proj_attrib_mask for the fragment shader precompile, causing us to needlessly compile it twice. Furthermore, this optimization only applies to fixed-function programs; it does not help modern GLSL-based programs at all. Generally, older fixed-function programs run fine on modern hardware anyway. The optimization has existed in some form since the initial commit. When we rewrote the fragment shader backend, we dropped it for a while. Eric readded it in commit eb30820f268608cf451da32de69723036dddbc62 as part of an attempt to cure a ~1% performance regression caused by converting the fixed-function fragment shader generation code from Mesa IR to GLSL IR. However, no performance data was included in the commit message, so it's unclear whether or not it was successful. Time has passed, so I decided to re-measure this. Surprisingly, Eric's OpenArena timedemo actually runs /faster/ after removing this and the brw_wm_input_sizes atom. On Ivybridge at 1024x768, I measured a 1.39532% +/- 0.91833% increase in FPS (n = 55). On Ironlake, there was no statistically significant difference (n = 37). Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs.cpp | 43 ++++++++++++---------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 42c6ab4673a..1dc08bfec50 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1039,31 +1039,24 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) * attribute, as well as making brw_vs_constval.c * handle varyings other than gl_TexCoord. */ - if (location >= VARYING_SLOT_TEX0 && - location <= VARYING_SLOT_TEX7 && - k == 3 && !(c->key.proj_attrib_mask - & BITFIELD64_BIT(location))) { - emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); - } else { - struct brw_reg interp = interp_reg(location, k); - emit_linterp(attr, fs_reg(interp), interpolation_mode, - ir->centroid); - if (brw->needs_unlit_centroid_workaround && ir->centroid) { - /* Get the pixel/sample mask into f0 so that we know - * which pixels are lit. Then, for each channel that is - * unlit, replace the centroid data with non-centroid - * data. - */ - emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS, attr); - fs_inst *inst = emit_linterp(attr, fs_reg(interp), - interpolation_mode, false); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - } - if (intel->gen < 6) { - emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); - } - } + struct brw_reg interp = interp_reg(location, k); + emit_linterp(attr, fs_reg(interp), interpolation_mode, + ir->centroid); + if (brw->needs_unlit_centroid_workaround && ir->centroid) { + /* Get the pixel/sample mask into f0 so that we know + * which pixels are lit. Then, for each channel that is + * unlit, replace the centroid data with non-centroid + * data. + */ + emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS, attr); + fs_inst *inst = emit_linterp(attr, fs_reg(interp), + interpolation_mode, false); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + } + if (intel->gen < 6) { + emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); + } attr.reg_offset++; } -- 2.30.2