From c0bdf37c9100c4e473f53defccab4e2ae6b7a7b1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 23 Mar 2020 14:25:24 -0700 Subject: [PATCH] nir/algebraic: Change the default cursor location when replacing a unary op MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the expression tree that is being replaced has a unary operation at its root, set the cursor (location where new instructions are inserted) at the source instruction instead. This doesn't do much now because there are very few patterns that have a unary operation as the root. Almost all of the patterns that do have a unary operation as the root have inot. All of the shaders that are affected by this commit have expression trees with an inot at the root. This change prevents some significant, spurious caused by the next commit. There is further explanation in the large comment added in the code. I also considered a couple other options that may still be worth exploring. 1. Add some mark-up to the search pattern to denote where new instructions should be added. I considered using "@" to denote the cursor location. For example, (('fneg', ('fadd@', a, b)), ...) 2. To prevent other kinds of unintended code motion, add the ability to name expressions in the search pattern so that they can be reused in the replacement. For example, (('bcsel', ('ige', ('find_lsb=b', a), 0), ('find_lsb', a), -1), b), An alternative would be to add some kind of CSE at the time of inserting the replacements. Create a new instruction, then check to see if it already exists. That option might be better overall. Over the years I know Matt has heard me complain, "I added a pattern that just deleted an instruction, but it added a bunch of spills!" This was always in large, complex shaders that are very hard to analyze. I always blamed these cases on the scheduler being dumb. I am now very suspicious that unintended code motion was the real problem. All Gen4+ Intel platforms had similar results. (Tiger Lake shown) total instructions in shared programs: 17611405 -> 17611333 (<.01%) instructions in affected programs: 18613 -> 18541 (-0.39%) helped: 41 HURT: 13 helped stats (abs) min: 1 max: 18 x̄: 4.46 x̃: 4 helped stats (rel) min: 0.27% max: 5.68% x̄: 1.29% x̃: 1.34% HURT stats (abs) min: 1 max: 20 x̄: 8.54 x̃: 7 HURT stats (rel) min: 0.30% max: 4.20% x̄: 2.15% x̃: 2.38% 95% mean confidence interval for instructions value: -3.29 0.63 95% mean confidence interval for instructions %-change: -0.95% 0.02% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 338366118 -> 338365223 (<.01%) cycles in affected programs: 257889 -> 256994 (-0.35%) helped: 42 HURT: 15 helped stats (abs) min: 2 max: 120 x̄: 39.38 x̃: 34 helped stats (rel) min: 0.04% max: 2.55% x̄: 0.86% x̃: 0.76% HURT stats (abs) min: 6 max: 204 x̄: 50.60 x̃: 34 HURT stats (rel) min: 0.11% max: 4.75% x̄: 1.12% x̃: 0.56% 95% mean confidence interval for cycles value: -30.39 -1.02 95% mean confidence interval for cycles %-change: -0.66% -0.02% Cycles are helped. Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir_search.c | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 7abb023faf5..5435bbb78e6 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -736,7 +736,41 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, fprintf(stderr, " ssa_%d\n", instr->dest.dest.ssa.index); #endif - build->cursor = nir_before_instr(&instr->instr); + /* If the instruction at the root of the expression tree being replaced is + * a unary operation, insert the replacement instructions at the location + * of the source of the unary operation. Otherwise, insert the replacement + * instructions at the location of the expression tree root. + * + * For the unary operation case, this is done to prevent some spurious code + * motion that can dramatically extend live ranges. Imagine an expression + * like -(A+B) where the addtion and the negation are separated by flow + * control and thousands of instructions. If this expression is replaced + * with -A+-B, inserting the new instructions at the site of the negation + * could extend the live range of A and B dramtically. This could increase + * register pressure and cause spilling. + * + * It may well be that moving instructions around is a good thing, but + * keeping algebraic optimizations and code motion optimizations separate + * seems safest. + */ + nir_alu_instr *const src_instr = nir_src_as_alu_instr(instr->src[0].src); + if (src_instr != NULL && + (instr->op == nir_op_fneg || instr->op == nir_op_fabs || + instr->op == nir_op_ineg || instr->op == nir_op_iabs || + instr->op == nir_op_inot)) { + /* Insert new instructions *after*. Otherwise a hypothetical + * replacement fneg(X) -> fabs(X) would insert the fabs() instruction + * before X! This can also occur for things like fneg(X.wzyx) -> X.wzyx + * in vector mode. A move instruction to handle the swizzle will get + * inserted before X. + * + * This manifested in a single OpenGL ES 2.0 CTS vertex shader test on + * older Intel GPU that use vector-mode vertex processing. + */ + build->cursor = nir_after_instr(&src_instr->instr); + } else { + build->cursor = nir_before_instr(&instr->instr); + } state.states = states; -- 2.30.2