From 8591adea38b82f973948cd120ffd9f93eb3bd71d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Sat, 20 Jun 2020 13:48:19 -0700 Subject: [PATCH] nir/algebraic: Don't distrubte absolute-value into dot-products MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Dot product is multiplication followed by addition, and absolute value does not distribute into addition. Only vec4 platforms are affected by this change as scalar-only platforms never have any of the fdot_replicated instructions. In the shader-db results, below, shaders in MANY different applications are affected. Trine, Doom3, Enemy Territory: Quake Wars, Counter Strike: Global Offensive, Mad Max, Metro Last Light, and on and on... I'm really shocked that there were no test regressions! All Haswell and earlier platforms had similar results. (Haswell shown) total instructions in shared programs: 16219743 -> 16219820 (<.01%) instructions in affected programs: 12171 -> 12248 (0.63%) helped: 1 HURT: 78 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.78% max: 0.78% x̄: 0.78% x̃: 0.78% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.35% max: 2.38% x̄: 0.91% x̃: 1.06% 95% mean confidence interval for instructions value: 0.92 1.03 95% mean confidence interval for instructions %-change: 0.78% 1.00% Instructions are HURT. total cycles in shared programs: 538481383 -> 538491045 (<.01%) cycles in affected programs: 470796 -> 480458 (2.05%) helped: 149 HURT: 142 helped stats (abs) min: 1 max: 1338 x̄: 71.13 x̃: 4 helped stats (rel) min: 0.06% max: 40.99% x̄: 2.76% x̃: 0.67% HURT stats (abs) min: 1 max: 2092 x̄: 142.68 x̃: 12 HURT stats (rel) min: 0.07% max: 55.38% x̄: 5.07% x̃: 1.07% 95% mean confidence interval for cycles value: -5.28 71.69 95% mean confidence interval for cycles %-change: -0.07% 2.19% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Alyssa Rosenzweig Fixes: 62795475e8f ("nir/algebraic: Distribute source modifiers into instructions") Closes: #3129 Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 575024c04bf..4b5fc5ecc9a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2041,6 +2041,9 @@ distribute_src_mods = [ (('fdot_replicated4', ('fneg', a), ('fneg', b)), ('fdot_replicated4', a, b)), (('fneg', ('fneg', a)), a), + (('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)), + (('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))), + (('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))), (('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)), (('fneg', ('fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))), @@ -2050,21 +2053,18 @@ distribute_src_mods = [ (('fneg', ('fmin(is_used_once)', a, b)), ('fmax', ('fneg', a), ('fneg', b))), (('fneg', ('fmax(is_used_once)', a, b)), ('fmin', ('fneg', a), ('fneg', b))), + (('fneg', ('fdot_replicated2(is_used_once)', a, b)), ('fdot_replicated2', ('fneg', a), b)), + (('fneg', ('fdot_replicated3(is_used_once)', a, b)), ('fdot_replicated3', ('fneg', a), b)), + (('fneg', ('fdot_replicated4(is_used_once)', a, b)), ('fdot_replicated4', ('fneg', a), b)), + # fdph works mostly like fdot, but to get the correct result, the negation # must be applied to the second source. (('fneg', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', a, ('fneg', b))), - (('fabs', ('fdph_replicated(is_used_once)', a, b)), ('fdph_replicated', ('fabs', a), ('fabs', b))), (('fneg', ('fsign(is_used_once)', a)), ('fsign', ('fneg', a))), (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))), ] -for op in ['fmul', 'fdot_replicated2', 'fdot_replicated3', 'fdot_replicated4']: - distribute_src_mods.extend([ - (('fneg', (op + '(is_used_once)', a, b)), (op, ('fneg', a), b)), - (('fabs', (op + '(is_used_once)', a, b)), (op, ('fabs', a), ('fabs', b))), - ]) - print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma", before_ffma_optimizations).render()) -- 2.30.2