From 32d259713b7044a25241ce377f68f71ead703e94 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 27 Mar 2018 22:57:07 -0700 Subject: [PATCH] nir/algebraic: Commute 1-fsat(a) to fsat(1-a) for all non-fmul instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The goal is to avoid having an extra MOV instruction to perform the saturate. Doing the subtraction first allows the saturate to be applied to the ADD instruction making the MOV unnecessary. Values generated in different block and values from non-ALU instructions (e.g., texture instructions) almost always need the extra MOV. Multiply instructions are restricted because doing this rearrangement can interfere with the generation of flrp and ffma instructions. v2: Now that the final method has been selected, squash three commits into one. All Intel platforms has similar results. (Ice Lake shown) total instructions in shared programs: 17223214 -> 17219386 (-0.02%) instructions in affected programs: 1524376 -> 1520548 (-0.25%) helped: 2686 HURT: 26 helped stats (abs) min: 1 max: 32 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.03% max: 16.67% x̄: 0.54% x̃: 0.37% HURT stats (abs) min: 1 max: 2 x̄: 1.69 x̃: 2 HURT stats (rel) min: 0.33% max: 1.67% x̄: 0.54% x̃: 0.35% 95% mean confidence interval for instructions value: -1.46 -1.36 95% mean confidence interval for instructions %-change: -0.56% -0.50% Instructions are helped. total cycles in shared programs: 360811571 -> 360791896 (<.01%) cycles in affected programs: 103650214 -> 103630539 (-0.02%) helped: 1557 HURT: 675 helped stats (abs) min: 1 max: 1773 x̄: 41.44 x̃: 16 helped stats (rel) min: <.01% max: 26.77% x̄: 1.37% x̃: 0.64% HURT stats (abs) min: 1 max: 1513 x̄: 66.44 x̃: 14 HURT stats (rel) min: <.01% max: 46.16% x̄: 2.00% x̃: 0.49% 95% mean confidence interval for cycles value: -14.82 -2.81 95% mean confidence interval for cycles %-change: -0.50% -0.20% Cycles are helped. LOST: 2 GAINED: 0 Reviewed-by: Matt Turner [v1] Reviewed-by: Thomas Helland --- src/compiler/nir/nir_opt_algebraic.py | 13 ++++++++----- src/compiler/nir/nir_search_helpers.h | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 79058c06b97..1c7b3597c1f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -152,11 +152,7 @@ optimizations = [ (('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'), (('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ) )), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'), # These are the same as the previous three rules, but it depends on - # 1-fsat(x) <=> fsat(1-x): - # - # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially - # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 - # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 + # 1-fsat(x) <=> fsat(1-x). See below. (('~fadd@32', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp32'), (('~fadd@64', ('fmul', a, ('fsat', ('fadd', 1.0, ('fneg', c )))), ('fmul', b, ('fsat', c))), ('flrp', a, b, ('fsat', c)), '!options->lower_flrp64'), @@ -177,6 +173,11 @@ optimizations = [ (('fdot3', ('vec3', a, 0.0, 0.0), b), ('fmul', a, b)), (('fdot3', ('vec3', a, b, 0.0), c), ('fdot2', ('vec2', a, b), c)), + # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially + # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1 + # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0 + (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), + # (a * #b + #c) << #d # ((a * #b) << #d) + (#c << #d) # (a * (#b << #d)) + (#c << #d) @@ -1179,6 +1180,8 @@ late_optimizations = [ (('ior', a, a), a), (('iand', a, a), a), + (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), + (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 9e03c610ece..631c65a8642 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -173,6 +173,22 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, return !nir_src_is_const(instr->src[src].src); } +static inline bool +is_not_fmul(nir_alu_instr *instr, unsigned src, + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +{ + nir_alu_instr *src_alu = + nir_src_as_alu_instr(instr->src[src].src); + + if (src_alu == NULL) + return true; + + if (src_alu->op == nir_op_fneg) + return is_not_fmul(src_alu, 0, 0, NULL); + + return src_alu->op != nir_op_fmul; +} + static inline bool is_used_once(nir_alu_instr *instr) { -- 2.30.2