From 7c64cbf49db1573cd611ee82db3d6c8654d308a7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 21 Mar 2019 14:52:58 -0700 Subject: [PATCH] nir/algebraic: Recognize (a < 0 || 0 < b) as min(a, -b) < 0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Similar to commit 97e6c1b9 and f5cf74d8ba8c. First apply 0 < b => -b < 0 to get (a < 0 || -b < 0), then apply some pre-existing rules to get min(a, -b) < 0. v2: Substantially update the comment explaining the use of is_used_once and the duplication of patterns. Suggested by Caio. Also, while flt and fge are not commutative, ior and iand are. Half of the original patterns were redundant, so delete them. As alternate justification for deleting them, fmin(a, -b) < 0 <=> 0 < fmax(-a, b). Proof left as an exercise for the reader. All Gen7+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 16333789 -> 16333713 (<.01%) instructions in affected programs: 11424 -> 11348 (-0.67%) helped: 32 HURT: 0 helped stats (abs) min: 1 max: 7 x̄: 2.38 x̃: 2 helped stats (rel) min: 0.20% max: 1.67% x̄: 0.76% x̃: 0.69% 95% mean confidence interval for instructions value: -3.03 -1.72 95% mean confidence interval for instructions %-change: -0.89% -0.62% Instructions are helped. total cycles in shared programs: 367598295 -> 367596791 (<.01%) cycles in affected programs: 141414 -> 139910 (-1.06%) helped: 23 HURT: 6 helped stats (abs) min: 3 max: 386 x̄: 72.52 x̃: 20 helped stats (rel) min: 0.15% max: 4.86% x̄: 1.01% x̃: 0.76% HURT stats (abs) min: 4 max: 88 x̄: 27.33 x̃: 12 HURT stats (rel) min: 0.22% max: 3.95% x̄: 1.08% x̃: 0.59% 95% mean confidence interval for cycles value: -93.51 -10.21 95% mean confidence interval for cycles %-change: -1.10% -0.05% Cycles are helped. total instructions in shared programs: 10830836 -> 10830779 (<.01%) instructions in affected programs: 6895 -> 6838 (-0.83%) helped: 12 HURT: 0 helped stats (abs) min: 1 max: 14 x̄: 4.75 x̃: 1 helped stats (rel) min: 0.14% max: 1.61% x̄: 0.65% x̃: 0.33% 95% mean confidence interval for instructions value: -8.46 -1.04 95% mean confidence interval for instructions %-change: -1.03% -0.27% Instructions are helped. total cycles in shared programs: 154028477 -> 154032740 (<.01%) cycles in affected programs: 178433 -> 182696 (2.39%) helped: 3 HURT: 9 helped stats (abs) min: 3 max: 20 x̄: 11.00 x̃: 10 helped stats (rel) min: 0.07% max: 0.20% x̄: 0.12% x̃: 0.09% HURT stats (abs) min: 27 max: 1415 x̄: 477.33 x̃: 262 HURT stats (rel) min: 0.22% max: 6.45% x̄: 2.49% x̃: 1.76% 95% mean confidence interval for cycles value: 28.68 681.82 95% mean confidence interval for cycles %-change: 0.37% 3.30% Cycles are HURT. Iron Lake total instructions in shared programs: 8137966 -> 8137992 (<.01%) instructions in affected programs: 3281 -> 3307 (0.79%) helped: 0 HURT: 6 HURT stats (abs) min: 3 max: 7 x̄: 4.33 x̃: 3 HURT stats (rel) min: 0.63% max: 1.01% x̄: 0.76% x̃: 0.64% 95% mean confidence interval for instructions value: 2.17 6.50 95% mean confidence interval for instructions %-change: 0.56% 0.96% Instructions are HURT. total cycles in shared programs: 188539386 -> 188540038 (<.01%) cycles in affected programs: 103826 -> 104478 (0.63%) helped: 0 HURT: 7 HURT stats (abs) min: 16 max: 218 x̄: 93.14 x̃: 80 HURT stats (rel) min: 0.14% max: 0.95% x̄: 0.53% x̃: 0.46% 95% mean confidence interval for cycles value: 10.26 176.02 95% mean confidence interval for cycles %-change: 0.24% 0.81% Cycles are HURT. GM45 total instructions in shared programs: 5008876 -> 5008889 (<.01%) instructions in affected programs: 1645 -> 1658 (0.79%) helped: 0 HURT: 3 HURT stats (abs) min: 3 max: 7 x̄: 4.33 x̃: 3 HURT stats (rel) min: 0.63% max: 1.00% x̄: 0.76% x̃: 0.63% total cycles in shared programs: 128968950 -> 128969426 (<.01%) cycles in affected programs: 64854 -> 65330 (0.73%) helped: 0 HURT: 4 HURT stats (abs) min: 18 max: 218 x̄: 119.00 x̃: 120 HURT stats (rel) min: 0.14% max: 0.95% x̄: 0.60% x̃: 0.66% 95% mean confidence interval for cycles value: -62.92 300.92 95% mean confidence interval for cycles %-change: -0.05% 1.26% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Caio Marcelo de Oliveira Filho --- src/compiler/nir/nir_opt_algebraic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 2b9a0e3dc6c..b26adc750f9 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -502,6 +502,20 @@ optimizations = [ (('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))), (('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)), + # These derive from the previous patterns with the application of b < 0 <=> + # 0 < -b. The transformation should be applied if either comparison is + # used once as this ensures that the number of comparisons will not + # increase. The sources to the ior and iand are not symmetric, so the + # rules have to be duplicated to get this behavior. + (('~ior', ('flt(is_used_once)', 0.0, 'a@32'), ('flt', 'b@32', 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))), + (('~ior', ('flt', 0.0, 'a@32'), ('flt(is_used_once)', 'b@32', 0.0)), ('flt', 0.0, ('fmax', a, ('fneg', b)))), + (('~ior', ('fge(is_used_once)', 0.0, 'a@32'), ('fge', 'b@32', 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))), + (('~ior', ('fge', 0.0, 'a@32'), ('fge(is_used_once)', 'b@32', 0.0)), ('fge', 0.0, ('fmin', a, ('fneg', b)))), + (('~iand', ('flt(is_used_once)', 0.0, 'a@32'), ('flt', 'b@32', 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))), + (('~iand', ('flt', 0.0, 'a@32'), ('flt(is_used_once)', 'b@32', 0.0)), ('flt', 0.0, ('fmin', a, ('fneg', b)))), + (('~iand', ('fge(is_used_once)', 0.0, 'a@32'), ('fge', 'b@32', 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))), + (('~iand', ('fge', 0.0, 'a@32'), ('fge(is_used_once)', 'b@32', 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))), + # Common pattern like 'if (i == 0 || i == 1 || ...)' (('ior', ('ieq', a, 0), ('ieq', a, 1)), ('uge', 1, a)), (('ior', ('uge', 1, a), ('ieq', a, 2)), ('uge', 2, a)), -- 2.30.2