From 1d36af93389e0cdaa36e8b972f328566487bd7d5 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 6 Mar 2020 16:23:29 -0800 Subject: [PATCH] nir/algebraic: Generalize some and-of-shift-right patterns [v2] MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Generalizes some of the patterns from 76289fbfa84a and 905ff8619824. In particular, some of the soft-fp64 code generates (a & 0x7fffffff) << 1 when constant 0.0 is compared (flt or feq). v2: Reduce the set of added patterns to those that actually help something. This reduces the size of the state transition tables by about 29k. Suggested by Jason. Remove the existing patterns that this commit subsumes. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake total instructions in shared programs: 903171 -> 903108 (<.01%) instructions in affected programs: 635903 -> 635840 (<.01%) helped: 25 HURT: 11 helped stats (abs) min: 1 max: 16 x̄: 5.04 x̃: 3 helped stats (rel) min: <.01% max: 0.15% x̄: 0.04% x̃: 0.03% HURT stats (abs) min: 2 max: 14 x̄: 5.73 x̃: 5 HURT stats (rel) min: <.01% max: 0.11% x̄: 0.04% x̃: 0.02% 95% mean confidence interval for instructions value: -3.91 0.41 95% mean confidence interval for instructions %-change: -0.03% <.01% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 7059527 -> 7059681 (<.01%) cycles in affected programs: 5249401 -> 5249555 (<.01%) helped: 41 HURT: 9 helped stats (abs) min: 2 max: 76 x̄: 11.90 x̃: 10 helped stats (rel) min: <.01% max: 11.86% x̄: 0.99% x̃: 0.01% HURT stats (abs) min: 2 max: 380 x̄: 71.33 x̃: 12 HURT stats (rel) min: <.01% max: 0.22% x̄: 0.04% x̃: 0.01% 95% mean confidence interval for cycles value: -14.93 21.09 95% mean confidence interval for cycles %-change: -1.40% -0.20% Inconclusive result (value mean confidence interval includes 0). Ice Lake total instructions in shared programs: 895506 -> 895384 (-0.01%) instructions in affected programs: 658800 -> 658678 (-0.02%) helped: 37 HURT: 0 helped stats (abs) min: 2 max: 8 x̄: 3.30 x̃: 2 helped stats (rel) min: <.01% max: 0.03% x̄: 0.02% x̃: 0.02% 95% mean confidence interval for instructions value: -4.00 -2.59 95% mean confidence interval for instructions %-change: -0.02% -0.02% Instructions are helped. total cycles in shared programs: 7092748 -> 7092224 (<.01%) cycles in affected programs: 5272008 -> 5271484 (<.01%) helped: 36 HURT: 14 helped stats (abs) min: 2 max: 440 x̄: 21.67 x̃: 8 helped stats (rel) min: <.01% max: 11.86% x̄: 1.12% x̃: 0.02% HURT stats (abs) min: 2 max: 122 x̄: 18.29 x̃: 6 HURT stats (rel) min: <.01% max: 0.07% x̄: 0.01% x̃: <.01% 95% mean confidence interval for cycles value: -29.24 8.28 95% mean confidence interval for cycles %-change: -1.40% -0.21% Inconclusive result (value mean confidence interval includes 0). Regular shader-db results: All Haswell+ platforms had similar results. (Tiger Lake shown) total instructions in shared programs: 17611489 -> 17611408 (<.01%) instructions in affected programs: 21188 -> 21107 (-0.38%) helped: 23 HURT: 1 helped stats (abs) min: 1 max: 16 x̄: 3.78 x̃: 3 helped stats (rel) min: 0.03% max: 5.82% x̄: 1.13% x̃: 0.85% HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6 HURT stats (rel) min: 0.60% max: 0.60% x̄: 0.60% x̃: 0.60% 95% mean confidence interval for instructions value: -5.27 -1.48 95% mean confidence interval for instructions %-change: -1.70% -0.42% Instructions are helped. total cycles in shared programs: 338418502 -> 338366148 (-0.02%) cycles in affected programs: 2289052 -> 2236698 (-2.29%) helped: 18 HURT: 3 helped stats (abs) min: 4 max: 18000 x̄: 2909.67 x̃: 38 helped stats (rel) min: 0.09% max: 4.07% x̄: 0.96% x̃: 0.43% HURT stats (abs) min: 2 max: 14 x̄: 6.67 x̃: 4 HURT stats (rel) min: 0.22% max: 1.13% x̄: 0.66% x̃: 0.64% 95% mean confidence interval for cycles value: -5204.00 217.91 95% mean confidence interval for cycles %-change: -1.31% -0.14% Inconclusive result (value mean confidence interval includes 0). Ivy Bridge total instructions in shared programs: 11875617 -> 11875615 (<.01%) instructions in affected programs: 1339 -> 1337 (-0.15%) helped: 2 HURT: 0 No changes on any earlier Intel platforms. Reviewed-by: Jason Ekstrand Reviewed-by: Matt Turner [v1] Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 52b48fffdfc..3d2f7db5bd2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -271,6 +271,25 @@ for log2 in range(1, 7): # powers of two from 2 to 64 (('iand@32', ('iadd@32', a, b_is_multiple), mask), ('iadd', ('iand', a, mask), b)), ]) +# To save space in the state tables, reduce to the set that is known to help. +# Previously, this was range(1, 32). In addition, a couple rules inside the +# loop are commented out. Revisit someday, probably after mesa/#2635 has some +# resolution. +for i in [1, 2, 16, 24]: + lo_mask = 0xffffffff >> i + hi_mask = (0xffffffff << i) & 0xffffffff + + optimizations.extend([ + # This pattern seems to only help in the soft-fp64 code. + (('ishl@32', ('iand', 'a@32', lo_mask), i), ('ishl', a, i)), +# (('ushr@32', ('iand', 'a@32', hi_mask), i), ('ushr', a, i)), +# (('ishr@32', ('iand', 'a@32', hi_mask), i), ('ishr', a, i)), + + (('iand', ('ishl', 'a@32', i), hi_mask), ('ishl', a, i)), + (('iand', ('ushr', 'a@32', i), lo_mask), ('ushr', a, i)), +# (('iand', ('ishr', 'a@32', i), lo_mask), ('ushr', a, i)), # Yes, ushr is correct + ]) + optimizations.extend([ # This is common for address calculations. Reassociating may enable the # 'a<lower_rotate'), (('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'), (('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'), -- 2.30.2