From d6d63aec18624fe4cbc2e9b06d95f858500257df Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 30 Oct 2019 17:41:41 -0700 Subject: [PATCH] nir/algebraic: optimize ior(ine(a, 0), ine(b, 0)) to ine(ior(a, b), 0) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Like 70f9e2589e6b. Also scrub the unnecessary size qualifier in both replacement patterns. This occurs in a handful of places in the soft-fp64 code, and that is the primary reason for the change. Perhaps the patterns that generate umin should be conditioned on something, but I'm not sure what. lower_bitops might cover the cases that matter, but it seems ugly. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 936505 -> 933388 (-0.33%) instructions in affected programs: 925719 -> 922602 (-0.34%) helped: 154 HURT: 1 helped stats (abs) min: 1 max: 211 x̄: 35.45 x̃: 16 helped stats (rel) min: 0.34% max: 9.30% x̄: 2.28% x̃: 0.96% HURT stats (abs) min: 2342 max: 2342 x̄: 2342.00 x̃: 2342 HURT stats (rel) min: 2.28% max: 2.28% x̄: 2.28% x̃: 2.28% 95% mean confidence interval for instructions value: -51.21 10.99 95% mean confidence interval for instructions %-change: -2.61% -1.89% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 7323502 -> 7306184 (-0.24%) cycles in affected programs: 7220376 -> 7203058 (-0.24%) helped: 126 HURT: 1 helped stats (abs) min: 2 max: 946 x̄: 159.10 x̃: 95 helped stats (rel) min: 0.01% max: 9.62% x̄: 0.80% x̃: 0.37% HURT stats (abs) min: 2728 max: 2728 x̄: 2728.00 x̃: 2728 HURT stats (rel) min: 0.37% max: 0.37% x̄: 0.37% x̃: 0.37% 95% mean confidence interval for cycles value: -192.07 -80.66 95% mean confidence interval for cycles %-change: -1.07% -0.51% Cycles are helped. total spills in shared programs: 635 -> 817 (28.66%) spills in affected programs: 635 -> 817 (28.66%) helped: 0 HURT: 3 total fills in shared programs: 2065 -> 2438 (18.06%) fills in affected programs: 2019 -> 2392 (18.47%) helped: 0 HURT: 2 Regular shader-db results: All Haswell+ platforms had similar results. (Tiger Lake shown) total instructions in shared programs: 17611506 -> 17611489 (<.01%) instructions in affected programs: 33442 -> 33425 (-0.05%) helped: 32 HURT: 6 helped stats (abs) min: 1 max: 6 x̄: 1.69 x̃: 1 helped stats (rel) min: 0.08% max: 1.90% x̄: 0.27% x̃: 0.11% HURT stats (abs) min: 1 max: 15 x̄: 6.17 x̃: 5 HURT stats (rel) min: 0.09% max: 1.50% x̄: 0.65% x̃: 0.55% 95% mean confidence interval for instructions value: -1.70 0.80 95% mean confidence interval for instructions %-change: -0.30% 0.05% Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 338419218 -> 338418502 (<.01%) cycles in affected programs: 385795 -> 385079 (-0.19%) helped: 42 HURT: 3 helped stats (abs) min: 2 max: 192 x̄: 24.57 x̃: 16 helped stats (rel) min: 0.04% max: 2.09% x̄: 0.33% x̃: 0.22% HURT stats (abs) min: 64 max: 164 x̄: 105.33 x̃: 88 HURT stats (rel) min: 0.77% max: 1.58% x̄: 1.09% x̃: 0.93% 95% mean confidence interval for cycles value: -29.76 -2.06 95% mean confidence interval for cycles %-change: -0.40% -0.07% Cycles are helped. Ivy Bridge and Sandy Bridge had similar results. (Ivy Bridge shown) total instructions in shared programs: 11875620 -> 11875617 (<.01%) instructions in affected programs: 421 -> 418 (-0.71%) helped: 2 HURT: 0 total cycles in shared programs: 178245336 -> 178245326 (<.01%) cycles in affected programs: 3425 -> 3415 (-0.29%) helped: 2 HURT: 0 No changes on Gen4 or Gen5. Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index d65c8089211..52b48fffdfc 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -607,7 +607,8 @@ optimizations.extend([ (('ine', ('ineg', ('b2i32', 'a@1')), ('ineg', ('b2i32', 'b@1'))), ('ine', a, b)), (('b2i32', ('ine', 'a@1', 'b@1')), ('b2i32', ('ixor', a, b))), - (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', 'a@32', 'b@32'), 0), '!options->lower_bitops'), + (('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', a, b), 0), '!options->lower_bitops'), + (('ior', ('ine', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('ior', a, b), 0), '!options->lower_bitops'), # These patterns can result when (a < b || a < c) => (a < min(b, c)) # transformations occur before constant propagation and loop-unrolling. @@ -1705,6 +1706,9 @@ late_optimizations = [ (('ior', a, a), a), (('iand', a, a), a), + (('iand', ('ine(is_used_once)', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('umin', a, b), 0)), + (('ior', ('ieq(is_used_once)', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('umin', a, b), 0)), + (('~fadd', ('fneg(is_used_once)', ('fsat(is_used_once)', 'a(is_not_fmul)')), 1.0), ('fsat', ('fadd', 1.0, ('fneg', a)))), (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), -- 2.30.2