From 5116646a7636ebc99714d1a0cc41cd402a915220 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 5 Mar 2019 12:46:11 -0800 Subject: [PATCH] nir/algebraic: Recognize open-coded fsat with modifiers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This change also enables a later change (nir/algebraic: Replace 1-fsat(a) with fsat(1-a)) to affect more shaders. Almost all of the affected shaders are in Bioshock Infinite, and all of those shaders all require GLSL 4.10. All Intel platforms had similar results. (Ice Lake shown) total instructions in shared programs: 17228584 -> 17228376 (<.01%) instructions in affected programs: 31438 -> 31230 (-0.66%) helped: 105 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 1.98 x̃: 1 helped stats (rel) min: 0.08% max: 1.53% x̄: 0.73% x̃: 0.70% 95% mean confidence interval for instructions value: -2.20 -1.76 95% mean confidence interval for instructions %-change: -0.80% -0.67% Instructions are helped. total cycles in shared programs: 360936431 -> 360935690 (<.01%) cycles in affected programs: 420100 -> 419359 (-0.18%) helped: 71 HURT: 21 helped stats (abs) min: 1 max: 160 x̄: 19.28 x̃: 10 helped stats (rel) min: <.01% max: 9.78% x̄: 0.95% x̃: 0.48% HURT stats (abs) min: 1 max: 198 x̄: 29.90 x̃: 10 HURT stats (rel) min: 0.05% max: 8.36% x̄: 1.24% x̃: 0.90% 95% mean confidence interval for cycles value: -16.77 0.66 95% mean confidence interval for cycles %-change: -0.85% -0.06% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Matt Turner Reviewed-by: Thomas Helland --- src/compiler/nir/nir_opt_algebraic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index c488b2c4fb9..ccf57fd4b86 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -105,6 +105,7 @@ optimizations = [ (('iadd', a, ('iadd', ('ineg', a), b)), b), (('~fadd', ('fneg', a), ('fadd', a, b)), b), (('~fadd', a, ('fadd', ('fneg', a), b)), b), + (('fadd', ('fsat', a), ('fsat', ('fneg', a))), ('fsat', ('fabs', a))), (('~fmul', a, 0.0), 0.0), (('imul', a, 0), 0), (('umul_unorm_4x8', a, 0), 0), @@ -371,6 +372,8 @@ optimizations = [ (('imax', a, ('ineg', a)), ('iabs', a)), (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'), + (('~fmin', ('fmax', a, -1.0), 0.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_negate && !options->lower_fsat'), + (('~fmax', ('fmin', a, 0.0), -1.0), ('fneg', ('fsat', ('fneg', a))), '!options->lower_negate && !options->lower_fsat'), (('fsat', ('fsign', a)), ('b2f', ('flt', 0.0, a))), (('fsat', ('b2f', a)), ('b2f', a)), (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), @@ -646,6 +649,8 @@ optimizations = [ (('ilt', ('f2u', a), b), ('ilt', ('f2i', a), b)), (('ilt', b, ('f2u', a)), ('ilt', b, ('f2i', a))), + (('~fmin', ('fabs', a), 1.0), ('fsat', ('fabs', a)), '!options->lower_fsat'), + # Packing and then unpacking does nothing (('unpack_64_2x32_split_x', ('pack_64_2x32_split', a, b)), a), (('unpack_64_2x32_split_y', ('pack_64_2x32_split', a, b)), b), -- 2.30.2