From 03fb13f646732d234d3698acfa089ee0216cbe8f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 5 Jan 2018 13:20:46 -0800 Subject: [PATCH] nir: Rearrange logic op-compounded integer compares MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Skylake and Broadwell had similar results. Skylake shown. total instructions in shared programs: 14521769 -> 14521753 (<.01%) instructions in affected programs: 8782 -> 8766 (-0.18%) helped: 16 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.12% max: 0.40% x̄: 0.20% x̃: 0.18% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -0.23% -0.16% Instructions are helped. total cycles in shared programs: 533000376 -> 533000205 (<.01%) cycles in affected programs: 447035 -> 446864 (-0.04%) helped: 9 HURT: 9 helped stats (abs) min: 2 max: 40 x̄: 35.78 x̃: 40 helped stats (rel) min: 0.02% max: 0.18% x̄: 0.10% x̃: 0.09% HURT stats (abs) min: 1 max: 52 x̄: 16.78 x̃: 10 HURT stats (rel) min: <.01% max: 1.11% x̄: 0.29% x̃: 0.12% 95% mean confidence interval for cycles value: -25.07 6.07 95% mean confidence interval for cycles %-change: -0.08% 0.27% Inconclusive result (value mean confidence interval includes 0). No changes on GM45, Iron Lake, Sandy Bridge, Ivy Bridge, or Haswell. Signed-off-by: Ian Romanick Reviewed-by: Samuel Iglesias Gonsálvez Reviewed-by: Elie Tournier --- src/compiler/nir/nir_opt_algebraic.py | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9dadb67bd2e..3cc910a8a60 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -218,6 +218,23 @@ optimizations = [ (('~iand', ('fge', a, '#b'), ('fge', a, '#c')), ('fge', a, ('fmax', b, c))), (('~iand', ('fge', '#a', c), ('fge', '#b', c)), ('fge', ('fmin', a, b), c)), + (('ior', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imax', b, c))), + (('ior', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imin', a, b), c)), + (('ior', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imin', b, c))), + (('ior', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imax', a, b), c)), + (('ior', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umax', b, c))), + (('ior', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umin', a, b), c)), + (('ior', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umin', b, c))), + (('ior', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umax', a, b), c)), + (('iand', ('ilt(is_used_once)', a, b), ('ilt', a, c)), ('ilt', a, ('imin', b, c))), + (('iand', ('ilt(is_used_once)', a, c), ('ilt', b, c)), ('ilt', ('imax', a, b), c)), + (('iand', ('ige(is_used_once)', a, b), ('ige', a, c)), ('ige', a, ('imax', b, c))), + (('iand', ('ige(is_used_once)', a, c), ('ige', b, c)), ('ige', ('imin', a, b), c)), + (('iand', ('ult(is_used_once)', a, b), ('ult', a, c)), ('ult', a, ('umin', b, c))), + (('iand', ('ult(is_used_once)', a, c), ('ult', b, c)), ('ult', ('umax', a, b), c)), + (('iand', ('uge(is_used_once)', a, b), ('uge', a, c)), ('uge', a, ('umax', b, c))), + (('iand', ('uge(is_used_once)', a, c), ('uge', b, c)), ('uge', ('umin', a, b), c)), + # These patterns can result when (a < b || a < c) => (a < min(b, c)) # transformations occur before constant propagation and loop-unrolling. (('~flt', a, ('fmax', b, a)), ('flt', a, b)), @@ -227,6 +244,24 @@ optimizations = [ (('~flt', a, ('fmin', b, a)), False), (('~flt', ('fmax', a, b), a), False), + (('ilt', a, ('imax', b, a)), ('ilt', a, b)), + (('ilt', ('imin', a, b), a), ('ilt', b, a)), + (('ige', a, ('imin', b, a)), True), + (('ige', ('imax', a, b), a), True), + (('ult', a, ('umax', b, a)), ('ult', a, b)), + (('ult', ('umin', a, b), a), ('ult', b, a)), + (('uge', a, ('umin', b, a)), True), + (('uge', ('umax', a, b), a), True), + + (('ilt', '#a', ('imin', '#b', c)), ('iand', ('ilt', a, b), ('ilt', a, c))), + (('ilt', ('imax', '#a', b), '#c'), ('iand', ('ilt', a, c), ('ilt', b, c))), + (('ige', '#a', ('imax', '#b', c)), ('iand', ('ige', a, b), ('ige', a, c))), + (('ige', ('imin', '#a', b), '#c'), ('iand', ('ige', a, c), ('ige', b, c))), + (('ult', '#a', ('umin', '#b', c)), ('iand', ('ult', a, b), ('ult', a, c))), + (('ult', ('umax', '#a', b), '#c'), ('iand', ('ult', a, c), ('ult', b, c))), + (('uge', '#a', ('umax', '#b', c)), ('iand', ('uge', a, b), ('uge', a, c))), + (('uge', ('umin', '#a', b), '#c'), ('iand', ('uge', a, c), ('uge', b, c))), + (('fabs', ('slt', a, b)), ('slt', a, b)), (('fabs', ('sge', a, b)), ('sge', a, b)), (('fabs', ('seq', a, b)), ('seq', a, b)), -- 2.30.2