From: Erik Faye-Lund Date: Fri, 10 Jan 2020 21:59:54 +0000 (+0100) Subject: nir: add iabs-lowering code X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5e841e8b4fd689c50e5ff870ccd64788c6181c9e;p=mesa.git nir: add iabs-lowering code Microsoft's DXIL is based on LLVM, which doesn't have an integer ABS opcode, but instead needs it lowered to NEG + MAX. We need to do this with an option, to prevent an already existing optimization rule from undoing this. Reviewed-by: Eric Anholt Reviewed-by: Jason Ekstrand Part-of: --- diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 005f7625a60..aa7ff44985e 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3042,6 +3042,9 @@ typedef struct nir_shader_compiler_options { /** enables rules to lower fsign to fsub and flt */ bool lower_fsign; + /** enables rules to lower iabs to ineg+imax */ + bool lower_iabs; + /* lower fdph to fdot4 */ bool lower_fdph; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 65cff5c04eb..4a2efa8252f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -543,7 +543,7 @@ optimizations.extend([ (('fmax', a, ('fabs', a)), ('fabs', a)), (('imax', a, ('iabs', a)), ('iabs', a)), (('fmax', a, ('fneg', a)), ('fabs', a)), - (('imax', a, ('ineg', a)), ('iabs', a)), + (('imax', a, ('ineg', a)), ('iabs', a), '!options->lower_iabs'), (('~fmax', ('fabs', a), 0.0), ('fabs', a)), (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), # fmax(fmin(a, 1.0), 0.0) is inexact because it returns 1.0 on NaN, while @@ -1902,6 +1902,7 @@ late_optimizations = [ (('iadd', 'a', ('ineg', 'b')), ('isub', 'a', 'b'), '!options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), + (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), # These are duplicated from the main optimizations table. The late # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create