From: Ian Romanick Date: Wed, 19 Sep 2018 08:17:31 +0000 (-0700) Subject: nir/algebraic: Add lowering for 64-bit usub_sat X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1bdfc6d7cb4c897ae6fe826d7f778574c8ca7551;p=mesa.git nir/algebraic: Add lowering for 64-bit usub_sat v2: Rebase on 272e927d0e9 ("nir/spirv: initial handling of OpenCL.std extension opcodes") v3: Add a new lower_usub_sat64 flag that only applies to the 64-bit version of the nir_op_usub_sat instruction. v4: Also enable the lowering when nir_lower_iadd64 is set. Reviewed-by: Caio Marcelo de Oliveira Filho [v3] Part-of: --- diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d07e688b5a8..eafd6fad855 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2851,8 +2851,25 @@ typedef struct nir_shader_compiler_options { */ bool lower_hadd64; + /** + * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_usub_sat64 + */ bool lower_add_sat; + /** + * Set if only 64-bit nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * \sa ::lower_add_sat + */ + bool lower_usub_sat64; + /** * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's * for IO purposes and would prefer loads/stores be vectorized. diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 97c4777a534..6289551939a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1010,6 +1010,7 @@ optimizations.extend([ (('uadd_sat', a, b), ('bcsel', ('ult', ('iadd', a, b), a), -1, ('iadd', a, b)), 'options->lower_add_sat'), (('usub_sat', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_add_sat'), + (('usub_sat@64', a, b), ('bcsel', ('ult', a, b), 0, ('isub', a, b)), 'options->lower_usub_sat64 || (options->lower_int64_options & nir_lower_iadd64) != 0'), # Alternative lowering that doesn't rely on bfi. (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),