From: Eduardo Lima Mitev Date: Sun, 12 May 2019 22:09:38 +0000 (+0200) Subject: nir_algebraic: Add basic optimizations for umul_low and imadsh_mix16 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3addd7c;p=mesa.git nir_algebraic: Add basic optimizations for umul_low and imadsh_mix16 For umul_low (al * bl), zero is returned if the low 16-bits word of either source is zero. for imadsh_mix16 (ah * bl << 16 + c), c is returned if either 'ah' or 'bl' is zero. A couple of nir_search_helpers are added: is_upper_half_zero() returns true if the highest word of all components of an integer NIR alu src are zero. is_lower_half_zero() returns true if the lowest word of all components of an integer nir alu src are zero. Reviewed-by: Eric Anholt --- diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index b1c9b071464..e4cdce4865b 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1105,6 +1105,15 @@ for op in ['fddx', 'fddx_fine', 'fddx_coarse', ((op, 'a'), 0.0, 'info->stage == MESA_SHADER_COMPUTE && info->cs.derivative_group == DERIVATIVE_GROUP_NONE') ] +# Some optimizations for ir3-specific instructions. +optimizations += [ + # 'al * bl': If either 'al' or 'bl' is zero, return zero. + (('umul_low', '#a(is_lower_half_zero)', 'b'), (0)), + # '(ah * bl) << 16 + c': If either 'ah' or 'bl' is zero, return 'c'. + (('imadsh_mix16', '#a@32(is_upper_half_zero)', 'b@32', 'c@32'), ('c')), + (('imadsh_mix16', 'a@32', '#b@32(is_lower_half_zero)', 'c@32'), ('c')), +] + # This section contains "late" optimizations that should be run before # creating ffmas and calling regular optimizations for the final time. # Optimizations should go here if they help code generation and conflict diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 631c65a8642..acd53a4960d 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -242,4 +242,50 @@ is_used_by_non_fsat(nir_alu_instr *instr) return false; } +/** + * Returns true if a NIR ALU src represents a constant integer + * of either 32 or 64 bits, and the higher word (bit-size / 2) + * of all its components is zero. + */ +static inline bool +is_upper_half_zero(nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) +{ + if (nir_src_as_const_value(instr->src[src].src) == NULL) + return false; + + for (unsigned i = 0; i < num_components; i++) { + unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2; + uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size; + if ((nir_src_comp_as_uint(instr->src[src].src, + swizzle[i]) & high_bits) != 0) { + return false; + } + } + + return true; +} + +/** + * Returns true if a NIR ALU src represents a constant integer + * of either 32 or 64 bits, and the lower word (bit-size / 2) + * of all its components is zero. + */ +static inline bool +is_lower_half_zero(nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) +{ + if (nir_src_as_const_value(instr->src[src].src) == NULL) + return false; + + for (unsigned i = 0; i < num_components; i++) { + uint32_t low_bits = + (1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1; + if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0) + return false; + } + + return true; +} + #endif /* _NIR_SEARCH_ */