From 5f26c21e62466783cd031c45a39544c992cee202 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 23 Jul 2020 14:24:15 -0700 Subject: [PATCH] nir: Expand opt_undef to handle undef channels in a store intrinsic. Instead of only handling all-undef stores, shrink the writemask to just the defined channels. Cleans up a bunch of writemasks on prog_to_nir output in particular. Reviewed-by: Rhys Perry Part-of: --- src/compiler/nir/nir_opt_undef.c | 48 ++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c index e03e5b88efc..d12354e0245 100644 --- a/src/compiler/nir/nir_opt_undef.c +++ b/src/compiler/nir/nir_opt_undef.c @@ -96,9 +96,36 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu) return true; } +static uint32_t +nir_get_undef_mask(nir_ssa_def *def) +{ + nir_instr *instr = def->parent_instr; + + if (instr->type == nir_instr_type_ssa_undef) + return BITSET_MASK(def->num_components); + + if (instr->type != nir_instr_type_alu) + return 0; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + unsigned undef = 0; + + if (nir_op_is_vec(alu->op)) { + for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (alu->src[i].src.is_ssa && + alu->src[i].src.ssa->parent_instr->type == + nir_instr_type_ssa_undef) { + undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i; + } + } + } + + return undef; +} + /** - * Remove any store intrinsics whose value is undefined (the existing - * value is a fine representation of "undefined"). + * Remove any store intrinsic writemask channels whose value is undefined (the + * existing value is a fine representation of "undefined"). */ static bool opt_undef_store(nir_intrinsic_instr *intrin) @@ -120,11 +147,22 @@ opt_undef_store(nir_intrinsic_instr *intrin) return false; } - if (!intrin->src[arg_index].is_ssa || - intrin->src[arg_index].ssa->parent_instr->type != nir_instr_type_ssa_undef) + if (!intrin->src[arg_index].is_ssa) + return false; + + nir_ssa_def *def = intrin->src[arg_index].ssa; + + unsigned write_mask = nir_intrinsic_write_mask(intrin); + unsigned undef_mask = nir_get_undef_mask(def); + + if (!(write_mask & undef_mask)) return false; - nir_instr_remove(&intrin->instr); + write_mask &= ~undef_mask; + if (!write_mask) + nir_instr_remove(&intrin->instr); + else + nir_intrinsic_set_write_mask(intrin, write_mask); return true; } -- 2.30.2