nir: Expand opt_undef to handle undef channels in a store intrinsic.
authorEric Anholt <eric@anholt.net>
Thu, 23 Jul 2020 21:24:15 +0000 (14:24 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 20 Aug 2020 16:44:08 +0000 (16:44 +0000)
Instead of only handling all-undef stores, shrink the writemask to just
the defined channels.  Cleans up a bunch of writemasks on prog_to_nir
output in particular.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6054>

src/compiler/nir/nir_opt_undef.c

index e03e5b88efc6bb2629fddcd63c2b799f05d58291..d12354e024567d863ddc93d837cf6276d4f81bd1 100644 (file)
@@ -96,9 +96,36 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
    return true;
 }
 
+static uint32_t
+nir_get_undef_mask(nir_ssa_def *def)
+{
+   nir_instr *instr = def->parent_instr;
+
+   if (instr->type == nir_instr_type_ssa_undef)
+      return BITSET_MASK(def->num_components);
+
+   if (instr->type != nir_instr_type_alu)
+      return 0;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+   unsigned undef = 0;
+
+   if (nir_op_is_vec(alu->op)) {
+      for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+         if (alu->src[i].src.is_ssa &&
+             alu->src[i].src.ssa->parent_instr->type ==
+             nir_instr_type_ssa_undef) {
+            undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i;
+         }
+      }
+   }
+
+   return undef;
+}
+
 /**
- * Remove any store intrinsics whose value is undefined (the existing
- * value is a fine representation of "undefined").
+ * Remove any store intrinsic writemask channels whose value is undefined (the
+ * existing value is a fine representation of "undefined").
  */
 static bool
 opt_undef_store(nir_intrinsic_instr *intrin)
@@ -120,11 +147,22 @@ opt_undef_store(nir_intrinsic_instr *intrin)
       return false;
    }
 
-   if (!intrin->src[arg_index].is_ssa ||
-       intrin->src[arg_index].ssa->parent_instr->type != nir_instr_type_ssa_undef)
+   if (!intrin->src[arg_index].is_ssa)
+      return false;
+
+   nir_ssa_def *def = intrin->src[arg_index].ssa;
+
+   unsigned write_mask = nir_intrinsic_write_mask(intrin);
+   unsigned undef_mask = nir_get_undef_mask(def);
+
+   if (!(write_mask & undef_mask))
       return false;
 
-   nir_instr_remove(&intrin->instr);
+   write_mask &= ~undef_mask;
+   if (!write_mask)
+      nir_instr_remove(&intrin->instr);
+   else
+      nir_intrinsic_set_write_mask(intrin, write_mask);
 
    return true;
 }