nir/copy_prop: Respect the source's number of components
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 3 Mar 2017 01:10:24 +0000 (17:10 -0800)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 14 Mar 2017 14:36:20 +0000 (07:36 -0700)
In the near future we are going to require that the num_components in a
src dereference match the num_components of the SSA value being
dereferenced.  To do that, we need copy_prop to not remove our MOVs from
a larger SSA value into an instruction that uses fewer channels.

Because we suddenly have to know how many components each source has,
this makes the pass a bit more complicated.  Fortunately, copy
propagation is the only pass that cares about the number of components
are read by any given source so it's fairly contained.

Shader-db results on Sky Lake:

   total instructions in shared programs: 13318947 -> 13320265 (0.01%)
   instructions in affected programs: 260633 -> 261951 (0.51%)
   helped: 324
   HURT: 1027

Looking through the hurt programs, about a dozen are hurt by 3
instructions and the rest are all hurt by 2 instructions.  From a
spot-check of the shaders, the story is always the same:  They get a
vec4 from somewhere (frequently an input) and use the first two or three
components as a texture coordinate.  Because of the vector component
mismatch, we have a mov or, more likely, a vecN sitting between the
texture instruction and the input.  This means that the back-end inserts
a bunch of MOVs and split_virtual_grfs() goes to town.  Because the
texture coordinate is also used by some other calculation, register
coalesce can't combine them back together and we end up with an extra 2
MOV instructions in our shader.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
src/compiler/nir/nir_opt_copy_propagate.c

index c26e07fda712a4326506d02c0c53c5b189bfbe18..c4001fa73f5335d2ff5cf4bb51a3d9b95ba1a8c8 100644 (file)
@@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr)
 }
 
 static bool
-copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
+              unsigned num_components)
 {
    if (!src->is_ssa) {
       if (src->reg.indirect)
-         return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
+         return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1);
       return false;
    }
 
@@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
    if (!is_swizzleless_move(alu_instr))
       return false;
 
-   /* Don't let copy propagation land us with a phi that has more
-    * components in its source than it has in its destination.  That badly
-    * messes up out-of-ssa.
-    */
-   if (parent_instr && parent_instr->type == nir_instr_type_phi) {
-      nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
-      assert(phi->dest.is_ssa);
-      if (phi->dest.ssa.num_components !=
-          alu_instr->src[0].src.ssa->num_components)
-         return false;
-   }
+   if (alu_instr->src[0].src.ssa->num_components != num_components)
+      return false;
 
    if (parent_instr) {
       nir_instr_rewrite_src(parent_instr, src,
@@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
    if (!src->src.is_ssa) {
       if (src->src.reg.indirect)
          return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
-                              NULL);
+                              NULL, 1);
       return false;
    }
 
@@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
    return true;
 }
 
-typedef struct {
-   nir_instr *parent_instr;
-   bool progress;
-} copy_prop_state;
+static bool
+copy_prop_dest(nir_dest *dest, nir_instr *instr)
+{
+   if (!dest->is_ssa && dest->reg.indirect)
+      return copy_prop_src(dest->reg.indirect, instr, NULL, 1);
+
+   return false;
+}
 
 static bool
-copy_prop_src_cb(nir_src *src, void *_state)
+copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var)
 {
-   copy_prop_state *state = (copy_prop_state *) _state;
-   while (copy_prop_src(src, state->parent_instr, NULL))
-      state->progress = true;
+   if (!deref_var)
+      return false;
 
-   return true;
+   bool progress = false;
+   for (nir_deref *deref = deref_var->deref.child;
+        deref; deref = deref->child) {
+      if (deref->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(deref);
+      if (arr->deref_array_type != nir_deref_array_type_indirect)
+         continue;
+
+      while (copy_prop_src(&arr->indirect, instr, NULL, 1))
+         progress = true;
+   }
+   return progress;
 }
 
 static bool
 copy_prop_instr(nir_instr *instr)
 {
-   if (instr->type == nir_instr_type_alu) {
+   bool progress = false;
+   switch (instr->type) {
+   case nir_instr_type_alu: {
       nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
-      bool progress = false;
 
       for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
          while (copy_prop_alu_src(alu_instr, i))
             progress = true;
 
-      if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
-         while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+      while (copy_prop_dest(&alu_instr->dest.dest, instr))
+         progress = true;
+
+      return progress;
+   }
+
+   case nir_instr_type_tex: {
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      for (unsigned i = 0; i < tex->num_srcs; i++) {
+         unsigned num_components = nir_tex_instr_src_size(tex, i);
+         while (copy_prop_src(&tex->src[i].src, instr, NULL, num_components))
             progress = true;
+      }
+
+      if (copy_prop_deref_var(instr, tex->texture))
+         progress = true;
+      if (copy_prop_deref_var(instr, tex->sampler))
+         progress = true;
+
+      while (copy_prop_dest(&tex->dest, instr))
+         progress = true;
+
+      return progress;
+   }
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      for (unsigned i = 0;
+           i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
+         unsigned num_components =
+            nir_intrinsic_infos[intrin->intrinsic].src_components[i];
+         if (!num_components)
+            num_components = intrin->num_components;
+
+         while (copy_prop_src(&intrin->src[i], instr, NULL, num_components))
+            progress = true;
+      }
+
+      for (unsigned i = 0;
+           i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
+         if (copy_prop_deref_var(instr, intrin->variables[i]))
+            progress = true;
+      }
+
+      if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
+         while (copy_prop_dest(&intrin->dest, instr))
+            progress = true;
+      }
 
       return progress;
    }
 
-   copy_prop_state state;
-   state.parent_instr = instr;
-   state.progress = false;
-   nir_foreach_src(instr, copy_prop_src_cb, &state);
+   case nir_instr_type_phi: {
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      assert(phi->dest.is_ssa);
+      unsigned num_components = phi->dest.ssa.num_components;
+      nir_foreach_phi_src(src, phi) {
+         while (copy_prop_src(&src->src, instr, NULL, num_components))
+            progress = true;
+      }
+
+      return progress;
+   }
 
-   return state.progress;
+   default:
+      return false;
+   }
 }
 
 static bool
 copy_prop_if(nir_if *if_stmt)
 {
-   return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
+   return copy_prop_src(&if_stmt->condition, NULL, if_stmt, 1);
 }
 
 static bool