nir: Walk blocks in source code order in lower_vars_to_ssa.
authorMatt Turner <mattst88@gmail.com>
Thu, 25 Aug 2016 02:25:58 +0000 (19:25 -0700)
committerMatt Turner <mattst88@gmail.com>
Thu, 25 Aug 2016 20:45:39 +0000 (13:45 -0700)
Prior to this commit rename_variables_block() is recursively called,
performing a depth-first traversal of the control flow graph. The
function uses a non-trivial amount of stack space for local variables,
which puts us in danger of smashing the stack, given a sufficiently deep
dominance tree.

XCOM: Enemy Within contains a shader with such a dominance tree (1574
nir_blocks in total, depth of at least 143).

Jason tells me that he believes that any walk over the nir_blocks that
respects dominance is sufficient (a DFS might have been necessary prior
to the introduction of nir_phi_builder).

In fact, the introduction of nir_phi_builder made the problem worse:
rename_variables_block(), walks to the bottom of the dominance tree
before calling nir_phi_builder_value_get_block_def() which walks back to
the top of the dominance tree...

In any case, this patch ensures we avoid that problem as well.

Cc: mesa-stable@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97225
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
src/compiler/nir/nir_lower_vars_to_ssa.c
src/compiler/nir/nir_phi_builder.h

index 317647bf9e4f63ec3c749e8ebdb85b80b003a4c6..25dc70c4618bb054d187bd8603de8af7d64af636 100644 (file)
@@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node,
    return true;
 }
 
-/* Performs variable renaming by doing a DFS of the dominance tree
+/* Performs variable renaming
  *
  * This algorithm is very similar to the one outlined in "Efficiently
  * Computing Static Single Assignment Form and the Control Dependence
@@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
  * SSA def on the stack per block.
  */
 static bool
-rename_variables_block(nir_block *block, struct lower_variables_state *state)
+rename_variables(struct lower_variables_state *state)
 {
    nir_builder b;
    nir_builder_init(&b, state->impl);
 
-   nir_foreach_instr_safe(instr, block) {
-      if (instr->type != nir_instr_type_intrinsic)
-         continue;
-
-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-      switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
-
-         if (node == NULL) {
-            /* If we hit this path then we are referencing an invalid
-             * value.  Most likely, we unrolled something and are
-             * reading past the end of some array.  In any case, this
-             * should result in an undefined value.
-             */
-            nir_ssa_undef_instr *undef =
-               nir_ssa_undef_instr_create(state->shader,
-                                          intrin->num_components,
-                                          intrin->dest.ssa.bit_size);
-
-            nir_instr_insert_before(&intrin->instr, &undef->instr);
-            nir_instr_remove(&intrin->instr);
-
-            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&undef->def));
+   nir_foreach_block(block, state->impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
             continue;
-         }
 
-         if (!node->lower_to_ssa)
-            continue;
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* If we hit this path then we are referencing an invalid
+                * value.  Most likely, we unrolled something and are
+                * reading past the end of some array.  In any case, this
+                * should result in an undefined value.
+                */
+               nir_ssa_undef_instr *undef =
+                  nir_ssa_undef_instr_create(state->shader,
+                                             intrin->num_components,
+                                             intrin->dest.ssa.bit_size);
+
+               nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_remove(&intrin->instr);
+
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(&undef->def));
+               continue;
+            }
 
-         nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                   nir_op_imov);
-         mov->src[0].src = nir_src_for_ssa(
-            nir_phi_builder_value_get_block_def(node->pb_value, block));
-         for (unsigned i = intrin->num_components; i < 4; i++)
-            mov->src[0].swizzle[i] = 0;
+            if (!node->lower_to_ssa)
+               continue;
 
-         assert(intrin->dest.is_ssa);
+            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+                                                      nir_op_imov);
+            mov->src[0].src = nir_src_for_ssa(
+               nir_phi_builder_value_get_block_def(node->pb_value, block));
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
 
-         mov->dest.write_mask = (1 << intrin->num_components) - 1;
-         nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                           intrin->num_components,
-                           intrin->dest.ssa.bit_size, NULL);
+            assert(intrin->dest.is_ssa);
 
-         nir_instr_insert_before(&intrin->instr, &mov->instr);
-         nir_instr_remove(&intrin->instr);
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components,
+                              intrin->dest.ssa.bit_size, NULL);
 
-         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                  nir_src_for_ssa(&mov->dest.dest.ssa));
-         break;
-      }
-
-      case nir_intrinsic_store_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
-
-         if (node == NULL) {
-            /* Probably an out-of-bounds array store.  That should be a
-             * no-op. */
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
             nir_instr_remove(&intrin->instr);
-            continue;
-         }
 
-         if (!node->lower_to_ssa)
-            continue;
-
-         assert(intrin->num_components ==
-                glsl_get_vector_elements(node->type));
-
-         assert(intrin->src[0].is_ssa);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
+            break;
+         }
 
-         nir_ssa_def *new_def;
-         b.cursor = nir_before_instr(&intrin->instr);
+         case nir_intrinsic_store_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
 
-         unsigned wrmask = nir_intrinsic_write_mask(intrin);
-         if (wrmask == (1 << intrin->num_components) - 1) {
-            /* Whole variable store - just copy the source.  Note that
-             * intrin->num_components and intrin->src[0].ssa->num_components
-             * may differ.
-             */
-            unsigned swiz[4];
-            for (unsigned i = 0; i < 4; i++)
-               swiz[i] = i < intrin->num_components ? i : 0;
+            if (node == NULL) {
+               /* Probably an out-of-bounds array store.  That should be a
+                * no-op. */
+               nir_instr_remove(&intrin->instr);
+               continue;
+            }
 
-            new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
-                                  intrin->num_components, false);
-         } else {
-            nir_ssa_def *old_def =
-               nir_phi_builder_value_get_block_def(node->pb_value, block);
-            /* For writemasked store_var intrinsics, we combine the newly
-             * written values with the existing contents of unwritten
-             * channels, creating a new SSA value for the whole vector.
-             */
-            nir_ssa_def *srcs[4];
-            for (unsigned i = 0; i < intrin->num_components; i++) {
-               if (wrmask & (1 << i)) {
-                  srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
-               } else {
-                  srcs[i] = nir_channel(&b, old_def, i);
+            if (!node->lower_to_ssa)
+               continue;
+
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
+
+            assert(intrin->src[0].is_ssa);
+
+            nir_ssa_def *new_def;
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            unsigned wrmask = nir_intrinsic_write_mask(intrin);
+            if (wrmask == (1 << intrin->num_components) - 1) {
+               /* Whole variable store - just copy the source.  Note that
+                * intrin->num_components and intrin->src[0].ssa->num_components
+                * may differ.
+                */
+               unsigned swiz[4];
+               for (unsigned i = 0; i < 4; i++)
+                  swiz[i] = i < intrin->num_components ? i : 0;
+
+               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+                                     intrin->num_components, false);
+            } else {
+               nir_ssa_def *old_def =
+                  nir_phi_builder_value_get_block_def(node->pb_value, block);
+               /* For writemasked store_var intrinsics, we combine the newly
+                * written values with the existing contents of unwritten
+                * channels, creating a new SSA value for the whole vector.
+                */
+               nir_ssa_def *srcs[4];
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (wrmask & (1 << i)) {
+                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+                  } else {
+                     srcs[i] = nir_channel(&b, old_def, i);
+                  }
                }
+               new_def = nir_vec(&b, srcs, intrin->num_components);
             }
-            new_def = nir_vec(&b, srcs, intrin->num_components);
-         }
 
-         assert(new_def->num_components == intrin->num_components);
+            assert(new_def->num_components == intrin->num_components);
 
-         nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
-         nir_instr_remove(&intrin->instr);
-         break;
-      }
+            nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
+            nir_instr_remove(&intrin->instr);
+            break;
+         }
 
-      default:
-         break;
+         default:
+            break;
+         }
       }
    }
 
-   for (unsigned i = 0; i < block->num_dom_children; ++i)
-      rename_variables_block(block->dom_children[i], state);
-
    return true;
 }
 
@@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
       }
    }
 
-   rename_variables_block(nir_start_block(impl), &state);
+   rename_variables(&state);
 
    nir_phi_builder_finish(state.phi_builder);
 
index edc530268c2c8c0a81d204330e521295e6c8db10..a4dc18a2b22df27826db9db04ba73796e64a6526 100644 (file)
@@ -44,7 +44,8 @@
  *         var.pb_val = nir_phi_builder_add_value(pb, var.defs)
  *
  *     // Visit each block.  This needs to visit dominators first;
- *     // nir_for_each_block() will be ok.
+ *     // nir_foreach_block() will be ok.
+ *
  *     foreach block:
  *         foreach instruction:
  *             foreach use of variable var: