i965/vec4: split VEC4_OPCODE_FROM_DOUBLE into one opcode per destination's type

[mesa.git] / src / compiler / nir / nir_from_ssa.c
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c

index 27e94f823b032e977168c295f9e7e71ccdafc620..1aa35509b11c9f7067b3c7f98d6eccb1d8809495 100644 (file)
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -32,7 +32,7 @@
  /*
   * This file implements an out-of-SSA pass as described in "Revisiting
   * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
   */
  
  struct from_ssa_state {
@@ -41,6 +41,7 @@ struct from_ssa_state {
     bool phi_webs_only;
     struct hash_table *merge_node_table;
     nir_instr *instr;
+   bool progress;
  };
  
  /* Returns true if a dominates b */
@@ -63,16 +64,16 @@ ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
  
  /* The following data structure, which I have named merge_set is a way of
   * representing a set registers of non-interfering registers.  This is
- * based on the concept of a "dominence forest" presented in "Fast Copy
- * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * based on the concept of a "dominance forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et al. but the
   * implementation concept is taken from  "Revisiting Out-of-SSA Translation
- * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et al.
   *
   * Each SSA definition is associated with a merge_node and the association
   * is represented by a combination of a hash table and the "def" parameter
   * in the merge_node structure.  The merge_set stores a linked list of
- * merge_nodes in dominence order of the ssa definitions.  (Since the
- * liveness analysis pass indexes the SSA values in dominence order for us,
+ * merge_nodes in dominance order of the ssa definitions.  (Since the
+ * liveness analysis pass indexes the SSA values in dominance order for us,
   * this is an easy thing to keep up.)  It is assumed that no pair of the
   * nodes in a given set interfere.  Merging two sets or checking for
   * interference can be done in a single linear-time merge-sort walk of the
@@ -177,7 +178,7 @@ merge_merge_sets(merge_set *a, merge_set *b)
   *
   * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
   * Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
   */
  static bool
  merge_sets_interfere(merge_set *a, merge_set *b)
@@ -503,6 +504,7 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
        nir_instr *parent_instr = def->parent_instr;
        nir_instr_remove(parent_instr);
        ralloc_steal(state->dead_ctx, parent_instr);
+      state->progress = true;
        return true;
     }
  
@@ -514,14 +516,14 @@ rewrite_ssa_def(nir_ssa_def *def, void *void_state)
     nir_dest *dest = exec_node_data(nir_dest, def, ssa);
  
     nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
-
+   state->progress = true;
     return true;
  }
  
  /* Resolves ssa definitions to registers.  While we're at it, we also
   * remove phi nodes.
   */
-static bool
+static void
  resolve_registers_block(nir_block *block, struct from_ssa_state *state)
  {
     nir_foreach_instr_safe(instr, block) {
@@ -531,11 +533,10 @@ resolve_registers_block(nir_block *block, struct from_ssa_state *state)
        if (instr->type == nir_instr_type_phi) {
           nir_instr_remove(instr);
           ralloc_steal(state->dead_ctx, instr);
+         state->progress = true;
        }
     }
     state->instr = NULL;
-
-   return true;
  }
  
  static void
@@ -561,7 +562,7 @@ emit_copy(nir_builder *b, nir_src src, nir_src dest_src)
  /* Resolves a single parallel copy operation into a sequence of movs
   *
   * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
- * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * Correctness, Code Quality, and Efficiency" by Boissinot et al.
   * However, I never got the algorithm to work as written, so this version
   * is slightly modified.
   *
@@ -756,7 +757,7 @@ resolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state)
     return true;
  }
  
-static void
+static bool
  nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
  {
     struct from_ssa_state state;
@@ -766,6 +767,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
     state.phi_webs_only = phi_webs_only;
     state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                      _mesa_key_pointer_equal);
+   state.progress = false;
  
     nir_foreach_block(block, impl) {
        add_parallel_copy_to_end_of_block(block, state.dead_ctx);
@@ -804,15 +806,20 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
     /* Clean up dead instructions and the hash tables */
     _mesa_hash_table_destroy(state.merge_node_table, NULL);
     ralloc_free(state.dead_ctx);
+   return state.progress;
  }
  
-void
+bool
  nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
  {
+   bool progress = false;
+
     nir_foreach_function(function, shader) {
        if (function->impl)
-         nir_convert_from_ssa_impl(function->impl, phi_webs_only);
+         progress |= nir_convert_from_ssa_impl(function->impl, phi_webs_only);
     }
+
+   return progress;
  }