/*
* This file implements an out-of-SSA pass as described in "Revisiting
* Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
*/
struct from_ssa_state {
bool phi_webs_only;
struct hash_table *merge_node_table;
nir_instr *instr;
+ bool progress;
};
/* Returns true if a dominates b */
/* The following data structure, which I have named merge_set is a way of
* representing a set registers of non-interfering registers. This is
- * based on the concept of a "dominence forest" presented in "Fast Copy
- * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * based on the concept of a "dominance forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et al. but the
* implementation concept is taken from "Revisiting Out-of-SSA Translation
- * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et al.
*
* Each SSA definition is associated with a merge_node and the association
* is represented by a combination of a hash table and the "def" parameter
* in the merge_node structure. The merge_set stores a linked list of
- * merge_nodes in dominence order of the ssa definitions. (Since the
- * liveness analysis pass indexes the SSA values in dominence order for us,
+ * merge_nodes in dominance order of the ssa definitions. (Since the
+ * liveness analysis pass indexes the SSA values in dominance order for us,
* this is an easy thing to keep up.) It is assumed that no pair of the
* nodes in a given set interfere. Merging two sets or checking for
* interference can be done in a single linear-time merge-sort walk of the
*
* This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
* Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
*/
static bool
merge_sets_interfere(merge_set *a, merge_set *b)
nir_instr *parent_instr = def->parent_instr;
nir_instr_remove(parent_instr);
ralloc_steal(state->dead_ctx, parent_instr);
+ state->progress = true;
return true;
}
nir_dest *dest = exec_node_data(nir_dest, def, ssa);
nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
-
+ state->progress = true;
return true;
}
/* Resolves ssa definitions to registers. While we're at it, we also
* remove phi nodes.
*/
-static bool
+static void
resolve_registers_block(nir_block *block, struct from_ssa_state *state)
{
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_phi) {
nir_instr_remove(instr);
ralloc_steal(state->dead_ctx, instr);
+ state->progress = true;
}
}
state->instr = NULL;
-
- return true;
}
static void
/* Resolves a single parallel copy operation into a sequence of movs
*
* This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
- * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * Correctness, Code Quality, and Efficiency" by Boissinot et al.
* However, I never got the algorithm to work as written, so this version
* is slightly modified.
*
nir_register *reg = nir_local_reg_create(state->builder.impl);
reg->name = "copy_temp";
reg->num_array_elems = 0;
- if (values[b].is_ssa)
+ if (values[b].is_ssa) {
reg->num_components = values[b].ssa->num_components;
- else
+ reg->bit_size = values[b].ssa->bit_size;
+ } else {
reg->num_components = values[b].reg.reg->num_components;
+ reg->bit_size = values[b].reg.reg->bit_size;
+ }
values[num_vals].is_ssa = false;
values[num_vals].reg.reg = reg;
return true;
}
-static void
+static bool
nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
{
struct from_ssa_state state;
nir_builder_init(&state.builder, impl);
state.dead_ctx = ralloc_context(NULL);
state.phi_webs_only = phi_webs_only;
- state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ state.merge_node_table = _mesa_pointer_hash_table_create(NULL);
+ state.progress = false;
nir_foreach_block(block, impl) {
add_parallel_copy_to_end_of_block(block, state.dead_ctx);
/* Clean up dead instructions and the hash tables */
_mesa_hash_table_destroy(state.merge_node_table, NULL);
ralloc_free(state.dead_ctx);
+ return state.progress;
}
-void
+bool
nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
{
+ bool progress = false;
+
nir_foreach_function(function, shader) {
if (function->impl)
- nir_convert_from_ssa_impl(function->impl, phi_webs_only);
+ progress |= nir_convert_from_ssa_impl(function->impl, phi_webs_only);
}
+
+ return progress;
}
if (block != def->parent_instr->block) {
/* Try to go up the single-successor tree */
bool all_single_successors = true;
- struct set_entry *entry;
set_foreach(block->predecessors, entry) {
nir_block *pred = (nir_block *)entry->key;
if (pred->successors[0] && pred->successors[1]) {
nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
+ /* We don't want derefs ending up in phi sources */
+ assert(!nir_src_as_deref(src->src));
place_phi_read(shader, reg, src->src.ssa, src->pred);
}
mov->dest.dest = nir_dest_for_reg(reg);
mov->dest.write_mask = (1 << reg->num_components) - 1;
nir_instr_insert(nir_after_instr(&load->instr), &mov->instr);
+ } else if (instr->type == nir_instr_type_deref) {
+ /* Derefs should always be SSA values, don't rewrite them. */
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ nir_foreach_use_safe(use, &deref->dest.ssa)
+ assert(use->parent_instr->block == block);
+ assert(list_empty(&deref->dest.ssa.if_uses));
} else {
nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state);
}