*/
#include "nir.h"
+#include "nir_builder.h"
#include "nir_vla.h"
/*
* This file implements an out-of-SSA pass as described in "Revisiting
* Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
*/
struct from_ssa_state {
- void *mem_ctx;
+ nir_builder builder;
void *dead_ctx;
bool phi_webs_only;
struct hash_table *merge_node_table;
nir_instr *instr;
- nir_function_impl *impl;
+ bool progress;
};
/* Returns true if a dominates b */
/* The following data structure, which I have named merge_set is a way of
* representing a set registers of non-interfering registers. This is
- * based on the concept of a "dominence forest" presented in "Fast Copy
- * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * based on the concept of a "dominance forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et al. but the
* implementation concept is taken from "Revisiting Out-of-SSA Translation
- * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et al.
*
* Each SSA definition is associated with a merge_node and the association
* is represented by a combination of a hash table and the "def" parameter
* in the merge_node structure. The merge_set stores a linked list of
- * merge_node's in dominence order of the ssa definitions. (Since the
- * liveness analysis pass indexes the SSA values in dominence order for us,
+ * merge_nodes in dominance order of the ssa definitions. (Since the
+ * liveness analysis pass indexes the SSA values in dominance order for us,
* this is an easy thing to keep up.) It is assumed that no pair of the
* nodes in a given set interfere. Merging two sets or checking for
* interference can be done in a single linear-time merge-sort walk of the
*
* This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
* Translation for Correctness, Code Quality, and Efficiency" by
- * Boissinot et. al.
+ * Boissinot et al.
*/
static bool
merge_sets_interfere(merge_set *a, merge_set *b)
}
static bool
-add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
+add_parallel_copy_to_end_of_block(nir_block *block, void *dead_ctx)
{
- struct from_ssa_state *state = void_state;
bool need_end_copy = false;
if (block->successors[0]) {
* (if there is one).
*/
nir_parallel_copy_instr *pcopy =
- nir_parallel_copy_instr_create(state->dead_ctx);
+ nir_parallel_copy_instr_create(dead_ctx);
nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr);
}
* time because of potential back-edges in the CFG.
*/
static bool
-isolate_phi_nodes_block(nir_block *block, void *void_state)
+isolate_phi_nodes_block(nir_block *block, void *dead_ctx)
{
- struct from_ssa_state *state = void_state;
-
nir_instr *last_phi_instr = NULL;
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr(instr, block) {
/* Phi nodes only ever come at the start of a block */
if (instr->type != nir_instr_type_phi)
break;
last_phi_instr = instr;
}
- /* If we don't have any phi's, then there's nothing for us to do. */
+ /* If we don't have any phis, then there's nothing for us to do. */
if (last_phi_instr == NULL)
return true;
* start of this block but after the phi nodes.
*/
nir_parallel_copy_instr *block_pcopy =
- nir_parallel_copy_instr_create(state->dead_ctx);
+ nir_parallel_copy_instr_create(dead_ctx);
nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr(instr, block) {
/* Phi nodes only ever come at the start of a block */
if (instr->type != nir_instr_type_phi)
break;
nir_phi_instr *phi = nir_instr_as_phi(instr);
assert(phi->dest.is_ssa);
- nir_foreach_phi_src(phi, src) {
+ nir_foreach_phi_src(src, phi) {
nir_parallel_copy_instr *pcopy =
get_parallel_copy_at_end_of_block(src->pred);
assert(pcopy);
- nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+ nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, src->src.ssa->name);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size, src->src.ssa->name);
exec_list_push_tail(&pcopy->entries, &entry->node);
assert(src->src.is_ssa);
nir_src_for_ssa(&entry->dest.ssa));
}
- nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
+ nir_parallel_copy_entry *entry = rzalloc(dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
+ phi->dest.ssa.name);
exec_list_push_tail(&block_pcopy->entries, &entry->node);
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
}
static bool
-coalesce_phi_nodes_block(nir_block *block, void *void_state)
+coalesce_phi_nodes_block(nir_block *block, struct from_ssa_state *state)
{
- struct from_ssa_state *state = void_state;
-
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr(instr, block) {
/* Phi nodes only ever come at the start of a block */
if (instr->type != nir_instr_type_phi)
break;
assert(phi->dest.is_ssa);
merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
- nir_foreach_phi_src(phi, src) {
+ nir_foreach_phi_src(src, phi) {
assert(src->src.is_ssa);
merge_node *src_node = get_merge_node(src->src.ssa, state);
if (src_node->set != dest_node->set)
aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
struct from_ssa_state *state)
{
- nir_foreach_parallel_copy_entry(pcopy, entry) {
+ nir_foreach_parallel_copy_entry(entry, pcopy) {
if (!entry->src.is_ssa)
continue;
}
static bool
-aggressive_coalesce_block(nir_block *block, void *void_state)
+aggressive_coalesce_block(nir_block *block, struct from_ssa_state *state)
{
- struct from_ssa_state *state = void_state;
-
nir_parallel_copy_instr *start_pcopy = NULL;
- nir_foreach_instr(block, instr) {
+ nir_foreach_instr(instr, block) {
/* Phi nodes only ever come at the start of a block */
if (instr->type != nir_instr_type_phi) {
if (instr->type != nir_instr_type_parallel_copy)
return true;
}
+static nir_register *
+create_reg_for_ssa_def(nir_ssa_def *def, nir_function_impl *impl)
+{
+ nir_register *reg = nir_local_reg_create(impl);
+
+ reg->name = def->name;
+ reg->num_components = def->num_components;
+ reg->bit_size = def->bit_size;
+ reg->num_array_elems = 0;
+
+ return reg;
+}
+
static bool
rewrite_ssa_def(nir_ssa_def *def, void *void_state)
{
* the things in the merge set should be the same so it doesn't
* matter which node's definition we use.
*/
- if (node->set->reg == NULL) {
- node->set->reg = nir_local_reg_create(state->impl);
- node->set->reg->name = def->name;
- node->set->reg->num_components = def->num_components;
- node->set->reg->num_array_elems = 0;
- }
+ if (node->set->reg == NULL)
+ node->set->reg = create_reg_for_ssa_def(def, state->builder.impl);
reg = node->set->reg;
} else {
if (def->parent_instr->type == nir_instr_type_load_const)
return true;
- reg = nir_local_reg_create(state->impl);
- reg->name = def->name;
- reg->num_components = def->num_components;
- reg->num_array_elems = 0;
+ reg = create_reg_for_ssa_def(def, state->builder.impl);
}
nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg));
- assert(list_empty(&def->uses) && list_empty(&def->if_uses));
+ assert(list_is_empty(&def->uses) && list_is_empty(&def->if_uses));
if (def->parent_instr->type == nir_instr_type_ssa_undef) {
/* If it's an ssa_undef instruction, remove it since we know we just got
nir_instr *parent_instr = def->parent_instr;
nir_instr_remove(parent_instr);
ralloc_steal(state->dead_ctx, parent_instr);
+ state->progress = true;
return true;
}
nir_dest *dest = exec_node_data(nir_dest, def, ssa);
nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg));
-
+ state->progress = true;
return true;
}
/* Resolves ssa definitions to registers. While we're at it, we also
* remove phi nodes.
*/
-static bool
-resolve_registers_block(nir_block *block, void *void_state)
+static void
+resolve_registers_block(nir_block *block, struct from_ssa_state *state)
{
- struct from_ssa_state *state = void_state;
-
- nir_foreach_instr_safe(block, instr) {
+ nir_foreach_instr_safe(instr, block) {
state->instr = instr;
nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
if (instr->type == nir_instr_type_phi) {
nir_instr_remove(instr);
ralloc_steal(state->dead_ctx, instr);
+ state->progress = true;
}
}
state->instr = NULL;
-
- return true;
}
static void
-emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
- void *mem_ctx)
+emit_copy(nir_builder *b, nir_src src, nir_src dest_src)
{
assert(!dest_src.is_ssa &&
dest_src.reg.indirect == NULL &&
else
assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
- nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+ nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
nir_src_copy(&mov->src[0].src, &src, mov);
mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
- nir_instr_insert_before(&pcopy->instr, &mov->instr);
+ nir_builder_instr_insert(b, &mov->instr);
}
-/* Resolves a single parallel copy operation into a sequence of mov's
+/* Resolves a single parallel copy operation into a sequence of movs
*
* This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
- * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * Correctness, Code Quality, and Efficiency" by Boissinot et al.
* However, I never got the algorithm to work as written, so this version
* is slightly modified.
*
struct from_ssa_state *state)
{
unsigned num_copies = 0;
- nir_foreach_parallel_copy_entry(pcopy, entry) {
+ nir_foreach_parallel_copy_entry(entry, pcopy) {
/* Sources may be SSA */
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
continue;
NIR_VLA(int, to_do, num_copies * 2);
int to_do_idx = -1;
+ state->builder.cursor = nir_before_instr(&pcopy->instr);
+
/* Now we set everything up:
* - All values get assigned a temporary index
* - Current locations are set from sources
* - Predicessors are recorded from sources and destinations
*/
int num_vals = 0;
- nir_foreach_parallel_copy_entry(pcopy, entry) {
+ nir_foreach_parallel_copy_entry(entry, pcopy) {
/* Sources may be SSA */
if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
continue;
while (ready_idx >= 0) {
int b = ready[ready_idx--];
int a = pred[b];
- emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx);
-
- /* If any other copies want a they can find it at b */
- loc[a] = b;
+ emit_copy(&state->builder, values[loc[a]], values[b]);
/* b has been filled, mark it as not needing to be copied */
pred[b] = -1;
- /* If a needs to be filled, it's ready for copying now */
- if (pred[a] != -1)
+ /* If a needs to be filled... */
+ if (pred[a] != -1) {
+ /* If any other copies want a they can find it at b */
+ loc[a] = b;
+
+ /* It's ready for copying now */
ready[++ready_idx] = a;
+ }
}
int b = to_do[to_do_idx--];
if (pred[b] == -1)
* backend can coalesce the (possibly multiple) temporaries.
*/
assert(num_vals < num_copies * 2);
- nir_register *reg = nir_local_reg_create(state->impl);
+ nir_register *reg = nir_local_reg_create(state->builder.impl);
reg->name = "copy_temp";
reg->num_array_elems = 0;
- if (values[b].is_ssa)
+ if (values[b].is_ssa) {
reg->num_components = values[b].ssa->num_components;
- else
+ reg->bit_size = values[b].ssa->bit_size;
+ } else {
reg->num_components = values[b].reg.reg->num_components;
+ reg->bit_size = values[b].reg.reg->bit_size;
+ }
values[num_vals].is_ssa = false;
values[num_vals].reg.reg = reg;
- emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx);
+ emit_copy(&state->builder, values[b], values[num_vals]);
loc[b] = num_vals;
ready[++ready_idx] = b;
num_vals++;
* the end (or right before the final jump if it exists).
*/
static bool
-resolve_parallel_copies_block(nir_block *block, void *void_state)
+resolve_parallel_copies_block(nir_block *block, struct from_ssa_state *state)
{
- struct from_ssa_state *state = void_state;
-
/* At this point, we have removed all of the phi nodes. If a parallel
* copy existed right after the phi nodes in this block, it is now the
* first instruction.
return true;
}
-static void
+static bool
nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
{
struct from_ssa_state state;
- state.mem_ctx = ralloc_parent(impl);
+ nir_builder_init(&state.builder, impl);
state.dead_ctx = ralloc_context(NULL);
- state.impl = impl;
state.phi_webs_only = phi_webs_only;
- state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ state.merge_node_table = _mesa_pointer_hash_table_create(NULL);
+ state.progress = false;
+
+ nir_foreach_block(block, impl) {
+ add_parallel_copy_to_end_of_block(block, state.dead_ctx);
+ }
- nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
- nir_foreach_block(impl, isolate_phi_nodes_block, &state);
+ nir_foreach_block(block, impl) {
+ isolate_phi_nodes_block(block, state.dead_ctx);
+ }
/* Mark metadata as dirty before we ask for liveness analysis */
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_require(impl, nir_metadata_live_ssa_defs |
nir_metadata_dominance);
- nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
- nir_foreach_block(impl, aggressive_coalesce_block, &state);
+ nir_foreach_block(block, impl) {
+ coalesce_phi_nodes_block(block, &state);
+ }
- nir_foreach_block(impl, resolve_registers_block, &state);
+ nir_foreach_block(block, impl) {
+ aggressive_coalesce_block(block, &state);
+ }
- nir_foreach_block(impl, resolve_parallel_copies_block, &state);
+ nir_foreach_block(block, impl) {
+ resolve_registers_block(block, &state);
+ }
+
+ nir_foreach_block(block, impl) {
+ resolve_parallel_copies_block(block, &state);
+ }
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
/* Clean up dead instructions and the hash tables */
_mesa_hash_table_destroy(state.merge_node_table, NULL);
ralloc_free(state.dead_ctx);
+ return state.progress;
}
-void
+bool
nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
{
- nir_foreach_function(shader, function) {
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
if (function->impl)
- nir_convert_from_ssa_impl(function->impl, phi_webs_only);
+ progress |= nir_convert_from_ssa_impl(function->impl, phi_webs_only);
+ }
+
+ return progress;
+}
+
+
+static void
+place_phi_read(nir_shader *shader, nir_register *reg,
+ nir_ssa_def *def, nir_block *block, unsigned depth)
+{
+ if (block != def->parent_instr->block) {
+ /* Try to go up the single-successor tree */
+ bool all_single_successors = true;
+ set_foreach(block->predecessors, entry) {
+ nir_block *pred = (nir_block *)entry->key;
+ if (pred->successors[0] && pred->successors[1]) {
+ all_single_successors = false;
+ break;
+ }
+ }
+
+ if (all_single_successors && depth < 32) {
+ /* All predecessors of this block have exactly one successor and it
+ * is this block so they must eventually lead here without
+ * intersecting each other. Place the reads in the predecessors
+ * instead of this block.
+ *
+ * We only let this function recurse 32 times because it can recurse
+ * indefinitely in the presence of infinite loops. Because we're
+ * crawling a single-successor chain, it doesn't matter where we
+ * place it so it's ok to stop at an arbitrary distance.
+ *
+ * TODO: One day, we could detect back edges and avoid the recursion
+ * that way.
+ */
+ set_foreach(block->predecessors, entry) {
+ place_phi_read(shader, reg, def, (nir_block *)entry->key,
+ depth + 1);
+ }
+ return;
+ }
+ }
+
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
+ mov->src[0].src = nir_src_for_ssa(def);
+ mov->dest.dest = nir_dest_for_reg(reg);
+ mov->dest.write_mask = (1 << reg->num_components) - 1;
+ nir_instr_insert(nir_after_block_before_jump(block), &mov->instr);
+}
+
+/** Lower all of the phi nodes in a block to imovs to and from a register
+ *
+ * This provides a very quick-and-dirty out-of-SSA pass that you can run on a
+ * single block to convert all of its phis to a register and some imovs.
+ * The code that is generated, while not optimal for actual codegen in a
+ * back-end, is easy to generate, correct, and will turn into the same set of
+ * phis after you call regs_to_ssa and do some copy propagation.
+ *
+ * The one intelligent thing this pass does is that it places the moves from
+ * the phi sources as high up the predecessor tree as possible instead of in
+ * the exact predecessor. This means that, in particular, it will crawl into
+ * the deepest nesting of any if-ladders. In order to ensure that doing so is
+ * safe, it stops as soon as one of the predecessors has multiple successors.
+ */
+bool
+nir_lower_phis_to_regs_block(nir_block *block)
+{
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_shader *shader = impl->function->shader;
+
+ bool progress = false;
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_phi)
+ break;
+
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ assert(phi->dest.is_ssa);
+
+ nir_register *reg = create_reg_for_ssa_def(&phi->dest.ssa, impl);
+
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
+ mov->src[0].src = nir_src_for_reg(reg);
+ mov->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+ phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
+ phi->dest.ssa.name);
+ nir_instr_insert(nir_after_instr(&phi->instr), &mov->instr);
+
+ nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+ nir_src_for_ssa(&mov->dest.dest.ssa));
+
+ nir_foreach_phi_src(src, phi) {
+ assert(src->src.is_ssa);
+ place_phi_read(shader, reg, src->src.ssa, src->pred, 0);
+ }
+
+ nir_instr_remove(&phi->instr);
+
+ progress = true;
+ }
+
+ return progress;
+}
+
+struct ssa_def_to_reg_state {
+ nir_function_impl *impl;
+ bool progress;
+};
+
+static bool
+dest_replace_ssa_with_reg(nir_dest *dest, void *void_state)
+{
+ struct ssa_def_to_reg_state *state = void_state;
+
+ if (!dest->is_ssa)
+ return true;
+
+ nir_register *reg = create_reg_for_ssa_def(&dest->ssa, state->impl);
+
+ nir_ssa_def_rewrite_uses(&dest->ssa, nir_src_for_reg(reg));
+
+ nir_instr *instr = dest->ssa.parent_instr;
+ *dest = nir_dest_for_reg(reg);
+ dest->reg.parent_instr = instr;
+ list_addtail(&dest->reg.def_link, ®->defs);
+
+ state->progress = true;
+
+ return true;
+}
+
+static bool
+ssa_def_is_local_to_block(nir_ssa_def *def, UNUSED void *state)
+{
+ nir_block *block = def->parent_instr->block;
+ nir_foreach_use(use_src, def) {
+ if (use_src->parent_instr->block != block ||
+ use_src->parent_instr->type == nir_instr_type_phi) {
+ return false;
+ }
+ }
+
+ if (!list_is_empty(&def->if_uses))
+ return false;
+
+ return true;
+}
+
+/** Lower all of the SSA defs in a block to registers
+ *
+ * This performs the very simple operation of blindly replacing all of the SSA
+ * defs in the given block with registers. If not used carefully, this may
+ * result in phi nodes with register sources which is technically invalid.
+ * Fortunately, the register-based into-SSA pass handles them anyway.
+ */
+bool
+nir_lower_ssa_defs_to_regs_block(nir_block *block)
+{
+ nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+ nir_shader *shader = impl->function->shader;
+
+ struct ssa_def_to_reg_state state = {
+ .impl = impl,
+ .progress = false,
+ };
+
+ nir_foreach_instr(instr, block) {
+ if (instr->type == nir_instr_type_ssa_undef) {
+ /* Undefs are just a read of something never written. */
+ nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(instr);
+ nir_register *reg = create_reg_for_ssa_def(&undef->def, state.impl);
+ nir_ssa_def_rewrite_uses(&undef->def, nir_src_for_reg(reg));
+ } else if (instr->type == nir_instr_type_load_const) {
+ /* Constant loads are SSA-only, we need to insert a move */
+ nir_load_const_instr *load = nir_instr_as_load_const(instr);
+ nir_register *reg = create_reg_for_ssa_def(&load->def, state.impl);
+ nir_ssa_def_rewrite_uses(&load->def, nir_src_for_reg(reg));
+
+ nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
+ mov->src[0].src = nir_src_for_ssa(&load->def);
+ mov->dest.dest = nir_dest_for_reg(reg);
+ mov->dest.write_mask = (1 << reg->num_components) - 1;
+ nir_instr_insert(nir_after_instr(&load->instr), &mov->instr);
+ } else if (nir_foreach_ssa_def(instr, ssa_def_is_local_to_block, NULL)) {
+ /* If the SSA def produced by this instruction is only in the block
+ * in which it is defined and is not used by ifs or phis, then we
+ * don't have a reason to convert it to a register.
+ */
+ } else {
+ nir_foreach_dest(instr, dest_replace_ssa_with_reg, &state);
+ }
}
+
+ return state.progress;
}