#include "gpir.h"
#include "lima_context.h"
+gpir_reg *gpir_create_reg(gpir_compiler *comp)
+{
+ gpir_reg *reg = ralloc(comp, gpir_reg);
+ reg->index = comp->cur_reg++;
+ list_addtail(®->list, &comp->reg_list);
+ return reg;
+}
+
+static gpir_reg *reg_for_nir_reg(gpir_compiler *comp, nir_register *nir_reg)
+{
+ unsigned index = nir_reg->index;
+ gpir_reg *reg = comp->reg_for_reg[index];
+ if (reg)
+ return reg;
+ reg = gpir_create_reg(comp);
+ comp->reg_for_reg[index] = reg;
+ return reg;
+}
-static inline void *gpir_node_create_ssa(gpir_block *block, gpir_op op, nir_ssa_def *ssa)
+static inline gpir_node *gpir_node_create_ssa(gpir_block *block, gpir_op op, nir_ssa_def *ssa)
{
int index = ssa->index;
gpir_node *node = gpir_node_create(block, op);
- block->comp->var_nodes[index] = node;
+ block->comp->node_for_ssa[index] = node;
snprintf(node->name, sizeof(node->name), "ssa%d", index);
list_addtail(&node->list, &block->node_list);
+
+ /* If any uses are outside the current block, we'll need to create a store
+ * instruction for them.
+ */
+ bool needs_register = false;
+ nir_foreach_use(use, ssa) {
+ if (use->parent_instr->block != ssa->parent_instr->block) {
+ needs_register = true;
+ break;
+ }
+ }
+
+ if (!needs_register) {
+ nir_foreach_if_use(use, ssa) {
+ if (nir_cf_node_prev(&use->parent_if->cf_node) !=
+ &ssa->parent_instr->block->cf_node) {
+ needs_register = true;
+ break;
+ }
+ }
+ }
+
+ if (needs_register) {
+ gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
+ store->child = node;
+ store->reg = gpir_create_reg(block->comp);
+ gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
+ list_addtail(&store->node.list, &block->node_list);
+ block->comp->reg_for_ssa[ssa->index] = store->reg;
+ }
+
return node;
}
-static inline void *gpir_node_create_reg(gpir_block *block, gpir_op op, nir_reg_dest *reg)
+static inline void *gpir_node_create_reg(gpir_block *block, gpir_op op, nir_reg_dest *nir_reg)
{
- int index = reg->reg->index;
+ int index = nir_reg->reg->index;
gpir_node *node = gpir_node_create(block, op);
+ block->comp->node_for_reg[index] = node;
gpir_store_node *store = gpir_node_create(block, gpir_op_store_reg);
snprintf(node->name, sizeof(node->name), "reg%d", index);
store->child = node;
+ store->reg = reg_for_nir_reg(block->comp, nir_reg->reg);
gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
- list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) {
- if (reg->index == index) {
- store->reg = reg;
- list_addtail(&store->reg_link, ®->defs_list);
- break;
- }
- }
-
list_addtail(&node->list, &block->node_list);
list_addtail(&store->node.list, &block->node_list);
return node;
static gpir_node *gpir_node_find(gpir_block *block, gpir_node *succ, nir_src *src,
int channel)
{
+ gpir_reg *reg = NULL;
gpir_node *pred = NULL;
-
if (src->is_ssa) {
if (src->ssa->num_components > 1) {
for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++) {
if (block->comp->vector_ssa[i].ssa == src->ssa->index) {
- pred = block->comp->vector_ssa[i].nodes[channel];
- break;
+ return block->comp->vector_ssa[i].nodes[channel];
}
}
- } else
- pred = block->comp->var_nodes[src->ssa->index];
-
- assert(pred);
- }
- else {
- pred = gpir_node_create(block, gpir_op_load_reg);
- list_addtail(&pred->list, &succ->list);
-
- gpir_load_node *load = gpir_node_to_load(pred);
- list_for_each_entry(gpir_reg, reg, &block->comp->reg_list, list) {
- if (reg->index == src->reg.reg->index) {
- load->reg = reg;
- list_addtail(&load->reg_link, ®->uses_list);
- break;
- }
+ } else {
+ gpir_node *pred = block->comp->node_for_ssa[src->ssa->index];
+ if (pred->block == block)
+ return pred;
+ reg = block->comp->reg_for_ssa[src->ssa->index];
}
+ } else {
+ pred = block->comp->node_for_reg[src->reg.reg->index];
+ if (pred && pred->block == block && pred != succ)
+ return pred;
+ reg = reg_for_nir_reg(block->comp, src->reg.reg);
}
+ assert(reg);
+ pred = gpir_node_create(block, gpir_op_load_reg);
+ gpir_load_node *load = gpir_node_to_load(pred);
+ load->reg = reg;
+ list_addtail(&pred->list, &succ->list);
+
return pred;
}
return true;
}
-gpir_reg *gpir_create_reg(gpir_compiler *comp)
-{
- gpir_reg *reg = ralloc(comp, gpir_reg);
- reg->index = comp->cur_reg++;
- list_addtail(®->list, &comp->reg_list);
- list_inithead(®->defs_list);
- list_inithead(®->uses_list);
- return reg;
-}
-
static gpir_compiler *gpir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
{
gpir_compiler *comp = rzalloc(prog, gpir_compiler);
list_inithead(&comp->block_list);
list_inithead(&comp->reg_list);
- for (int i = 0; i < num_reg; i++)
- gpir_create_reg(comp);
-
for (int i = 0; i < GPIR_VECTOR_SSA_NUM; i++)
comp->vector_ssa[i].ssa = -1;
- comp->var_nodes = rzalloc_array(comp, gpir_node *, num_ssa);
+ comp->node_for_ssa = rzalloc_array(comp, gpir_node *, num_ssa);
+ comp->node_for_reg = rzalloc_array(comp, gpir_node *, num_reg);
+ comp->reg_for_ssa = rzalloc_array(comp, gpir_reg *, num_ssa);
+ comp->reg_for_reg = rzalloc_array(comp, gpir_reg *, num_reg);
comp->prog = prog;
return comp;
}
*/
#include "gpir.h"
+#include "util/u_dynarray.h"
-/* Register allocation
- *
- * TODO: This needs to be rewritten when we support multiple basic blocks. We
- * need to do proper liveness analysis, combined with either linear scan,
- * graph coloring, or SSA-based allocation. We should also support spilling to
- * temporaries.
- *
- * For now, this only assigns fake registers to values, used to build the fake
- * dependencies that the scheduler relies on. In the future we should also be
- * assigning actual physreg numbers to load_reg/store_reg nodes.
- */
+/* Per-register information */
+
+struct reg_info {
+ BITSET_WORD *conflicts;
+ struct util_dynarray conflict_list;
+
+ /* Number of conflicts that must be allocated to physical registers.
+ */
+ unsigned phys_conflicts;
+
+ unsigned node_conflicts;
+
+ /* Number of conflicts that can be allocated to either. */
+ unsigned total_conflicts;
+
+ int assigned_color;
+
+ bool visited;
+};
-static void regalloc_block(gpir_block *block)
+struct regalloc_ctx {
+ unsigned bitset_words, num_nodes_and_regs;
+ struct reg_info *registers;
+
+ /* Reusable scratch liveness array */
+ BITSET_WORD *live;
+
+ unsigned *worklist;
+ unsigned worklist_start, worklist_end;
+
+ unsigned *stack;
+ unsigned stack_size;
+
+ gpir_compiler *comp;
+ void *mem_ctx;
+};
+
+/* Liveness analysis */
+
+static void propagate_liveness_instr(gpir_node *node, BITSET_WORD *live,
+ gpir_compiler *comp)
{
- /* build each node sequence index in the block node list */
- int index = 0;
- list_for_each_entry(gpir_node, node, &block->node_list, list) {
- node->vreg.index = index++;
+ /* KILL */
+ if (node->type == gpir_node_type_store) {
+ if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ BITSET_CLEAR(live, store->reg->index);
+ }
}
- /* find the last successor of each node by the sequence index */
- list_for_each_entry(gpir_node, node, &block->node_list, list) {
- node->vreg.last = NULL;
- gpir_node_foreach_succ(node, dep) {
- gpir_node *succ = dep->succ;
- if (!node->vreg.last || node->vreg.last->vreg.index < succ->vreg.index)
- node->vreg.last = succ;
+ /* GEN */
+ if (node->type == gpir_node_type_load) {
+ if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ BITSET_SET(live, load->reg->index);
}
}
+}
+
+static bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx)
+{
+ for (unsigned i = 0; i < 2; i++) {
+ if (block->successors[i]) {
+ for (unsigned j = 0; j < ctx->bitset_words; j++)
+ block->live_out[j] |= block->successors[i]->live_in[j];
+ }
+ }
+
+ memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));
+
+ list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
+ propagate_liveness_instr(node, ctx->live, block->comp);
+ }
- /* do linear scan regalloc */
- int reg_search_start = 0;
- gpir_node *active[GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM] = {0};
+ bool changed = false;
+ for (unsigned i = 0; i < ctx->bitset_words; i++) {
+ changed |= (block->live_in[i] != ctx->live[i]);
+ block->live_in[i] = ctx->live[i];
+ }
+ return changed;
+}
+
+static void calc_def_block(gpir_block *block)
+{
list_for_each_entry(gpir_node, node, &block->node_list, list) {
- /* if some reg is expired */
- gpir_node_foreach_pred(node, dep) {
- gpir_node *pred = dep->pred;
- if (pred->vreg.last == node)
- active[pred->value_reg] = NULL;
- }
-
- /* no need to alloc value reg for root node */
- if (gpir_node_is_root(node)) {
- node->value_reg = -1;
- continue;
- }
-
- /* find a free reg for this node */
- int i;
- for (i = 0; i < GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM; i++) {
- /* round robin reg select to reduce false dep when schedule */
- int reg = (reg_search_start + i) % (GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
- if (!active[reg]) {
- active[reg] = node;
- node->value_reg = reg;
- reg_search_start++;
+ if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ BITSET_SET(block->def_out, store->reg->index);
+ }
+ }
+}
+
+static void calc_liveness(struct regalloc_ctx *ctx)
+{
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) {
+ changed |= propagate_liveness_block(block, ctx);
+ }
+ }
+
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ calc_def_block(block);
+ }
+
+ changed = true;
+ while (changed) {
+ changed = false;
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ for (unsigned i = 0; i < 2; i++) {
+ gpir_block *succ = block->successors[i];
+ if (!succ)
+ continue;
+
+ for (unsigned j = 0; j < ctx->bitset_words; j++) {
+ BITSET_WORD new = block->def_out[j] & ~succ->def_out[j];
+ changed |= (new != 0);
+ succ->def_out[j] |= block->def_out[j];
+ }
+ }
+ }
+ }
+}
+
+/* Interference calculation */
+
+static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
+{
+ if (i == j)
+ return;
+
+ struct reg_info *a = &ctx->registers[i];
+ struct reg_info *b = &ctx->registers[j];
+
+ if (BITSET_TEST(a->conflicts, j))
+ return;
+
+ BITSET_SET(a->conflicts, j);
+ BITSET_SET(b->conflicts, i);
+
+ a->total_conflicts++;
+ b->total_conflicts++;
+ if (j < ctx->comp->cur_reg)
+ a->phys_conflicts++;
+ else
+ a->node_conflicts++;
+
+ if (i < ctx->comp->cur_reg)
+ b->phys_conflicts++;
+ else
+ b->node_conflicts++;
+
+ util_dynarray_append(&a->conflict_list, unsigned, j);
+ util_dynarray_append(&b->conflict_list, unsigned, i);
+}
+
+/* Make the register or node "i" intefere with all the other live registers
+ * and nodes.
+ */
+static void add_all_interferences(struct regalloc_ctx *ctx,
+ unsigned i,
+ BITSET_WORD *live_nodes,
+ BITSET_WORD *live_regs)
+{
+ int live_node;
+ BITSET_WORD tmp;
+ BITSET_FOREACH_SET(live_node, tmp, live_nodes, ctx->comp->cur_index) {
+ add_interference(ctx, i,
+ live_node + ctx->comp->cur_reg);
+ }
+
+ int live_reg;
+ BITSET_FOREACH_SET(live_reg, tmp, ctx->live, ctx->comp->cur_index) {
+ add_interference(ctx, i, live_reg);
+ }
+
+}
+
+static void print_liveness(struct regalloc_ctx *ctx,
+ BITSET_WORD *live_reg, BITSET_WORD *live_val)
+{
+ if (!(lima_debug & LIMA_DEBUG_GP))
+ return;
+
+ int live_idx;
+ BITSET_WORD tmp;
+ BITSET_FOREACH_SET(live_idx, tmp, live_reg, ctx->comp->cur_reg) {
+ printf("reg%d ", live_idx);
+ }
+ BITSET_FOREACH_SET(live_idx, tmp, live_val, ctx->comp->cur_index) {
+ printf("%d ", live_idx);
+ }
+ printf("\n");
+}
+
+static void calc_interference(struct regalloc_ctx *ctx)
+{
+ BITSET_WORD *live_nodes =
+ rzalloc_array(ctx->mem_ctx, BITSET_WORD, ctx->comp->cur_index);
+
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ /* Initialize liveness at the end of the block, but exclude values that
+ * definitely aren't defined by the end. This helps out with
+ * partially-defined registers, like:
+ *
+ * if (condition) {
+ * foo = ...;
+ * }
+ * if (condition) {
+ * ... = foo;
+ * }
+ *
+ * If we naively propagated liveness backwards, foo would be live from
+ * the beginning of the program, but if we're not inside a loop then
+ * its value is undefined before the first if and we don't have to
+ * consider it live. Mask out registers like foo here.
+ */
+ for (unsigned i = 0; i < ctx->bitset_words; i++) {
+ ctx->live[i] = block->live_out[i] & block->def_out[i];
+ }
+
+ list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
+ gpir_debug("processing node %d\n", node->index);
+ print_liveness(ctx, ctx->live, live_nodes);
+ if (node->type != gpir_node_type_store &&
+ node->type != gpir_node_type_branch) {
+ add_all_interferences(ctx, node->index + ctx->comp->cur_reg,
+ live_nodes, ctx->live);
+
+ /* KILL */
+ BITSET_CLEAR(live_nodes, node->index);
+ } else if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ add_all_interferences(ctx, store->reg->index,
+ live_nodes, ctx->live);
+
+ /* KILL */
+ BITSET_CLEAR(ctx->live, store->reg->index);
+ }
+
+ /* GEN */
+ if (node->type == gpir_node_type_store) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ BITSET_SET(live_nodes, store->child->index);
+ } else if (node->type == gpir_node_type_alu) {
+ gpir_alu_node *alu = gpir_node_to_alu(node);
+ for (int i = 0; i < alu->num_child; i++)
+ BITSET_SET(live_nodes, alu->children[i]->index);
+ } else if (node->type == gpir_node_type_branch) {
+ gpir_branch_node *branch = gpir_node_to_branch(node);
+ BITSET_SET(live_nodes, branch->cond->index);
+ } else if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ BITSET_SET(ctx->live, load->reg->index);
+ }
+ }
+ }
+}
+
+/* Register allocation */
+
+static bool can_simplify(struct regalloc_ctx *ctx, unsigned i)
+{
+ struct reg_info *info = &ctx->registers[i];
+ if (i < ctx->comp->cur_reg) {
+ /* Physical regs. */
+ return info->phys_conflicts + info->node_conflicts < GPIR_PHYSICAL_REG_NUM;
+ } else {
+ /* Nodes: if we manage to allocate all of its conflicting physical
+ * registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so
+ * we can ignore any more than that.
+ */
+ return MIN2(info->phys_conflicts, GPIR_PHYSICAL_REG_NUM) +
+ info->node_conflicts < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
+ }
+}
+
+static void push_stack(struct regalloc_ctx *ctx, unsigned i)
+{
+ ctx->stack[ctx->stack_size++] = i;
+ if (i < ctx->comp->cur_reg)
+ gpir_debug("pushing reg%u\n", i);
+ else
+ gpir_debug("pushing %d\n", i - ctx->comp->cur_reg);
+
+ struct reg_info *info = &ctx->registers[i];
+ assert(info->visited);
+
+ util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {
+ struct reg_info *conflict_info = &ctx->registers[*conflict];
+ if (i < ctx->comp->cur_reg) {
+ assert(conflict_info->phys_conflicts > 0);
+ conflict_info->phys_conflicts--;
+ } else {
+ assert(conflict_info->node_conflicts > 0);
+ conflict_info->node_conflicts--;
+ }
+ if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {
+ ctx->worklist[ctx->worklist_end++] = *conflict;
+ ctx->registers[*conflict].visited = true;
+ }
+ }
+}
+
+static bool do_regalloc(struct regalloc_ctx *ctx)
+{
+ ctx->worklist_start = 0;
+ ctx->worklist_end = 0;
+ ctx->stack_size = 0;
+
+ /* Step 1: find the initially simplifiable registers */
+ for (int i = 0; i < ctx->comp->cur_reg + ctx->comp->cur_index; i++) {
+ if (can_simplify(ctx, i)) {
+ ctx->worklist[ctx->worklist_end++] = i;
+ ctx->registers[i].visited = true;
+ }
+ }
+
+ while (true) {
+ /* Step 2: push onto the stack whatever we can */
+ while (ctx->worklist_start != ctx->worklist_end) {
+ push_stack(ctx, ctx->worklist[ctx->worklist_start++]);
+ }
+
+ if (ctx->stack_size < ctx->num_nodes_and_regs) {
+ /* If there are still unsimplifiable nodes left, we need to
+ * optimistically push a node onto the stack. Choose the one with
+ * the smallest number of current neighbors, since that's the most
+ * likely to succeed.
+ */
+ unsigned min_conflicts = UINT_MAX;
+ unsigned best_reg = 0;
+ for (unsigned reg = 0; reg < ctx->num_nodes_and_regs; reg++) {
+ struct reg_info *info = &ctx->registers[reg];
+ if (info->visited)
+ continue;
+ if (info->phys_conflicts + info->node_conflicts < min_conflicts) {
+ best_reg = reg;
+ min_conflicts = info->phys_conflicts + info->node_conflicts;
+ }
+ }
+ gpir_debug("optimistic triggered\n");
+ ctx->registers[best_reg].visited = true;
+ push_stack(ctx, best_reg);
+ } else {
+ break;
+ }
+ }
+
+ /* Step 4: pop off the stack and assign colors */
+ for (int i = ctx->num_nodes_and_regs - 1; i >= 0; i--) {
+ unsigned idx = ctx->stack[i];
+ struct reg_info *reg = &ctx->registers[idx];
+
+ unsigned num_available_regs;
+ if (idx < ctx->comp->cur_reg) {
+ num_available_regs = GPIR_PHYSICAL_REG_NUM;
+ } else {
+ num_available_regs = GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM;
+ }
+
+ bool found = false;
+ unsigned start = i % num_available_regs;
+ for (unsigned j = 0; j < num_available_regs; j++) {
+ unsigned candidate = (j + start) % num_available_regs;
+ bool available = true;
+ util_dynarray_foreach(®->conflict_list, unsigned, conflict_idx) {
+ struct reg_info *conflict = &ctx->registers[*conflict_idx];
+ if (conflict->assigned_color >= 0 &&
+ conflict->assigned_color == (int) candidate) {
+ available = false;
+ break;
+ }
+ }
+
+ if (available) {
+ reg->assigned_color = candidate;
+ found = true;
break;
}
}
- /* TODO: spill */
- assert(i != GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM);
+ /* TODO: spilling */
+ if (!found) {
+ gpir_error("Failed to allocate registers\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void assign_regs(struct regalloc_ctx *ctx)
+{
+ list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
+ list_for_each_entry(gpir_node, node, &block->node_list, list) {
+ if (node->index >= 0) {
+ node->value_reg =
+ ctx->registers[ctx->comp->cur_reg + node->index].assigned_color;
+ }
+
+ if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ unsigned color = ctx->registers[load->reg->index].assigned_color;
+ load->index = color / 4;
+ load->component = color % 4;
+ }
+
+ if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ unsigned color = ctx->registers[store->reg->index].assigned_color;
+ store->index = color / 4;
+ store->component = color % 4;
+ node->value_reg = color;
+ }
+ }
+
+ block->live_out_phys = 0;
+
+ int reg_idx;
+ BITSET_WORD tmp;
+ BITSET_FOREACH_SET(reg_idx, tmp, block->live_out, ctx->comp->cur_reg) {
+ if (BITSET_TEST(block->def_out, reg_idx)) {
+ block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color);
+ }
+ }
}
}
gpir_node *pred = dep->pred;
printf(" %d/%d", pred->index, pred->value_reg);
}
+ if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ printf(" -/%d", 4 * load->index + load->component);
+ printf(" (%d)", load->reg->index);
+ } else if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ printf(" (%d)", store->reg->index);
+ }
printf("\n");
}
printf("----------------------------\n");
bool gpir_regalloc_prog(gpir_compiler *comp)
{
+ struct regalloc_ctx ctx;
+
+ ctx.mem_ctx = ralloc_context(NULL);
+ ctx.num_nodes_and_regs = comp->cur_reg + comp->cur_index;
+ ctx.bitset_words = BITSET_WORDS(ctx.num_nodes_and_regs);
+ ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
+ ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
+ ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
+ ctx.comp = comp;
+
+ ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, ctx.num_nodes_and_regs);
+ for (unsigned i = 0; i < ctx.num_nodes_and_regs; i++) {
+ ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,
+ ctx.bitset_words);
+ util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);
+ }
+
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
- regalloc_block(block);
+ block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
+ block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
+ block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
+ }
+
+ calc_liveness(&ctx);
+ calc_interference(&ctx);
+ if (!do_regalloc(&ctx)) {
+ ralloc_free(ctx.mem_ctx);
+ return false;
}
+ assign_regs(&ctx);
regalloc_print_result(comp);
+ ralloc_free(ctx.mem_ctx);
return true;
}
* schedule the instruction.
*/
int total_spill_needed;
+
+ /* For each physical register, a linked list of loads associated with it in
+ * this block. When we spill a value to a given register, and there are
+ * existing loads associated with it that haven't been scheduled yet, we
+ * have to make sure that the corresponding unspill happens after the last
+ * original use has happened, i.e. is scheduled before.
+ */
+ struct list_head physreg_reads[GPIR_PHYSICAL_REG_NUM];
} sched_ctx;
static int gpir_min_dist_alu(gpir_dep *dep)
}
}
+ if (node->op == gpir_op_store_reg) {
+ /* This register may be loaded in the next basic block, in which case
+ * there still needs to be a 2 instruction gap. We do what the blob
+ * seems to do and simply disable stores in the last two instructions of
+ * the basic block.
+ *
+ * TODO: We may be able to do better than this, but we have to check
+ * first if storing a register works across branches.
+ */
+ if (instr->index < 2)
+ return false;
+ }
+
node->sched.instr = instr;
int max_node_spill_needed = INT_MAX;
ctx->live_physregs |= (1ull << physreg);
- /* TODO: when we support multiple basic blocks, there may be register
- * loads/stores to this register other than this one that haven't been
- * scheduled yet so we may need to insert write-after-read dependencies.
- */
gpir_store_node *store = gpir_node_create(ctx->block, gpir_op_store_reg);
store->index = physreg / 4;
store->component = physreg % 4;
}
node->sched.physreg_store = store;
gpir_node_add_dep(&store->node, node, GPIR_DEP_INPUT);
+
+ list_for_each_entry(gpir_load_node, load,
+ &ctx->physreg_reads[physreg], reg_link) {
+ gpir_node_add_dep(&store->node, &load->node, GPIR_DEP_WRITE_AFTER_READ);
+ if (load->node.sched.ready) {
+ list_del(&load->node.list);
+ load->node.sched.ready = false;
+ }
+ }
+
node->sched.ready = false;
schedule_insert_ready_list(ctx, &store->node);
}
list_inithead(&ctx.ready_list);
ctx.block = block;
ctx.ready_list_slots = 0;
- /* TODO initialize with block live out once we have proper liveness
- * tracking
- */
- ctx.live_physregs = 0;
+ ctx.live_physregs = block->live_out_phys;
+
+ for (unsigned i = 0; i < GPIR_PHYSICAL_REG_NUM; i++) {
+ list_inithead(&ctx.physreg_reads[i]);
+ }
/* construct the ready list from root nodes */
list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
+ /* Add to physreg_reads */
+ if (node->op == gpir_op_load_reg) {
+ gpir_load_node *load = gpir_node_to_load(node);
+ unsigned index = 4 * load->index + load->component;
+ list_addtail(&load->reg_link, &ctx.physreg_reads[index]);
+ }
+
if (gpir_node_is_root(node))
schedule_insert_ready_list(&ctx, node);
}
}
}
- /* Forward dependencies. We only need to add these for register loads,
- * since value registers already have an input dependency.
- */
- list_for_each_entry(gpir_node, node, &block->node_list, list) {
- if (node->op == gpir_op_load_reg) {
- gpir_load_node *load = gpir_node_to_load(node);
- unsigned index = 4 * load->index + load->component;
- if (last_written[index]) {
- gpir_node_add_dep(node, last_written[index], GPIR_DEP_READ_AFTER_WRITE);
- }
- }
-
- if (node->value_reg >= 0)
- last_written[node->value_reg] = node;
- }
-
memset(last_written, 0, sizeof(last_written));
/* False dependencies. For value registers, these exist only to make sure
if (last_written[index]) {
gpir_node_add_dep(last_written[index], node, GPIR_DEP_WRITE_AFTER_READ);
}
+ } else if (node->op == gpir_op_store_reg) {
+ gpir_store_node *store = gpir_node_to_store(node);
+ unsigned index = 4 * store->index + store->component;
+ last_written[index] = node;
} else {
add_fake_dep(node, node, last_written);
}