From e1261d3c493ff48348483a0084f3017c7e663dc0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 29 Sep 2010 12:08:11 -0700 Subject: [PATCH] i965: First cut at register allocation using graph coloring. The interference is totally bogus (maximal), so this is equivalent to our trivial register assignment before. As in, passes the same set of piglit tests. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 158 +++++++++++++++++++++++++-- 1 file changed, 151 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e91351cb6cc..4de64f09275 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -35,6 +35,7 @@ extern "C" { #include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/prog_optimize.h" +#include "program/register_allocate.h" #include "program/sampler.h" #include "program/hash_table.h" #include "brw_context.h" @@ -448,6 +449,7 @@ public: void assign_curb_setup(); void assign_urb_setup(); void assign_regs(); + void assign_regs_trivial(); void generate_code(); void generate_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, @@ -1997,7 +1999,7 @@ fs_visitor::assign_urb_setup() } static void -trivial_assign_reg(int *reg_hw_locations, fs_reg *reg) +assign_reg(int *reg_hw_locations, fs_reg *reg) { if (reg->file == GRF && reg->reg != 0) { reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset; @@ -2006,7 +2008,7 @@ trivial_assign_reg(int *reg_hw_locations, fs_reg *reg) } void -fs_visitor::assign_regs() +fs_visitor::assign_regs_trivial() { int last_grf = 0; int hw_reg_mapping[this->virtual_grf_next]; @@ -2020,18 +2022,157 @@ fs_visitor::assign_regs() } last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1]; - /* FINISHME: trivial assignment of register numbers */ foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - trivial_assign_reg(hw_reg_mapping, &inst->dst); - trivial_assign_reg(hw_reg_mapping, &inst->src[0]); - trivial_assign_reg(hw_reg_mapping, &inst->src[1]); + assign_reg(hw_reg_mapping, &inst->dst); + assign_reg(hw_reg_mapping, &inst->src[0]); + assign_reg(hw_reg_mapping, &inst->src[1]); } this->grf_used = last_grf + 1; } +void +fs_visitor::assign_regs() +{ + int last_grf = 0; + int hw_reg_mapping[this->virtual_grf_next + 1]; + int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Set up the register classes. + * + * The base registers store a scalar value. For texture samples, + * we get virtual GRFs composed of 4 contiguous hw register. For + * structures and arrays, we store them as contiguous larger things + * than that, though we should be able to do better most of the + * time. + */ + class_sizes[class_count++] = 1; + for (int r = 1; r < this->virtual_grf_next; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + int ra_reg_count = 0; + int class_base_reg[class_count]; + int class_reg_count[class_count]; + int classes[class_count]; + + for (int i = 0; i < class_count; i++) { + class_base_reg[i] = ra_reg_count; + class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); + ra_reg_count += class_reg_count[i]; + } + + struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); + for (int i = 0; i < class_count; i++) { + classes[i] = ra_alloc_reg_class(regs); + + for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { + ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); + } + + /* Add conflicts between our contiguous registers aliasing + * base regs and other register classes' contiguous registers + * that alias base regs, or the base regs themselves for classes[0]. + */ + for (int c = 0; c <= i; c++) { + for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) { + for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); + c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1); + c_r++) { + + if (0) { + printf("%d/%d conflicts %d/%d\n", + class_sizes[i], i_r, + class_sizes[c], c_r); + } + + ra_add_reg_conflict(regs, + class_base_reg[i] + i_r, + class_base_reg[c] + c_r); + } + } + } + } + + ra_set_finalize(regs); + + struct ra_graph *g = ra_alloc_interference_graph(regs, + this->virtual_grf_next); + /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 + * with nodes. + */ + ra_set_node_class(g, 0, classes[0]); + + /* FINISHME: Proper interference (live interval analysis) */ + for (int i = 1; i < this->virtual_grf_next; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, classes[c]); + break; + } + } + + for (int j = 1; j < i; j++) { + ra_add_node_interference(g, i, j); + } + } + + /* FINISHME: Handle spilling */ + if (!ra_allocate_no_spills(g)) { + fprintf(stderr, "Failed to allocate registers.\n"); + this->fail = true; + return; + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + hw_reg_mapping[0] = 0; /* unused */ + for (int i = 1; i < this->virtual_grf_next; i++) { + int reg = ra_get_node_reg(g, i); + int hw_reg = -1; + + for (int c = 0; c < class_count; c++) { + if (reg >= class_base_reg[c] && + reg < class_base_reg[c] + class_reg_count[c] - 1) { + hw_reg = reg - class_base_reg[c]; + break; + } + } + + assert(hw_reg != -1); + hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg; + last_grf = MAX2(last_grf, + hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1); + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + assign_reg(hw_reg_mapping, &inst->dst); + assign_reg(hw_reg_mapping, &inst->src[0]); + assign_reg(hw_reg_mapping, &inst->src[1]); + } + + this->grf_used = last_grf + 1; + + talloc_free(g); + talloc_free(regs); +} + static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) { struct brw_reg brw_reg; @@ -2319,7 +2460,10 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.emit_fb_writes(); v.assign_curb_setup(); v.assign_urb_setup(); - v.assign_regs(); + if (0) + v.assign_regs_trivial(); + else + v.assign_regs(); } v.generate_code(); -- 2.30.2