From 447879eb88b8df41ad32cf4406cc636b112b72d9 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 10 Feb 2015 15:51:34 +0200 Subject: [PATCH] i965: Factor out virtual GRF allocation to a separate object. Right now virtual GRF book-keeping and allocation is performed in each visitor class separately (among other hundred different things), leading to duplicated logic in each visitor and preventing layering as it forces any code that manipulates i965 IR and needs to allocate virtual registers to depend on the specific visitor that happens to be used to translate from GLSL IR. v2: Use realloc()/free() to allocate VGRF book-keeping arrays (Connor). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 77 +++++++--------- src/mesa/drivers/dri/i965/brw_fs.h | 8 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- .../dri/i965/brw_fs_live_variables.cpp | 8 +- .../drivers/dri/i965/brw_fs_live_variables.h | 2 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 50 +++++------ .../dri/i965/brw_fs_register_coalesce.cpp | 8 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 47 +++++----- src/mesa/drivers/dri/i965/brw_ir_allocator.h | 87 +++++++++++++++++++ .../dri/i965/brw_schedule_instructions.cpp | 10 +-- src/mesa/drivers/dri/i965/brw_shader.h | 6 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 20 ++--- src/mesa/drivers/dri/i965/brw_vec4.h | 12 --- .../dri/i965/brw_vec4_copy_propagation.cpp | 8 +- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 2 +- .../dri/i965/brw_vec4_live_variables.cpp | 10 +-- .../dri/i965/brw_vec4_reg_allocate.cpp | 43 +++++---- .../drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++------ 18 files changed, 235 insertions(+), 201 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_ir_allocator.h diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 200a494c4c9..45870356f93 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, assert(dst.width % 8 == 0); int regs_written = 4 * (dst.width / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written), + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type, dst.width); inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -688,7 +688,7 @@ fs_visitor::get_timestamp() 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4); + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); fs_inst *mov = emit(MOV(dst, ts)); /* We want to read the 3 fields we care about even if it's not enabled in @@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1); + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); emit(ADD(diff, start, shader_end_time)); /* If there were no instructions between the two timestamp gets, the diff @@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) } } -int -fs_visitor::virtual_grf_alloc(int size) -{ - if (virtual_grf_array_size <= virtual_grf_count) { - if (virtual_grf_array_size == 0) - virtual_grf_array_size = 16; - else - virtual_grf_array_size *= 2; - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, - virtual_grf_array_size); - } - virtual_grf_sizes[virtual_grf_count] = size; - return virtual_grf_count++; -} - fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width), + return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), brw_type_for_base_type(type), dispatch_width); } @@ -1056,7 +1041,7 @@ fs_reg fs_visitor::vgrf(int num_components) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width), + return fs_reg(GRF, alloc.allocate(num_components * reg_width), BRW_REGISTER_TYPE_F, dispatch_width); } @@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup() void fs_visitor::split_virtual_grfs() { - int num_vars = this->virtual_grf_count; + int num_vars = this->alloc.count; /* Count the total number of registers */ int reg_count = 0; int vgrf_to_reg[num_vars]; for (int i = 0; i < num_vars; i++) { vgrf_to_reg[i] = reg_count; - reg_count += virtual_grf_sizes[i]; + reg_count += alloc.sizes[i]; } /* An array of "split points". For each register slot, this indicates @@ -1935,14 +1920,14 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == GRF) { int reg = vgrf_to_reg[inst->dst.reg]; - for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++) + for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++) split_points[reg + j] = true; } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { int reg = vgrf_to_reg[inst->src[i].reg]; - for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++) + for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++) split_points[reg + j] = true; } } @@ -1988,13 +1973,13 @@ fs_visitor::split_virtual_grfs() int offset = 1; /* j > 0 case */ - for (int j = 1; j < virtual_grf_sizes[i]; j++) { + for (unsigned j = 1; j < alloc.sizes[i]; j++) { /* If this is a split point, reset the offset to 0 and allocate a * new virtual GRF for the previous offset many registers */ if (split_points[reg]) { assert(offset <= MAX_VGRF_SIZE); - int grf = virtual_grf_alloc(offset); + int grf = alloc.allocate(offset); for (int k = reg - offset; k < reg; k++) new_virtual_grf[k] = grf; offset = 0; @@ -2006,7 +1991,7 @@ fs_visitor::split_virtual_grfs() /* The last one gets the original register number */ assert(offset <= MAX_VGRF_SIZE); - virtual_grf_sizes[i] = offset; + alloc.sizes[i] = offset; for (int k = reg - offset; k < reg; k++) new_virtual_grf[k] = i; } @@ -2017,14 +2002,14 @@ fs_visitor::split_virtual_grfs() reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset; inst->dst.reg = new_virtual_grf[reg]; inst->dst.reg_offset = new_reg_offset[reg]; - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]); + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset; inst->src[i].reg = new_virtual_grf[reg]; inst->src[i].reg_offset = new_reg_offset[reg]; - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]); + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } } } @@ -2044,7 +2029,7 @@ bool fs_visitor::compact_virtual_grfs() { bool progress = false; - int remap_table[this->virtual_grf_count]; + int remap_table[this->alloc.count]; memset(remap_table, -1, sizeof(remap_table)); /* Mark which virtual GRFs are used. */ @@ -2060,7 +2045,7 @@ fs_visitor::compact_virtual_grfs() /* Compact the GRF arrays. */ int new_index = 0; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (remap_table[i] == -1) { /* We just found an unused register. This means that we are * actually going to compact something. @@ -2068,13 +2053,13 @@ fs_visitor::compact_virtual_grfs() progress = true; } else { remap_table[i] = new_index; - virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; + alloc.sizes[new_index] = alloc.sizes[i]; invalidate_live_intervals(); ++new_index; } } - this->virtual_grf_count = new_index; + this->alloc.count = new_index; /* Patch all the instructions to use the newly renumbered registers */ foreach_block_and_inst(block, fs_inst, inst, cfg) { @@ -2458,8 +2443,8 @@ fs_visitor::opt_register_renaming() bool progress = false; int depth = 0; - int remap[virtual_grf_count]; - memset(remap, -1, sizeof(int) * virtual_grf_count); + int remap[alloc.count]; + memset(remap, -1, sizeof(int) * alloc.count); foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) { @@ -2483,12 +2468,12 @@ fs_visitor::opt_register_renaming() if (depth == 0 && inst->dst.file == GRF && - virtual_grf_sizes[inst->dst.reg] == inst->dst.width / 8 && + alloc.sizes[inst->dst.reg] == inst->dst.width / 8 && !inst->is_partial_write()) { if (remap[dst] == -1) { remap[dst] = dst; } else { - remap[dst] = virtual_grf_alloc(inst->dst.width / 8); + remap[dst] = alloc.allocate(inst->dst.width / 8); inst->dst.reg = remap[dst]; progress = true; } @@ -3030,7 +3015,7 @@ fs_visitor::lower_uniform_pull_constant_loads() */ if (brw->gen >= 9) { payload.reg_offset++; - virtual_grf_sizes[payload.reg] = 2; + alloc.sizes[payload.reg] = 2; } /* This is actually going to be a MOV, but since only the first dword @@ -3071,11 +3056,11 @@ fs_visitor::lower_load_payload() { bool progress = false; - int vgrf_to_reg[virtual_grf_count]; + int vgrf_to_reg[alloc.count]; int reg_count = 16; /* Leave room for MRF */ - for (int i = 0; i < virtual_grf_count; ++i) { + for (unsigned i = 0; i < alloc.count; ++i) { vgrf_to_reg[i] = reg_count; - reg_count += virtual_grf_sizes[i]; + reg_count += alloc.sizes[i]; } struct { @@ -3239,7 +3224,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "vgrf%d", inst->dst.reg); if (inst->dst.width != dispatch_width) fprintf(file, "@%d", inst->dst.width); - if (virtual_grf_sizes[inst->dst.reg] != inst->dst.width / 8 || + if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 || inst->dst.subreg_offset) fprintf(file, "+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); @@ -3299,7 +3284,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "vgrf%d", inst->src[i].reg); if (inst->src[i].width != dispatch_width) fprintf(file, "@%d", inst->src[i].width); - if (virtual_grf_sizes[inst->src[i].reg] != inst->src[i].width / 8 || + if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 || inst->src[i].subreg_offset) fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); @@ -3550,9 +3535,9 @@ fs_visitor::calculate_register_pressure() regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions); - for (int reg = 0; reg < virtual_grf_count; reg++) { + for (unsigned reg = 0; reg < alloc.count; reg++) { for (int ip = virtual_grf_start[reg]; ip <= virtual_grf_end[reg]; ip++) - regs_live_at_ip[ip] += virtual_grf_sizes[reg]; + regs_live_at_ip[ip] += alloc.sizes[reg]; } } @@ -3640,7 +3625,7 @@ fs_visitor::fixup_3src_null_dest() { foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { if (inst->is_3src() && inst->dst.is_null()) { - inst->dst = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8), + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), inst->dst.type); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b95e2c03e13..2c9b705c07b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -321,7 +321,6 @@ public: void init(); fs_reg *variable_storage(ir_variable *var); - int virtual_grf_alloc(int size); fs_reg vgrf(const glsl_type *const type); fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); @@ -643,9 +642,6 @@ public: int *param_size; - int *virtual_grf_sizes; - int virtual_grf_count; - int virtual_grf_array_size; int *virtual_grf_start; int *virtual_grf_end; brw::fs_live_variables *live_intervals; @@ -679,7 +675,7 @@ public: bool do_dual_src; int first_non_payload_grf; /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ - int max_grf; + unsigned max_grf; fs_reg *fp_temp_regs; fs_reg *fp_input_regs; @@ -730,7 +726,7 @@ public: fs_reg shader_start_time; fs_reg userplane[MAX_CLIP_PLANES]; - int grf_used; + unsigned grf_used; bool spilled_any_registers; const unsigned dispatch_width; /**< 8 or 16 */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 11cb327614c..ae069bb757a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -224,7 +224,7 @@ fs_visitor::opt_cse_local(bblock_t *block) assert(written % dst_width == 0); fs_reg orig_dst = entry->generator->dst; - fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), + fs_reg tmp = fs_reg(GRF, alloc.allocate(written), orig_dst.type, orig_dst.width); entry->tmp = tmp; entry->generator->dst = tmp; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 189a119025d..968219bc074 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -278,17 +278,17 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg) { mem_ctx = ralloc_context(NULL); - num_vgrfs = v->virtual_grf_count; + num_vgrfs = v->alloc.count; num_vars = 0; var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs); for (int i = 0; i < num_vgrfs; i++) { var_from_vgrf[i] = num_vars; - num_vars += v->virtual_grf_sizes[i]; + num_vars += v->alloc.sizes[i]; } vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars); for (int i = 0; i < num_vgrfs; i++) { - for (int j = 0; j < v->virtual_grf_sizes[i]; j++) { + for (unsigned j = 0; j < v->alloc.sizes[i]; j++) { vgrf_from_var[var_from_vgrf[i] + j] = i; } } @@ -344,7 +344,7 @@ fs_visitor::calculate_live_intervals() if (this->live_intervals) return; - int num_vgrfs = this->virtual_grf_count; + int num_vgrfs = this->alloc.count; ralloc_free(this->virtual_grf_start); ralloc_free(this->virtual_grf_end); virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs); diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h index a52f922d959..a9b61aa39e5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h @@ -77,7 +77,7 @@ public: /** * Map from any index in block_data to the virtual GRF containing it. * - * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain + * For alloc.sizes of [1, 2, 3], vgrf_from_var would contain * [0, 1, 1, 2, 2, 2]. */ int *vgrf_from_var; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index bcd657b2172..ebe0b12b098 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -31,7 +31,7 @@ #include "glsl/ir_optimization.h" static void -assign_reg(int *reg_hw_locations, fs_reg *reg) +assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { if (reg->file == GRF) { assert(reg->reg_offset >= 0); @@ -43,17 +43,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg) void fs_visitor::assign_regs_trivial() { - int hw_reg_mapping[this->virtual_grf_count + 1]; - int i; + unsigned hw_reg_mapping[this->alloc.count + 1]; + unsigned i; int reg_width = dispatch_width / 8; /* Note that compressed instructions require alignment to 2 registers. */ hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 1; i <= this->virtual_grf_count; i++) { + for (i = 1; i <= this->alloc.count; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + - this->virtual_grf_sizes[i - 1]); + this->alloc.sizes[i - 1]); } - this->grf_used = hw_reg_mapping[this->virtual_grf_count]; + this->grf_used = hw_reg_mapping[this->alloc.count]; foreach_block_and_inst(block, fs_inst, inst, cfg) { assign_reg(hw_reg_mapping, &inst->dst); @@ -66,7 +66,7 @@ fs_visitor::assign_regs_trivial() fail("Ran out of regs on trivial allocator (%d/%d)\n", this->grf_used, max_grf); } else { - this->virtual_grf_count = this->grf_used; + this->alloc.count = this->grf_used; } } @@ -427,7 +427,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * live between the start of the program and our last use of the payload * node. */ - for (int j = 0; j < this->virtual_grf_count; j++) { + for (unsigned j = 0; j < this->alloc.count; j++) { /* Note that we use a <= comparison, unlike virtual_grf_interferes(), * in order to not have to worry about the uniform issue described in * calculate_live_intervals(). @@ -515,7 +515,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) * that are used as conflicting with all virtual GRFs. */ if (mrf_used[i]) { - for (int j = 0; j < this->virtual_grf_count; j++) { + for (unsigned j = 0; j < this->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } } @@ -533,12 +533,12 @@ fs_visitor::assign_regs(bool allow_spilling) * for reg_width == 2. */ int reg_width = dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_count]; + unsigned hw_reg_mapping[this->alloc.count]; int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width); int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */ calculate_live_intervals(); - int node_count = this->virtual_grf_count; + int node_count = this->alloc.count; int first_payload_node = node_count; node_count += payload_node_count; int first_mrf_hack_node = node_count; @@ -547,8 +547,8 @@ fs_visitor::assign_regs(bool allow_spilling) struct ra_graph *g = ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count); - for (int i = 0; i < this->virtual_grf_count; i++) { - unsigned size = this->virtual_grf_sizes[i]; + for (unsigned i = 0; i < this->alloc.count; i++) { + unsigned size = this->alloc.sizes[i]; int c; assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) && @@ -572,7 +572,7 @@ fs_visitor::assign_regs(bool allow_spilling) ra_set_node_class(g, i, c); - for (int j = 0; j < i; j++) { + for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling) * highest register that works. */ if (inst->eot) { - int size = virtual_grf_sizes[inst->src[0].reg]; + int size = alloc.sizes[inst->src[0].reg]; int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1; ra_set_node_reg(g, inst->src[0].reg, reg); break; @@ -661,12 +661,12 @@ fs_visitor::assign_regs(bool allow_spilling) * numbers. */ this->grf_used = payload_node_count; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg]; this->grf_used = MAX2(this->grf_used, - hw_reg_mapping[i] + this->virtual_grf_sizes[i]); + hw_reg_mapping[i] + this->alloc.sizes[i]); } foreach_block_and_inst(block, fs_inst, inst, cfg) { @@ -676,7 +676,7 @@ fs_visitor::assign_regs(bool allow_spilling) } } - this->virtual_grf_count = this->grf_used; + this->alloc.count = this->grf_used; ralloc_free(g); @@ -747,10 +747,10 @@ int fs_visitor::choose_spill_reg(struct ra_graph *g) { float loop_scale = 1.0; - float spill_costs[this->virtual_grf_count]; - bool no_spill[this->virtual_grf_count]; + float spill_costs[this->alloc.count]; + bool no_spill[this->alloc.count]; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; no_spill[i] = false; } @@ -811,7 +811,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } } - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (!no_spill[i]) ra_set_node_spill_cost(g, i, spill_costs[i]); } @@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) void fs_visitor::spill_reg(int spill_reg) { - int size = virtual_grf_sizes[spill_reg]; + int size = alloc.sizes[spill_reg]; unsigned int spill_offset = last_scratch; assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ int spill_base_mrf = dispatch_width > 8 ? 13 : 14; @@ -862,7 +862,7 @@ fs_visitor::spill_reg(int spill_reg) int regs_read = inst->regs_read(this, i); int subset_spill_offset = (spill_offset + REG_SIZE * inst->src[i].reg_offset); - fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read)); + fs_reg unspill_dst(GRF, alloc.allocate(regs_read)); inst->src[i].reg = unspill_dst.reg; inst->src[i].reg_offset = 0; @@ -876,7 +876,7 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.reg == spill_reg) { int subset_spill_offset = (spill_offset + REG_SIZE * inst->dst.reg_offset); - fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written)); + fs_reg spill_src(GRF, alloc.allocate(inst->regs_written)); inst->dst.reg = spill_src.reg; inst->dst.reg_offset = 0; diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 62788cd3091..09f0faddbdb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -66,7 +66,7 @@ is_nop_mov(const fs_inst *inst) static bool is_copy_payload(const fs_visitor *v, const fs_inst *inst) { - if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written) + if (v->alloc.sizes[inst->src[0].reg] != inst->regs_written) return false; fs_reg reg = inst->src[0]; @@ -94,8 +94,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) return false; } - if (v->virtual_grf_sizes[inst->src[0].reg] > - v->virtual_grf_sizes[inst->dst.reg]) + if (v->alloc.sizes[inst->src[0].reg] > + v->alloc.sizes[inst->dst.reg]) return false; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { @@ -179,7 +179,7 @@ fs_visitor::register_coalesce() if (reg_from != inst->src[0].reg) { reg_from = inst->src[0].reg; - src_size = virtual_grf_sizes[inst->src[0].reg]; + src_size = alloc.sizes[inst->src[0].reg]; assert(src_size <= MAX_VGRF_SIZE); assert(inst->src[0].width % 8 == 0); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 6cddcf5e7e9..2a36d942838 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1278,7 +1278,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, /* If last_rhs_inst wrote a different number of components than our LHS, * we can't safely rewrite it. */ - if (virtual_grf_sizes[dst.reg] != modify->regs_written) + if (alloc.sizes[dst.reg] != modify->regs_written) return false; /* Success! Rewrite the instruction. */ @@ -1461,7 +1461,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * this weirdness around to the expected layout. */ orig_dst = dst; - dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type); + dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type); } enum opcode opcode; @@ -1672,7 +1672,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * need to offset the Sampler State Pointer in the header. */ header_present = true; - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); length++; } @@ -1814,7 +1814,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, else mlen = length * reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_F); emit(LOAD_PAYLOAD(src_payload, sources, length)); @@ -1926,7 +1926,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, * tracking to get the scaling factor. */ if (brw->gen < 6 && is_rect) { - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(coord_components)); + fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components)); fs_reg src = coordinate; coordinate = dst; @@ -1985,7 +1985,7 @@ fs_reg fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) { int reg_width = dispatch_width / 8; - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width), + fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width), BRW_REGISTER_TYPE_F); fs_reg dest = vgrf(glsl_type::uvec4_type); fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components); @@ -2986,7 +2986,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4); - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; @@ -3020,7 +3020,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, } int mlen = 1 + (length - 1) * reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD); emit(LOAD_PAYLOAD(src_payload, sources, length)); @@ -3041,7 +3041,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; @@ -3060,7 +3060,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, emit(MOV(sources[1], offset)); int mlen = 1 + reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2)); @@ -3280,7 +3280,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) int len = 0; for (unsigned i = 0; i < 4; ++i) { if (colors_enabled & (1 << i)) { - dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8), + dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8), color.type, color.width); inst = emit(MOV(dst[len], offset(color, i))); inst->saturate = key->clamp_fragment_color; @@ -3304,11 +3304,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) */ for (unsigned i = 0; i < 4; ++i) { if (colors_enabled & (1 << i)) { - dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); inst = emit(MOV(dst[i], half(offset(color, i), 0))); inst->saturate = key->clamp_fragment_color; - dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type); inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); inst->saturate = key->clamp_fragment_color; inst->force_sechalf = true; @@ -3409,7 +3409,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, length += 2; if (payload.aa_dest_stencil_reg) { - sources[length] = fs_reg(GRF, virtual_grf_alloc(1)); + sources[length] = fs_reg(GRF, alloc.allocate(1)); emit(MOV(sources[length], fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); length++; @@ -3423,7 +3423,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since * it's unsinged single words, one vgrf is always 16-wide. */ - sources[length] = fs_reg(GRF, virtual_grf_alloc(1), + sources[length] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UW, 16); emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; @@ -3437,7 +3437,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, length += setup_color_payload(sources + length, this->outputs[0], 0); } else if (color1.file == BAD_FILE) { if (src0_alpha.file != BAD_FILE) { - sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size), + sources[length] = fs_reg(GRF, alloc.allocate(reg_size), src0_alpha.type, src0_alpha.width); fs_inst *inst = emit(MOV(sources[length], src0_alpha)); inst->saturate = key->clamp_fragment_color; @@ -3486,7 +3486,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, /* Send from the GRF */ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); load = emit(LOAD_PAYLOAD(payload, sources, length)); - payload.reg = virtual_grf_alloc(load->regs_written); + payload.reg = alloc.allocate(load->regs_written); payload.width = dispatch_width; load->dst = payload; write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); @@ -3655,7 +3655,7 @@ fs_visitor::emit_urb_writes() * send to terminate the shader. */ if (vue_map->slots_valid == 0) { - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)))); inst->force_writemask_all = true; @@ -3688,7 +3688,7 @@ fs_visitor::emit_urb_writes() break; } - zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); emit(MOV(zero, fs_reg(0u))); sources[length++] = zero; @@ -3742,7 +3742,7 @@ fs_visitor::emit_urb_writes() * temp register and use that for the payload. */ for (int i = 0; i < 4; i++) { - reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type); + reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); src = offset(this->outputs[varying], i); fs_inst *inst = emit(MOV(reg, src)); inst->saturate = true; @@ -3769,14 +3769,14 @@ fs_visitor::emit_urb_writes() emit_shader_time_end(); fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1), + fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F); /* We need WE_all on the MOV for the message header (the URB handles) * so do a MOV to a dummy register and set force_writemask_all on the * MOV. LOAD_PAYLOAD will preserve that. */ - fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1), + fs_reg dummy = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)))); @@ -3892,9 +3892,6 @@ fs_visitor::init() this->current_annotation = NULL; this->base_ir = NULL; - this->virtual_grf_sizes = NULL; - this->virtual_grf_count = 0; - this->virtual_grf_array_size = 0; this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_ir_allocator.h b/src/mesa/drivers/dri/i965/brw_ir_allocator.h new file mode 100644 index 00000000000..b1237ed38e7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ir_allocator.h @@ -0,0 +1,87 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_IR_ALLOCATOR_H +#define BRW_IR_ALLOCATOR_H + +#include "main/macros.h" + +namespace brw { + /** + * Simple allocator used to keep track of virtual GRFs. + */ + class simple_allocator { + public: + simple_allocator() : + sizes(NULL), offsets(NULL), count(0), total_size(0), capacity(0) + { + } + + ~simple_allocator() + { + free(offsets); + free(sizes); + } + + unsigned + allocate(unsigned size) + { + if (capacity <= count) { + capacity = MAX2(16, capacity * 2); + sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned)); + offsets = (unsigned *)realloc(offsets, capacity * sizeof(unsigned)); + } + + sizes[count] = size; + offsets[count] = total_size; + total_size += size; + + return count++; + } + + /** + * Array of sizes for each allocation. The allocation unit is up to the + * back-end, but it's expected to be one scalar value in the FS back-end + * and one vec4 in the VEC4 back-end. + */ + unsigned *sizes; + + /** + * Array of offsets from the start of the VGRF space in allocation + * units. + */ + unsigned *offsets; + + /** Total number of VGRFs allocated. */ + unsigned count; + + /** Cumulative size in allocation units. */ + unsigned total_size; + + private: + unsigned capacity; + }; +} + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 40b5715cccd..78666fd222f 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -544,9 +544,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->dst.file == GRF) { if (remaining_grf_uses[inst->dst.reg] == 1) - benefit += v->virtual_grf_sizes[inst->dst.reg]; + benefit += v->alloc.sizes[inst->dst.reg]; if (!grf_active[inst->dst.reg]) - benefit -= v->virtual_grf_sizes[inst->dst.reg]; + benefit -= v->alloc.sizes[inst->dst.reg]; } for (int i = 0; i < inst->sources; i++) { @@ -554,9 +554,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) continue; if (remaining_grf_uses[inst->src[i].reg] == 1) - benefit += v->virtual_grf_sizes[inst->src[i].reg]; + benefit += v->alloc.sizes[inst->src[i].reg]; if (!grf_active[inst->src[i].reg]) - benefit -= v->virtual_grf_sizes[inst->src[i].reg]; + benefit -= v->alloc.sizes[inst->src[i].reg]; } return benefit; @@ -1503,7 +1503,7 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode) if (mode == SCHEDULE_POST) grf_count = grf_used; else - grf_count = virtual_grf_count; + grf_count = alloc.count; fs_instruction_scheduler sched(this, grf_count, mode); sched.run(cfg); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 5ad87d6278b..ab3ad60e02b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -27,6 +27,10 @@ #include "main/compiler.h" #include "glsl/ir.h" +#ifdef __cplusplus +#include "brw_ir_allocator.h" +#endif + #pragma once enum PACKED register_file { @@ -172,6 +176,8 @@ public: gl_shader_stage stage; + brw::simple_allocator alloc; + virtual void dump_instruction(backend_instruction *inst) = 0; virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0; virtual void dump_instructions(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 98fad6c8f87..f2339b399e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1209,7 +1209,7 @@ vec4_visitor::opt_register_coalesce() void vec4_visitor::split_virtual_grfs() { - int num_vars = this->virtual_grf_count; + int num_vars = this->alloc.count; int new_virtual_grf[num_vars]; bool split_grf[num_vars]; @@ -1217,7 +1217,7 @@ vec4_visitor::split_virtual_grfs() /* Try to split anything > 0 sized. */ for (int i = 0; i < num_vars; i++) { - split_grf[i] = this->virtual_grf_sizes[i] != 1; + split_grf[i] = this->alloc.sizes[i] != 1; } /* Check that the instructions are compatible with the registers we're trying @@ -1243,13 +1243,13 @@ vec4_visitor::split_virtual_grfs() if (!split_grf[i]) continue; - new_virtual_grf[i] = virtual_grf_alloc(1); - for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { - int reg = virtual_grf_alloc(1); + new_virtual_grf[i] = alloc.allocate(1); + for (unsigned j = 2; j < this->alloc.sizes[i]; j++) { + unsigned reg = alloc.allocate(1); assert(reg == new_virtual_grf[i] + j - 1); (void) reg; } - this->virtual_grf_sizes[i] = 1; + this->alloc.sizes[i] = 1; } foreach_block_and_inst(block, vec4_instruction, inst, cfg) { @@ -1432,7 +1432,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) /* Don't print .0; and only VGRFs have reg_offsets and sizes */ if (inst->src[i].reg_offset != 0 && inst->src[i].file == GRF && - virtual_grf_sizes[inst->src[i].reg] != 1) + alloc.sizes[inst->src[i].reg] != 1) fprintf(file, ".%d", inst->src[i].reg_offset); if (inst->src[i].file != IMM) { @@ -1834,9 +1834,9 @@ vec4_visitor::run() if (false) { /* Debug of register spilling: Go spill everything. */ - const int grf_count = virtual_grf_count; - float spill_costs[virtual_grf_count]; - bool no_spill[virtual_grf_count]; + const int grf_count = alloc.count; + float spill_costs[alloc.count]; + bool no_spill[alloc.count]; evaluate_spill_costs(spill_costs, no_spill); for (int i = 0; i < grf_count; i++) { if (no_spill[i]) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 980544d3dbe..6b710c9bc31 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -275,9 +275,6 @@ public: const void *base_ir; const char *current_annotation; - int *virtual_grf_sizes; - int virtual_grf_count; - int virtual_grf_array_size; int first_non_payload_grf; unsigned int max_grf; int *virtual_grf_start; @@ -285,14 +282,6 @@ public: brw::vec4_live_variables *live_intervals; dst_reg userplane[MAX_CLIP_PLANES]; - /** - * This is the size to be used for an array with an element per - * reg_offset - */ - int virtual_grf_reg_count; - /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */ - int *virtual_grf_reg_map; - dst_reg *variable_storage(ir_variable *var); void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); @@ -347,7 +336,6 @@ public: bool run(void); void fail(const char *msg, ...); - int virtual_grf_alloc(int size); void setup_uniform_clipplane_values(); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 638d99a4c92..81567d2b295 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -346,7 +346,7 @@ bool vec4_visitor::opt_copy_propagation(bool do_constant_prop) { bool progress = false; - struct copy_entry entries[virtual_grf_reg_count]; + struct copy_entry entries[alloc.total_size]; memset(&entries, 0, sizeof(entries)); @@ -375,7 +375,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) inst->src[i].reladdr) continue; - int reg = (virtual_grf_reg_map[inst->src[i].reg] + + int reg = (alloc.offsets[inst->src[i].reg] + inst->src[i].reg_offset); /* Find the regs that each swizzle component came from. @@ -418,7 +418,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) /* Track available source registers. */ if (inst->dst.file == GRF) { const int reg = - virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset; + alloc.offsets[inst->dst.reg] + inst->dst.reg_offset; /* Update our destination's current channel values. For a direct copy, * the value is the newly propagated source. Otherwise, we don't know @@ -439,7 +439,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) if (inst->dst.reladdr) memset(&entries, 0, sizeof(entries)); else { - for (int i = 0; i < virtual_grf_reg_count; i++) { + for (unsigned i = 0; i < alloc.total_size; i++) { for (int j = 0; j < 4; j++) { if (is_channel_updated(inst, entries[i].value, j)){ entries[i].value[j] = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index ee50419dc9a..5fb8f3166ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -241,7 +241,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == GRF) { - assert((src->reg * 4 + 3) < (virtual_grf_count * 4)); + assert((unsigned)(src->reg * 4 + 3) < (alloc.count * 4)); int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0], virtual_grf_end[src->reg * 4 + 1]), diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 98350691db2..c562b2e6800 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -96,7 +96,7 @@ vec4_live_variables::setup_def_use() * variable, and thus qualify for being in def[]. */ if (inst->dst.file == GRF && - v->virtual_grf_sizes[inst->dst.reg] == 1 && + v->alloc.sizes[inst->dst.reg] == 1 && !inst->predicate) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { @@ -180,7 +180,7 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg) { mem_ctx = ralloc_context(NULL); - num_vars = v->virtual_grf_count * 4; + num_vars = v->alloc.count * 4; block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); bitset_words = BITSET_WORDS(num_vars); @@ -230,14 +230,14 @@ vec4_visitor::calculate_live_intervals() if (this->live_intervals) return; - int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4); - int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4); + int *start = ralloc_array(mem_ctx, int, this->alloc.count * 4); + int *end = ralloc_array(mem_ctx, int, this->alloc.count * 4); ralloc_free(this->virtual_grf_start); ralloc_free(this->virtual_grf_end); this->virtual_grf_start = start; this->virtual_grf_end = end; - for (int i = 0; i < this->virtual_grf_count * 4; i++) { + for (unsigned i = 0; i < this->alloc.count * 4; i++) { start[i] = MAX_INSTRUCTION; end[i] = -1; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index e8e2185ac1a..b944d454df6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -45,15 +45,14 @@ assign(unsigned int *reg_hw_locations, backend_reg *reg) bool vec4_visitor::reg_allocate_trivial() { - unsigned int hw_reg_mapping[this->virtual_grf_count]; - bool virtual_grf_used[this->virtual_grf_count]; - int i; + unsigned int hw_reg_mapping[this->alloc.count]; + bool virtual_grf_used[this->alloc.count]; int next; /* Calculate which virtual GRFs are actually in use after whatever * optimization passes have occurred. */ - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { virtual_grf_used[i] = false; } @@ -61,18 +60,18 @@ vec4_visitor::reg_allocate_trivial() if (inst->dst.file == GRF) virtual_grf_used[inst->dst.reg] = true; - for (int i = 0; i < 3; i++) { + for (unsigned i = 0; i < 3; i++) { if (inst->src[i].file == GRF) virtual_grf_used[inst->src[i].reg] = true; } } hw_reg_mapping[0] = this->first_non_payload_grf; - next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; - for (i = 1; i < this->virtual_grf_count; i++) { + next = hw_reg_mapping[0] + this->alloc.sizes[0]; + for (unsigned i = 1; i < this->alloc.count; i++) { if (virtual_grf_used[i]) { hw_reg_mapping[i] = next; - next += this->virtual_grf_sizes[i]; + next += this->alloc.sizes[i]; } } prog_data->total_grf = next; @@ -176,7 +175,7 @@ bool vec4_visitor::reg_allocate() { struct intel_screen *screen = brw->intelScreen; - unsigned int hw_reg_mapping[virtual_grf_count]; + unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; /* Using the trivial allocator can be useful in debugging undefined @@ -187,19 +186,19 @@ vec4_visitor::reg_allocate() calculate_live_intervals(); - int node_count = virtual_grf_count; + int node_count = alloc.count; int first_payload_node = node_count; node_count += payload_reg_count; struct ra_graph *g = ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count); - for (int i = 0; i < virtual_grf_count; i++) { - int size = this->virtual_grf_sizes[i]; + for (unsigned i = 0; i < alloc.count; i++) { + int size = this->alloc.sizes[i]; assert(size >= 1 && size <= 2 && "Register allocation relies on split_virtual_grfs()."); ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]); - for (int j = 0; j < i; j++) { + for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -230,12 +229,12 @@ vec4_visitor::reg_allocate() * numbers. */ prog_data->total_grf = payload_reg_count; - for (int i = 0; i < virtual_grf_count; i++) { + for (unsigned i = 0; i < alloc.count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg]; prog_data->total_grf = MAX2(prog_data->total_grf, - hw_reg_mapping[i] + virtual_grf_sizes[i]); + hw_reg_mapping[i] + alloc.sizes[i]); } foreach_block_and_inst(block, vec4_instruction, inst, cfg) { @@ -255,9 +254,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) { float loop_scale = 1.0; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; - no_spill[i] = virtual_grf_sizes[i] != 1; + no_spill[i] = alloc.sizes[i] != 1; } /* Calculate costs for spilling nodes. Call it a cost of 1 per @@ -308,12 +307,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) int vec4_visitor::choose_spill_reg(struct ra_graph *g) { - float spill_costs[this->virtual_grf_count]; - bool no_spill[this->virtual_grf_count]; + float spill_costs[this->alloc.count]; + bool no_spill[this->alloc.count]; evaluate_spill_costs(spill_costs, no_spill); - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (!no_spill[i]) ra_set_node_spill_cost(g, i, spill_costs[i]); } @@ -324,7 +323,7 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g) void vec4_visitor::spill_reg(int spill_reg_nr) { - assert(virtual_grf_sizes[spill_reg_nr] == 1); + assert(alloc.sizes[spill_reg_nr] == 1); unsigned int spill_offset = c->last_scratch++; /* Generate spill/unspill instructions for the objects being spilled. */ @@ -332,7 +331,7 @@ vec4_visitor::spill_reg(int spill_reg_nr) for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { src_reg spill_reg = inst->src[i]; - inst->src[i].reg = virtual_grf_alloc(1); + inst->src[i].reg = alloc.allocate(1); dst_reg temp = dst_reg(inst->src[i]); emit_scratch_read(block, inst, temp, spill_reg, spill_offset); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e6a7ed06020..7d5221386cb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -616,31 +616,12 @@ type_size(const struct glsl_type *type) return 0; } -int -vec4_visitor::virtual_grf_alloc(int size) -{ - if (virtual_grf_array_size <= virtual_grf_count) { - if (virtual_grf_array_size == 0) - virtual_grf_array_size = 16; - else - virtual_grf_array_size *= 2; - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, - virtual_grf_array_size); - virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int, - virtual_grf_array_size); - } - virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count; - virtual_grf_reg_count += size; - virtual_grf_sizes[virtual_grf_count] = size; - return virtual_grf_count++; -} - src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type)); + this->reg = v->alloc.allocate(type_size(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -658,7 +639,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type) * size); + this->reg = v->alloc.allocate(type_size(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -670,7 +651,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type)); + this->reg = v->alloc.allocate(type_size(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -3372,7 +3353,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_grf_array_access_to_scratch() { - int scratch_loc[this->virtual_grf_count]; + int scratch_loc[this->alloc.count]; memset(scratch_loc, -1, sizeof(scratch_loc)); /* First, calculate the set of virtual GRFs that need to be punted @@ -3383,7 +3364,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (inst->dst.file == GRF && inst->dst.reladdr && scratch_loc[inst->dst.reg] == -1) { scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[inst->dst.reg]; + c->last_scratch += this->alloc.sizes[inst->dst.reg]; } for (int i = 0 ; i < 3; i++) { @@ -3392,7 +3373,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (src->file == GRF && src->reladdr && scratch_loc[src->reg] == -1) { scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[src->reg]; + c->last_scratch += this->alloc.sizes[src->reg]; } } } @@ -3612,11 +3593,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; - this->virtual_grf_sizes = NULL; - this->virtual_grf_count = 0; - this->virtual_grf_reg_map = NULL; - this->virtual_grf_reg_count = 0; - this->virtual_grf_array_size = 0; this->live_intervals = NULL; this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; -- 2.30.2