assert(dst.width % 8 == 0);
int regs_written = 4 * (dst.width / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written),
+ fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
dst.type, dst.width);
inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4);
+ fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
fs_inst *mov = emit(MOV(dst, ts));
/* We want to read the 3 fields we care about even if it's not enabled in
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1);
+ fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
emit(ADD(diff, start, shader_end_time));
/* If there were no instructions between the two timestamp gets, the diff
}
}
-int
-fs_visitor::virtual_grf_alloc(int size)
-{
- if (virtual_grf_array_size <= virtual_grf_count) {
- if (virtual_grf_array_size == 0)
- virtual_grf_array_size = 16;
- else
- virtual_grf_array_size *= 2;
- virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
- virtual_grf_array_size);
- }
- virtual_grf_sizes[virtual_grf_count] = size;
- return virtual_grf_count++;
-}
-
fs_reg
fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width),
+ return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
brw_type_for_base_type(type), dispatch_width);
}
fs_visitor::vgrf(int num_components)
{
int reg_width = dispatch_width / 8;
- return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width),
+ return fs_reg(GRF, alloc.allocate(num_components * reg_width),
BRW_REGISTER_TYPE_F, dispatch_width);
}
void
fs_visitor::split_virtual_grfs()
{
- int num_vars = this->virtual_grf_count;
+ int num_vars = this->alloc.count;
/* Count the total number of registers */
int reg_count = 0;
int vgrf_to_reg[num_vars];
for (int i = 0; i < num_vars; i++) {
vgrf_to_reg[i] = reg_count;
- reg_count += virtual_grf_sizes[i];
+ reg_count += alloc.sizes[i];
}
/* An array of "split points". For each register slot, this indicates
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->dst.file == GRF) {
int reg = vgrf_to_reg[inst->dst.reg];
- for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++)
+ for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
split_points[reg + j] = true;
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
int reg = vgrf_to_reg[inst->src[i].reg];
- for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++)
+ for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
split_points[reg + j] = true;
}
}
int offset = 1;
/* j > 0 case */
- for (int j = 1; j < virtual_grf_sizes[i]; j++) {
+ for (unsigned j = 1; j < alloc.sizes[i]; j++) {
/* If this is a split point, reset the offset to 0 and allocate a
* new virtual GRF for the previous offset many registers
*/
if (split_points[reg]) {
assert(offset <= MAX_VGRF_SIZE);
- int grf = virtual_grf_alloc(offset);
+ int grf = alloc.allocate(offset);
for (int k = reg - offset; k < reg; k++)
new_virtual_grf[k] = grf;
offset = 0;
/* The last one gets the original register number */
assert(offset <= MAX_VGRF_SIZE);
- virtual_grf_sizes[i] = offset;
+ alloc.sizes[i] = offset;
for (int k = reg - offset; k < reg; k++)
new_virtual_grf[k] = i;
}
reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
inst->dst.reg = new_virtual_grf[reg];
inst->dst.reg_offset = new_reg_offset[reg];
- assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+ assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == GRF) {
reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
inst->src[i].reg = new_virtual_grf[reg];
inst->src[i].reg_offset = new_reg_offset[reg];
- assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
+ assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
}
}
}
fs_visitor::compact_virtual_grfs()
{
bool progress = false;
- int remap_table[this->virtual_grf_count];
+ int remap_table[this->alloc.count];
memset(remap_table, -1, sizeof(remap_table));
/* Mark which virtual GRFs are used. */
/* Compact the GRF arrays. */
int new_index = 0;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (remap_table[i] == -1) {
/* We just found an unused register. This means that we are
* actually going to compact something.
progress = true;
} else {
remap_table[i] = new_index;
- virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
+ alloc.sizes[new_index] = alloc.sizes[i];
invalidate_live_intervals();
++new_index;
}
}
- this->virtual_grf_count = new_index;
+ this->alloc.count = new_index;
/* Patch all the instructions to use the newly renumbered registers */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
bool progress = false;
int depth = 0;
- int remap[virtual_grf_count];
- memset(remap, -1, sizeof(int) * virtual_grf_count);
+ int remap[alloc.count];
+ memset(remap, -1, sizeof(int) * alloc.count);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) {
if (depth == 0 &&
inst->dst.file == GRF &&
- virtual_grf_sizes[inst->dst.reg] == inst->dst.width / 8 &&
+ alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
- remap[dst] = virtual_grf_alloc(inst->dst.width / 8);
+ remap[dst] = alloc.allocate(inst->dst.width / 8);
inst->dst.reg = remap[dst];
progress = true;
}
*/
if (brw->gen >= 9) {
payload.reg_offset++;
- virtual_grf_sizes[payload.reg] = 2;
+ alloc.sizes[payload.reg] = 2;
}
/* This is actually going to be a MOV, but since only the first dword
{
bool progress = false;
- int vgrf_to_reg[virtual_grf_count];
+ int vgrf_to_reg[alloc.count];
int reg_count = 16; /* Leave room for MRF */
- for (int i = 0; i < virtual_grf_count; ++i) {
+ for (unsigned i = 0; i < alloc.count; ++i) {
vgrf_to_reg[i] = reg_count;
- reg_count += virtual_grf_sizes[i];
+ reg_count += alloc.sizes[i];
}
struct {
fprintf(file, "vgrf%d", inst->dst.reg);
if (inst->dst.width != dispatch_width)
fprintf(file, "@%d", inst->dst.width);
- if (virtual_grf_sizes[inst->dst.reg] != inst->dst.width / 8 ||
+ if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
inst->dst.reg_offset, inst->dst.subreg_offset);
fprintf(file, "vgrf%d", inst->src[i].reg);
if (inst->src[i].width != dispatch_width)
fprintf(file, "@%d", inst->src[i].width);
- if (virtual_grf_sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
+ if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
inst->src[i].subreg_offset)
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions);
- for (int reg = 0; reg < virtual_grf_count; reg++) {
+ for (unsigned reg = 0; reg < alloc.count; reg++) {
for (int ip = virtual_grf_start[reg]; ip <= virtual_grf_end[reg]; ip++)
- regs_live_at_ip[ip] += virtual_grf_sizes[reg];
+ regs_live_at_ip[ip] += alloc.sizes[reg];
}
}
{
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
if (inst->is_3src() && inst->dst.is_null()) {
- inst->dst = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8),
+ inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
}
}
void init();
fs_reg *variable_storage(ir_variable *var);
- int virtual_grf_alloc(int size);
fs_reg vgrf(const glsl_type *const type);
fs_reg vgrf(int num_components);
void import_uniforms(fs_visitor *v);
int *param_size;
- int *virtual_grf_sizes;
- int virtual_grf_count;
- int virtual_grf_array_size;
int *virtual_grf_start;
int *virtual_grf_end;
brw::fs_live_variables *live_intervals;
bool do_dual_src;
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
- int max_grf;
+ unsigned max_grf;
fs_reg *fp_temp_regs;
fs_reg *fp_input_regs;
fs_reg shader_start_time;
fs_reg userplane[MAX_CLIP_PLANES];
- int grf_used;
+ unsigned grf_used;
bool spilled_any_registers;
const unsigned dispatch_width; /**< 8 or 16 */
assert(written % dst_width == 0);
fs_reg orig_dst = entry->generator->dst;
- fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+ fs_reg tmp = fs_reg(GRF, alloc.allocate(written),
orig_dst.type, orig_dst.width);
entry->tmp = tmp;
entry->generator->dst = tmp;
{
mem_ctx = ralloc_context(NULL);
- num_vgrfs = v->virtual_grf_count;
+ num_vgrfs = v->alloc.count;
num_vars = 0;
var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);
for (int i = 0; i < num_vgrfs; i++) {
var_from_vgrf[i] = num_vars;
- num_vars += v->virtual_grf_sizes[i];
+ num_vars += v->alloc.sizes[i];
}
vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);
for (int i = 0; i < num_vgrfs; i++) {
- for (int j = 0; j < v->virtual_grf_sizes[i]; j++) {
+ for (unsigned j = 0; j < v->alloc.sizes[i]; j++) {
vgrf_from_var[var_from_vgrf[i] + j] = i;
}
}
if (this->live_intervals)
return;
- int num_vgrfs = this->virtual_grf_count;
+ int num_vgrfs = this->alloc.count;
ralloc_free(this->virtual_grf_start);
ralloc_free(this->virtual_grf_end);
virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs);
/**
* Map from any index in block_data to the virtual GRF containing it.
*
- * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain
+ * For alloc.sizes of [1, 2, 3], vgrf_from_var would contain
* [0, 1, 1, 2, 2, 2].
*/
int *vgrf_from_var;
#include "glsl/ir_optimization.h"
static void
-assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
{
if (reg->file == GRF) {
assert(reg->reg_offset >= 0);
void
fs_visitor::assign_regs_trivial()
{
- int hw_reg_mapping[this->virtual_grf_count + 1];
- int i;
+ unsigned hw_reg_mapping[this->alloc.count + 1];
+ unsigned i;
int reg_width = dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
- for (i = 1; i <= this->virtual_grf_count; i++) {
+ for (i = 1; i <= this->alloc.count; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
- this->virtual_grf_sizes[i - 1]);
+ this->alloc.sizes[i - 1]);
}
- this->grf_used = hw_reg_mapping[this->virtual_grf_count];
+ this->grf_used = hw_reg_mapping[this->alloc.count];
foreach_block_and_inst(block, fs_inst, inst, cfg) {
assign_reg(hw_reg_mapping, &inst->dst);
fail("Ran out of regs on trivial allocator (%d/%d)\n",
this->grf_used, max_grf);
} else {
- this->virtual_grf_count = this->grf_used;
+ this->alloc.count = this->grf_used;
}
}
* live between the start of the program and our last use of the payload
* node.
*/
- for (int j = 0; j < this->virtual_grf_count; j++) {
+ for (unsigned j = 0; j < this->alloc.count; j++) {
/* Note that we use a <= comparison, unlike virtual_grf_interferes(),
* in order to not have to worry about the uniform issue described in
* calculate_live_intervals().
* that are used as conflicting with all virtual GRFs.
*/
if (mrf_used[i]) {
- for (int j = 0; j < this->virtual_grf_count; j++) {
+ for (unsigned j = 0; j < this->alloc.count; j++) {
ra_add_node_interference(g, first_mrf_node + i, j);
}
}
* for reg_width == 2.
*/
int reg_width = dispatch_width / 8;
- int hw_reg_mapping[this->virtual_grf_count];
+ unsigned hw_reg_mapping[this->alloc.count];
int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width);
int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */
calculate_live_intervals();
- int node_count = this->virtual_grf_count;
+ int node_count = this->alloc.count;
int first_payload_node = node_count;
node_count += payload_node_count;
int first_mrf_hack_node = node_count;
struct ra_graph *g =
ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count);
- for (int i = 0; i < this->virtual_grf_count; i++) {
- unsigned size = this->virtual_grf_sizes[i];
+ for (unsigned i = 0; i < this->alloc.count; i++) {
+ unsigned size = this->alloc.sizes[i];
int c;
assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) &&
ra_set_node_class(g, i, c);
- for (int j = 0; j < i; j++) {
+ for (unsigned j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
* highest register that works.
*/
if (inst->eot) {
- int size = virtual_grf_sizes[inst->src[0].reg];
+ int size = alloc.sizes[inst->src[0].reg];
int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
ra_set_node_reg(g, inst->src[0].reg, reg);
break;
* numbers.
*/
this->grf_used = payload_node_count;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg];
this->grf_used = MAX2(this->grf_used,
- hw_reg_mapping[i] + this->virtual_grf_sizes[i]);
+ hw_reg_mapping[i] + this->alloc.sizes[i]);
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
}
}
- this->virtual_grf_count = this->grf_used;
+ this->alloc.count = this->grf_used;
ralloc_free(g);
fs_visitor::choose_spill_reg(struct ra_graph *g)
{
float loop_scale = 1.0;
- float spill_costs[this->virtual_grf_count];
- bool no_spill[this->virtual_grf_count];
+ float spill_costs[this->alloc.count];
+ bool no_spill[this->alloc.count];
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
spill_costs[i] = 0.0;
no_spill[i] = false;
}
}
}
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (!no_spill[i])
ra_set_node_spill_cost(g, i, spill_costs[i]);
}
void
fs_visitor::spill_reg(int spill_reg)
{
- int size = virtual_grf_sizes[spill_reg];
+ int size = alloc.sizes[spill_reg];
unsigned int spill_offset = last_scratch;
assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
int regs_read = inst->regs_read(this, i);
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->src[i].reg_offset);
- fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read));
+ fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
inst->src[i].reg = unspill_dst.reg;
inst->src[i].reg_offset = 0;
inst->dst.reg == spill_reg) {
int subset_spill_offset = (spill_offset +
REG_SIZE * inst->dst.reg_offset);
- fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written));
+ fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
inst->dst.reg = spill_src.reg;
inst->dst.reg_offset = 0;
static bool
is_copy_payload(const fs_visitor *v, const fs_inst *inst)
{
- if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written)
+ if (v->alloc.sizes[inst->src[0].reg] != inst->regs_written)
return false;
fs_reg reg = inst->src[0];
return false;
}
- if (v->virtual_grf_sizes[inst->src[0].reg] >
- v->virtual_grf_sizes[inst->dst.reg])
+ if (v->alloc.sizes[inst->src[0].reg] >
+ v->alloc.sizes[inst->dst.reg])
return false;
if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
if (reg_from != inst->src[0].reg) {
reg_from = inst->src[0].reg;
- src_size = virtual_grf_sizes[inst->src[0].reg];
+ src_size = alloc.sizes[inst->src[0].reg];
assert(src_size <= MAX_VGRF_SIZE);
assert(inst->src[0].width % 8 == 0);
/* If last_rhs_inst wrote a different number of components than our LHS,
* we can't safely rewrite it.
*/
- if (virtual_grf_sizes[dst.reg] != modify->regs_written)
+ if (alloc.sizes[dst.reg] != modify->regs_written)
return false;
/* Success! Rewrite the instruction. */
* this weirdness around to the expected layout.
*/
orig_dst = dst;
- dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type);
+ dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
}
enum opcode opcode;
* need to offset the Sampler State Pointer in the header.
*/
header_present = true;
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
length++;
}
else
mlen = length * reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_F);
emit(LOAD_PAYLOAD(src_payload, sources, length));
* tracking to get the scaling factor.
*/
if (brw->gen < 6 && is_rect) {
- fs_reg dst = fs_reg(GRF, virtual_grf_alloc(coord_components));
+ fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components));
fs_reg src = coordinate;
coordinate = dst;
fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
{
int reg_width = dispatch_width / 8;
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width),
+ fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width),
BRW_REGISTER_TYPE_F);
fs_reg dest = vgrf(glsl_type::uvec4_type);
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components);
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
emit(MOV(sources[0], fs_reg(0u)))
->force_writemask_all = true;
}
int mlen = 1 + (length - 1) * reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD);
emit(LOAD_PAYLOAD(src_payload, sources, length));
fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
- sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* Initialize the sample mask in the message header. */
emit(MOV(sources[0], fs_reg(0u)))
->force_writemask_all = true;
emit(MOV(sources[1], offset));
int mlen = 1 + reg_width;
- fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
+ fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2));
int len = 0;
for (unsigned i = 0; i < 4; ++i) {
if (colors_enabled & (1 << i)) {
- dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
+ dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
color.type, color.width);
inst = emit(MOV(dst[len], offset(color, i)));
inst->saturate = key->clamp_fragment_color;
*/
for (unsigned i = 0; i < 4; ++i) {
if (colors_enabled & (1 << i)) {
- dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
inst = emit(MOV(dst[i], half(offset(color, i), 0)));
inst->saturate = key->clamp_fragment_color;
- dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
inst->saturate = key->clamp_fragment_color;
inst->force_sechalf = true;
length += 2;
if (payload.aa_dest_stencil_reg) {
- sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
+ sources[length] = fs_reg(GRF, alloc.allocate(1));
emit(MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
length++;
/* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
* it's unsinged single words, one vgrf is always 16-wide.
*/
- sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
+ sources[length] = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UW, 16);
emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
length++;
length += setup_color_payload(sources + length, this->outputs[0], 0);
} else if (color1.file == BAD_FILE) {
if (src0_alpha.file != BAD_FILE) {
- sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
+ sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
src0_alpha.type, src0_alpha.width);
fs_inst *inst = emit(MOV(sources[length], src0_alpha));
inst->saturate = key->clamp_fragment_color;
/* Send from the GRF */
fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
load = emit(LOAD_PAYLOAD(payload, sources, length));
- payload.reg = virtual_grf_alloc(load->regs_written);
+ payload.reg = alloc.allocate(load->regs_written);
payload.width = dispatch_width;
load->dst = payload;
write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
* send to terminate the shader. */
if (vue_map->slots_valid == 0) {
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD))));
inst->force_writemask_all = true;
break;
}
- zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
+ zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
emit(MOV(zero, fs_reg(0u)));
sources[length++] = zero;
* temp register and use that for the payload.
*/
for (int i = 0; i < 4; i++) {
- reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type);
+ reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
src = offset(this->outputs[varying], i);
fs_inst *inst = emit(MOV(reg, src));
inst->saturate = true;
emit_shader_time_end();
fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
- fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1),
+ fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
BRW_REGISTER_TYPE_F);
/* We need WE_all on the MOV for the message header (the URB handles)
* so do a MOV to a dummy register and set force_writemask_all on the
* MOV. LOAD_PAYLOAD will preserve that.
*/
- fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1),
+ fs_reg dummy = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD))));
this->current_annotation = NULL;
this->base_ir = NULL;
- this->virtual_grf_sizes = NULL;
- this->virtual_grf_count = 0;
- this->virtual_grf_array_size = 0;
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
--- /dev/null
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_IR_ALLOCATOR_H
+#define BRW_IR_ALLOCATOR_H
+
+#include "main/macros.h"
+
+namespace brw {
+ /**
+ * Simple allocator used to keep track of virtual GRFs.
+ */
+ class simple_allocator {
+ public:
+ simple_allocator() :
+ sizes(NULL), offsets(NULL), count(0), total_size(0), capacity(0)
+ {
+ }
+
+ ~simple_allocator()
+ {
+ free(offsets);
+ free(sizes);
+ }
+
+ unsigned
+ allocate(unsigned size)
+ {
+ if (capacity <= count) {
+ capacity = MAX2(16, capacity * 2);
+ sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned));
+ offsets = (unsigned *)realloc(offsets, capacity * sizeof(unsigned));
+ }
+
+ sizes[count] = size;
+ offsets[count] = total_size;
+ total_size += size;
+
+ return count++;
+ }
+
+ /**
+ * Array of sizes for each allocation. The allocation unit is up to the
+ * back-end, but it's expected to be one scalar value in the FS back-end
+ * and one vec4 in the VEC4 back-end.
+ */
+ unsigned *sizes;
+
+ /**
+ * Array of offsets from the start of the VGRF space in allocation
+ * units.
+ */
+ unsigned *offsets;
+
+ /** Total number of VGRFs allocated. */
+ unsigned count;
+
+ /** Cumulative size in allocation units. */
+ unsigned total_size;
+
+ private:
+ unsigned capacity;
+ };
+}
+
+#endif
if (inst->dst.file == GRF) {
if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->virtual_grf_sizes[inst->dst.reg];
+ benefit += v->alloc.sizes[inst->dst.reg];
if (!grf_active[inst->dst.reg])
- benefit -= v->virtual_grf_sizes[inst->dst.reg];
+ benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
continue;
if (remaining_grf_uses[inst->src[i].reg] == 1)
- benefit += v->virtual_grf_sizes[inst->src[i].reg];
+ benefit += v->alloc.sizes[inst->src[i].reg];
if (!grf_active[inst->src[i].reg])
- benefit -= v->virtual_grf_sizes[inst->src[i].reg];
+ benefit -= v->alloc.sizes[inst->src[i].reg];
}
return benefit;
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
- grf_count = virtual_grf_count;
+ grf_count = alloc.count;
fs_instruction_scheduler sched(this, grf_count, mode);
sched.run(cfg);
#include "main/compiler.h"
#include "glsl/ir.h"
+#ifdef __cplusplus
+#include "brw_ir_allocator.h"
+#endif
+
#pragma once
enum PACKED register_file {
gl_shader_stage stage;
+ brw::simple_allocator alloc;
+
virtual void dump_instruction(backend_instruction *inst) = 0;
virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0;
virtual void dump_instructions();
void
vec4_visitor::split_virtual_grfs()
{
- int num_vars = this->virtual_grf_count;
+ int num_vars = this->alloc.count;
int new_virtual_grf[num_vars];
bool split_grf[num_vars];
/* Try to split anything > 0 sized. */
for (int i = 0; i < num_vars; i++) {
- split_grf[i] = this->virtual_grf_sizes[i] != 1;
+ split_grf[i] = this->alloc.sizes[i] != 1;
}
/* Check that the instructions are compatible with the registers we're trying
if (!split_grf[i])
continue;
- new_virtual_grf[i] = virtual_grf_alloc(1);
- for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
- int reg = virtual_grf_alloc(1);
+ new_virtual_grf[i] = alloc.allocate(1);
+ for (unsigned j = 2; j < this->alloc.sizes[i]; j++) {
+ unsigned reg = alloc.allocate(1);
assert(reg == new_virtual_grf[i] + j - 1);
(void) reg;
}
- this->virtual_grf_sizes[i] = 1;
+ this->alloc.sizes[i] = 1;
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
/* Don't print .0; and only VGRFs have reg_offsets and sizes */
if (inst->src[i].reg_offset != 0 &&
inst->src[i].file == GRF &&
- virtual_grf_sizes[inst->src[i].reg] != 1)
+ alloc.sizes[inst->src[i].reg] != 1)
fprintf(file, ".%d", inst->src[i].reg_offset);
if (inst->src[i].file != IMM) {
if (false) {
/* Debug of register spilling: Go spill everything. */
- const int grf_count = virtual_grf_count;
- float spill_costs[virtual_grf_count];
- bool no_spill[virtual_grf_count];
+ const int grf_count = alloc.count;
+ float spill_costs[alloc.count];
+ bool no_spill[alloc.count];
evaluate_spill_costs(spill_costs, no_spill);
for (int i = 0; i < grf_count; i++) {
if (no_spill[i])
const void *base_ir;
const char *current_annotation;
- int *virtual_grf_sizes;
- int virtual_grf_count;
- int virtual_grf_array_size;
int first_non_payload_grf;
unsigned int max_grf;
int *virtual_grf_start;
brw::vec4_live_variables *live_intervals;
dst_reg userplane[MAX_CLIP_PLANES];
- /**
- * This is the size to be used for an array with an element per
- * reg_offset
- */
- int virtual_grf_reg_count;
- /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
- int *virtual_grf_reg_map;
-
dst_reg *variable_storage(ir_variable *var);
void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
bool run(void);
void fail(const char *msg, ...);
- int virtual_grf_alloc(int size);
void setup_uniform_clipplane_values();
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
bool progress = false;
- struct copy_entry entries[virtual_grf_reg_count];
+ struct copy_entry entries[alloc.total_size];
memset(&entries, 0, sizeof(entries));
inst->src[i].reladdr)
continue;
- int reg = (virtual_grf_reg_map[inst->src[i].reg] +
+ int reg = (alloc.offsets[inst->src[i].reg] +
inst->src[i].reg_offset);
/* Find the regs that each swizzle component came from.
/* Track available source registers. */
if (inst->dst.file == GRF) {
const int reg =
- virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+ alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
/* Update our destination's current channel values. For a direct copy,
* the value is the newly propagated source. Otherwise, we don't know
if (inst->dst.reladdr)
memset(&entries, 0, sizeof(entries));
else {
- for (int i = 0; i < virtual_grf_reg_count; i++) {
+ for (unsigned i = 0; i < alloc.total_size; i++) {
for (int j = 0; j < 4; j++) {
if (is_channel_updated(inst, entries[i].value, j)){
entries[i].value[j] = NULL;
* more -- a sure sign they'll fail operands_match().
*/
if (src->file == GRF) {
- assert((src->reg * 4 + 3) < (virtual_grf_count * 4));
+ assert((unsigned)(src->reg * 4 + 3) < (alloc.count * 4));
int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
virtual_grf_end[src->reg * 4 + 1]),
* variable, and thus qualify for being in def[].
*/
if (inst->dst.file == GRF &&
- v->virtual_grf_sizes[inst->dst.reg] == 1 &&
+ v->alloc.sizes[inst->dst.reg] == 1 &&
!inst->predicate) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
{
mem_ctx = ralloc_context(NULL);
- num_vars = v->virtual_grf_count * 4;
+ num_vars = v->alloc.count * 4;
block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
bitset_words = BITSET_WORDS(num_vars);
if (this->live_intervals)
return;
- int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
- int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
+ int *start = ralloc_array(mem_ctx, int, this->alloc.count * 4);
+ int *end = ralloc_array(mem_ctx, int, this->alloc.count * 4);
ralloc_free(this->virtual_grf_start);
ralloc_free(this->virtual_grf_end);
this->virtual_grf_start = start;
this->virtual_grf_end = end;
- for (int i = 0; i < this->virtual_grf_count * 4; i++) {
+ for (unsigned i = 0; i < this->alloc.count * 4; i++) {
start[i] = MAX_INSTRUCTION;
end[i] = -1;
}
bool
vec4_visitor::reg_allocate_trivial()
{
- unsigned int hw_reg_mapping[this->virtual_grf_count];
- bool virtual_grf_used[this->virtual_grf_count];
- int i;
+ unsigned int hw_reg_mapping[this->alloc.count];
+ bool virtual_grf_used[this->alloc.count];
int next;
/* Calculate which virtual GRFs are actually in use after whatever
* optimization passes have occurred.
*/
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
virtual_grf_used[i] = false;
}
if (inst->dst.file == GRF)
virtual_grf_used[inst->dst.reg] = true;
- for (int i = 0; i < 3; i++) {
+ for (unsigned i = 0; i < 3; i++) {
if (inst->src[i].file == GRF)
virtual_grf_used[inst->src[i].reg] = true;
}
}
hw_reg_mapping[0] = this->first_non_payload_grf;
- next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
- for (i = 1; i < this->virtual_grf_count; i++) {
+ next = hw_reg_mapping[0] + this->alloc.sizes[0];
+ for (unsigned i = 1; i < this->alloc.count; i++) {
if (virtual_grf_used[i]) {
hw_reg_mapping[i] = next;
- next += this->virtual_grf_sizes[i];
+ next += this->alloc.sizes[i];
}
}
prog_data->total_grf = next;
vec4_visitor::reg_allocate()
{
struct intel_screen *screen = brw->intelScreen;
- unsigned int hw_reg_mapping[virtual_grf_count];
+ unsigned int hw_reg_mapping[alloc.count];
int payload_reg_count = this->first_non_payload_grf;
/* Using the trivial allocator can be useful in debugging undefined
calculate_live_intervals();
- int node_count = virtual_grf_count;
+ int node_count = alloc.count;
int first_payload_node = node_count;
node_count += payload_reg_count;
struct ra_graph *g =
ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count);
- for (int i = 0; i < virtual_grf_count; i++) {
- int size = this->virtual_grf_sizes[i];
+ for (unsigned i = 0; i < alloc.count; i++) {
+ int size = this->alloc.sizes[i];
assert(size >= 1 && size <= 2 &&
"Register allocation relies on split_virtual_grfs().");
ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]);
- for (int j = 0; j < i; j++) {
+ for (unsigned j = 0; j < i; j++) {
if (virtual_grf_interferes(i, j)) {
ra_add_node_interference(g, i, j);
}
* numbers.
*/
prog_data->total_grf = payload_reg_count;
- for (int i = 0; i < virtual_grf_count; i++) {
+ for (unsigned i = 0; i < alloc.count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg];
prog_data->total_grf = MAX2(prog_data->total_grf,
- hw_reg_mapping[i] + virtual_grf_sizes[i]);
+ hw_reg_mapping[i] + alloc.sizes[i]);
}
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
{
float loop_scale = 1.0;
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
spill_costs[i] = 0.0;
- no_spill[i] = virtual_grf_sizes[i] != 1;
+ no_spill[i] = alloc.sizes[i] != 1;
}
/* Calculate costs for spilling nodes. Call it a cost of 1 per
int
vec4_visitor::choose_spill_reg(struct ra_graph *g)
{
- float spill_costs[this->virtual_grf_count];
- bool no_spill[this->virtual_grf_count];
+ float spill_costs[this->alloc.count];
+ bool no_spill[this->alloc.count];
evaluate_spill_costs(spill_costs, no_spill);
- for (int i = 0; i < this->virtual_grf_count; i++) {
+ for (unsigned i = 0; i < this->alloc.count; i++) {
if (!no_spill[i])
ra_set_node_spill_cost(g, i, spill_costs[i]);
}
void
vec4_visitor::spill_reg(int spill_reg_nr)
{
- assert(virtual_grf_sizes[spill_reg_nr] == 1);
+ assert(alloc.sizes[spill_reg_nr] == 1);
unsigned int spill_offset = c->last_scratch++;
/* Generate spill/unspill instructions for the objects being spilled. */
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
src_reg spill_reg = inst->src[i];
- inst->src[i].reg = virtual_grf_alloc(1);
+ inst->src[i].reg = alloc.allocate(1);
dst_reg temp = dst_reg(inst->src[i]);
emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
return 0;
}
-int
-vec4_visitor::virtual_grf_alloc(int size)
-{
- if (virtual_grf_array_size <= virtual_grf_count) {
- if (virtual_grf_array_size == 0)
- virtual_grf_array_size = 16;
- else
- virtual_grf_array_size *= 2;
- virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
- virtual_grf_array_size);
- virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
- virtual_grf_array_size);
- }
- virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
- virtual_grf_reg_count += size;
- virtual_grf_sizes[virtual_grf_count] = size;
- return virtual_grf_count++;
-}
-
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type));
+ this->reg = v->alloc.allocate(type_size(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type) * size);
+ this->reg = v->alloc.allocate(type_size(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
init();
this->file = GRF;
- this->reg = v->virtual_grf_alloc(type_size(type));
+ this->reg = v->alloc.allocate(type_size(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
void
vec4_visitor::move_grf_array_access_to_scratch()
{
- int scratch_loc[this->virtual_grf_count];
+ int scratch_loc[this->alloc.count];
memset(scratch_loc, -1, sizeof(scratch_loc));
/* First, calculate the set of virtual GRFs that need to be punted
if (inst->dst.file == GRF && inst->dst.reladdr &&
scratch_loc[inst->dst.reg] == -1) {
scratch_loc[inst->dst.reg] = c->last_scratch;
- c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
+ c->last_scratch += this->alloc.sizes[inst->dst.reg];
}
for (int i = 0 ; i < 3; i++) {
if (src->file == GRF && src->reladdr &&
scratch_loc[src->reg] == -1) {
scratch_loc[src->reg] = c->last_scratch;
- c->last_scratch += this->virtual_grf_sizes[src->reg];
+ c->last_scratch += this->alloc.sizes[src->reg];
}
}
}
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
- this->virtual_grf_sizes = NULL;
- this->virtual_grf_count = 0;
- this->virtual_grf_reg_map = NULL;
- this->virtual_grf_reg_count = 0;
- this->virtual_grf_array_size = 0;
this->live_intervals = NULL;
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;