foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
- for (int j = 1; j < inst->regs_written; j++)
+ for (unsigned j = 1; j < regs_written(inst); j++)
split_points[reg + j] = false;
}
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE;
- for (int j = 1; j < inst->regs_read(i); j++)
+ for (unsigned j = 1; j < regs_read(inst, i); j++)
split_points[reg + j] = false;
}
}
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
- remap[dst] = alloc.allocate(inst->regs_written);
+ remap[dst] = alloc.allocate(regs_written(inst));
inst->dst.nr = remap[dst];
progress = true;
}
* regs_left bitset keeps track of the registers we haven't yet found a
* generating instruction for.
*/
- unsigned regs_left = (1 << inst->regs_read(0)) - 1;
+ unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
/* Found all generating instructions of our MRF's source value, so it
* should be safe to rewrite them to point to the MRF directly.
*/
- regs_left = (1 << inst->regs_read(0)) - 1;
+ regs_left = (1 << regs_read(inst, 0)) - 1;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
fs_inst *inst)
{
- int write_len = inst->regs_written;
+ int write_len = regs_written(inst);
int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
* dependency has more latency than a MOV.
*/
if (scan_inst->dst.file == VGRF) {
- for (int i = 0; i < scan_inst->regs_written; i++) {
+ for (unsigned i = 0; i < regs_written(scan_inst); i++) {
int reg = scan_inst->dst.nr + i;
if (reg >= first_write_grf &&
void
fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
{
- int write_len = inst->regs_written;
+ int write_len = regs_written(inst);
int first_write_grf = inst->dst.nr;
bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
assert(write_len < (int)sizeof(needs_dep) - 1);
/* Send from the GRF */
fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
- payload.nr = bld.shader->alloc.allocate(load->regs_written);
+ payload.nr = bld.shader->alloc.allocate(regs_written(load));
load->dst = payload;
inst->src[0] = payload;
}
inst->opcode = FS_OPCODE_FB_WRITE;
- inst->mlen = load->regs_written;
+ inst->mlen = regs_written(load);
inst->header_size = header_size;
}
unsigned grad_components)
{
const gen_device_info *devinfo = bld.shader->devinfo;
- int reg_width = bld.dispatch_width() / 8;
+ unsigned reg_width = bld.dispatch_width() / 8;
unsigned header_size = 0, length = 0;
fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
* and we have an explicit header, we need to set up the sampler
* writemask. It's reversed from normal: 1 means "don't write".
*/
- if (!inst->eot && inst->regs_written != 4 * reg_width) {
- assert((inst->regs_written % reg_width) == 0);
- unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf;
+ if (!inst->eot && regs_written(inst) != 4 * reg_width) {
+ assert(regs_written(inst) % reg_width == 0);
+ unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
inst->offset |= mask << 12;
}
}
static void
create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
{
- int written = inst->regs_written;
- int dst_width =
+ unsigned written = regs_written(inst);
+ unsigned dst_width =
DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
fs_inst *copy;
copy->force_writemask_all = inst->force_writemask_all;
copy->src[0].negate = negate;
}
- assert(copy->regs_written == written);
+ assert(regs_written(copy) == written);
}
bool
if (no_existing_temp && !entry->generator->dst.is_null()) {
const fs_builder ibld = fs_builder(this, block, entry->generator)
.at(block, entry->generator->next);
- int written = entry->generator->regs_written;
+ int written = regs_written(entry->generator);
entry->tmp = fs_reg(VGRF, alloc.allocate(written),
entry->generator->dst.type);
if (inst->dst.file == VGRF && !inst->has_side_effects()) {
bool result_live = false;
- if (inst->regs_written == 1) {
+ if (regs_written(inst) == 1) {
int var = live_intervals->var_from_reg(inst->dst);
result_live = BITSET_TEST(live, var);
} else {
int var = live_intervals->var_from_reg(inst->dst);
- for (int i = 0; i < inst->regs_written; i++) {
+ for (unsigned i = 0; i < regs_written(inst); i++) {
result_live = result_live || BITSET_TEST(live, var + i);
}
}
if (inst->dst.file == VGRF) {
if (!inst->is_partial_write()) {
int var = live_intervals->var_from_reg(inst->dst);
- for (int i = 0; i < inst->regs_written; i++) {
+ for (unsigned i = 0; i < regs_written(inst); i++) {
BITSET_CLEAR(live, var + i);
}
}
if (inst->src[i].file == VGRF) {
int var = live_intervals->var_from_reg(inst->src[i]);
- for (int j = 0; j < inst->regs_read(i); j++) {
+ for (unsigned j = 0; j < regs_read(inst, i); j++) {
BITSET_SET(live, var + j);
}
}
if (reg.file != VGRF)
continue;
- for (int j = 0; j < inst->regs_read(i); j++) {
+ for (unsigned j = 0; j < regs_read(inst, i); j++) {
setup_one_read(bd, inst, ip, reg);
reg.offset += REG_SIZE;
}
/* Set def[] for this instruction */
if (inst->dst.file == VGRF) {
fs_reg reg = inst->dst;
- for (int j = 0; j < inst->regs_written; j++) {
+ for (unsigned j = 0; j < regs_written(inst); j++) {
setup_one_write(bd, inst, ip, reg);
reg.offset += REG_SIZE;
}
if (node_nr >= payload_node_count)
continue;
- for (int j = 0; j < inst->regs_read(i); j++) {
+ for (unsigned j = 0; j < regs_read(inst, i); j++) {
payload_last_use_ip[node_nr + j] = use_ip;
- assert(node_nr + j < payload_node_count);
+ assert(node_nr + j < unsigned(payload_node_count));
}
}
}
for (unsigned int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF &&
inst->src[i].nr == spill_reg) {
- int regs_read = inst->regs_read(i);
+ int count = regs_read(inst, i);
int subset_spill_offset = spill_offset +
ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
- fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
+ fs_reg unspill_dst(VGRF, alloc.allocate(count));
inst->src[i].nr = unspill_dst.nr;
inst->src[i].offset %= REG_SIZE;
* hardware) up to the maximum supported block size.
*/
const unsigned width =
- MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1));
+ MIN2(32, 1u << (ffs(MAX2(1, count) * 8) - 1));
/* Set exec_all() on unspill messages under the (rather
* pessimistic) assumption that there is no one-to-one
* unspill destination is a block-local temporary.
*/
emit_unspill(ibld.exec_all().group(width, 0),
- unspill_dst, subset_spill_offset, regs_read);
+ unspill_dst, subset_spill_offset, count);
}
}
inst->dst.nr == spill_reg) {
int subset_spill_offset = spill_offset +
ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
- fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
+ fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst)));
inst->dst.nr = spill_src.nr;
inst->dst.offset %= REG_SIZE;
const fs_builder ubld = ibld.exec_all(!per_channel).group(width, 0);
/* If our write is going to affect just part of the
- * inst->regs_written(), then we need to unspill the destination
- * since we write back out all of the regs_written(). If the
- * original instruction had force_writemask_all set and is not a
- * partial write, there should be no need for the unspill since the
+ * regs_written(inst), then we need to unspill the destination since
+ * we write back out all of the regs_written(). If the original
+ * instruction had force_writemask_all set and is not a partial
+ * write, there should be no need for the unspill since the
* instruction will be overwriting the whole destination in any case.
*/
if (inst->is_partial_write() ||
(!inst->force_writemask_all && !per_channel))
emit_unspill(ubld, spill_src, subset_spill_offset,
- inst->regs_written);
+ regs_written(inst));
emit_spill(ubld.at(block, inst->next), spill_src,
- subset_spill_offset, inst->regs_written);
+ subset_spill_offset, regs_written(inst));
}
}
dst_reg_offset[i] = i;
}
mov[0] = inst;
- channels_remaining -= inst->regs_written;
+ channels_remaining -= regs_written(inst);
} else {
const int offset = inst->src[0].offset / REG_SIZE;
if (mov[offset]) {
if (inst->regs_written > 1)
dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
mov[offset] = inst;
- channels_remaining -= inst->regs_written;
+ channels_remaining -= regs_written(inst);
}
if (channels_remaining)
{
foreach_block_and_inst (block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
- fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <=
+ fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
alloc.sizes[inst->dst.nr]);
}
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
- fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <=
- (int)alloc.sizes[inst->src[i].nr]);
+ fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <=
+ alloc.sizes[inst->src[i].nr]);
}
}
}
return inst;
}
+/**
+ * Return the number of dataflow registers written by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->dst) /
+ * register_size)'. The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_written(const fs_inst *inst)
+{
+ /* XXX - Take into account register-misaligned offsets correctly. */
+ return inst->regs_written;
+}
+
+/**
+ * Return the number of dataflow registers read by the instruction (either
+ * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
+ * register_size)'. The somewhat arbitrary register size unit is 4B for the
+ * UNIFORM and IMM files and 32B for all other files.
+ */
+inline unsigned
+regs_read(const fs_inst *inst, unsigned i)
+{
+ /* XXX - Take into account register-misaligned offsets correctly. */
+ return inst->regs_read(i);
+}
+
#endif
if (inst->src[i].nr >= hw_reg_count)
continue;
- for (int j = 0; j < inst->regs_read(i); j++)
+ for (unsigned j = 0; j < regs_read(inst, i); j++)
hw_reads_remaining[inst->src[i].nr + j]++;
}
}
reads_remaining[inst->src[i].nr]--;
} else if (inst->src[i].file == FIXED_GRF &&
inst->src[i].nr < hw_reg_count) {
- for (int off = 0; off < inst->regs_read(i); off++)
+ for (unsigned off = 0; off < regs_read(inst, i); off++)
hw_reads_remaining[inst->src[i].nr + off]--;
}
}
if (inst->src[i].file == FIXED_GRF &&
inst->src[i].nr < hw_reg_count) {
- for (int off = 0; off < inst->regs_read(i); off++) {
+ for (unsigned off = 0; off < regs_read(inst, i); off++) {
int reg = inst->src[i].nr + off;
if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
hw_reads_remaining[reg] == 1) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(i); r++)
+ for (unsigned r = 0; r < regs_read(inst, i); r++)
add_dep(last_grf_write[inst->src[i].nr + r], n);
} else {
- for (int r = 0; r < inst->regs_read(i); r++) {
+ for (unsigned r = 0; r < regs_read(inst, i); r++) {
add_dep(last_grf_write[inst->src[i].nr * 16 +
inst->src[i].offset / REG_SIZE + r], n);
}
}
} else if (inst->src[i].file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(i); r++)
+ for (unsigned r = 0; r < regs_read(inst, i); r++)
add_dep(last_grf_write[inst->src[i].nr + r], n);
} else {
add_dep(last_fixed_grf_write, n);
/* write-after-write deps. */
if (inst->dst.file == VGRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written; r++) {
+ for (unsigned r = 0; r < regs_written(inst); r++) {
add_dep(last_grf_write[inst->dst.nr + r], n);
last_grf_write[inst->dst.nr + r] = n;
}
} else {
- for (int r = 0; r < inst->regs_written; r++) {
+ for (unsigned r = 0; r < regs_written(inst); r++) {
add_dep(last_grf_write[inst->dst.nr * 16 +
inst->dst.offset / REG_SIZE + r], n);
last_grf_write[inst->dst.nr * 16 +
}
} else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written; r++)
+ for (unsigned r = 0; r < regs_written(inst); r++)
last_grf_write[inst->dst.nr + r] = n;
} else {
last_fixed_grf_write = n;
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(i); r++)
+ for (unsigned r = 0; r < regs_read(inst, i); r++)
add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
} else {
- for (int r = 0; r < inst->regs_read(i); r++) {
+ for (unsigned r = 0; r < regs_read(inst, i); r++) {
add_dep(n, last_grf_write[inst->src[i].nr * 16 +
inst->src[i].offset / REG_SIZE + r], 0);
}
}
} else if (inst->src[i].file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_read(i); r++)
+ for (unsigned r = 0; r < regs_read(inst, i); r++)
add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
} else {
add_dep(n, last_fixed_grf_write, 0);
*/
if (inst->dst.file == VGRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written; r++)
+ for (unsigned r = 0; r < regs_written(inst); r++)
last_grf_write[inst->dst.nr + r] = n;
} else {
- for (int r = 0; r < inst->regs_written; r++) {
+ for (unsigned r = 0; r < regs_written(inst); r++) {
last_grf_write[inst->dst.nr * 16 +
inst->dst.offset / REG_SIZE + r] = n;
}
}
} else if (inst->dst.file == FIXED_GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < inst->regs_written; r++)
+ for (unsigned r = 0; r < regs_written(inst); r++)
last_grf_write[inst->dst.nr + r] = n;
} else {
last_fixed_grf_write = n;