case FIXED_GRF:
case MRF:
case ATTR:
- this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
- REG_SIZE);
+ this->size_written = dst.component_size(exec_size);
break;
case BAD_FILE:
- this->regs_written = 0;
+ this->size_written = 0;
break;
case IMM:
case UNIFORM:
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
vec4_result, surf_index, vec4_offset);
- inst->regs_written = 4 * bld.dispatch_width() / 8;
+ inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
if (type_sz(dst.type) == 8) {
shuffle_32bit_load_result_to_64bit_data(
bool
fs_inst::overwrites_reg(const fs_reg ®) const
{
- return reg.in_range(dst, regs_written);
+ return reg.in_range(dst, DIV_ROUND_UP(size_written, REG_SIZE));
}
bool
if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
return false;
- if (grf_alloc.sizes[reg.nr] != this->regs_written)
+ if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
return false;
for (int i = 0; i < this->sources; i++) {
for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) {
if (i == FB_WRITE_LOGICAL_SRC_COLOR0) {
if (!fb_write->src[i].equals(tex_inst->dst) ||
- fb_write->regs_read(i) != tex_inst->regs_written)
+ fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written)
return false;
} else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) {
if (fb_write->src[i].file != BAD_FILE)
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
tex_inst->dst = ibld.null_reg_ud();
- tex_inst->regs_written = 0;
+ tex_inst->size_written = 0;
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
/* Marking EOT is sufficient, lower_logical_sends() will notice the EOT
if (depth == 0 &&
inst->dst.file == VGRF &&
- alloc.sizes[inst->dst.nr] == inst->regs_written &&
+ alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
unsigned regs_left = (1 << regs_read(inst, 0)) - 1;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
- if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
/* Found the last thing to write our reg we want to turn
* into a compute-to-MRF.
* a time.
*/
if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
- scan_inst->dst.offset / REG_SIZE + scan_inst->regs_written >
+ scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
inst->src[0].offset / REG_SIZE + inst->regs_read(0))
break;
/* Clear the bits for any registers this instruction overwrites. */
regs_left &= ~mask_relative_to(
- inst->src[0], scan_inst->dst, scan_inst->regs_written);
+ inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+ REG_SIZE));
if (!regs_left)
break;
}
if (interfered)
break;
- if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
- inst->dst, inst->regs_written * REG_SIZE)) {
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
+ inst->dst, inst->size_written)) {
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
- inst->dst, inst->regs_written * REG_SIZE)) {
+ inst->dst, inst->size_written)) {
/* Found a SEND instruction, which means that there are
* live values in MRFs from base_mrf to base_mrf +
* scan_inst->mlen - 1. Don't go pushing our MRF write up
regs_left = (1 << regs_read(inst, 0)) - 1;
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
- if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
+ if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
/* Clear the bits for any registers this instruction overwrites. */
regs_left &= ~mask_relative_to(
- inst->src[0], scan_inst->dst, scan_inst->regs_written);
+ inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
+ REG_SIZE));
const unsigned rel_offset = (reg_offset(scan_inst->dst) -
reg_offset(inst->src[0])) / REG_SIZE;
/* Clear the COMPR4 bit if the generating instruction is not
* compressed.
*/
- if (scan_inst->regs_written < 2)
+ if (scan_inst->size_written < 2 * REG_SIZE)
scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
} else {
/* Clear out any MRF move records whose sources got overwritten. */
for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
if (last_mrf_move[i] &&
- regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+ regions_overlap(inst->dst, inst->size_written,
last_mrf_move[i]->src[0],
last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
last_mrf_move[i] = NULL;
* which is the one that is going to limit the overall execution size of
* the instruction due to this rule.
*/
- unsigned reg_count = inst->regs_written;
+ unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
for (unsigned i = 0; i < inst->sources; i++)
reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
*/
if (devinfo->gen < 8) {
for (unsigned i = 0; i < inst->sources; i++) {
- if (inst->regs_written == 2 &&
+ if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
inst->regs_read(i) != 0 && inst->regs_read(i) != 2 &&
!is_uniform(inst->src[i]) &&
!(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
- type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1))
- max_width = MIN2(max_width, inst->exec_size /
- inst->regs_written);
+ type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
+ const unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
+ max_width = MIN2(max_width, inst->exec_size / reg_count);
+ }
}
}
* In this situation we calculate the maximum size of the split
* instructions so they only ever write to a single register.
*/
- if (devinfo->gen < 8 && inst->regs_written > 1 &&
+ if (devinfo->gen < 8 && inst->size_written > REG_SIZE &&
!inst->force_writemask_all) {
- const unsigned channels_per_grf = inst->exec_size / inst->regs_written;
+ const unsigned channels_per_grf = inst->exec_size /
+ DIV_ROUND_UP(inst->size_written, REG_SIZE);
unsigned exec_type_size = 0;
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != BAD_FILE)
* the results of multiple lowered instructions in order to make sure that
* they end up arranged correctly in the original destination region.
*/
- if (inst->regs_written * REG_SIZE >
- inst->dst.component_size(inst->exec_size))
+ if (inst->size_written > inst->dst.component_size(inst->exec_size))
return true;
/* If the lowered execution size is larger than the original the result of
* group which could cause one of the lowered instructions to overwrite
* the data read from the same source by other lowered instructions.
*/
- if (regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
+ if (regions_overlap(inst->dst, inst->size_written,
inst->src[i], inst->regs_read(i) * REG_SIZE) &&
!inst->dst.equals(inst->src[i]))
return true;
/* Specified channel group from the destination region. */
const fs_reg dst = horiz_offset(inst->dst, lbld.group());
- const unsigned dst_size = inst->regs_written * REG_SIZE /
- inst->dst.component_size(inst->exec_size);
+ const unsigned dst_size = inst->size_written /
+ inst->dst.component_size(inst->exec_size);
if (needs_dst_copy(lbld, inst)) {
const fs_reg tmp = lbld.vgrf(inst->dst.type, dst_size);
* original or the lowered instruction, whichever is lower.
*/
const unsigned n = DIV_ROUND_UP(inst->exec_size, lower_width);
- const unsigned dst_size = inst->regs_written * REG_SIZE /
+ const unsigned dst_size = inst->size_written /
inst->dst.component_size(inst->exec_size);
assert(!inst->writes_accumulator && !inst->mlen);
split_inst.src[j] = emit_unzip(lbld, block, inst, j);
split_inst.dst = emit_zip(lbld, block, inst);
- split_inst.regs_written = DIV_ROUND_UP(
- split_inst.dst.component_size(lower_width) * dst_size,
- REG_SIZE);
+ split_inst.size_written =
+ split_inst.dst.component_size(lower_width) * dst_size;
lbld.emit(split_inst);
}
switch (inst->dst.file) {
case VGRF:
fprintf(file, "vgrf%d", inst->dst.nr);
- if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
+ if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
inst->dst.offset % REG_SIZE)
fprintf(file, "+%d.%d",
inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);
{
instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
inst->header_size = header_size;
- inst->regs_written = header_size;
+ inst->size_written = header_size * REG_SIZE;
for (unsigned i = header_size; i < sources; i++) {
- inst->regs_written +=
- DIV_ROUND_UP(dispatch_width() * type_sz(src[i].type) *
- dst.stride, REG_SIZE);
+ inst->size_written +=
+ ALIGN(dispatch_width() * type_sz(src[i].type) * dst.stride,
+ REG_SIZE);
}
return inst;
struct acp_entry : public exec_node {
fs_reg dst;
fs_reg src;
- uint8_t regs_written;
+ uint8_t size_written;
uint8_t regs_read;
enum opcode opcode;
bool saturate;
* that entry is writing.
*/
if (!region_contained_in(inst->src[arg], inst->regs_read(arg),
- entry->dst, entry->regs_written))
+ entry->dst, DIV_ROUND_UP(entry->size_written,
+ REG_SIZE)))
return false;
/* we can't generally copy-propagate UD negations because we
* that entry is writing.
*/
if (!region_contained_in(inst->src[i], inst->regs_read(i),
- entry->dst, entry->regs_written))
+ entry->dst, DIV_ROUND_UP(entry->size_written,
+ REG_SIZE)))
continue;
/* If the type sizes don't match each channel of the instruction is
/* kill the destination from the ACP */
if (inst->dst.file == VGRF) {
foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
- if (regions_overlap(entry->dst, entry->regs_written * REG_SIZE,
- inst->dst, inst->regs_written * REG_SIZE))
+ if (regions_overlap(entry->dst, entry->size_written,
+ inst->dst, inst->size_written))
entry->remove();
}
* _any_ of the registers that it reads
*/
if (regions_overlap(entry->src, entry->regs_read * REG_SIZE,
- inst->dst, inst->regs_written * REG_SIZE))
+ inst->dst, inst->size_written))
entry->remove();
}
}
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
- entry->regs_written = inst->regs_written;
+ entry->size_written = inst->size_written;
entry->regs_read = inst->regs_read(0);
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
for (int i = 0; i < inst->sources; i++) {
int effective_width = i < inst->header_size ? 8 : inst->exec_size;
assert(effective_width * type_sz(inst->src[i].type) % REG_SIZE == 0);
- int regs_written = effective_width *
- type_sz(inst->src[i].type) / REG_SIZE;
+ const unsigned size_written = effective_width *
+ type_sz(inst->src[i].type);
if (inst->src[i].file == VGRF) {
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->dst.offset += offset * REG_SIZE;
entry->src = inst->src[i];
- entry->regs_written = regs_written;
+ entry->size_written = size_written;
entry->regs_read = inst->regs_read(i);
entry->opcode = inst->opcode;
if (!entry->dst.equals(inst->src[i])) {
ralloc_free(entry);
}
}
- offset += regs_written;
+ offset += DIV_ROUND_UP(size_written, REG_SIZE);
}
}
}
a->dst.type == b->dst.type &&
a->offset == b->offset &&
a->mlen == b->mlen &&
- a->regs_written == b->regs_written &&
+ a->size_written == b->size_written &&
a->base_mrf == b->base_mrf &&
a->eot == b->eot &&
a->header_size == b->header_size &&
/* dest <- temp */
if (!inst->dst.is_null()) {
- assert(inst->regs_written == entry->generator->regs_written);
+ assert(inst->size_written == entry->generator->size_written);
assert(inst->dst.type == entry->tmp.type);
const fs_builder ibld(this, block, inst);
fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst,
struct brw_reg payload)
{
+ assert(inst->size_written % REG_SIZE == 0);
brw_wm_prog_data *prog_data =
reinterpret_cast<brw_wm_prog_data *>(this->prog_data);
const unsigned surf_index =
prog_data->binding_table.render_target_start + inst->target;
gen9_fb_READ(p, dst, payload, surf_index,
- inst->header_size, inst->regs_written,
+ inst->header_size, inst->size_written / REG_SIZE,
prog_data->persample_dispatch);
brw_mark_surface_used(&prog_data->base, surf_index);
struct brw_reg dst,
struct brw_reg header)
{
+ assert(inst->size_written % REG_SIZE == 0);
assert(header.file == BRW_GENERAL_REGISTER_FILE);
assert(header.type == BRW_REGISTER_TYPE_UD);
brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
brw_inst_set_mlen(p->devinfo, send, inst->mlen);
- brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
+ brw_inst_set_rlen(p->devinfo, send, inst->size_written / REG_SIZE);
brw_inst_set_header_present(p->devinfo, send, true);
brw_inst_set_urb_global_offset(p->devinfo, send, inst->offset);
}
struct brw_reg surface_index,
struct brw_reg sampler_index)
{
+ assert(inst->size_written % REG_SIZE == 0);
int msg_type = -1;
uint32_t simd_mode;
uint32_t return_format;
surface + base_binding_table_index,
sampler % 16,
msg_type,
- inst->regs_written,
+ inst->size_written / REG_SIZE,
inst->mlen,
inst->header_size != 0,
simd_mode,
0 /* surface */,
0 /* sampler */,
msg_type,
- inst->regs_written,
+ inst->size_written / REG_SIZE,
inst->mlen /* mlen */,
inst->header_size != 0 /* header */,
simd_mode,
*/
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
assert(inst->mlen == 3);
- assert(inst->regs_written == 8);
+ assert(inst->size_written == 8 * REG_SIZE);
rlen = 8;
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
struct brw_reg msg_data,
unsigned msg_type)
{
+ assert(inst->size_written % REG_SIZE == 0);
assert(msg_data.type == BRW_REGISTER_TYPE_UD);
brw_pixel_interpolator_query(p,
msg_type,
msg_data,
inst->mlen,
- inst->regs_written);
+ inst->size_written / REG_SIZE);
}
inst = bld.emit(opcode, dst, payload, desc);
inst->mlen = mlen;
/* 2 floats per slot returned */
- inst->regs_written = 2 * bld.dispatch_width() / 8;
+ inst->size_written = 2 * bld.dispatch_width() / 8 * REG_SIZE;
inst->pi_noperspective = interpolation == INTERP_MODE_NOPERSPECTIVE;
wm_prog_data->pulls_bary = true;
unsigned read_components = num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle);
- inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(tmp_dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp_dst,
icp_handle);
- inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
}
inst->offset = base_offset + offset_const->u32[0];
inst->mlen = 1;
if (first_component != 0) {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
payload);
- inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = read_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(tmp_dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp_dst,
payload);
- inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = num_components * type_sz(tmp_dst.type) / 4 * REG_SIZE;
}
inst->offset = base_offset;
inst->mlen = 2;
/* Read the whole VUE header (because of alignment) and read .w. */
fs_reg tmp = bld.vgrf(dst.type, 4);
inst->dst = tmp;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(dst, offset(tmp, bld, 3));
}
}
inst->offset = imm_offset;
inst->mlen = 2;
}
- inst->regs_written =
- ((num_components + first_component) * type_sz(dst.type) / 4);
+ inst->size_written =
+ ((num_components + first_component) * type_sz(dst.type) / 4) * REG_SIZE;
/* If we are reading 64-bit data using 32-bit read messages we need
* build proper 64-bit data elements by shuffling the low and high
if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
assert(type_sz(dst.type) < 8);
inst->dst = bld.vgrf(dst.type, 4);
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(dst, offset(inst->dst, bld, 3));
}
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
inst->offset = 0;
inst->mlen = 1;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
/* dst.xy = tmp.wz */
bld.MOV(dst, offset(tmp, bld, 3));
break;
}
case GL_TRIANGLES:
- /* DWord 4; hardcode offset = 1 and regs_written = 1 */
+ /* DWord 4; hardcode offset = 1 and size_written = REG_SIZE */
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
inst->offset = 1;
inst->mlen = 1;
- inst->regs_written = 1;
+ inst->size_written = REG_SIZE;
break;
case GL_ISOLINES:
/* All channels are undefined. */
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
inst->offset = 1;
inst->mlen = 1;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
/* Reswizzle: WZYX */
fs_reg srcs[4] = {
fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
patch_handle);
- inst->regs_written = read_components;
+ inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) {
bld.MOV(offset(dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
patch_handle);
- inst->regs_written = instr->num_components;
+ inst->size_written = instr->num_components * REG_SIZE;
}
inst->offset = imm_offset;
inst->mlen = 1;
fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
payload);
- inst->regs_written = read_components;
+ inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) {
bld.MOV(offset(dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
payload);
- inst->regs_written = instr->num_components;
+ inst->size_written = instr->num_components * REG_SIZE;
}
inst->offset = imm_offset;
inst->mlen = 2;
fs_reg tmp = bld.vgrf(dest.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
patch_handle);
- inst->regs_written = read_components;
+ inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) {
bld.MOV(offset(dest, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest,
patch_handle);
- inst->regs_written = instr->num_components;
+ inst->size_written = instr->num_components * REG_SIZE;
}
inst->mlen = 1;
inst->offset = imm_offset;
}
inst->mlen = 2;
inst->offset = imm_offset;
- inst->regs_written =
- ((num_components + first_component) * type_sz(dest.type) / 4);
+ inst->size_written =
+ ((num_components + first_component) * type_sz(dest.type) / 4) *
+ REG_SIZE;
/* If we are reading 64-bit data using 32-bit read messages we need
* build proper 64-bit data elements by shuffling the low and high
STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
- inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
- REG_SIZE;
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
return inst;
}
assert(bld.shader->devinfo->gen >= 9);
fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst);
inst->target = target;
- inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
- REG_SIZE;
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
return inst;
}
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
- ->regs_written = 2;
+ ->size_written = 2 * REG_SIZE;
break;
}
src_payload, brw_imm_ud(index));
inst->header_size = 0;
inst->mlen = 1;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
brw_mark_surface_used(prog_data, index);
nir_ssa_def_components_read(&instr->dest.ssa):
(1 << dest_size) - 1;
assert(write_mask != 0); /* dead code should have been eliminated */
- inst->regs_written = util_last_bit(write_mask) * dispatch_width / 8;
+ inst->size_written = util_last_bit(write_mask) * dispatch_width / 8 * REG_SIZE;
} else {
- inst->regs_written = 4 * dispatch_width / 8;
+ inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
}
if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
}
if (inst->dst.file == VGRF)
- spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
+ spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
+ * loop_scale;
switch (inst->opcode) {
continue;
}
dst_reg_offset[offset] = inst->dst.offset / REG_SIZE;
- if (inst->regs_written > 1)
+ if (inst->size_written > REG_SIZE)
dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1;
mov[offset] = inst;
channels_remaining -= regs_written(inst);
const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
- inst->regs_written = rsize * bld.dispatch_width() / 8;
+ inst->size_written = rsize * bld.dispatch_width() / 8 * REG_SIZE;
inst->predicate = pred;
return dst;
}
/* We only care about one or two regs of response, but the sampler always
* writes 4/8.
*/
- inst->regs_written = 4 * dispatch_width / 8;
+ inst->size_written = 4 * dispatch_width / 8 * REG_SIZE;
return dest;
}
regs_written(const fs_inst *inst)
{
/* XXX - Take into account register-misaligned offsets correctly. */
- return inst->regs_written;
+ assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
+ return DIV_ROUND_UP(inst->size_written, REG_SIZE);
}
/**
* single-result send is probably actually reducing register
* pressure.
*/
- if (inst->regs_written <= inst->exec_size / 8 &&
- chosen_inst->regs_written > chosen_inst->exec_size / 8) {
+ if (inst->size_written <= inst->exec_size / 8 * REG_SIZE &&
+ chosen_inst->size_written > chosen_inst->exec_size / 8 * REG_SIZE) {
chosen = n;
continue;
- } else if (inst->regs_written > chosen_inst->regs_written) {
+ } else if (inst->size_written > chosen_inst->size_written) {
continue;
}
}
int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
uint8_t target; /**< MRT target. */
uint8_t regs_written; /**< Number of registers written by the instruction. */
+ unsigned size_written; /**< Data written to the destination register in bytes. */
enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
enum brw_conditional_mod conditional_mod; /**< BRW_CONDITIONAL_* */
fs_reg zero(brw_imm_f(0.0f));
bld.ADD(offset(dest, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
- ->regs_written = 4;
+ ->size_written = 4 * REG_SIZE;
bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
/* = Before =
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
bld.ADD(offset(dst0, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dst0, src2)
- ->regs_written = 4;
+ ->size_written = 4 * REG_SIZE;
set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
/* = Before =