* be any component of a vector, and then we load 4 contiguous
* components starting from that.
*
- * We break down the const_offset to a portion added to the variable
- * offset and a portion done using reg_offset, which means that if you
- * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
- * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
- * CSE can later notice that those loads are all the same and eliminate
- * the redundant ones.
+ * We break down the const_offset to a portion added to the variable offset
+ * and a portion done using fs_reg::offset, which means that if you have
+ * GLSL using something like "uniform vec4 a[20]; gl_FragColor = a[i]",
+ * we'll temporarily generate 4 vec4 loads from offset i * 4, and CSE can
+ * later notice that those loads are all the same and eliminate the
+ * redundant ones.
*/
fs_reg vec4_offset = vgrf(glsl_type::uint_type);
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
vec4_result, surf_index, vec4_offset);
- inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
+ inst->size_written = 4 * vec4_result.component_size(inst->exec_size);
if (type_sz(dst.type) == 8) {
shuffle_32bit_load_result_to_64bit_data(
offset == inst->offset);
}
-bool
-fs_inst::overwrites_reg(const fs_reg ®) const
-{
- return reg.in_range(dst, DIV_ROUND_UP(size_written, REG_SIZE));
-}
-
bool
fs_inst::is_send_from_grf() const
{
return false;
fs_reg reg = this->src[0];
- if (reg.file != VGRF || reg.offset / REG_SIZE != 0 || reg.stride == 0)
+ if (reg.file != VGRF || reg.offset != 0 || reg.stride != 1)
return false;
if (grf_alloc.sizes[reg.nr] * REG_SIZE != this->size_written)
stride == r.stride);
}
-fs_reg &
-fs_reg::set_smear(unsigned subreg)
-{
- assert(file != ARF && file != FIXED_GRF && file != IMM);
- offset = ROUND_DOWN_TO(offset, REG_SIZE) + subreg * type_sz(type);
- stride = 0;
- return *this;
-}
-
bool
fs_reg::is_contiguous() const
{
void
fs_visitor::emit_shader_time_begin()
{
- shader_start_time = get_timestamp(bld.annotate("shader time start"));
-
/* We want only the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
* which is plenty of time for our purposes. It is identical across the
* EUs, but since it's tracking GPU core speed it will increment at a
* varying rate as render P-states change.
*/
- shader_start_time.set_smear(0);
+ shader_start_time = component(
+ get_timestamp(bld.annotate("shader time start")), 0);
}
void
assert(end && ((fs_inst *) end)->eot);
const fs_builder ibld = bld.annotate("shader time end")
.exec_all().at(NULL, end);
-
- fs_reg shader_end_time = get_timestamp(ibld);
+ const fs_reg timestamp = get_timestamp(ibld);
/* We only use the low 32 bits of the timestamp - see
* emit_shader_time_begin()).
* else that might disrupt timing) by setting smear to 2 and checking if
* that field is != 0.
*/
- shader_end_time.set_smear(0);
+ const fs_reg shader_end_time = component(timestamp, 0);
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
- fs_reg reset = shader_end_time;
- reset.set_smear(2);
+ const fs_reg reset = component(timestamp, 2);
set_condmod(BRW_CONDITIONAL_Z,
ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- diff.set_smear(0);
-
+ const fs_reg diff = component(fs_reg(VGRF, alloc.allocate(1),
+ BRW_REGISTER_TYPE_UD),
+ 0);
const fs_builder cbld = ibld.group(1, 0);
cbld.group(1, 0).ADD(diff, start, shader_end_time);
}
}
-int
+unsigned
fs_inst::size_read(int arg) const
{
switch (opcode) {
case SHADER_OPCODE_MOV_INDIRECT:
if (arg == 0) {
assert(src[2].file == IMM);
- unsigned region_length = src[2].ud;
-
- if (src[0].file == UNIFORM) {
- assert(region_length % 4 == 0);
- return region_length;
- } else if (src[0].file == FIXED_GRF) {
- /* If the start of the region is not register aligned, then
- * there's some portion of the register that's technically
- * unread at the beginning.
- *
- * However, the register allocator works in terms of whole
- * registers, and does not use subnr. It assumes that the
- * read starts at the beginning of the register, and extends
- * regs_read() whole registers beyond that.
- *
- * To compensate, we extend the region length to include this
- * unread portion at the beginning.
- */
- if (src[0].subnr)
- region_length += src[0].subnr;
-
- return region_length;
- } else {
- assert(!"Invalid register file");
- }
+ return src[2].ud;
}
break;
brw_imm_v(0x44440000));
abld.AND(*reg, tmp, brw_imm_w(0xf));
} else {
- fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
- t1.set_smear(0);
- fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
+ const fs_reg t1 = component(fs_reg(VGRF, alloc.allocate(1),
+ BRW_REGISTER_TYPE_D), 0);
+ const fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
- /* Up until now, the param[] array has been indexed by reg + reg_offset
+ /* Up until now, the param[] array has been indexed by reg + offset
* of UNIFORM registers. Move pull constants into pull_param[] and
* condense param[] to only contain the uniforms we chose to push.
*
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
- inst->src[i].offset %= 4;
- inst->src[i].set_smear((pull_index & 3) * 4 /
- type_sz(inst->src[i].type));
+ inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4;
brw_mark_surface_used(prog_data, index);
}
/**
* Compute a bitmask with GRF granularity with a bit set for each GRF starting
- * from \p r which overlaps the region starting at \p r and spanning \p n GRF
- * units.
+ * from \p r.offset which overlaps the region starting at \p s.offset and
+ * spanning \p ds bytes.
*/
static inline unsigned
-mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned n)
+mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned ds)
{
- const int rel_offset = (reg_offset(s) - reg_offset(r)) / REG_SIZE;
+ const int rel_offset = reg_offset(s) - reg_offset(r);
+ const int shift = rel_offset / REG_SIZE;
+ const unsigned n = DIV_ROUND_UP(rel_offset % REG_SIZE + ds, REG_SIZE);
assert(reg_space(r) == reg_space(s) &&
- rel_offset >= 0 && rel_offset < int(8 * sizeof(unsigned)));
- return ((1 << n) - 1) << rel_offset;
+ shift >= 0 && shift < int(8 * sizeof(unsigned)));
+ return ((1 << n) - 1) << shift;
}
bool
* would need us to understand coalescing out more than one MOV at
* a time.
*/
- if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
- scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
- inst->src[0].offset / REG_SIZE + DIV_ROUND_UP(inst->size_read(0), REG_SIZE))
+ if (!region_contained_in(scan_inst->dst, scan_inst->size_written,
+ inst->src[0], inst->size_read(0)))
break;
/* SEND instructions can't have MRF as a destination. */
/* Clear the bits for any registers this instruction overwrites. */
regs_left &= ~mask_relative_to(
- inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
- REG_SIZE));
+ inst->src[0], scan_inst->dst, scan_inst->size_written);
if (!regs_left)
break;
}
inst->src[0], inst->size_read(0))) {
/* Clear the bits for any registers this instruction overwrites. */
regs_left &= ~mask_relative_to(
- inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written,
- REG_SIZE));
+ inst->src[0], scan_inst->dst, scan_inst->size_written);
- const unsigned rel_offset = (reg_offset(scan_inst->dst) -
- reg_offset(inst->src[0])) / REG_SIZE;
+ const unsigned rel_offset = reg_offset(scan_inst->dst) -
+ reg_offset(inst->src[0]);
if (inst->dst.nr & BRW_MRF_COMPR4) {
/* Apply the same address transformation done by the hardware
* for COMPR4 MRF writes.
*/
- assert(rel_offset < 2);
- scan_inst->dst.nr = inst->dst.nr + rel_offset * 4;
+ assert(rel_offset < 2 * REG_SIZE);
+ scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE * 4;
/* Clear the COMPR4 bit if the generating instruction is not
* compressed.
/* Calculate the MRF number the result of this instruction is
* ultimately written to.
*/
- scan_inst->dst.nr = inst->dst.nr + rel_offset;
+ scan_inst->dst.nr = inst->dst.nr + rel_offset / REG_SIZE;
}
scan_inst->dst.file = MRF;
- scan_inst->dst.offset %= REG_SIZE;
+ scan_inst->dst.offset = inst->dst.offset + rel_offset % REG_SIZE;
scan_inst->saturate |= inst->saturate;
if (!regs_left)
break;
bool progress = false;
- /* Note that we're done with register allocation, so GRF fs_regs always
- * have a .reg_offset of 0.
- */
-
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->mlen != 0 && inst->dst.file == VGRF) {
insert_gen4_pre_send_dependency_workarounds(block, inst);
inst->dst.type);
if (devinfo->gen >= 7) {
- fs_reg src1_0_w = inst->src[1];
- fs_reg src1_1_w = inst->src[1];
-
if (inst->src[1].file == IMM) {
- src1_0_w.ud &= 0xffff;
- src1_1_w.ud >>= 16;
+ ibld.MUL(low, inst->src[0],
+ brw_imm_uw(inst->src[1].ud & 0xffff));
+ ibld.MUL(high, inst->src[0],
+ brw_imm_uw(inst->src[1].ud >> 16));
} else {
- src1_0_w.type = BRW_REGISTER_TYPE_UW;
- if (src1_0_w.stride != 0) {
- assert(src1_0_w.stride == 1);
- src1_0_w.stride = 2;
- }
-
- src1_1_w.type = BRW_REGISTER_TYPE_UW;
- if (src1_1_w.stride != 0) {
- assert(src1_1_w.stride == 1);
- src1_1_w.stride = 2;
- }
- src1_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
+ ibld.MUL(low, inst->src[0],
+ subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 0));
+ ibld.MUL(high, inst->src[0],
+ subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
}
- ibld.MUL(low, inst->src[0], src1_0_w);
- ibld.MUL(high, inst->src[0], src1_1_w);
} else {
- fs_reg src0_0_w = inst->src[0];
- fs_reg src0_1_w = inst->src[0];
-
- src0_0_w.type = BRW_REGISTER_TYPE_UW;
- if (src0_0_w.stride != 0) {
- assert(src0_0_w.stride == 1);
- src0_0_w.stride = 2;
- }
-
- src0_1_w.type = BRW_REGISTER_TYPE_UW;
- if (src0_1_w.stride != 0) {
- assert(src0_1_w.stride == 1);
- src0_1_w.stride = 2;
- }
- src0_1_w.offset += type_sz(BRW_REGISTER_TYPE_UW);
-
- ibld.MUL(low, src0_0_w, inst->src[1]);
- ibld.MUL(high, src0_1_w, inst->src[1]);
+ ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
+ inst->src[1]);
+ ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
+ inst->src[1]);
}
- fs_reg dst = inst->dst;
- dst.type = BRW_REGISTER_TYPE_UW;
- dst.offset = ROUND_DOWN_TO(dst.offset, REG_SIZE) + 2;
- dst.stride = 2;
-
- high.type = BRW_REGISTER_TYPE_UW;
- high.stride = 2;
-
- low.type = BRW_REGISTER_TYPE_UW;
- low.offset = ROUND_DOWN_TO(low.offset, REG_SIZE) + 2;
- low.stride = 2;
-
- ibld.ADD(dst, low, high);
+ ibld.ADD(subscript(inst->dst, BRW_REGISTER_TYPE_UW, 1),
+ subscript(low, BRW_REGISTER_TYPE_UW, 1),
+ subscript(high, BRW_REGISTER_TYPE_UW, 0));
if (inst->conditional_mod || orig_dst.file == MRF) {
set_condmod(inst->conditional_mod,
*/
if (devinfo->gen < 8) {
for (unsigned i = 0; i < inst->sources; i++) {
- if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 &&
- inst->size_read(i) != 0 && DIV_ROUND_UP(inst->size_read(i), REG_SIZE) != 2 &&
+ if (inst->size_written > REG_SIZE &&
+ inst->size_read(i) != 0 && inst->size_read(i) <= REG_SIZE &&
!is_uniform(inst->src[i]) &&
!(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 &&
type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) {
switch (inst->dst.file) {
case VGRF:
fprintf(file, "vgrf%d", inst->dst.nr);
- if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
- inst->dst.offset % REG_SIZE)
- fprintf(file, "+%d.%d",
- inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);
break;
case FIXED_GRF:
fprintf(file, "g%d", inst->dst.nr);
fprintf(file, "(null)");
break;
case UNIFORM:
- fprintf(file, "***u%d***", inst->dst.nr + inst->dst.offset / 4);
+ fprintf(file, "***u%d***", inst->dst.nr);
break;
case ATTR:
- fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.offset / REG_SIZE);
+ fprintf(file, "***attr%d***", inst->dst.nr);
break;
case ARF:
switch (inst->dst.nr) {
fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
break;
}
- if (inst->dst.subnr)
- fprintf(file, "+%d", inst->dst.subnr);
break;
case IMM:
unreachable("not reached");
}
+
+ if (inst->dst.offset ||
+ (inst->dst.file == VGRF &&
+ alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) {
+ const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE);
+ fprintf(file, "+%d.%d", inst->dst.offset / reg_size,
+ inst->dst.offset % reg_size);
+ }
+
if (inst->dst.stride != 1)
fprintf(file, "<%u>", inst->dst.stride);
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
switch (inst->src[i].file) {
case VGRF:
fprintf(file, "vgrf%d", inst->src[i].nr);
- if (alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i) ||
- inst->src[i].offset % REG_SIZE != 0)
- fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE,
- inst->src[i].offset % REG_SIZE);
break;
case FIXED_GRF:
fprintf(file, "g%d", inst->src[i].nr);
fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].offset / REG_SIZE);
+ fprintf(file, "attr%d", inst->src[i].nr);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].nr + inst->src[i].offset / 4);
- if (inst->src[i].offset % 4 != 0) {
- fprintf(file, "+%d.%d", inst->src[i].offset / 4,
- inst->src[i].offset % 4);
- }
+ fprintf(file, "u%d", inst->src[i].nr);
break;
case BAD_FILE:
fprintf(file, "(null)");
fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
break;
}
- if (inst->src[i].subnr)
- fprintf(file, "+%d", inst->src[i].subnr);
break;
}
+
+ if (inst->src[i].offset ||
+ (inst->src[i].file == VGRF &&
+ alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
+ const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE);
+ fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,
+ inst->src[i].offset % reg_size);
+ }
+
if (inst->src[i].abs)
fprintf(file, "|");