* be any component of a vector, and then we load 4 contiguous
* components starting from that.
*
- * We break down the const_offset to a portion added to the variable
- * offset and a portion done using reg_offset, which means that if you
- * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
- * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
- * CSE can later notice that those loads are all the same and eliminate
- * the redundant ones.
+ * We break down the const_offset to a portion added to the variable offset
+ * and a portion done using fs_reg::offset, which means that if you have
+ * GLSL using something like "uniform vec4 a[20]; gl_FragColor = a[i]",
+ * we'll temporarily generate 4 vec4 loads from offset i * 4, and CSE can
+ * later notice that those loads are all the same and eliminate the
+ * redundant ones.
*/
fs_reg vec4_offset = vgrf(glsl_type::uint_type);
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
vec4_result, surf_index, vec4_offset);
- inst->size_written = 4 * bld.dispatch_width() / 8 * REG_SIZE;
+ inst->size_written = 4 * vec4_result.component_size(inst->exec_size);
if (type_sz(dst.type) == 8) {
shuffle_32bit_load_result_to_64bit_data(
stride == r.stride);
}
-fs_reg &
-fs_reg::set_smear(unsigned subreg)
-{
- assert(file != ARF && file != FIXED_GRF && file != IMM);
- offset = ROUND_DOWN_TO(offset, REG_SIZE) + subreg * type_sz(type);
- stride = 0;
- return *this;
-}
-
bool
fs_reg::is_contiguous() const
{
void
fs_visitor::emit_shader_time_begin()
{
- shader_start_time = get_timestamp(bld.annotate("shader time start"));
-
/* We want only the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
* which is plenty of time for our purposes. It is identical across the
* EUs, but since it's tracking GPU core speed it will increment at a
* varying rate as render P-states change.
*/
- shader_start_time.set_smear(0);
+ shader_start_time = component(
+ get_timestamp(bld.annotate("shader time start")), 0);
}
void
assert(end && ((fs_inst *) end)->eot);
const fs_builder ibld = bld.annotate("shader time end")
.exec_all().at(NULL, end);
-
- fs_reg shader_end_time = get_timestamp(ibld);
+ const fs_reg timestamp = get_timestamp(ibld);
/* We only use the low 32 bits of the timestamp - see
* emit_shader_time_begin()).
* else that might disrupt timing) by setting smear to 2 and checking if
* that field is != 0.
*/
- shader_end_time.set_smear(0);
+ const fs_reg shader_end_time = component(timestamp, 0);
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
- fs_reg reset = shader_end_time;
- reset.set_smear(2);
+ const fs_reg reset = component(timestamp, 2);
set_condmod(BRW_CONDITIONAL_Z,
ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- diff.set_smear(0);
-
+ const fs_reg diff = component(fs_reg(VGRF, alloc.allocate(1),
+ BRW_REGISTER_TYPE_UD),
+ 0);
const fs_builder cbld = ibld.group(1, 0);
cbld.group(1, 0).ADD(diff, start, shader_end_time);
brw_imm_v(0x44440000));
abld.AND(*reg, tmp, brw_imm_w(0xf));
} else {
- fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
- t1.set_smear(0);
- fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
+ const fs_reg t1 = component(fs_reg(VGRF, alloc.allocate(1),
+ BRW_REGISTER_TYPE_D), 0);
+ const fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
- /* Up until now, the param[] array has been indexed by reg + reg_offset
+ /* Up until now, the param[] array has been indexed by reg + offset
* of UNIFORM registers. Move pull constants into pull_param[] and
* condense param[] to only contain the uniforms we chose to push.
*
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
- inst->src[i].offset %= 4;
- inst->src[i].set_smear((pull_index & 3) * 4 /
- type_sz(inst->src[i].type));
+ inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4;
brw_mark_surface_used(prog_data, index);
}
* would need us to understand coalescing out more than one MOV at
* a time.
*/
- if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE ||
- scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) >
- inst->src[0].offset / REG_SIZE + DIV_ROUND_UP(inst->size_read(0), REG_SIZE))
+ if (!region_contained_in(scan_inst->dst, scan_inst->size_written,
+ inst->src[0], inst->size_read(0)))
break;
/* SEND instructions can't have MRF as a destination. */
bool progress = false;
- /* Note that we're done with register allocation, so GRF fs_regs always
- * have a .reg_offset of 0.
- */
-
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->mlen != 0 && inst->dst.file == VGRF) {
insert_gen4_pre_send_dependency_workarounds(block, inst);
switch (inst->dst.file) {
case VGRF:
fprintf(file, "vgrf%d", inst->dst.nr);
- if (alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written ||
- inst->dst.offset % REG_SIZE)
- fprintf(file, "+%d.%d",
- inst->dst.offset / REG_SIZE, inst->dst.offset % REG_SIZE);
break;
case FIXED_GRF:
fprintf(file, "g%d", inst->dst.nr);
fprintf(file, "(null)");
break;
case UNIFORM:
- fprintf(file, "***u%d***", inst->dst.nr + inst->dst.offset / 4);
+ fprintf(file, "***u%d***", inst->dst.nr);
break;
case ATTR:
- fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.offset / REG_SIZE);
+ fprintf(file, "***attr%d***", inst->dst.nr);
break;
case ARF:
switch (inst->dst.nr) {
fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
break;
}
- if (inst->dst.subnr)
- fprintf(file, "+%d", inst->dst.subnr);
break;
case IMM:
unreachable("not reached");
}
+
+ if (inst->dst.offset ||
+ (inst->dst.file == VGRF &&
+ alloc.sizes[inst->dst.nr] * REG_SIZE != inst->size_written)) {
+ const unsigned reg_size = (inst->dst.file == UNIFORM ? 4 : REG_SIZE);
+ fprintf(file, "+%d.%d", inst->dst.offset / reg_size,
+ inst->dst.offset % reg_size);
+ }
+
if (inst->dst.stride != 1)
fprintf(file, "<%u>", inst->dst.stride);
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
switch (inst->src[i].file) {
case VGRF:
fprintf(file, "vgrf%d", inst->src[i].nr);
- if (alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i) ||
- inst->src[i].offset % REG_SIZE != 0)
- fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE,
- inst->src[i].offset % REG_SIZE);
break;
case FIXED_GRF:
fprintf(file, "g%d", inst->src[i].nr);
fprintf(file, "***m%d***", inst->src[i].nr);
break;
case ATTR:
- fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].offset / REG_SIZE);
+ fprintf(file, "attr%d", inst->src[i].nr);
break;
case UNIFORM:
- fprintf(file, "u%d", inst->src[i].nr + inst->src[i].offset / 4);
- if (inst->src[i].offset % 4 != 0) {
- fprintf(file, "+%d.%d", inst->src[i].offset / 4,
- inst->src[i].offset % 4);
- }
+ fprintf(file, "u%d", inst->src[i].nr);
break;
case BAD_FILE:
fprintf(file, "(null)");
fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
break;
}
- if (inst->src[i].subnr)
- fprintf(file, "+%d", inst->src[i].subnr);
break;
}
+
+ if (inst->src[i].offset ||
+ (inst->src[i].file == VGRF &&
+ alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i))) {
+ const unsigned reg_size = (inst->src[i].file == UNIFORM ? 4 : REG_SIZE);
+ fprintf(file, "+%d.%d", inst->src[i].offset / reg_size,
+ inst->src[i].offset % reg_size);
+ }
+
if (inst->src[i].abs)
fprintf(file, "|");