bld.MOV(wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
- float offset = (pixel_center_integer ? 0.0 : 0.5);
+ float offset = (pixel_center_integer ? 0.0f : 0.5f);
if (flip) {
pixel_y.negate = true;
- offset += key->drawable_height - 1.0;
+ offset += key->drawable_height - 1.0f;
}
bld.ADD(wpos, pixel_y, fs_reg(offset));
continue;
/* Set up the annotation tracking for new generated instructions. */
- const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
- .at(block, inst);
+ const fs_builder ibld(this, block, inst);
fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
fs_reg dst = vgrf(glsl_type::float_type);
inst->src[i].reladdr = NULL;
inst->src[i].stride = 1;
} else {
+ const fs_builder ubld = ibld.exec_all().group(8, 0);
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
- ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, surf_index, offset);
inst->src[i].set_smear(pull_index & 3);
}
return false;
/* Look for a texturing instruction immediately before the final FB_WRITE. */
- fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end();
+ bblock_t *block = cfg->blocks[cfg->num_blocks - 1];
+ fs_inst *fb_write = (fs_inst *)block->end();
assert(fb_write->eot);
assert(fb_write->opcode == FS_OPCODE_FB_WRITE);
assert(!tex_inst->eot); /* We can't get here twice */
assert((tex_inst->offset & (0xff << 24)) == 0);
+ const fs_builder ibld(this, block, tex_inst);
+
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
- tex_inst->dst = bld.null_reg_ud();
+ tex_inst->dst = ibld.null_reg_ud();
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
/* If a header is present, marking the eot is sufficient. Otherwise, we need
if (tex_inst->header_size != 0)
return true;
- fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F,
- load_payload->sources + 1);
+ fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
+ load_payload->sources + 1);
fs_reg *new_sources =
ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
if (block->start() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst),
+ first_write_grf + i);
}
return;
}
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
needs_dep[reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, inst), reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);
needs_dep[reg - first_write_grf] = false;
if (scan_inst->exec_size == 16)
needs_dep[reg - first_write_grf + 1] = false;
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ first_write_grf + i);
}
return;
}
scan_inst->dst.reg >= first_write_grf &&
scan_inst->dst.reg < first_write_grf + write_len &&
needs_dep[scan_inst->dst.reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ scan_inst->dst.reg);
needs_dep[scan_inst->dst.reg - first_write_grf] = false;
}
if (dst.file == MRF)
dst.reg = dst.reg & ~BRW_MRF_COMPR4;
- const fs_builder hbld = bld.exec_all().group(8, 0).at(block, inst);
+ const fs_builder ibld(this, block, inst);
+ const fs_builder hbld = ibld.exec_all().group(8, 0);
for (uint8_t i = 0; i < inst->header_size; i++) {
if (inst->src[i].file != BAD_FILE) {
dst = offset(dst, hbld, 1);
}
- const fs_builder ibld = bld.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf)
- .at(block, inst);
-
if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
bool progress = false;
/* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
- * directly, but Cherryview cannot.
+ * directly, but CHV/BXT cannot.
*/
- if (devinfo->gen >= 8 && !devinfo->is_cherryview)
+ if (devinfo->gen >= 8 && !devinfo->is_cherryview && !devinfo->is_broxton)
return false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
inst->dst.type != BRW_REGISTER_TYPE_UD))
continue;
- const fs_builder ibld = bld.at(block, inst);
+ const fs_builder ibld(this, block, inst);
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
}
}
+/**
+ * Initialize the header present in some typed and untyped surface
+ * messages.
+ */
+static fs_reg
+emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
+{
+ fs_builder ubld = bld.exec_all().group(8, 0);
+ const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.MOV(dst, fs_reg(0));
+ ubld.MOV(component(dst, 7), sample_mask);
+ return dst;
+}
+
static void
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &sample_mask)
const fs_reg &addr = inst->src[0];
const fs_reg &src = inst->src[1];
const fs_reg &surface = inst->src[2];
- const fs_reg &dims = inst->src[3];
+ const UNUSED fs_reg &dims = inst->src[3];
const fs_reg &arg = inst->src[4];
- assert(!"Not implemented");
+ /* Calculate the total number of components of the payload. */
+ const unsigned addr_sz = inst->components_read(0);
+ const unsigned src_sz = inst->components_read(1);
+ const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1);
+ const unsigned sz = header_sz + addr_sz + src_sz;
+
+ /* Allocate space for the payload. */
+ fs_reg *const components = new fs_reg[sz];
+ const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+ unsigned n = 0;
+
+ /* Construct the payload. */
+ if (header_sz)
+ components[n++] = emit_surface_header(bld, sample_mask);
+
+ for (unsigned i = 0; i < addr_sz; i++)
+ components[n++] = offset(addr, bld, i);
+
+ for (unsigned i = 0; i < src_sz; i++)
+ components[n++] = offset(src, bld, i);
+
+ bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+
+ /* Update the original instruction. */
+ inst->opcode = op;
+ inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+ inst->header_size = header_sz;
+
+ inst->src[0] = payload;
+ inst->src[1] = surface;
+ inst->src[2] = arg;
+ inst->resize_sources(3);
+
+ delete[] components;
}
bool
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
- const fs_builder ibld = bld.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf)
- .at(block, inst);
+ const fs_builder ibld(this, block, inst);
switch (inst->opcode) {
case FS_OPCODE_FB_WRITE_LOGICAL:
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
- fs_reg());
+ fs_reg(0xffff));
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
- fs_reg());
+ ibld.sample_mask_reg());
break;
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_ATOMIC,
- fs_reg());
+ ibld.sample_mask_reg());
break;
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_READ,
- fs_reg());
+ fs_reg(0xffff));
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_WRITE,
- fs_reg());
+ ibld.sample_mask_reg());
break;
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_ATOMIC,
- fs_reg());
+ ibld.sample_mask_reg());
break;
default:
else
return inst->exec_size;
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ return 8;
+
default:
return inst->exec_size;
}
const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
if (lower_width != inst->exec_size) {
- /* Builder matching the original instruction. */
+ /* Builder matching the original instruction. We may also need to
+ * emit an instruction of width larger than the original, set the
+ * execution size of the builder to the highest of both for now so
+ * we're sure that both cases can be handled.
+ */
const fs_builder ibld = bld.at(block, inst)
.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf);
+ .group(MAX2(inst->exec_size, lower_width),
+ inst->force_sechalf);
/* Split the copies in chunks of the execution width of either the
* original or the lowered instruction, whichever is lower.
split_inst.exec_size = lower_width;
split_inst.eot = inst->eot && i == n - 1;
- /* Set exec_all if the lowered width is higher than the original
- * to avoid breaking the compiler invariant that no control
- * flow-masked instruction is wider than the shader's
- * dispatch_width. Then transform the sources and destination and
- * emit the lowered instruction.
+ /* Select the correct channel enables for the i-th group, then
+ * transform the sources and destination and emit the lowered
+ * instruction.
*/
- const fs_builder lbld = ibld.exec_all(lower_width > inst->exec_size)
- .group(lower_width, i);
+ const fs_builder lbld = ibld.group(lower_width, i);
for (unsigned j = 0; j < inst->sources; j++) {
if (inst->src[j].file != BAD_FILE &&
* Ideally optimization passes wouldn't be part of the visitor so they
* wouldn't have access to bld at all, but they do, so just in case some
* pass forgets to ask for a location explicitly set it to NULL here to
- * make it trip.
+ * make it trip. The dispatch width is initialized to a bogus value to
+ * make sure that optimizations set the execution controls explicitly to
+ * match the code they are manipulating instead of relying on the defaults.
*/
- bld = bld.at(NULL, NULL);
+ bld = fs_builder(this, 64);
split_virtual_grfs();