bld.MOV(wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
- float offset = (pixel_center_integer ? 0.0 : 0.5);
+ float offset = (pixel_center_integer ? 0.0f : 0.5f);
if (flip) {
pixel_y.negate = true;
- offset += key->drawable_height - 1.0;
+ offset += key->drawable_height - 1.0f;
}
bld.ADD(wpos, pixel_y, fs_reg(offset));
return false;
/* Look for a texturing instruction immediately before the final FB_WRITE. */
- fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end();
+ bblock_t *block = cfg->blocks[cfg->num_blocks - 1];
+ fs_inst *fb_write = (fs_inst *)block->end();
assert(fb_write->eot);
assert(fb_write->opcode == FS_OPCODE_FB_WRITE);
assert(!tex_inst->eot); /* We can't get here twice */
assert((tex_inst->offset & (0xff << 24)) == 0);
+ const fs_builder ibld(this, block, tex_inst);
+
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
- tex_inst->dst = bld.null_reg_ud();
+ tex_inst->dst = ibld.null_reg_ud();
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
/* If a header is present, marking the eot is sufficient. Otherwise, we need
if (tex_inst->header_size != 0)
return true;
- fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F,
- load_payload->sources + 1);
+ fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
+ load_payload->sources + 1);
fs_reg *new_sources =
ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
if (block->start() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst),
+ first_write_grf + i);
}
return;
}
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
needs_dep[reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, inst), reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);
needs_dep[reg - first_write_grf] = false;
if (scan_inst->exec_size == 16)
needs_dep[reg - first_write_grf + 1] = false;
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ first_write_grf + i);
}
return;
}
scan_inst->dst.reg >= first_write_grf &&
scan_inst->dst.reg < first_write_grf + write_len &&
needs_dep[scan_inst->dst.reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ scan_inst->dst.reg);
needs_dep[scan_inst->dst.reg - first_write_grf] = false;
}
if (dst.file == MRF)
dst.reg = dst.reg & ~BRW_MRF_COMPR4;
- const fs_builder hbld = bld.exec_all().group(8, 0).at(block, inst);
+ const fs_builder ibld(this, block, inst);
+ const fs_builder hbld = ibld.exec_all().group(8, 0);
for (uint8_t i = 0; i < inst->header_size; i++) {
if (inst->src[i].file != BAD_FILE) {
dst = offset(dst, hbld, 1);
}
- const fs_builder ibld = bld.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf)
- .at(block, inst);
-
if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
bool progress = false;
/* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
- * directly, but Cherryview cannot.
+ * directly, but CHV/BXT cannot.
*/
- if (devinfo->gen >= 8 && !devinfo->is_cherryview)
+ if (devinfo->gen >= 8 && !devinfo->is_cherryview && !devinfo->is_broxton)
return false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
inst->dst.type != BRW_REGISTER_TYPE_UD))
continue;
- const fs_builder ibld = bld.at(block, inst);
+ const fs_builder ibld(this, block, inst);
/* The MUL instruction isn't commutative. On Gen <= 6, only the low
* 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
- const fs_builder ibld = bld.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf)
- .at(block, inst);
+ const fs_builder ibld(this, block, inst);
switch (inst->opcode) {
case FS_OPCODE_FB_WRITE_LOGICAL:
const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
if (lower_width != inst->exec_size) {
- /* Builder matching the original instruction. */
+ /* Builder matching the original instruction. We may also need to
+ * emit an instruction of width larger than the original, set the
+ * execution size of the builder to the highest of both for now so
+ * we're sure that both cases can be handled.
+ */
const fs_builder ibld = bld.at(block, inst)
.exec_all(inst->force_writemask_all)
- .group(inst->exec_size, inst->force_sechalf);
+ .group(MAX2(inst->exec_size, lower_width),
+ inst->force_sechalf);
/* Split the copies in chunks of the execution width of either the
* original or the lowered instruction, whichever is lower.
split_inst.exec_size = lower_width;
split_inst.eot = inst->eot && i == n - 1;
- /* Set exec_all if the lowered width is higher than the original
- * to avoid breaking the compiler invariant that no control
- * flow-masked instruction is wider than the shader's
- * dispatch_width. Then transform the sources and destination and
- * emit the lowered instruction.
+ /* Select the correct channel enables for the i-th group, then
+ * transform the sources and destination and emit the lowered
+ * instruction.
*/
- const fs_builder lbld = ibld.exec_all(lower_width > inst->exec_size)
- .group(lower_width, i);
+ const fs_builder lbld = ibld.group(lower_width, i);
for (unsigned j = 0; j < inst->sources; j++) {
if (inst->src[j].file != BAD_FILE &&
* Ideally optimization passes wouldn't be part of the visitor so they
* wouldn't have access to bld at all, but they do, so just in case some
* pass forgets to ask for a location explicitly set it to NULL here to
- * make it trip.
+ * make it trip. The dispatch width is initialized to a bogus value to
+ * make sure that optimizations set the execution controls explicitly to
+ * match the code they are manipulating instead of relying on the defaults.
*/
- bld = bld.at(NULL, NULL);
+ bld = fs_builder(this, 64);
split_virtual_grfs();