mul->src[1].stride *= 2;
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
- inst->force_sechalf) {
+ inst->group > 0) {
/* Among other things the quarter control bits influence which
* accumulator register is used by the hardware for instructions
* that access the accumulator implicitly (e.g. MACH). A
* to get the result masked correctly according to the current
* channel enables.
*/
- mach->force_sechalf = false;
+ mach->group = 0;
mach->force_writemask_all = true;
mach->dst = ibld.vgrf(inst->dst.type);
ibld.MOV(inst->dst, mach->dst);
sample_mask.stride *= 2;
bld.exec_all().annotate("FB write oMask")
- .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
- inst->force_sechalf),
+ .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW),
+ inst->group),
sample_mask);
length++;
}
* execution size of the builder to the highest of both for now so
* we're sure that both cases can be handled.
*/
+ const unsigned max_width = MAX2(inst->exec_size, lower_width);
const fs_builder ibld = bld.at(block, inst)
.exec_all(inst->force_writemask_all)
- .group(MAX2(inst->exec_size, lower_width),
- inst->force_sechalf);
+ .group(max_width, inst->group / max_width);
/* Split the copies in chunks of the execution width of either the
* original or the lowered instruction, whichever is lower.
if (inst->force_writemask_all)
fprintf(file, "NoMask ");
- if (dispatch_width == 16 && inst->exec_size == 8) {
- if (inst->force_sechalf)
- fprintf(file, "2ndhalf ");
- else
- fprintf(file, "1sthalf ");
- }
+ if (inst->exec_size != dispatch_width)
+ fprintf(file, "group%d ", inst->group);
fprintf(file, "\n");
}
fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
shader(shader), block(block), cursor(inst),
_dispatch_width(inst->exec_size),
- _group(inst->force_sechalf ? 8 : 0),
+ _group(inst->group),
force_writemask_all(inst->force_writemask_all)
{
annotation.str = inst->annotation;
return _dispatch_width;
}
+ /**
+ * Get the channel group in use.
+ */
+ unsigned
+ group() const
+ {
+ return _group;
+ }
+
/**
* Allocate a virtual register of natural vector size (one for this IR)
* and SIMD width. \p n gives the amount of space to allocate in
assert(inst->exec_size <= 32);
assert(inst->exec_size == dispatch_width() ||
force_writemask_all);
- assert(_group == 0 || _group == 8);
- inst->force_sechalf = (_group == 8);
+ inst->group = _group;
inst->force_writemask_all = force_writemask_all;
inst->annotation = annotation.str;
inst->ir = annotation.ir;
return a->opcode == b->opcode &&
a->force_writemask_all == b->force_writemask_all &&
a->exec_size == b->exec_size &&
- a->force_sechalf == b->force_sechalf &&
+ a->group == b->group &&
a->saturate == b->saturate &&
a->predicate == b->predicate &&
a->predicate_inverse == b->predicate_inverse &&
copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
copy = bld.MOV(inst->dst, src);
- copy->force_sechalf = inst->force_sechalf;
+ copy->group = inst->group;
copy->force_writemask_all = inst->force_writemask_all;
copy->src[0].negate = negate;
}
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
else if (prog_data->dual_src_blend) {
- if (!inst->force_sechalf)
+ if (!inst->group)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
brw_set_default_compression(p, lower_size > 8);
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
- brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i);
+ brw_set_default_group(p, inst->group + lower_size * i);
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
const bool compressed =
inst->dst.component_size(inst->exec_size) > REG_SIZE;
brw_set_default_compression(p, compressed);
- brw_set_default_group(p, inst->force_sechalf ? 8 : 0);
+ brw_set_default_group(p, inst->group);
for (unsigned int i = 0; i < inst->sources; i++) {
src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen,
brw_set_default_exec_size(p, cvt(inst->exec_size) - 1);
assert(inst->force_writemask_all || inst->exec_size >= 8);
+ assert(inst->force_writemask_all || inst->group % inst->exec_size == 0);
assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen));
assert(inst->mlen <= BRW_MAX_MSG_LENGTH);
/* Check that the MOVs are the right form. */
if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
then_mov[i]->exec_size != else_mov[i]->exec_size ||
- then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
+ then_mov[i]->group != else_mov[i]->group ||
then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
then_mov[i]->is_partial_write() ||
else_mov[i]->is_partial_write() ||
*/
uint8_t exec_size;
+ /**
+ * Channel group from the hardware execution and predication mask that
+ * should be applied to the instruction. The subset of channel enable
+ * signals (calculated from the EU control flow and predication state)
+ * given by [group, group + exec_size) will be used to mask GRF writes and
+ * any other side effects of the instruction.
+ */
+ uint8_t group;
+
bool eot:1;
- bool force_sechalf:1;
bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */
};
-/**
- * Set second-half quarter control on \p inst.
- */
-static inline fs_inst *
-set_sechalf(fs_inst *inst)
-{
- inst->force_sechalf = true;
- return inst;
-}
-
/**
* Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate.