lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &sample_mask)
{
+ const gen_device_info *devinfo = bld.shader->devinfo;
+
/* Get the logical send arguments. */
const fs_reg &addr = inst->src[0];
const fs_reg &src = inst->src[1];
/* Calculate the total number of components of the payload. */
const unsigned addr_sz = inst->components_read(0);
const unsigned src_sz = inst->components_read(1);
- const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1);
+ /* From the BDW PRM Volume 7, page 147:
+ *
+ * "For the Data Cache Data Port*, the header must be present for the
+ * following message types: [...] Typed read/write/atomics"
+ *
+ * Earlier generations have a similar wording. Because of this restriction
+ * we don't attempt to implement sample masks via predication for such
+ * messages prior to Gen9, since we have to provide a header anyway. On
+ * Gen11+ the header has been removed so we can only use predication.
+ */
+ const unsigned header_sz = devinfo->gen < 9 &&
+ (op == SHADER_OPCODE_TYPED_SURFACE_READ ||
+ op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
+ op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
const unsigned sz = header_sz + addr_sz + src_sz;
/* Allocate space for the payload. */
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+ /* Predicate the instruction on the sample mask if no header is
+ * provided.
+ */
+ if (!header_sz && sample_mask.file != BAD_FILE &&
+ sample_mask.file != IMM) {
+ const fs_builder ubld = bld.group(1, 0).exec_all();
+ if (inst->predicate) {
+ assert(inst->predicate == BRW_PREDICATE_NORMAL);
+ assert(!inst->predicate_inverse);
+ assert(inst->flag_subreg < 2);
+ /* Combine the sample mask with the existing predicate by using a
+ * vertical predication mode.
+ */
+ inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+ sample_mask.type),
+ sample_mask);
+ } else {
+ inst->flag_subreg = 2;
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = false;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+ sample_mask);
+ }
+ }
+
/* Update the original instruction. */
inst->opcode = op;
inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;