+ const bool is_typed_access =
+ inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
+ inst->opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL ||
+ inst->opcode == SHADER_OPCODE_TYPED_ATOMIC_LOGICAL;
+
+ /* From the BDW PRM Volume 7, page 147:
+ *
+ * "For the Data Cache Data Port*, the header must be present for the
+ * following message types: [...] Typed read/write/atomics"
+ *
+ * Earlier generations have a similar wording. Because of this restriction
+ * we don't attempt to implement sample masks via predication for such
+ * messages prior to Gen9, since we have to provide a header anyway. On
+ * Gen11+ the header has been removed so we can only use predication.
+ */
+ const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
+
+ const bool has_side_effects = inst->has_side_effects();
+ fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
+ fs_reg(brw_imm_d(0xffff));
+
+ fs_reg payload, payload2;
+ unsigned mlen, ex_mlen = 0;
+ if (devinfo->gen >= 9) {
+ /* We have split sends on gen9 and above */
+ assert(header_sz == 0);
+ payload = bld.move_to_vgrf(addr, addr_sz);
+ payload2 = bld.move_to_vgrf(src, src_sz);
+ mlen = addr_sz * (inst->exec_size / 8);
+ ex_mlen = src_sz * (inst->exec_size / 8);
+ } else {
+ /* Allocate space for the payload. */
+ const unsigned sz = header_sz + addr_sz + src_sz;
+ payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+ fs_reg *const components = new fs_reg[sz];
+ unsigned n = 0;
+
+ /* Construct the payload. */
+ if (header_sz)
+ components[n++] = emit_surface_header(bld, sample_mask);
+
+ for (unsigned i = 0; i < addr_sz; i++)
+ components[n++] = offset(addr, bld, i);
+
+ for (unsigned i = 0; i < src_sz; i++)
+ components[n++] = offset(src, bld, i);
+
+ bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+ mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+
+ delete[] components;
+ }
+
+ /* Predicate the instruction on the sample mask if no header is
+ * provided.
+ */
+ if (!header_sz && sample_mask.file != BAD_FILE &&
+ sample_mask.file != IMM) {
+ const fs_builder ubld = bld.group(1, 0).exec_all();
+ if (inst->predicate) {
+ assert(inst->predicate == BRW_PREDICATE_NORMAL);
+ assert(!inst->predicate_inverse);
+ assert(inst->flag_subreg < 2);
+ /* Combine the sample mask with the existing predicate by using a
+ * vertical predication mode.
+ */
+ inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+ sample_mask.type),
+ sample_mask);
+ } else {
+ inst->flag_subreg = 2;
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = false;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+ sample_mask);
+ }
+ }
+
+ uint32_t sfid;
+ switch (inst->opcode) {
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ /* Byte scattered opcodes go through the normal data cache */
+ sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ /* Untyped Surface messages go through the data cache but the SFID value
+ * changed on Haswell.
+ */
+ sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+ HSW_SFID_DATAPORT_DATA_CACHE_1 :
+ GEN7_SFID_DATAPORT_DATA_CACHE);
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ /* Typed surface messages go through the render cache on IVB and the
+ * data cache on HSW+.
+ */
+ sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
+ HSW_SFID_DATAPORT_DATA_CACHE_1 :
+ GEN6_SFID_DATAPORT_RENDER_CACHE);
+ break;
+
+ default:
+ unreachable("Unsupported surface opcode");
+ }
+
+ uint32_t desc;
+ switch (inst->opcode) {
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* num_channels */
+ false /* write */);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* num_channels */
+ true /* write */);
+ break;
+
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
+ arg.ud, /* bit_size */
+ false /* write */);
+ break;