intel/eu: Add a mechanism for emitting relocatable constant MOVs
[mesa.git] / src / intel / compiler / brw_eu_emit.c
index 365630b703a4775f15623e4a7ba64ccce93a6777..c9c180b1fcc9bdad7c623cf50fd51b0085e0ab33 100644 (file)
@@ -55,6 +55,7 @@ gen6_resolve_implied_move(struct brw_codegen *p,
       return;
 
    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+      assert(devinfo->gen < 12);
       brw_push_insn_state(p);
       brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -91,51 +92,92 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
 
    if (dest.file == BRW_MESSAGE_REGISTER_FILE)
       assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
-   else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   else if (dest.file == BRW_GENERAL_REGISTER_FILE)
       assert(dest.nr < 128);
 
-   gen7_convert_mrf_to_grf(p, &dest);
+   /* The hardware has a restriction where a destination of size Byte with
+    * a stride of 1 is only allowed for a packed byte MOV. For any other
+    * instruction, the stride must be at least 2, even when the destination
+    * is the NULL register.
+    */
+   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.nr == BRW_ARF_NULL &&
+       type_sz(dest.type) == 1 &&
+       dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
+      dest.hstride = BRW_HORIZONTAL_STRIDE_2;
+   }
 
-   brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
-   brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
+   gen7_convert_mrf_to_grf(p, &dest);
 
-   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+   if (devinfo->gen >= 12 &&
+       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+      assert(dest.subnr == 0);
+      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
+             (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              dest.vstride == dest.width + 1));
+      assert(!dest.negate && !dest.abs);
+      brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
       brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
 
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
-        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-           dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
-      } else {
-         brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
-         brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
-         if (dest.file == BRW_GENERAL_REGISTER_FILE ||
-             dest.file == BRW_MESSAGE_REGISTER_FILE) {
-            assert(dest.writemask != 0);
-         }
-        /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
-         *    Although Dst.HorzStride is a don't care for Align16, HW needs
-         *    this to be programmed as "01".
-         */
-         brw_inst_set_dst_hstride(devinfo, inst, 1);
-      }
+   } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(devinfo->gen < 12);
+      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+      assert(dest.subnr % 16 == 0);
+      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             dest.vstride == dest.width + 1);
+      assert(!dest.negate && !dest.abs);
+      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
    } else {
-      brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
+      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
 
-      /* These are different sizes in align1 vs align16:
-       */
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
-                                       dest.indirect_offset);
-        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
-           dest.hstride = BRW_HORIZONTAL_STRIDE_1;
-         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+         brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
+            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+         } else {
+            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
+            if (dest.file == BRW_GENERAL_REGISTER_FILE ||
+                dest.file == BRW_MESSAGE_REGISTER_FILE) {
+               assert(dest.writemask != 0);
+            }
+            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+             *    Although Dst.HorzStride is a don't care for Align16, HW needs
+             *    this to be programmed as "01".
+             */
+            brw_inst_set_dst_hstride(devinfo, inst, 1);
+         }
       } else {
-         brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
-                                        dest.indirect_offset);
-        /* even ignored in da16, still need to set as '01' */
-         brw_inst_set_dst_hstride(devinfo, inst, 1);
+         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
+
+         /* These are different sizes in align1 vs align16:
+          */
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
+                                          dest.indirect_offset);
+            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
+         } else {
+            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
+                                           dest.indirect_offset);
+            /* even ignored in da16, still need to set as '01' */
+            brw_inst_set_dst_hstride(devinfo, inst, 1);
+         }
       }
    }
 
@@ -170,13 +212,16 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 
    if (reg.file == BRW_MESSAGE_REGISTER_FILE)
       assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
-   else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   else if (reg.file == BRW_GENERAL_REGISTER_FILE)
       assert(reg.nr < 128);
 
    gen7_convert_mrf_to_grf(p, &reg);
 
-   if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
-                             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+   if (devinfo->gen >= 6 &&
+       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
       /* Any source modifiers or regions will be ignored, since this just
        * identifies the MRF/GRF to start reading the message contents from.
        * Check for some likely failures.
@@ -186,84 +231,110 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
    }
 
-   brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
-   brw_inst_set_src0_abs(devinfo, inst, reg.abs);
-   brw_inst_set_src0_negate(devinfo, inst, reg.negate);
-   brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
-
-   if (reg.file == BRW_IMMEDIATE_VALUE) {
-      if (reg.type == BRW_REGISTER_TYPE_DF ||
-          brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
-         brw_inst_set_imm_df(devinfo, inst, reg.df);
-      else if (reg.type == BRW_REGISTER_TYPE_UQ ||
-               reg.type == BRW_REGISTER_TYPE_Q)
-         brw_inst_set_imm_uq(devinfo, inst, reg.u64);
-      else
-         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-
-      if (type_sz(reg.type) < 8) {
-         brw_inst_set_src1_reg_file(devinfo, inst,
-                                    BRW_ARCHITECTURE_REGISTER_FILE);
-         brw_inst_set_src1_reg_hw_type(devinfo, inst,
-                                       brw_inst_src0_reg_hw_type(devinfo, inst));
-      }
+   if (devinfo->gen >= 12 &&
+       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+      assert(reg.file != BRW_IMMEDIATE_VALUE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr == 0);
+      assert(has_scalar_region(reg) ||
+             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              reg.vstride == reg.width + 1));
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
+      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+
+   } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr % 16 == 0);
+      assert(has_scalar_region(reg) ||
+             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              reg.vstride == reg.width + 1));
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
    } else {
-      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
-         brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
-         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-             brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
-        } else {
-            brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
-        }
+      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
+      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
+      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
+      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
+
+      if (reg.file == BRW_IMMEDIATE_VALUE) {
+         if (reg.type == BRW_REGISTER_TYPE_DF ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
+            brw_inst_set_imm_df(devinfo, inst, reg.df);
+         else if (reg.type == BRW_REGISTER_TYPE_UQ ||
+                  reg.type == BRW_REGISTER_TYPE_Q)
+            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
+         else
+            brw_inst_set_imm_ud(devinfo, inst, reg.ud);
+
+         if (devinfo->gen < 12 && type_sz(reg.type) < 8) {
+            brw_inst_set_src1_reg_file(devinfo, inst,
+                                       BRW_ARCHITECTURE_REGISTER_FILE);
+            brw_inst_set_src1_reg_hw_type(devinfo, inst,
+                                          brw_inst_src0_reg_hw_type(devinfo, inst));
+         }
       } else {
-         brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
+         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+            brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+                brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
+            } else {
+               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+            }
+         } else {
+            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
 
-         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-            brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
-        } else {
-            brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
-        }
-      }
+            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
+            } else {
+               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
+            }
+         }
 
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 &&
-             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
-            brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
-            brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
-        } else {
-            brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
-            brw_inst_set_src0_width(devinfo, inst, reg.width);
-            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
-        }
-      } else {
-         brw_inst_set_src0_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
-         brw_inst_set_src0_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
-         brw_inst_set_src0_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
-         brw_inst_set_src0_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
-         if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
-            /* This is an oddity of the fact we're using the same
-             * descriptions for registers in align_16 as align_1:
-             */
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
-         } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
-                    reg.type == BRW_REGISTER_TYPE_DF &&
-                    reg.vstride == BRW_VERTICAL_STRIDE_2) {
-            /* From SNB PRM:
-             *
-             * "For Align16 access mode, only encodings of 0000 and 0011
-             *  are allowed. Other codes are reserved."
-             *
-             * Presumably the DevSNB behavior applies to IVB as well.
-             */
-            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            if (reg.width == BRW_WIDTH_1 &&
+                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+            } else {
+               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
+               brw_inst_set_src0_width(devinfo, inst, reg.width);
+               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            }
          } else {
-            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+               /* This is an oddity of the fact we're using the same
+                * descriptions for registers in align_16 as align_1:
+                */
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+                       reg.type == BRW_REGISTER_TYPE_DF &&
+                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
+               /* From SNB PRM:
+                *
+                * "For Align16 access mode, only encodings of 0000 and 0011
+                *  are allowed. Other codes are reserved."
+                *
+                * Presumably the DevSNB behavior applies to IVB as well.
+                */
+               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else {
+               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
+            }
          }
       }
    }
@@ -275,130 +346,124 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 {
    const struct gen_device_info *devinfo = p->devinfo;
 
-   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
+   if (reg.file == BRW_GENERAL_REGISTER_FILE)
       assert(reg.nr < 128);
 
-   /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
-    *
-    *    "Accumulator registers may be accessed explicitly as src0
-    *    operands only."
-    */
-   assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-          reg.nr != BRW_ARF_ACCUMULATOR);
-
-   gen7_convert_mrf_to_grf(p, &reg);
-   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC ||
+       (devinfo->gen >= 12 &&
+        (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+         brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC))) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr == 0);
+      assert(has_scalar_region(reg) ||
+             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+              reg.vstride == reg.width + 1));
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+   } else {
+      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
+       *
+       *    "Accumulator registers may be accessed explicitly as src0
+       *    operands only."
+       */
+      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+             reg.nr != BRW_ARF_ACCUMULATOR);
 
-   brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
-   brw_inst_set_src1_abs(devinfo, inst, reg.abs);
-   brw_inst_set_src1_negate(devinfo, inst, reg.negate);
+      gen7_convert_mrf_to_grf(p, &reg);
+      assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
 
-   /* Only src1 can be immediate in two-argument instructions.
-    */
-   assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
+      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
+      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
+      brw_inst_set_src1_negate(devinfo, inst, reg.negate);
 
-   if (reg.file == BRW_IMMEDIATE_VALUE) {
-      /* two-argument instructions can only use 32-bit immediates */
-      assert(type_sz(reg.type) < 8);
-      brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-   } else {
-      /* This is a hardware restriction, which may or may not be lifted
-       * in the future:
+      /* Only src1 can be immediate in two-argument instructions.
        */
-      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
 
-      brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-         brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+      if (reg.file == BRW_IMMEDIATE_VALUE) {
+         /* two-argument instructions can only use 32-bit immediates */
+         assert(type_sz(reg.type) < 8);
+         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
       } else {
-         brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
-      }
+         /* This is a hardware restriction, which may or may not be lifted
+          * in the future:
+          */
+         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
-      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-        if (reg.width == BRW_WIDTH_1 &&
-             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
-            brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
-            brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
-        } else {
-            brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
-            brw_inst_set_src1_width(devinfo, inst, reg.width);
-            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
-        }
-      } else {
-         brw_inst_set_src1_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
-         brw_inst_set_src1_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
-         brw_inst_set_src1_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
-         brw_inst_set_src1_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
-
-         if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
-            /* This is an oddity of the fact we're using the same
-             * descriptions for registers in align_16 as align_1:
-             */
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
-         } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
-                    reg.type == BRW_REGISTER_TYPE_DF &&
-                    reg.vstride == BRW_VERTICAL_STRIDE_2) {
-            /* From SNB PRM:
-             *
-             * "For Align16 access mode, only encodings of 0000 and 0011
-             *  are allowed. Other codes are reserved."
-             *
-             * Presumably the DevSNB behavior applies to IVB as well.
-             */
-            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+         brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
+         } else {
+            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+         }
+
+         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+            if (reg.width == BRW_WIDTH_1 &&
+                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
+               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
+               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
+            } else {
+               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
+               brw_inst_set_src1_width(devinfo, inst, reg.width);
+               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            }
          } else {
-            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
+            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
+            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
+            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
+               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
+
+            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
+               /* This is an oddity of the fact we're using the same
+                * descriptions for registers in align_16 as align_1:
+                */
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+                       reg.type == BRW_REGISTER_TYPE_DF &&
+                       reg.vstride == BRW_VERTICAL_STRIDE_2) {
+               /* From SNB PRM:
+                *
+                * "For Align16 access mode, only encodings of 0000 and 0011
+                *  are allowed. Other codes are reserved."
+                *
+                * Presumably the DevSNB behavior applies to IVB as well.
+                */
+               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
+            } else {
+               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
+            }
          }
       }
    }
 }
 
 /**
- * Set the Message Descriptor and Extended Message Descriptor fields
- * for SEND messages.
- *
- * \note This zeroes out the Function Control bits, so it must be called
- *       \b before filling out any message-specific data.  Callers can
- *       choose not to fill in irrelevant bits; they will be zero.
+ * Specify the descriptor and extended descriptor immediate for a SEND(C)
+ * message instruction.
  */
 void
-brw_set_message_descriptor(struct brw_codegen *p,
-                          brw_inst *inst,
-                          enum brw_message_target sfid,
-                          unsigned msg_length,
-                          unsigned response_length,
-                          bool header_present,
-                          bool end_of_thread)
+brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
+                unsigned desc, unsigned ex_desc)
 {
    const struct gen_device_info *devinfo = p->devinfo;
-
-   brw_set_src1(p, inst, brw_imm_d(0));
-
-   /* For indirect sends, `inst` will not be the SEND/SENDC instruction
-    * itself; instead, it will be a MOV/OR into the address register.
-    *
-    * In this case, we avoid setting the extended message descriptor bits,
-    * since they go on the later SEND/SENDC instead and if set here would
-    * instead clobber the conditionalmod bits.
-    */
-   unsigned opcode = brw_inst_opcode(devinfo, inst);
-   if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
-      brw_inst_set_sfid(devinfo, inst, sfid);
-   }
-
-   brw_inst_set_mlen(devinfo, inst, msg_length);
-   brw_inst_set_rlen(devinfo, inst, response_length);
-   brw_inst_set_eot(devinfo, inst, end_of_thread);
-
-   if (devinfo->gen >= 5) {
-      brw_inst_set_header_present(devinfo, inst, header_present);
-   }
+   assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+          brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC);
+   if (devinfo->gen < 12)
+      brw_inst_set_src1_file_type(devinfo, inst,
+                                  BRW_IMMEDIATE_VALUE, BRW_REGISTER_TYPE_UD);
+   brw_inst_set_send_desc(devinfo, inst, desc);
+   if (devinfo->gen >= 9)
+      brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
 }
 
 static void brw_set_math_message( struct brw_codegen *p,
@@ -436,9 +501,10 @@ static void brw_set_math_message( struct brw_codegen *p,
       break;
    }
 
+   brw_set_desc(p, inst, brw_message_desc(
+                   devinfo, msg_length, response_length, false));
 
-   brw_set_message_descriptor(p, inst, BRW_SFID_MATH,
-                             msg_length, response_length, false, false);
+   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MATH);
    brw_inst_set_math_msg_function(devinfo, inst, function);
    brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
    brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
@@ -456,8 +522,11 @@ static void brw_set_ff_sync_message(struct brw_codegen *p,
 {
    const struct gen_device_info *devinfo = p->devinfo;
 
-   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
-                             1, response_length, true, end_of_thread);
+   brw_set_desc(p, insn, brw_message_desc(
+                   devinfo, 1, response_length, true));
+
+   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
+   brw_inst_set_eot(devinfo, insn, end_of_thread);
    brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
    brw_inst_set_urb_allocate(devinfo, insn, allocate);
    /* The following fields are not used by FF_SYNC: */
@@ -481,9 +550,11 @@ static void brw_set_urb_message( struct brw_codegen *p,
    assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
    assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
 
-   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
-                             msg_length, response_length, true,
-                              flags & BRW_URB_WRITE_EOT);
+   brw_set_desc(p, insn, brw_message_desc(
+                   devinfo, msg_length, response_length, true));
+
+   brw_inst_set_sfid(devinfo, insn, BRW_SFID_URB);
+   brw_inst_set_eot(devinfo, insn, !!(flags & BRW_URB_WRITE_EOT));
 
    if (flags & BRW_URB_WRITE_OWORD) {
       assert(msg_length == 2); /* header + one OWORD of data */
@@ -508,91 +579,6 @@ static void brw_set_urb_message( struct brw_codegen *p,
    }
 }
 
-void
-brw_set_dp_write_message(struct brw_codegen *p,
-                        brw_inst *insn,
-                        unsigned binding_table_index,
-                        unsigned msg_control,
-                        unsigned msg_type,
-                         unsigned target_cache,
-                        unsigned msg_length,
-                        bool header_present,
-                        unsigned last_render_target,
-                        unsigned response_length,
-                        unsigned end_of_thread,
-                        unsigned send_commit_msg)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
-                          BRW_SFID_DATAPORT_WRITE);
-
-   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
-                             header_present, end_of_thread);
-
-   brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
-   brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type);
-   brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control);
-   brw_inst_set_rt_last(devinfo, insn, last_render_target);
-   if (devinfo->gen < 7) {
-      brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg);
-   }
-
-   if (devinfo->gen >= 11)
-      brw_inst_set_null_rt(devinfo, insn, false);
-}
-
-void
-brw_set_dp_read_message(struct brw_codegen *p,
-                       brw_inst *insn,
-                       unsigned binding_table_index,
-                       unsigned msg_control,
-                       unsigned msg_type,
-                       unsigned target_cache,
-                       unsigned msg_length,
-                        bool header_present,
-                       unsigned response_length)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
-                          BRW_SFID_DATAPORT_READ);
-
-   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
-                             header_present, false);
-
-   brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
-   brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type);
-   brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control);
-   if (devinfo->gen < 6)
-      brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache);
-}
-
-void
-brw_set_sampler_message(struct brw_codegen *p,
-                        brw_inst *inst,
-                        unsigned binding_table_index,
-                        unsigned sampler,
-                        unsigned msg_type,
-                        unsigned response_length,
-                        unsigned msg_length,
-                        unsigned header_present,
-                        unsigned simd_mode,
-                        unsigned return_format)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-
-   brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length,
-                             response_length, header_present, false);
-
-   brw_inst_set_binding_table_index(devinfo, inst, binding_table_index);
-   brw_inst_set_sampler(devinfo, inst, sampler);
-   brw_inst_set_sampler_msg_type(devinfo, inst, msg_type);
-   if (devinfo->gen >= 5) {
-      brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode);
-   } else if (devinfo->gen == 4 && !devinfo->is_g4x) {
-      brw_inst_set_sampler_return_format(devinfo, inst, return_format);
-   }
-}
-
 static void
 gen7_set_dp_scratch_message(struct brw_codegen *p,
                             brw_inst *inst,
@@ -608,11 +594,13 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
    const struct gen_device_info *devinfo = p->devinfo;
    assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
           (devinfo->gen >= 8 && num_regs == 8));
-   const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+   const unsigned block_size = (devinfo->gen >= 8 ? util_logbase2(num_regs) :
                                 num_regs - 1);
 
-   brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
-                              mlen, rlen, header_present, false);
+   brw_set_desc(p, inst, brw_message_desc(
+                   devinfo, mlen, rlen, header_present));
+
+   brw_inst_set_sfid(devinfo, inst, GEN7_SFID_DATAPORT_DATA_CACHE);
    brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
    brw_inst_set_scratch_read_write(devinfo, inst, write);
    brw_inst_set_scratch_type(devinfo, inst, dword);
@@ -631,6 +619,8 @@ brw_inst_set_state(const struct gen_device_info *devinfo,
    brw_inst_set_compression(devinfo, insn, state->compressed);
    brw_inst_set_access_mode(devinfo, insn, state->access_mode);
    brw_inst_set_mask_control(devinfo, insn, state->mask_control);
+   if (devinfo->gen >= 12)
+      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(state->swsb));
    brw_inst_set_saturate(devinfo, insn, state->saturate);
    brw_inst_set_pred_control(devinfo, insn, state->predicate);
    brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
@@ -650,20 +640,62 @@ brw_inst_set_state(const struct gen_device_info *devinfo,
       brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
 }
 
-#define next_insn brw_next_insn
-brw_inst *
-brw_next_insn(struct brw_codegen *p, unsigned opcode)
+static brw_inst *
+brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned align)
 {
-   const struct gen_device_info *devinfo = p->devinfo;
-   brw_inst *insn;
+   assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
+   assert(util_is_power_of_two_or_zero(align));
+   const unsigned align_insn = MAX2(align / sizeof(brw_inst), 1);
+   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
+   const unsigned new_nr_insn = start_insn + nr_insn;
 
-   if (p->nr_insn + 1 > p->store_size) {
-      p->store_size <<= 1;
+   if (p->store_size < new_nr_insn) {
+      p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
       p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
    }
 
-   p->next_insn_offset += 16;
-   insn = &p->store[p->nr_insn++];
+   /* Memset any padding due to alignment to 0.  We don't want to be hashing
+    * or caching a bunch of random bits we got from a memory allocation.
+    */
+   if (p->nr_insn < start_insn) {
+      memset(&p->store[p->nr_insn], 0,
+             (start_insn - p->nr_insn) * sizeof(brw_inst));
+   }
+
+   assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
+   p->nr_insn = new_nr_insn;
+   p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
+
+   return &p->store[start_insn];
+}
+
+void
+brw_realign(struct brw_codegen *p, unsigned align)
+{
+   brw_append_insns(p, 0, align);
+}
+
+int
+brw_append_data(struct brw_codegen *p, void *data,
+                unsigned size, unsigned align)
+{
+   unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
+   void *dst = brw_append_insns(p, nr_insn, align);
+   memcpy(dst, data, size);
+
+   /* If it's not a whole number of instructions, memset the end */
+   if (size < nr_insn * sizeof(brw_inst))
+      memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
+
+   return dst - (void *)p->store;
+}
+
+#define next_insn brw_next_insn
+brw_inst *
+brw_next_insn(struct brw_codegen *p, unsigned opcode)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
 
    memset(insn, 0, sizeof(*insn));
    brw_inst_set_opcode(devinfo, insn, opcode);
@@ -710,12 +742,17 @@ get_3src_subreg_nr(struct brw_reg reg)
 }
 
 static enum gen10_align1_3src_vertical_stride
-to_3src_align1_vstride(enum brw_vertical_stride vstride)
+to_3src_align1_vstride(const struct gen_device_info *devinfo,
+                       enum brw_vertical_stride vstride)
 {
    switch (vstride) {
    case BRW_VERTICAL_STRIDE_0:
       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
+   case BRW_VERTICAL_STRIDE_1:
+      assert(devinfo->gen >= 12);
+      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
    case BRW_VERTICAL_STRIDE_2:
+      assert(devinfo->gen < 12);
       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
    case BRW_VERTICAL_STRIDE_4:
       return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
@@ -755,9 +792,14 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
    gen7_convert_mrf_to_grf(p, &dest);
 
    assert(dest.nr < 128);
-   assert(src0.nr < 128);
-   assert(src1.nr < 128);
-   assert(src2.nr < 128);
+
+   if (devinfo->gen >= 10)
+      assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
+               src2.file == BRW_IMMEDIATE_VALUE));
+
+   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
+   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
+   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
@@ -767,14 +809,19 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
              dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
 
-      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
-         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
-                                           BRW_ALIGN1_3SRC_ACCUMULATOR);
-         brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
-      } else {
-         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
-                                           BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
+      if (devinfo->gen >= 12) {
+         brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dest.file);
          brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+      } else {
+         if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+            brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+                                              BRW_ALIGN1_3SRC_ACCUMULATOR);
+            brw_inst_set_3src_dst_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+         } else {
+            brw_inst_set_3src_a1_dst_reg_file(devinfo, inst,
+                                              BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
+            brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
+         }
       }
       brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, dest.subnr / 8);
 
@@ -793,27 +840,26 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
       brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
       brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
 
-      brw_inst_set_3src_a1_src0_vstride(devinfo, inst,
-                                        to_3src_align1_vstride(src0.vstride));
-      brw_inst_set_3src_a1_src1_vstride(devinfo, inst,
-                                        to_3src_align1_vstride(src1.vstride));
-      /* no vstride on src2 */
-
-      brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
-                                        to_3src_align1_hstride(src0.hstride));
-      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
-                                        to_3src_align1_hstride(src1.hstride));
-      brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
-                                        to_3src_align1_hstride(src2.hstride));
-
-      brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
-      if (src0.type == BRW_REGISTER_TYPE_NF) {
-         brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+      if (src0.file == BRW_IMMEDIATE_VALUE) {
+         brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
       } else {
-         brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
+         brw_inst_set_3src_a1_src0_vstride(
+            devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
+         brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
+                                           to_3src_align1_hstride(src0.hstride));
+         brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, src0.subnr);
+         if (src0.type == BRW_REGISTER_TYPE_NF) {
+            brw_inst_set_3src_src0_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
+         } else {
+            brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
+         }
+         brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
+         brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
       }
-      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
-      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
+      brw_inst_set_3src_a1_src1_vstride(
+         devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
+      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
+                                        to_3src_align1_hstride(src1.hstride));
 
       brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, src1.subnr);
       if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
@@ -824,10 +870,17 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
       brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
       brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
 
-      brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
-      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
-      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
-      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+      if (src2.file == BRW_IMMEDIATE_VALUE) {
+         brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
+      } else {
+         brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
+                                           to_3src_align1_hstride(src2.hstride));
+         /* no vstride on src2 */
+         brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, src2.subnr);
+         brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
+         brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
+         brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
+      }
 
       assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
              src0.file == BRW_IMMEDIATE_VALUE ||
@@ -838,31 +891,49 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
       assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
              src2.file == BRW_IMMEDIATE_VALUE);
 
-      brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
-                                         src0.file == BRW_GENERAL_REGISTER_FILE ?
-                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
-                                         BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
-      brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
-                                         src1.file == BRW_GENERAL_REGISTER_FILE ?
-                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
-                                         BRW_ALIGN1_3SRC_ACCUMULATOR);
-      brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
-                                         src2.file == BRW_GENERAL_REGISTER_FILE ?
-                                         BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
-                                         BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+      if (devinfo->gen >= 12) {
+         if (src0.file == BRW_IMMEDIATE_VALUE) {
+            brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
+         } else {
+            brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
+         }
+
+         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
+
+         if (src2.file == BRW_IMMEDIATE_VALUE) {
+            brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
+         } else {
+            brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
+         }
+      } else {
+         brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
+                                            src0.file == BRW_GENERAL_REGISTER_FILE ?
+                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
+                                            src1.file == BRW_GENERAL_REGISTER_FILE ?
+                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+                                            BRW_ALIGN1_3SRC_ACCUMULATOR);
+         brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
+                                            src2.file == BRW_GENERAL_REGISTER_FILE ?
+                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
+                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
+      }
+
    } else {
       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
              dest.file == BRW_MESSAGE_REGISTER_FILE);
       assert(dest.type == BRW_REGISTER_TYPE_F  ||
              dest.type == BRW_REGISTER_TYPE_DF ||
              dest.type == BRW_REGISTER_TYPE_D  ||
-             dest.type == BRW_REGISTER_TYPE_UD);
+             dest.type == BRW_REGISTER_TYPE_UD ||
+             (dest.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 8));
       if (devinfo->gen == 6) {
          brw_inst_set_3src_a16_dst_reg_file(devinfo, inst,
                                             dest.file == BRW_MESSAGE_REGISTER_FILE);
       }
       brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
-      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
       brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
 
       assert(src0.file == BRW_GENERAL_REGISTER_FILE);
@@ -901,6 +972,22 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
           */
          brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
          brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
+
+         /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
+          *
+          *    "Three source instructions can use operands with mixed-mode
+          *     precision. When SrcType field is set to :f or :hf it defines
+          *     precision for source 0 only, and fields Src1Type and Src2Type
+          *     define precision for other source operands:
+          *
+          *     0b = :f. Single precision Float (32-bit).
+          *     1b = :hf. Half precision Float (16-bit)."
+          */
+         if (src1.type == BRW_REGISTER_TYPE_HF)
+            brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
+
+         if (src2.type == BRW_REGISTER_TYPE_HF)
+            brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
       }
    }
 
@@ -934,7 +1021,15 @@ brw_inst *brw_##OP(struct brw_codegen *p,         \
              struct brw_reg src0,                      \
              struct brw_reg src1,                      \
              struct brw_reg src2)                      \
-{                                                      \
+{                                                       \
+   if (p->current->access_mode == BRW_ALIGN_16) {       \
+      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src0.swizzle = BRW_SWIZZLE_XXXX;               \
+      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src1.swizzle = BRW_SWIZZLE_XXXX;               \
+      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
+         src2.swizzle = BRW_SWIZZLE_XXXX;               \
+   }                                                    \
    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);        \
 }
 
@@ -955,37 +1050,19 @@ brw_inst *brw_##OP(struct brw_codegen *p,         \
       assert(src0.type == BRW_REGISTER_TYPE_DF);                \
       assert(src1.type == BRW_REGISTER_TYPE_DF);                \
       assert(src2.type == BRW_REGISTER_TYPE_DF);                \
+   }                                                            \
+                                                                \
+   if (p->current->access_mode == BRW_ALIGN_16) {               \
+      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
+      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
+      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
+         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
    }                                                            \
    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
 }
 
-/* Rounding operations (other than RNDD) require two instructions - the first
- * stores a rounded value (possibly the wrong way) in the dest register, but
- * also sets a per-channel "increment bit" in the flag register.  A predicated
- * add of 1.0 fixes dest to contain the desired result.
- *
- * Sandybridge and later appear to round correctly without an ADD.
- */
-#define ROUND(OP)                                                            \
-void brw_##OP(struct brw_codegen *p,                                         \
-             struct brw_reg dest,                                            \
-             struct brw_reg src)                                             \
-{                                                                            \
-   const struct gen_device_info *devinfo = p->devinfo;                                       \
-   brw_inst *rnd, *add;                                                              \
-   rnd = next_insn(p, BRW_OPCODE_##OP);                                              \
-   brw_set_dest(p, rnd, dest);                                               \
-   brw_set_src0(p, rnd, src);                                                \
-                                                                             \
-   if (devinfo->gen < 6) {                                                           \
-      /* turn on round-increments */                                         \
-      brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R);            \
-      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                         \
-      brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL);          \
-   }                                                                         \
-}
-
-
 ALU2(SEL)
 ALU1(NOT)
 ALU2(AND)
@@ -995,9 +1072,14 @@ ALU2(SHR)
 ALU2(SHL)
 ALU1(DIM)
 ALU2(ASR)
+ALU2(ROL)
+ALU2(ROR)
 ALU3(CSEL)
 ALU1(FRC)
 ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDU)
+ALU1(RNDZ)
 ALU2(MAC)
 ALU2(MACH)
 ALU1(LZD)
@@ -1017,17 +1099,14 @@ ALU1(CBIT)
 ALU2(ADDC)
 ALU2(SUBB)
 
-ROUND(RNDZ)
-ROUND(RNDE)
-
 brw_inst *
 brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
 {
    const struct gen_device_info *devinfo = p->devinfo;
 
    /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
-    * To avoid the problems that causes, we use a <1,2,0> source region to read
-    * each element twice.
+    * To avoid the problems that causes, we use an <X,2,0> source region to
+    * read each element twice.
     */
    if (devinfo->gen == 7 && !devinfo->is_haswell &&
        brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
@@ -1036,11 +1115,8 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
         src0.type == BRW_REGISTER_TYPE_D ||
         src0.type == BRW_REGISTER_TYPE_UD) &&
        !has_scalar_region(src0)) {
-      assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
-             src0.width == BRW_WIDTH_4 &&
-             src0.hstride == BRW_HORIZONTAL_STRIDE_1);
-
-      src0.vstride = BRW_VERTICAL_STRIDE_1;
+      assert(src0.vstride == src0.width + src0.hstride);
+      src0.vstride = src0.hstride;
       src0.width = BRW_WIDTH_2;
       src0.hstride = BRW_HORIZONTAL_STRIDE_0;
    }
@@ -1186,9 +1262,12 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
    }
 
    if (needs_zero_fill) {
-      brw_inst_set_no_dd_clear(devinfo, inst, true);
+      if (devinfo->gen < 12)
+         brw_inst_set_no_dd_clear(devinfo, inst, true);
+      brw_set_default_swsb(p, tgl_swsb_null());
       inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
-      brw_inst_set_no_dd_check(devinfo, inst, true);
+      if (devinfo->gen < 12)
+         brw_inst_set_no_dd_check(devinfo, inst, true);
    }
 
    brw_pop_insn_state(p);
@@ -1234,9 +1313,11 @@ void brw_NOP(struct brw_codegen *p)
    brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
 }
 
-
-
-
+void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
+{
+   brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
+   brw_inst_set_cond_modifier(p->devinfo, insn, func);
+}
 
 /***********************************************************************
  * Comparisons, if/else/endif
@@ -1340,7 +1421,8 @@ brw_IF(struct brw_codegen *p, unsigned execute_size)
       brw_inst_set_uip(devinfo, insn, 0);
    } else {
       brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
-      brw_set_src0(p, insn, brw_imm_d(0));
+      if (devinfo->gen < 12)
+         brw_set_src0(p, insn, brw_imm_d(0));
       brw_inst_set_jip(devinfo, insn, 0);
       brw_inst_set_uip(devinfo, insn, 0);
    }
@@ -1540,7 +1622,8 @@ brw_ELSE(struct brw_codegen *p)
       brw_inst_set_uip(devinfo, insn, 0);
    } else {
       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-      brw_set_src0(p, insn, brw_imm_d(0));
+      if (devinfo->gen < 12)
+         brw_set_src0(p, insn, brw_imm_d(0));
       brw_inst_set_jip(devinfo, insn, 0);
       brw_inst_set_uip(devinfo, insn, 0);
    }
@@ -1686,18 +1769,27 @@ brw_CONT(struct brw_codegen *p)
 }
 
 brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    brw_inst *insn;
 
    insn = next_insn(p, BRW_OPCODE_HALT);
    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-   if (devinfo->gen >= 8) {
-      brw_set_src0(p, insn, brw_imm_d(0x0));
-   } else {
+   if (devinfo->gen < 6) {
+      /* From the Gen4 PRM:
+       *
+       *    "IP register must be put (for example, by the assembler) at <dst>
+       *    and <src0> locations.
+       */
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+   } else if (devinfo->gen < 8) {
       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   } else if (devinfo->gen < 12) {
+      brw_set_src0(p, insn, brw_imm_d(0x0));
    }
 
    brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
@@ -1793,7 +1885,8 @@ brw_WHILE(struct brw_codegen *p)
 
       if (devinfo->gen >= 8) {
          brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
-         brw_set_src0(p, insn, brw_imm_d(0));
+         if (devinfo->gen < 12)
+            brw_set_src0(p, insn, brw_imm_d(0));
          brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
       } else if (devinfo->gen == 7) {
          brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -1961,8 +2054,10 @@ void gen6_math(struct brw_codegen *p,
       assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
              (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
    } else {
-      assert(src0.type == BRW_REGISTER_TYPE_F);
-      assert(src1.type == BRW_REGISTER_TYPE_F);
+      assert(src0.type == BRW_REGISTER_TYPE_F ||
+             (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
+      assert(src1.type == BRW_REGISTER_TYPE_F ||
+             (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9));
    }
 
    /* Source modifiers are ignored for extended math instructions on Gen6. */
@@ -2010,7 +2105,8 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
    const unsigned target_cache =
       (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
        devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
-       BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+       BRW_SFID_DATAPORT_WRITE);
+   const struct tgl_swsb swsb = brw_get_default_swsb(p);
    uint32_t msg_type;
 
    if (devinfo->gen >= 6)
@@ -2030,11 +2126,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
       brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
 
       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
       /* set message header global offset field (reg 0, element 2) */
       brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_swsb(p, tgl_swsb_null());
       brw_MOV(p,
              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
                                  mrf.nr,
@@ -2042,6 +2140,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
              brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
    }
 
    {
@@ -2051,6 +2150,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
                                         BRW_REGISTER_TYPE_UW);
 
+      brw_inst_set_sfid(devinfo, insn, target_cache);
       brw_inst_set_compression(devinfo, insn, false);
 
       if (brw_inst_exec_size(devinfo, insn) >= 16)
@@ -2090,18 +2190,12 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
       else
         msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
-      brw_set_dp_write_message(p,
-                              insn,
-                               brw_scratch_surface_idx(p),
-                              BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
-                              msg_type,
-                               target_cache,
-                              mlen,
-                              true, /* header_present */
-                              0, /* not a render target */
-                              send_commit_msg, /* response_length */
-                              0, /* eot */
-                              send_commit_msg);
+      brw_set_desc(p, insn,
+                   brw_message_desc(devinfo, mlen, send_commit_msg, true) |
+                   brw_dp_write_desc(devinfo, brw_scratch_surface_idx(p),
+                                     BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
+                                     msg_type, 0, /* not a render target */
+                                     send_commit_msg));
    }
 }
 
@@ -2121,6 +2215,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
                             unsigned offset)
 {
    const struct gen_device_info *devinfo = p->devinfo;
+   const struct tgl_swsb swsb = brw_get_default_swsb(p);
 
    if (devinfo->gen >= 6)
       offset /= 16;
@@ -2143,10 +2238,11 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
    const unsigned target_cache =
       (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
        devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
-       BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+       BRW_SFID_DATAPORT_READ);
 
    {
       brw_push_insn_state(p);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
       brw_set_default_exec_size(p, BRW_EXECUTE_8);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -2155,14 +2251,17 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
 
       /* set message header global offset field (reg 0, element 2) */
       brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_swsb(p, tgl_swsb_null());
       brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
 
       brw_pop_insn_state(p);
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
    }
 
    {
       brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
+      brw_inst_set_sfid(devinfo, insn, target_cache);
       assert(brw_inst_pred_control(devinfo, insn) == 0);
       brw_inst_set_compression(devinfo, insn, false);
 
@@ -2174,15 +2273,12 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
          brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
       }
 
-      brw_set_dp_read_message(p,
-                             insn,
-                              brw_scratch_surface_idx(p),
-                             BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
-                             BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-                             target_cache,
-                             1, /* msg_length */
-                              true, /* header_present */
-                             rlen);
+      brw_set_desc(p, insn,
+                   brw_message_desc(devinfo, 1, rlen, true) |
+                   brw_dp_read_desc(devinfo, brw_scratch_surface_idx(p),
+                                    BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
+                                    BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+                                    BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
    }
 }
 
@@ -2234,8 +2330,9 @@ void brw_oword_block_read(struct brw_codegen *p,
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned target_cache =
       (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
-       BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+       BRW_SFID_DATAPORT_READ);
    const unsigned exec_size = 1 << brw_get_default_exec_size(p);
+   const struct tgl_swsb swsb = brw_get_default_swsb(p);
 
    /* On newer hardware, offset is in units of owords. */
    if (devinfo->gen >= 6)
@@ -2250,10 +2347,12 @@ void brw_oword_block_read(struct brw_codegen *p,
 
    brw_push_insn_state(p);
    brw_set_default_exec_size(p, BRW_EXECUTE_8);
+   brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
    /* set message header global offset field (reg 0, element 2) */
    brw_set_default_exec_size(p, BRW_EXECUTE_1);
+   brw_set_default_swsb(p, tgl_swsb_null());
    brw_MOV(p,
           retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
                               mrf.nr,
@@ -2261,8 +2360,12 @@ void brw_oword_block_read(struct brw_codegen *p,
           brw_imm_ud(offset));
    brw_pop_insn_state(p);
 
+   brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
+
    brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
+   brw_inst_set_sfid(devinfo, insn, target_cache);
+
    /* cast dest to a uword[8] vector */
    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
 
@@ -2274,13 +2377,12 @@ void brw_oword_block_read(struct brw_codegen *p,
       brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
    }
 
-   brw_set_dp_read_message(p, insn, bind_table_index,
-                           BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
-                          BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
-                          target_cache,
-                          1, /* msg_length */
-                           true, /* header_present */
-                          DIV_ROUND_UP(exec_size, 8)); /* response_length */
+   brw_set_desc(p, insn,
+                brw_message_desc(devinfo, 1, DIV_ROUND_UP(exec_size, 8), true) |
+                brw_dp_read_desc(devinfo, bind_table_index,
+                                 BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
+                                 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+                                 BRW_DATAPORT_READ_TARGET_DATA_CACHE));
 
    brw_pop_insn_state(p);
 }
@@ -2300,7 +2402,7 @@ brw_fb_WRITE(struct brw_codegen *p,
    const struct gen_device_info *devinfo = p->devinfo;
    const unsigned target_cache =
       (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
-       BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+       BRW_SFID_DATAPORT_WRITE);
    brw_inst *insn;
    unsigned msg_type;
    struct brw_reg dest, src0;
@@ -2315,6 +2417,7 @@ brw_fb_WRITE(struct brw_codegen *p,
    } else {
       insn = next_insn(p, BRW_OPCODE_SEND);
    }
+   brw_inst_set_sfid(devinfo, insn, target_cache);
    brw_inst_set_compression(devinfo, insn, false);
 
    if (devinfo->gen >= 6) {
@@ -2332,18 +2435,13 @@ brw_fb_WRITE(struct brw_codegen *p,
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
-   brw_set_dp_write_message(p,
-                           insn,
-                           binding_table_index,
-                           msg_control,
-                           msg_type,
-                            target_cache,
-                           msg_length,
-                           header_present,
-                           last_render_target,
-                           response_length,
-                           eot,
-                           0 /* send_commit_msg */);
+   brw_set_desc(p, insn,
+                brw_message_desc(devinfo, msg_length, response_length,
+                                 header_present) |
+                brw_dp_write_desc(devinfo, binding_table_index, msg_control,
+                                  msg_type, last_render_target,
+                                  0 /* send_commit_msg */));
+   brw_inst_set_eot(devinfo, insn, eot);
 
    return insn;
 }
@@ -2363,14 +2461,16 @@ gen9_fb_READ(struct brw_codegen *p,
       brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1;
    brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
 
+   brw_inst_set_sfid(devinfo, insn, GEN6_SFID_DATAPORT_RENDER_CACHE);
    brw_set_dest(p, insn, dst);
    brw_set_src0(p, insn, payload);
-   brw_set_dp_read_message(p, insn, binding_table_index,
-                           per_sample << 5 | msg_subtype,
-                           GEN9_DATAPORT_RC_RENDER_TARGET_READ,
-                           GEN6_SFID_DATAPORT_RENDER_CACHE,
-                           msg_length, true /* header_present */,
-                           response_length);
+   brw_set_desc(
+      p, insn,
+      brw_message_desc(devinfo, msg_length, response_length, true) |
+      brw_dp_read_desc(devinfo, binding_table_index,
+                       per_sample << 5 | msg_subtype,
+                       GEN9_DATAPORT_RC_RENDER_TARGET_READ,
+                       BRW_DATAPORT_READ_TARGET_RENDER_CACHE));
    brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
 
    return insn;
@@ -2401,6 +2501,7 @@ void brw_SAMPLE(struct brw_codegen *p,
       gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst_set_sfid(devinfo, insn, BRW_SFID_SAMPLER);
    brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
 
    /* From the 965 PRM (volume 4, part 1, section 14.2.41):
@@ -2422,15 +2523,11 @@ void brw_SAMPLE(struct brw_codegen *p,
 
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
-   brw_set_sampler_message(p, insn,
-                           binding_table_index,
-                           sampler,
-                           msg_type,
-                           response_length,
-                           msg_length,
-                           header_present,
-                           simd_mode,
-                           return_format);
+   brw_set_desc(p, insn,
+                brw_message_desc(devinfo, msg_length, response_length,
+                                 header_present) |
+                brw_sampler_desc(devinfo, binding_table_index, sampler,
+                                 msg_type, simd_mode, return_format));
 }
 
 /* Adjust the message header's sampler state pointer to
@@ -2470,12 +2567,15 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
 
       struct brw_reg temp = get_element_ud(header, 3);
 
+      brw_push_insn_state(p);
       brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
+      brw_set_default_swsb(p, tgl_swsb_regdist(1));
       brw_SHL(p, temp, temp, brw_imm_ud(4));
       brw_ADD(p,
               get_element_ud(header, 3),
               get_element_ud(brw_vec8_grf(0, 0), 3),
               temp);
+      brw_pop_insn_state(p);
    }
 }
 
@@ -2531,33 +2631,28 @@ void brw_urb_WRITE(struct brw_codegen *p,
                       swizzle);
 }
 
-struct brw_inst *
+void
 brw_send_indirect_message(struct brw_codegen *p,
                           unsigned sfid,
                           struct brw_reg dst,
                           struct brw_reg payload,
-                          struct brw_reg desc)
+                          struct brw_reg desc,
+                          unsigned desc_imm,
+                          bool eot)
 {
    const struct gen_device_info *devinfo = p->devinfo;
    struct brw_inst *send;
-   int setup;
 
    dst = retype(dst, BRW_REGISTER_TYPE_UW);
 
    assert(desc.type == BRW_REGISTER_TYPE_UD);
 
-   /* We hold on to the setup instruction (the SEND in the direct case, the OR
-    * in the indirect case) by its index in the instruction store.  The
-    * pointer returned by next_insn() may become invalid if emitting the SEND
-    * in the indirect case reallocs the store.
-    */
-
    if (desc.file == BRW_IMMEDIATE_VALUE) {
-      setup = p->nr_insn;
       send = next_insn(p, BRW_OPCODE_SEND);
-      brw_set_src1(p, send, desc);
-
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+      brw_set_desc(p, send, desc.ud | desc_imm);
    } else {
+      const struct tgl_swsb swsb = brw_get_default_swsb(p);
       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
 
       brw_push_insn_state(p);
@@ -2565,44 +2660,157 @@ brw_send_indirect_message(struct brw_codegen *p,
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_exec_size(p, BRW_EXECUTE_1);
       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
 
       /* Load the indirect descriptor to an address register using OR so the
-       * caller can specify additional descriptor bits with the usual
-       * brw_set_*_message() helper functions.
+       * caller can specify additional descriptor bits with the desc_imm
+       * immediate.
        */
-      setup = p->nr_insn;
-      brw_OR(p, addr, desc, brw_imm_ud(0));
+      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
 
       brw_pop_insn_state(p);
 
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
       send = next_insn(p, BRW_OPCODE_SEND);
-      brw_set_src1(p, send, addr);
-   }
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
 
-   if (dst.width < BRW_EXECUTE_8)
-      brw_inst_set_exec_size(devinfo, send, dst.width);
+      if (devinfo->gen >= 12)
+         brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
+      else
+         brw_set_src1(p, send, addr);
+   }
 
    brw_set_dest(p, send, dst);
-   brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
    brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
+}
+
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm,
+                                bool eot)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   struct brw_inst *send;
+
+   dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+   assert(desc.type == BRW_REGISTER_TYPE_UD);
 
-   return &p->store[setup];
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      desc.ud |= desc_imm;
+   } else {
+      const struct tgl_swsb swsb = brw_get_default_swsb(p);
+      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
+
+      /* Load the indirect descriptor to an address register using OR so the
+       * caller can specify additional descriptor bits with the desc_imm
+       * immediate.
+       */
+      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
+
+      brw_pop_insn_state(p);
+      desc = addr;
+
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
+       (devinfo->gen >= 12 || (ex_desc.ud & INTEL_MASK(15, 12)) == 0)) {
+      ex_desc.ud |= ex_desc_imm;
+   } else {
+      const struct tgl_swsb swsb = brw_get_default_swsb(p);
+      struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
+
+      /* Load the indirect extended descriptor to an address register using OR
+       * so the caller can specify additional descriptor bits with the
+       * desc_imm immediate.
+       *
+       * Even though the instruction dispatcher always pulls the SFID and EOT
+       * fields from the instruction itself, actual external unit which
+       * processes the message gets the SFID and EOT from the extended
+       * descriptor which comes from the address register.  If we don't OR
+       * those two bits in, the external unit may get confused and hang.
+       */
+      unsigned imm_part = ex_desc_imm | sfid | eot << 5;
+
+      if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+         /* ex_desc bits 15:12 don't exist in the instruction encoding prior
+          * to Gen12, so we may have fallen back to an indirect extended
+          * descriptor.
+          */
+         brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
+      } else {
+         brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
+      }
+
+      brw_pop_insn_state(p);
+      ex_desc = addr;
+
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
+   }
+
+   send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+      brw_inst_set_send_desc(devinfo, send, desc.ud);
+   } else {
+      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(desc.nr == BRW_ARF_ADDRESS);
+      assert(desc.subnr == 0);
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+      brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
+   } else {
+      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(ex_desc.nr == BRW_ARF_ADDRESS);
+      assert((ex_desc.subnr & 0x3) == 0);
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+   }
+
+   brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_eot(devinfo, send, eot);
 }
 
-static struct brw_inst *
+static void
 brw_send_indirect_surface_message(struct brw_codegen *p,
                                   unsigned sfid,
                                   struct brw_reg dst,
                                   struct brw_reg payload,
                                   struct brw_reg surface,
-                                  unsigned message_len,
-                                  unsigned response_len,
-                                  bool header_present)
+                                  unsigned desc_imm)
 {
-   const struct gen_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn;
-
    if (surface.file != BRW_IMMEDIATE_VALUE) {
+      const struct tgl_swsb swsb = brw_get_default_swsb(p);
       struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
 
       brw_push_insn_state(p);
@@ -2610,26 +2818,23 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_set_default_exec_size(p, BRW_EXECUTE_1);
       brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
 
       /* Mask out invalid bits from the surface index to avoid hangs e.g. when
        * some surface array is accessed out of bounds.
        */
-      insn = brw_AND(p, addr,
-                     suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
-                               BRW_GET_SWZ(surface.swizzle, 0)),
-                     brw_imm_ud(0xff));
+      brw_AND(p, addr,
+              suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
+                        BRW_GET_SWZ(surface.swizzle, 0)),
+              brw_imm_ud(0xff));
 
       brw_pop_insn_state(p);
 
       surface = addr;
+      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
    }
 
-   insn = brw_send_indirect_message(p, sfid, dst, payload, surface);
-   brw_inst_set_mlen(devinfo, insn, message_len);
-   brw_inst_set_rlen(devinfo, insn, response_len);
-   brw_inst_set_header_present(devinfo, insn, header_present);
-
-   return insn;
+   brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
 }
 
 static bool
@@ -2678,6 +2883,8 @@ brw_find_next_block_end(struct brw_codegen *p, int start_offset)
       case BRW_OPCODE_HALT:
          if (depth == 0)
             return offset;
+      default:
+         break;
       }
    }
 
@@ -2783,6 +2990,9 @@ brw_set_uip_jip(struct brw_codegen *p, int start_offset)
          assert(brw_inst_uip(devinfo, insn) != 0);
          assert(brw_inst_jip(devinfo, insn) != 0);
         break;
+
+      default:
+         break;
       }
    }
 }
@@ -2838,73 +3048,35 @@ brw_svb_write(struct brw_codegen *p,
    const unsigned target_cache =
       (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
        devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
-       BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+       BRW_SFID_DATAPORT_WRITE);
    brw_inst *insn;
 
    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_inst_set_sfid(devinfo, insn, target_cache);
    brw_set_dest(p, insn, dest);
    brw_set_src0(p, insn, src0);
-   brw_set_src1(p, insn, brw_imm_d(0));
-   brw_set_dp_write_message(p, insn,
-                            binding_table_index,
-                            0, /* msg_control: ignored */
-                            GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
-                            target_cache,
-                            1, /* msg_length */
-                            true, /* header_present */
-                            0, /* last_render_target: ignored */
-                            send_commit_msg, /* response_length */
-                            0, /* end_of_thread */
-                            send_commit_msg); /* send_commit_msg */
+   brw_set_desc(p, insn,
+                brw_message_desc(devinfo, 1, send_commit_msg, true) |
+                brw_dp_write_desc(devinfo, binding_table_index,
+                                  0, /* msg_control: ignored */
+                                  GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+                                  0, /* last_render_target: ignored */
+                                  send_commit_msg)); /* send_commit_msg */
 }
 
 static unsigned
 brw_surface_payload_size(struct brw_codegen *p,
                          unsigned num_channels,
-                         bool has_simd4x2,
-                         bool has_simd16)
+                         unsigned exec_size /**< 0 for SIMD4x2 */)
 {
-   if (has_simd4x2 && brw_get_default_access_mode(p) == BRW_ALIGN_16)
-      return 1;
-   else if (has_simd16 && brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-      return 2 * num_channels;
-   else
+   if (exec_size == 0)
+      return 1; /* SIMD4x2 */
+   else if (exec_size <= 8)
       return num_channels;
-}
-
-static void
-brw_set_dp_untyped_atomic_message(struct brw_codegen *p,
-                                  brw_inst *insn,
-                                  unsigned atomic_op,
-                                  bool response_expected)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   unsigned msg_control =
-      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
-      (response_expected ? 1 << 5 : 0); /* Return data expected */
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
-            msg_control |= 1 << 4; /* SIMD8 mode */
-
-         brw_inst_set_dp_msg_type(devinfo, insn,
-                                  HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP);
-      } else {
-         brw_inst_set_dp_msg_type(devinfo, insn,
-            HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2);
-      }
-   } else {
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
-
-      if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD8 mode */
-   }
-
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+   else
+      return 2 * num_channels;
 }
 
 void
@@ -2922,6 +3094,16 @@ brw_untyped_atomic(struct brw_codegen *p,
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 untyped atomic instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
+   const unsigned response_length =
+      brw_surface_payload_size(p, response_expected, exec_size);
+   const unsigned desc =
+      brw_message_desc(devinfo, msg_length, response_length, header_present) |
+      brw_dp_untyped_atomic_desc(devinfo, exec_size, atomic_op,
+                                 response_expected);
    /* Mask out unused components -- This is especially important in Align16
     * mode on generations that don't have native support for SIMD4x2 atomics,
     * because unused but enabled components will cause the dataport to perform
@@ -2929,37 +3111,9 @@ brw_untyped_atomic(struct brw_codegen *p,
     * uninitialized Y, Z and W coordinates of the payload.
     */
    const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
-      brw_surface_payload_size(p, response_expected,
-                               devinfo->gen >= 8 || devinfo->is_haswell, true),
-      header_present);
 
-   brw_set_dp_untyped_atomic_message(
-      p, insn, atomic_op, response_expected);
-}
-
-static void
-brw_set_dp_untyped_surface_read_message(struct brw_codegen *p,
-                                        struct brw_inst *insn,
-                                        unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of 32-bit channels to drop. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-      if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD16 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   }
-
-   brw_inst_set_dp_msg_type(devinfo, insn,
-                            (devinfo->gen >= 8 || devinfo->is_haswell ?
-                             HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ :
-                             GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ));
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+   brw_send_indirect_surface_message(p, sfid, brw_writemask(dst, mask),
+                                     payload, surface, desc);
 }
 
 void
@@ -2974,41 +3128,15 @@ brw_untyped_surface_read(struct brw_codegen *p,
    const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, dst, payload, surface, msg_length,
-      brw_surface_payload_size(p, num_channels, true, true),
-      false);
-
-   brw_set_dp_untyped_surface_read_message(
-      p, insn, num_channels);
-}
-
-static void
-brw_set_dp_untyped_surface_write_message(struct brw_codegen *p,
-                                         struct brw_inst *insn,
-                                         unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of 32-bit channels to drop. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-      if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-         msg_control |= 1 << 4; /* SIMD16 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   } else {
-      if (devinfo->gen >= 8 || devinfo->is_haswell)
-         msg_control |= 0 << 4; /* SIMD4x2 mode */
-      else
-         msg_control |= 2 << 4; /* SIMD8 mode */
-   }
+   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) : 0;
+   const unsigned response_length =
+      brw_surface_payload_size(p, num_channels, exec_size);
+   const unsigned desc =
+      brw_message_desc(devinfo, msg_length, response_length, false) |
+      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, false);
 
-   brw_inst_set_dp_msg_type(devinfo, insn,
-                            devinfo->gen >= 8 || devinfo->is_haswell ?
-                             HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
-                             GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
+   brw_send_indirect_surface_message(p, sfid, dst, payload, surface, desc);
 }
 
 void
@@ -3024,279 +3152,33 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
    const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
+   /* SIMD4x2 untyped surface write instructions only exist on HSW+ */
+   const bool has_simd4x2 = devinfo->gen >= 8 || devinfo->is_haswell;
+   const unsigned exec_size = align1 ? 1 << brw_get_default_exec_size(p) :
+                              has_simd4x2 ? 0 : 8;
+   const unsigned desc =
+      brw_message_desc(devinfo, msg_length, 0, header_present) |
+      brw_dp_untyped_surface_rw_desc(devinfo, exec_size, num_channels, true);
    /* Mask out unused components -- See comment in brw_untyped_atomic(). */
-   const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
-                          WRITEMASK_X : WRITEMASK_XYZW;
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, brw_writemask(brw_null_reg(), mask),
-      payload, surface, msg_length, 0, header_present);
+   const unsigned mask = !has_simd4x2 && !align1 ? WRITEMASK_X : WRITEMASK_XYZW;
 
-   brw_set_dp_untyped_surface_write_message(
-      p, insn, num_channels);
-}
-
-static unsigned
-brw_byte_scattered_data_element_from_bit_size(unsigned bit_size)
-{
-   switch (bit_size) {
-   case 8:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
-   case 16:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
-   case 32:
-      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
-   default:
-      unreachable("Unsupported bit_size for byte scattered messages");
-   }
-}
-
-
-void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned sfid =  GEN7_SFID_DATAPORT_DATA_CACHE;
-
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, dst, payload, surface, msg_length,
-      brw_surface_payload_size(p, 1, true, true),
-      false);
-
-   unsigned msg_control =
-      brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
-
-   if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-      msg_control |= 1; /* SIMD16 mode */
-   else
-      msg_control |= 0; /* SIMD8 mode */
-
-   brw_inst_set_dp_msg_type(devinfo, insn,
-                            HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ);
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   assert(devinfo->gen > 7 || devinfo->is_haswell);
-   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-   const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, brw_writemask(brw_null_reg(), WRITEMASK_XYZW),
-      payload, surface, msg_length, 0, header_present);
-
-   unsigned msg_control =
-      brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
-
-   if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
-      msg_control |= 1;
-   else
-      msg_control |= 0;
-
-   brw_inst_set_dp_msg_type(devinfo, insn,
-                            HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE);
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-static void
-brw_set_dp_typed_atomic_message(struct brw_codegen *p,
-                                struct brw_inst *insn,
-                                unsigned atomic_op,
-                                bool response_expected)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   unsigned msg_control =
-      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
-      (response_expected ? 1 << 5 : 0); /* Return data expected */
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
-
-         brw_inst_set_dp_msg_type(devinfo, insn,
-                                  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
-      } else {
-         brw_inst_set_dp_msg_type(devinfo, insn,
-                                  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
-      }
-
-   } else {
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
-
-      if ((brw_get_default_group(p) / 8) % 2 == 1)
-         msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
-   }
-
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_atomic(struct brw_codegen *p,
-                 struct brw_reg dst,
-                 struct brw_reg payload,
-                 struct brw_reg surface,
-                 unsigned atomic_op,
-                 unsigned msg_length,
-                 bool response_expected,
-                 bool header_present) {
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
-                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
-                          GEN6_SFID_DATAPORT_RENDER_CACHE);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
-   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
-   const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
-      brw_surface_payload_size(p, response_expected,
-                               devinfo->gen >= 8 || devinfo->is_haswell, false),
-      header_present);
-
-   brw_set_dp_typed_atomic_message(
-      p, insn, atomic_op, response_expected);
-}
-
-static void
-brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
-                                      struct brw_inst *insn,
-                                      unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of unused channels. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
-         else
-            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
-      }
-
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
-   } else {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
-      }
-
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               GEN7_DATAPORT_RC_TYPED_SURFACE_READ);
-   }
-
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_surface_read(struct brw_codegen *p,
-                       struct brw_reg dst,
-                       struct brw_reg payload,
-                       struct brw_reg surface,
-                       unsigned msg_length,
-                       unsigned num_channels,
-                       bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
-                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
-                          GEN6_SFID_DATAPORT_RENDER_CACHE);
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, dst, payload, surface, msg_length,
-      brw_surface_payload_size(p, num_channels,
-                               devinfo->gen >= 8 || devinfo->is_haswell, false),
-      header_present);
-
-   brw_set_dp_typed_surface_read_message(
-      p, insn, num_channels);
-}
-
-static void
-brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
-                                       struct brw_inst *insn,
-                                       unsigned num_channels)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   /* Set mask of unused channels. */
-   unsigned msg_control = 0xf & (0xf << num_channels);
-
-   if (devinfo->gen >= 8 || devinfo->is_haswell) {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
-         else
-            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
-      }
-
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE);
-
-   } else {
-      if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
-         if ((brw_get_default_group(p) / 8) % 2 == 1)
-            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
-      }
-
-      brw_inst_set_dp_msg_type(devinfo, insn,
-                               GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE);
-   }
-
-   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
-}
-
-void
-brw_typed_surface_write(struct brw_codegen *p,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned num_channels,
-                        bool header_present)
-{
-   const struct gen_device_info *devinfo = p->devinfo;
-   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
-                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
-                          GEN6_SFID_DATAPORT_RENDER_CACHE);
-   const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
-   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
-   const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
-                          WRITEMASK_X : WRITEMASK_XYZW);
-   struct brw_inst *insn = brw_send_indirect_surface_message(
-      p, sfid, brw_writemask(brw_null_reg(), mask),
-      payload, surface, msg_length, 0, header_present);
-
-   brw_set_dp_typed_surface_write_message(
-      p, insn, num_channels);
+   brw_send_indirect_surface_message(p, sfid, brw_writemask(brw_null_reg(), mask),
+                                     payload, surface, desc);
 }
 
 static void
 brw_set_memory_fence_message(struct brw_codegen *p,
                              struct brw_inst *insn,
                              enum brw_message_target sfid,
-                             bool commit_enable)
+                             bool commit_enable,
+                             unsigned bti)
 {
    const struct gen_device_info *devinfo = p->devinfo;
 
-   brw_set_message_descriptor(p, insn, sfid,
-                              1 /* message length */,
-                              (commit_enable ? 1 : 0) /* response length */,
-                              true /* header present */,
-                              false);
+   brw_set_desc(p, insn, brw_message_desc(
+                   devinfo, 1, (commit_enable ? 1 : 0), true));
+
+   brw_inst_set_sfid(devinfo, insn, sfid);
 
    switch (sfid) {
    case GEN6_SFID_DATAPORT_RENDER_CACHE:
@@ -3311,54 +3193,34 @@ brw_set_memory_fence_message(struct brw_codegen *p,
 
    if (commit_enable)
       brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
+
+   assert(devinfo->gen >= 11 || bti == 0);
+   brw_inst_set_binding_table_index(devinfo, insn, bti);
 }
 
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
-                 enum opcode send_op)
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 enum brw_message_target sfid,
+                 bool commit_enable,
+                 unsigned bti)
 {
    const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable =
-      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
-      (devinfo->gen == 7 && !devinfo->is_haswell);
-   struct brw_inst *insn;
 
-   brw_push_insn_state(p);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_set_default_exec_size(p, BRW_EXECUTE_1);
-   dst = vec1(dst);
+   dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
+   src = retype(vec1(src), BRW_REGISTER_TYPE_UD);
 
    /* Set dst as destination for dependency tracking, the MEMORY_FENCE
     * message doesn't write anything back.
     */
-   insn = next_insn(p, send_op);
-   dst = retype(dst, BRW_REGISTER_TYPE_UW);
+   struct brw_inst *insn = next_insn(p, send_op);
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
    brw_set_dest(p, insn, dst);
-   brw_set_src0(p, insn, dst);
-   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
-                                commit_enable);
-
-   if (devinfo->gen == 7 && !devinfo->is_haswell) {
-      /* IVB does typed surface access through the render cache, so we need to
-       * flush it too.  Use a different register so both flushes can be
-       * pipelined by the hardware.
-       */
-      insn = next_insn(p, send_op);
-      brw_set_dest(p, insn, offset(dst, 1));
-      brw_set_src0(p, insn, offset(dst, 1));
-      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
-                                   commit_enable);
-
-      /* Now write the response of the second message into the response of the
-       * first to trigger a pipeline stall -- This way future render and data
-       * cache messages will be properly ordered with respect to past data and
-       * render cache messages.
-       */
-      brw_MOV(p, dst, offset(dst, 1));
-   }
-
-   brw_pop_insn_state(p);
+   brw_set_src0(p, insn, src);
+   brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
 }
 
 void
@@ -3372,24 +3234,24 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              unsigned response_length)
 {
    const struct gen_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn;
    const uint16_t exec_size = brw_get_default_exec_size(p);
+   const unsigned slot_group = brw_get_default_group(p) / 16;
+   const unsigned simd_mode = (exec_size == BRW_EXECUTE_16);
+   const unsigned desc =
+      brw_message_desc(devinfo, msg_length, response_length, false) |
+      brw_pixel_interp_desc(devinfo, mode, noperspective, simd_mode,
+                            slot_group);
 
    /* brw_send_indirect_message will automatically use a direct send message
     * if data is actually immediate.
     */
-   insn = brw_send_indirect_message(p,
-                                    GEN7_SFID_PIXEL_INTERPOLATOR,
-                                    dest,
-                                    mrf,
-                                    vec1(data));
-   brw_inst_set_mlen(devinfo, insn, msg_length);
-   brw_inst_set_rlen(devinfo, insn, response_length);
-
-   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
-   brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
-   brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
-   brw_inst_set_pi_message_type(devinfo, insn, mode);
+   brw_send_indirect_message(p,
+                             GEN7_SFID_PIXEL_INTERPOLATOR,
+                             dest,
+                             mrf,
+                             vec1(data),
+                             desc,
+                             false);
 }
 
 void
@@ -3406,6 +3268,14 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
 
    brw_push_insn_state(p);
 
+   /* The flag register is only used on Gen7 in align1 mode, so avoid setting
+    * unnecessary bits in the instruction words, get the information we need
+    * and reset the default flag register. This allows more instructions to be
+    * compacted.
+    */
+   const unsigned flag_subreg = p->current->flag_subreg;
+   brw_set_default_flag_reg(p, 0, 0);
+
    if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
@@ -3429,6 +3299,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
              * hardware.
              */
             brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
+            brw_set_default_swsb(p, tgl_swsb_regdist(1));
             brw_AND(p, vec1(dst), exec_mask, vec1(dst));
             exec_mask = vec1(dst);
          }
@@ -3439,8 +3310,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
           */
          inst = brw_FBL(p, vec1(dst), exec_mask);
       } else {
-         const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
-                                                  p->current->flag_subreg % 2);
+         const struct brw_reg flag = brw_flag_subreg(flag_subreg);
 
          brw_set_default_exec_size(p, BRW_EXECUTE_1);
          brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
@@ -3460,6 +3330,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
             brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
             brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
             brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
+            brw_inst_set_flag_reg_nr(devinfo, inst, flag_subreg / 2);
+            brw_inst_set_flag_subreg_nr(devinfo, inst, flag_subreg % 2);
          }
 
          /* Find the first bit set in the exec_size-wide portion of the flag
@@ -3531,9 +3403,18 @@ brw_broadcast(struct brw_codegen *p,
        * asserting would be mean.
        */
       const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
-      brw_MOV(p, dst,
-              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
-               stride(suboffset(src, 4 * i), 0, 4, 1)));
+      src = align1 ? stride(suboffset(src, i), 0, 1, 0) :
+                     stride(suboffset(src, 4 * i), 0, 4, 1);
+
+      if (type_sz(src.type) > 4 && !devinfo->has_64bit_float) {
+         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+                    subscript(src, BRW_REGISTER_TYPE_D, 0));
+         brw_set_default_swsb(p, tgl_swsb_null());
+         brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+                    subscript(src, BRW_REGISTER_TYPE_D, 1));
+      } else {
+         brw_MOV(p, dst, src);
+      }
    } else {
       /* From the Haswell PRM section "Register Region Restrictions":
        *
@@ -3563,7 +3444,7 @@ brw_broadcast(struct brw_codegen *p,
          /* Take into account the component size and horizontal stride. */
          assert(src.vstride == src.hstride + src.width);
          brw_SHL(p, addr, vec1(idx),
-                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
+                 brw_imm_ud(util_logbase2(type_sz(src.type)) +
                             src.hstride - 1));
 
          /* We can only address up to limit bytes using the indirect
@@ -3571,15 +3452,19 @@ brw_broadcast(struct brw_codegen *p,
           * register is above this limit.
           */
          if (offset >= limit) {
+            brw_set_default_swsb(p, tgl_swsb_regdist(1));
             brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
             offset = offset % limit;
          }
 
          brw_pop_insn_state(p);
 
+         brw_set_default_swsb(p, tgl_swsb_regdist(1));
+
          /* Use indirect addressing to fetch the specified component. */
          if (type_sz(src.type) > 4 &&
-             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
+              !devinfo->has_64bit_float)) {
             /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
              *
              *    "When source or destination datatype is 64b or operation is
@@ -3595,6 +3480,7 @@ brw_broadcast(struct brw_codegen *p,
             brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
                        retype(brw_vec1_indirect(addr.subnr, offset),
                               BRW_REGISTER_TYPE_D));
+            brw_set_default_swsb(p, tgl_swsb_null());
             brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
                        retype(brw_vec1_indirect(addr.subnr, offset + 4),
                               BRW_REGISTER_TYPE_D));
@@ -3645,10 +3531,11 @@ void brw_shader_time_add(struct brw_codegen *p,
                          struct brw_reg payload,
                          uint32_t surf_index)
 {
-   const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ?
+   const struct gen_device_info *devinfo = p->devinfo;
+   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
                           HSW_SFID_DATAPORT_DATA_CACHE_1 :
                           GEN7_SFID_DATAPORT_DATA_CACHE);
-   assert(p->devinfo->gen >= 7);
+   assert(devinfo->gen >= 7);
 
    brw_push_insn_state(p);
    brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -3663,10 +3550,12 @@ void brw_shader_time_add(struct brw_codegen *p,
                                       BRW_ARF_NULL, 0));
    brw_set_src0(p, send, brw_vec1_reg(payload.file,
                                       payload.nr, 0));
-   brw_set_src1(p, send, brw_imm_ud(0));
-   brw_set_message_descriptor(p, send, sfid, 2, 0, false, false);
-   brw_inst_set_binding_table_index(p->devinfo, send, surf_index);
-   brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false);
+   brw_set_desc(p, send, (brw_message_desc(devinfo, 2, 0, false) |
+                          brw_dp_untyped_atomic_desc(devinfo, 1, BRW_AOP_ADD,
+                                                     false)));
+
+   brw_inst_set_sfid(devinfo, send, sfid);
+   brw_inst_set_binding_table_index(devinfo, send, surf_index);
 
    brw_pop_insn_state(p);
 }
@@ -3689,14 +3578,9 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src)
    brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
    brw_set_src0(p, inst, src);
    brw_set_src1(p, inst, brw_null_reg());
+   brw_set_desc(p, inst, brw_message_desc(devinfo, 1, 0, false));
 
-   brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY,
-                              1 /* msg_length */,
-                              0 /* response_length */,
-                              false /* header_present */,
-                              false /* end_of_thread */);
-
-   brw_inst_set_gateway_notify(devinfo, inst, 1);
+   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
    brw_inst_set_gateway_subfuncid(devinfo, inst,
                                   BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
 
@@ -3725,37 +3609,78 @@ brw_WAIT(struct brw_codegen *p)
    brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 }
 
-/**
- * Changes the floating point rounding mode updating the control register
- * field defined at cr0.0[5-6] bits. This function supports the changes to
- * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations.
- * Only RTNE and RTZ rounding are enabled at nir.
- */
 void
-brw_rounding_mode(struct brw_codegen *p,
-                  enum brw_rnd_mode mode)
-{
-   const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT;
-
-   if (bits != BRW_CR0_RND_MODE_MASK) {
-      brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
-                               brw_imm_ud(~BRW_CR0_RND_MODE_MASK));
-      brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
-
-      /* From the Skylake PRM, Volume 7, page 760:
-       *  "Implementation Restriction on Register Access: When the control
-       *   register is used as an explicit source and/or destination, hardware
-       *   does not ensure execution pipeline coherency. Software must set the
-       *   thread control field to ‘switch’ for an instruction that uses
-       *   control register as an explicit operand."
-       */
-      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
-    }
+brw_float_controls_mode(struct brw_codegen *p,
+                        unsigned mode, unsigned mask)
+{
+   /* From the Skylake PRM, Volume 7, page 760:
+    *  "Implementation Restriction on Register Access: When the control
+    *   register is used as an explicit source and/or destination, hardware
+    *   does not ensure execution pipeline coherency. Software must set the
+    *   thread control field to ‘switch’ for an instruction that uses
+    *   control register as an explicit operand."
+    *
+    * On Gen12+ this is implemented in terms of SWSB annotations instead.
+    */
+   brw_set_default_swsb(p, tgl_swsb_regdist(1));
 
-   if (bits) {
-      brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
-                              brw_imm_ud(bits));
-      brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
+   brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
+                            brw_imm_ud(~mask));
+   brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
+   if (p->devinfo->gen < 12)
       brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
+
+   if (mode) {
+      brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
+                                 brw_imm_ud(mode));
+      brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
+      if (p->devinfo->gen < 12)
+         brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
    }
+
+   if (p->devinfo->gen >= 12)
+      brw_SYNC(p, TGL_SYNC_NOP);
+}
+
+void
+brw_update_reloc_imm(const struct gen_device_info *devinfo,
+                     brw_inst *inst,
+                     uint32_t value)
+{
+   /* Sanity check that the instruction is a MOV of an immediate */
+   assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV);
+   assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
+
+   /* If it was compacted, we can't safely rewrite */
+   assert(brw_inst_cmpt_control(devinfo, inst) == 0);
+
+   brw_inst_set_imm_ud(devinfo, inst, value);
+}
+
+/* A default value for constants that will be patched at run-time.
+ * We pick an arbitrary value that prevents instruction compaction.
+ */
+#define DEFAULT_PATCH_IMM 0x4a7cc037
+
+void
+brw_MOV_reloc_imm(struct brw_codegen *p,
+                  struct brw_reg dst,
+                  enum brw_reg_type src_type,
+                  uint32_t id)
+{
+   assert(type_sz(src_type) == 4);
+   assert(type_sz(dst.type) == 4);
+
+   if (p->num_relocs + 1 > p->reloc_array_size) {
+      p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
+      p->relocs = reralloc(p->mem_ctx, p->relocs,
+                           struct brw_shader_reloc, p->reloc_array_size);
+   }
+
+   p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
+      .id = id,
+      .offset = p->next_insn_offset,
+   };
+
+   brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
 }