intel/disasm: Change visibility of has_uip and has_jip
[mesa.git] / src / intel / compiler / brw_eu.h
index d69bc5526cdcfc4ea33310f259fabc94d010974d..18567155fd66336d385db4e4abb567eeeb6d021c 100644 (file)
@@ -59,6 +59,9 @@ struct brw_insn_state {
    /* One of BRW_MASK_* */
    unsigned mask_control:1;
 
+   /* Scheduling info for Gen12+ */
+   struct tgl_swsb swsb;
+
    bool saturate:1;
 
    /* One of BRW_ALIGN_* */
@@ -139,6 +142,7 @@ void brw_push_insn_state( struct brw_codegen *p );
 unsigned brw_get_default_exec_size(struct brw_codegen *p);
 unsigned brw_get_default_group(struct brw_codegen *p);
 unsigned brw_get_default_access_mode(struct brw_codegen *p);
+struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
 void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
 void brw_set_default_saturate( struct brw_codegen *p, bool enable );
@@ -150,19 +154,25 @@ void brw_inst_set_group(const struct gen_device_info *devinfo,
                         brw_inst *inst, unsigned group);
 void brw_set_default_group(struct brw_codegen *p, unsigned group);
 void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
-void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc );
+void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
+void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
 
 void brw_init_codegen(const struct gen_device_info *, struct brw_codegen *p,
                      void *mem_ctx);
+bool brw_has_jip(const struct gen_device_info *devinfo, enum opcode opcode);
+bool brw_has_uip(const struct gen_device_info *devinfo, enum opcode opcode);
 int brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
                          const struct brw_inst *inst, bool is_compacted);
 void brw_disassemble(const struct gen_device_info *devinfo,
                      const void *assembly, int start, int end, FILE *out);
 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
 
+bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
+                               const char *identifier);
+
 brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
 void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
 void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
@@ -191,9 +201,6 @@ brw_inst *brw_##OP(struct brw_codegen *p,   \
              struct brw_reg src1,              \
              struct brw_reg src2);
 
-#define ROUND(OP) \
-void brw_##OP(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0);
-
 ALU1(MOV)
 ALU2(SEL)
 ALU1(NOT)
@@ -204,6 +211,8 @@ ALU2(SHR)
 ALU2(SHL)
 ALU1(DIM)
 ALU2(ASR)
+ALU2(ROL)
+ALU2(ROR)
 ALU3(CSEL)
 ALU1(F32TO16)
 ALU1(F16TO32)
@@ -212,6 +221,9 @@ ALU2(AVG)
 ALU2(MUL)
 ALU1(FRC)
 ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDU)
+ALU1(RNDZ)
 ALU2(MAC)
 ALU2(MACH)
 ALU1(LZD)
@@ -234,50 +246,13 @@ ALU2(ADDC)
 ALU2(SUBB)
 ALU2(MAC)
 
-ROUND(RNDZ)
-ROUND(RNDE)
-
 #undef ALU1
 #undef ALU2
 #undef ALU3
-#undef ROUND
 
 
 /* Helpers for SEND instruction:
  */
-void brw_set_sampler_message(struct brw_codegen *p,
-                             brw_inst *insn,
-                             unsigned binding_table_index,
-                             unsigned sampler,
-                             unsigned msg_type,
-                             unsigned response_length,
-                             unsigned msg_length,
-                             unsigned header_present,
-                             unsigned simd_mode,
-                             unsigned return_format);
-
-void brw_set_dp_read_message(struct brw_codegen *p,
-                            brw_inst *insn,
-                            unsigned binding_table_index,
-                            unsigned msg_control,
-                            unsigned msg_type,
-                            unsigned target_cache,
-                            unsigned msg_length,
-                             bool header_present,
-                            unsigned response_length);
-
-void brw_set_dp_write_message(struct brw_codegen *p,
-                             brw_inst *insn,
-                             unsigned binding_table_index,
-                             unsigned msg_control,
-                             unsigned msg_type,
-                              unsigned target_cache,
-                             unsigned msg_length,
-                             bool header_present,
-                             unsigned last_render_target,
-                             unsigned response_length,
-                             unsigned end_of_thread,
-                             unsigned send_commit_msg);
 
 /**
  * Construct a message descriptor immediate with the specified common
@@ -299,6 +274,694 @@ brw_message_desc(const struct gen_device_info *devinfo,
    }
 }
 
+static inline unsigned
+brw_message_desc_mlen(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 5)
+      return GET_BITS(desc, 28, 25);
+   else
+      return GET_BITS(desc, 23, 20);
+}
+
+static inline unsigned
+brw_message_desc_rlen(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 5)
+      return GET_BITS(desc, 24, 20);
+   else
+      return GET_BITS(desc, 19, 16);
+}
+
+static inline bool
+brw_message_desc_header_present(ASSERTED const struct gen_device_info *devinfo,
+                                uint32_t desc)
+{
+   assert(devinfo->gen >= 5);
+   return GET_BITS(desc, 19, 19);
+}
+
+static inline unsigned
+brw_message_ex_desc(UNUSED const struct gen_device_info *devinfo,
+                    unsigned ex_msg_length)
+{
+   return SET_BITS(ex_msg_length, 9, 6);
+}
+
+static inline unsigned
+brw_message_ex_desc_ex_mlen(UNUSED const struct gen_device_info *devinfo,
+                            uint32_t ex_desc)
+{
+   return GET_BITS(ex_desc, 9, 6);
+}
+
+static inline uint32_t
+brw_urb_desc(const struct gen_device_info *devinfo,
+             unsigned msg_type,
+             bool per_slot_offset_present,
+             bool channel_mask_present,
+             unsigned global_offset)
+{
+   if (devinfo->gen >= 8) {
+      return (SET_BITS(per_slot_offset_present, 17, 17) |
+              SET_BITS(channel_mask_present, 15, 15) |
+              SET_BITS(global_offset, 14, 4) |
+              SET_BITS(msg_type, 3, 0));
+   } else if (devinfo->gen >= 7) {
+      assert(!channel_mask_present);
+      return (SET_BITS(per_slot_offset_present, 16, 16) |
+              SET_BITS(global_offset, 13, 3) |
+              SET_BITS(msg_type, 3, 0));
+   } else {
+      unreachable("unhandled URB write generation");
+   }
+}
+
+static inline uint32_t
+brw_urb_desc_msg_type(ASSERTED const struct gen_device_info *devinfo,
+                      uint32_t desc)
+{
+   assert(devinfo->gen >= 7);
+   return GET_BITS(desc, 3, 0);
+}
+
+/**
+ * Construct a message descriptor immediate with the specified sampler
+ * function controls.
+ */
+static inline uint32_t
+brw_sampler_desc(const struct gen_device_info *devinfo,
+                 unsigned binding_table_index,
+                 unsigned sampler,
+                 unsigned msg_type,
+                 unsigned simd_mode,
+                 unsigned return_format)
+{
+   const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
+                          SET_BITS(sampler, 11, 8));
+   if (devinfo->gen >= 7)
+      return (desc | SET_BITS(msg_type, 16, 12) |
+              SET_BITS(simd_mode, 18, 17));
+   else if (devinfo->gen >= 5)
+      return (desc | SET_BITS(msg_type, 15, 12) |
+              SET_BITS(simd_mode, 17, 16));
+   else if (devinfo->is_g4x)
+      return desc | SET_BITS(msg_type, 15, 12);
+   else
+      return (desc | SET_BITS(return_format, 13, 12) |
+              SET_BITS(msg_type, 15, 14));
+}
+
+static inline unsigned
+brw_sampler_desc_binding_table_index(UNUSED const struct gen_device_info *devinfo,
+                                     uint32_t desc)
+{
+   return GET_BITS(desc, 7, 0);
+}
+
+static inline unsigned
+brw_sampler_desc_sampler(UNUSED const struct gen_device_info *devinfo, uint32_t desc)
+{
+   return GET_BITS(desc, 11, 8);
+}
+
+static inline unsigned
+brw_sampler_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 16, 12);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 15, 12);
+   else
+      return GET_BITS(desc, 15, 14);
+}
+
+static inline unsigned
+brw_sampler_desc_simd_mode(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 5);
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 18, 17);
+   else
+      return GET_BITS(desc, 17, 16);
+}
+
+static  inline unsigned
+brw_sampler_desc_return_format(ASSERTED const struct gen_device_info *devinfo,
+                               uint32_t desc)
+{
+   assert(devinfo->gen == 4 && !devinfo->is_g4x);
+   return GET_BITS(desc, 13, 12);
+}
+
+/**
+ * Construct a message descriptor for the dataport
+ */
+static inline uint32_t
+brw_dp_desc(const struct gen_device_info *devinfo,
+            unsigned binding_table_index,
+            unsigned msg_type,
+            unsigned msg_control)
+{
+   /* Prior to gen6, things are too inconsistent; use the dp_read/write_desc
+    * helpers instead.
+    */
+   assert(devinfo->gen >= 6);
+   const unsigned desc = SET_BITS(binding_table_index, 7, 0);
+   if (devinfo->gen >= 8) {
+      return (desc | SET_BITS(msg_control, 13, 8) |
+              SET_BITS(msg_type, 18, 14));
+   } else if (devinfo->gen >= 7) {
+      return (desc | SET_BITS(msg_control, 13, 8) |
+              SET_BITS(msg_type, 17, 14));
+   } else {
+      return (desc | SET_BITS(msg_control, 12, 8) |
+              SET_BITS(msg_type, 16, 13));
+   }
+}
+
+static inline unsigned
+brw_dp_desc_binding_table_index(UNUSED const struct gen_device_info *devinfo,
+                                uint32_t desc)
+{
+   return GET_BITS(desc, 7, 0);
+}
+
+static inline unsigned
+brw_dp_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 6);
+   if (devinfo->gen >= 8)
+      return GET_BITS(desc, 18, 14);
+   else if (devinfo->gen >= 7)
+      return GET_BITS(desc, 17, 14);
+   else
+      return GET_BITS(desc, 16, 13);
+}
+
+static inline unsigned
+brw_dp_desc_msg_control(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   assert(devinfo->gen >= 6);
+   if (devinfo->gen >= 7)
+      return GET_BITS(desc, 13, 8);
+   else
+      return GET_BITS(desc, 12, 8);
+}
+
+/**
+ * Construct a message descriptor immediate with the specified dataport read
+ * function controls.
+ */
+static inline uint32_t
+brw_dp_read_desc(const struct gen_device_info *devinfo,
+                 unsigned binding_table_index,
+                 unsigned msg_control,
+                 unsigned msg_type,
+                 unsigned target_cache)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 10, 8) |
+              SET_BITS(msg_type, 13, 11) |
+              SET_BITS(target_cache, 15, 14));
+   else
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 11, 8) |
+              SET_BITS(msg_type, 13, 12) |
+              SET_BITS(target_cache, 15, 14));
+}
+
+static inline unsigned
+brw_dp_read_desc_msg_type(const struct gen_device_info *devinfo, uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_type(devinfo, desc);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 13, 11);
+   else
+      return GET_BITS(desc, 13, 12);
+}
+
+static inline unsigned
+brw_dp_read_desc_msg_control(const struct gen_device_info *devinfo,
+                             uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_control(devinfo, desc);
+   else if (devinfo->gen >= 5 || devinfo->is_g4x)
+      return GET_BITS(desc, 10, 8);
+   else
+      return GET_BITS(desc, 11, 8);
+}
+
+/**
+ * Construct a message descriptor immediate with the specified dataport write
+ * function controls.
+ */
+static inline uint32_t
+brw_dp_write_desc(const struct gen_device_info *devinfo,
+                  unsigned binding_table_index,
+                  unsigned msg_control,
+                  unsigned msg_type,
+                  unsigned last_render_target,
+                  unsigned send_commit_msg)
+{
+   assert(devinfo->gen <= 6 || !send_commit_msg);
+   if (devinfo->gen >= 6)
+      return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
+             SET_BITS(last_render_target, 12, 12) |
+             SET_BITS(send_commit_msg, 17, 17);
+   else
+      return (SET_BITS(binding_table_index, 7, 0) |
+              SET_BITS(msg_control, 11, 8) |
+              SET_BITS(last_render_target, 11, 11) |
+              SET_BITS(msg_type, 14, 12) |
+              SET_BITS(send_commit_msg, 15, 15));
+}
+
+static inline unsigned
+brw_dp_write_desc_msg_type(const struct gen_device_info *devinfo,
+                           uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_type(devinfo, desc);
+   else
+      return GET_BITS(desc, 14, 12);
+}
+
+static inline unsigned
+brw_dp_write_desc_msg_control(const struct gen_device_info *devinfo,
+                              uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return brw_dp_desc_msg_control(devinfo, desc);
+   else
+      return GET_BITS(desc, 11, 8);
+}
+
+static inline bool
+brw_dp_write_desc_last_render_target(const struct gen_device_info *devinfo,
+                                     uint32_t desc)
+{
+   if (devinfo->gen >= 6)
+      return GET_BITS(desc, 12, 12);
+   else
+      return GET_BITS(desc, 11, 11);
+}
+
+static inline bool
+brw_dp_write_desc_write_commit(const struct gen_device_info *devinfo,
+                               uint32_t desc)
+{
+   assert(devinfo->gen <= 6);
+   if (devinfo->gen >= 6)
+      return GET_BITS(desc, 17, 17);
+   else
+      return GET_BITS(desc, 15, 15);
+}
+
+/**
+ * Construct a message descriptor immediate with the specified dataport
+ * surface function controls.
+ */
+static inline uint32_t
+brw_dp_surface_desc(const struct gen_device_info *devinfo,
+                    unsigned msg_type,
+                    unsigned msg_control)
+{
+   assert(devinfo->gen >= 7);
+   /* We'll OR in the binding table index later */
+   return brw_dp_desc(devinfo, 0, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_untyped_atomic_desc(const struct gen_device_info *devinfo,
+                           unsigned exec_size, /**< 0 for SIMD4x2 */
+                           unsigned atomic_op,
+                           bool response_expected)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   unsigned msg_type;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      if (exec_size > 0) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+      } else {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
+      }
+   } else {
+      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+   }
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
+                                 unsigned exec_size,
+                                 unsigned atomic_op,
+                                 bool response_expected)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+   assert(devinfo->gen >= 9);
+
+   assert(exec_size > 0);
+   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 1, 0) |
+      SET_BITS(exec_size <= 8, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline unsigned
+brw_mdc_cmask(unsigned num_channels)
+{
+   /* See also MDC_CMASK in the SKL PRM Vol 2d. */
+   return 0xf & (0xf << num_channels);
+}
+
+static inline uint32_t
+brw_dp_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
+                               unsigned exec_size, /**< 0 for SIMD4x2 */
+                               unsigned num_channels,
+                               bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   unsigned msg_type;
+   if (write) {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
+      } else {
+         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
+      }
+   } else {
+      /* Read */
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
+      } else {
+         msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
+      }
+   }
+
+   /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
+   if (write && devinfo->gen == 7 && !devinfo->is_haswell && exec_size == 0)
+      exec_size = 8;
+
+   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
+   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
+                              exec_size <= 8 ? 2 : 1;
+
+   const unsigned msg_control =
+      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+      SET_BITS(simd_mode, 5, 4);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline unsigned
+brw_mdc_ds(unsigned bit_size)
+{
+   switch (bit_size) {
+   case 8:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
+   case 16:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
+   case 32:
+      return GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
+   default:
+      unreachable("Unsupported bit_size for byte scattered messages");
+   }
+}
+
+static inline uint32_t
+brw_dp_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
+                              unsigned exec_size,
+                              unsigned bit_size,
+                              bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+
+   assert(devinfo->gen > 7 || devinfo->is_haswell);
+   const unsigned msg_type =
+      write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
+              HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
+
+   assert(exec_size > 0);
+   const unsigned msg_control =
+      SET_BITS(exec_size == 16, 0, 0) |
+      SET_BITS(brw_mdc_ds(bit_size), 3, 2);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_dword_scattered_rw_desc(const struct gen_device_info *devinfo,
+                               unsigned exec_size,
+                               bool write)
+{
+   assert(exec_size == 8 || exec_size == 16);
+
+   unsigned msg_type;
+   if (write) {
+      if (devinfo->gen >= 6) {
+         msg_type = GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
+      } else {
+         msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
+      }
+   } else {
+      if (devinfo->gen >= 7) {
+         msg_type = GEN7_DATAPORT_DC_DWORD_SCATTERED_READ;
+      } else if (devinfo->gen > 4 || devinfo->is_g4x) {
+         msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
+      } else {
+         msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
+      }
+   }
+
+   const unsigned msg_control =
+      SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
+      SET_BITS(exec_size == 16, 0, 0);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
+                                   unsigned exec_size, /**< 0 for SIMD4x2 */
+                                   unsigned num_channels,
+                                   bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+   assert(devinfo->gen >= 8);
+
+   unsigned msg_type =
+      write ? GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
+              GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
+
+   /* See also MDC_SM3 in the SKL PRM Vol 2d. */
+   const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
+                              exec_size <= 8 ? 2 : 1;
+
+   const unsigned msg_control =
+      SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+      SET_BITS(simd_mode, 5, 4);
+
+   return brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
+                      msg_type, msg_control);
+}
+
+/**
+ * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
+ * Skylake PRM).
+ */
+static inline uint32_t
+brw_mdc_a64_ds(unsigned elems)
+{
+   switch (elems) {
+   case 1:  return 0;
+   case 2:  return 1;
+   case 4:  return 2;
+   case 8:  return 3;
+   default:
+      unreachable("Unsupported elmeent count for A64 scattered message");
+   }
+}
+
+static inline uint32_t
+brw_dp_a64_byte_scattered_rw_desc(const struct gen_device_info *devinfo,
+                                  unsigned exec_size, /**< 0 for SIMD4x2 */
+                                  unsigned bit_size,
+                                  bool write)
+{
+   assert(exec_size <= 8 || exec_size == 16);
+   assert(devinfo->gen >= 8);
+
+   unsigned msg_type =
+      write ? GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
+              GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
+
+   const unsigned msg_control =
+      SET_BITS(GEN8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
+      SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
+      SET_BITS(exec_size == 16, 4, 4);
+
+   return brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
+                      msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
+                               ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
+                               unsigned bit_size,
+                               unsigned atomic_op,
+                               bool response_expected)
+{
+   assert(exec_size == 8);
+   assert(devinfo->gen >= 8);
+   assert(bit_size == 32 || bit_size == 64);
+
+   const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(bit_size == 64, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
+                      msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
+                                     ASSERTED unsigned exec_size,
+                                     unsigned atomic_op,
+                                     bool response_expected)
+{
+   assert(exec_size == 8);
+   assert(devinfo->gen >= 9);
+
+   assert(exec_size > 0);
+   const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 1, 0) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_desc(devinfo, GEN8_BTI_STATELESS_NON_COHERENT,
+                      msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_typed_atomic_desc(const struct gen_device_info *devinfo,
+                         unsigned exec_size,
+                         unsigned exec_group,
+                         unsigned atomic_op,
+                         bool response_expected)
+{
+   assert(exec_size > 0 || exec_group == 0);
+   assert(exec_group % 8 == 0);
+
+   unsigned msg_type;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      if (exec_size == 0) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
+      } else {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
+      }
+   } else {
+      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
+      assert(exec_size > 0);
+      msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP;
+   }
+
+   const bool high_sample_mask = (exec_group / 8) % 2 == 1;
+
+   const unsigned msg_control =
+      SET_BITS(atomic_op, 3, 0) |
+      SET_BITS(high_sample_mask, 4, 4) |
+      SET_BITS(response_expected, 5, 5);
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+static inline uint32_t
+brw_dp_typed_surface_rw_desc(const struct gen_device_info *devinfo,
+                             unsigned exec_size,
+                             unsigned exec_group,
+                             unsigned num_channels,
+                             bool write)
+{
+   assert(exec_size > 0 || exec_group == 0);
+   assert(exec_group % 8 == 0);
+
+   /* Typed surface reads and writes don't support SIMD16 */
+   assert(exec_size <= 8);
+
+   unsigned msg_type;
+   if (write) {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
+      } else {
+         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE;
+      }
+   } else {
+      if (devinfo->gen >= 8 || devinfo->is_haswell) {
+         msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
+      } else {
+         msg_type = GEN7_DATAPORT_RC_TYPED_SURFACE_READ;
+      }
+   }
+
+   /* See also MDC_SG3 in the SKL PRM Vol 2d. */
+   unsigned msg_control;
+   if (devinfo->gen >= 8 || devinfo->is_haswell) {
+      /* See also MDC_SG3 in the SKL PRM Vol 2d. */
+      const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
+                                  1 + ((exec_group / 8) % 2);
+
+      msg_control =
+         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+         SET_BITS(slot_group, 5, 4);
+   } else {
+      /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
+      assert(exec_size > 0);
+      const unsigned slot_group = ((exec_group / 8) % 2);
+
+      msg_control =
+         SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
+         SET_BITS(slot_group, 5, 5);
+   }
+
+   return brw_dp_surface_desc(devinfo, msg_type, msg_control);
+}
+
+/**
+ * Construct a message descriptor immediate with the specified pixel
+ * interpolator function controls.
+ */
+static inline uint32_t
+brw_pixel_interp_desc(UNUSED const struct gen_device_info *devinfo,
+                      unsigned msg_type,
+                      bool noperspective,
+                      unsigned simd_mode,
+                      unsigned slot_group)
+{
+   return (SET_BITS(slot_group, 11, 11) |
+           SET_BITS(msg_type, 13, 12) |
+           SET_BITS(!!noperspective, 14, 14) |
+           SET_BITS(simd_mode, 16, 16));
+}
 
 void brw_urb_WRITE(struct brw_codegen *p,
                   struct brw_reg dest,
@@ -313,18 +976,28 @@ void brw_urb_WRITE(struct brw_codegen *p,
 /**
  * Send message to shared unit \p sfid with a possibly indirect descriptor \p
  * desc.  If \p desc is not an immediate it will be transparently loaded to an
- * address register using an OR instruction.  The returned instruction can be
- * passed as argument to the usual brw_set_*_message() functions in order to
- * specify any additional descriptor bits -- If \p desc is an immediate this
- * will be the SEND instruction itself, otherwise it will be the OR
- * instruction.
+ * address register using an OR instruction.
  */
-struct brw_inst *
+void
 brw_send_indirect_message(struct brw_codegen *p,
                           unsigned sfid,
                           struct brw_reg dst,
                           struct brw_reg payload,
-                          struct brw_reg desc);
+                          struct brw_reg desc,
+                          unsigned desc_imm,
+                          bool eot);
+
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm,
+                                bool eot);
 
 void brw_ff_sync(struct brw_codegen *p,
                   struct brw_reg dest,
@@ -462,7 +1135,7 @@ brw_inst *brw_WHILE(struct brw_codegen *p);
 
 brw_inst *brw_BREAK(struct brw_codegen *p);
 brw_inst *brw_CONT(struct brw_codegen *p);
-brw_inst *gen6_HALT(struct brw_codegen *p);
+brw_inst *brw_HALT(struct brw_codegen *p);
 
 /* Forward jumps:
  */
@@ -475,6 +1148,8 @@ void brw_NOP(struct brw_codegen *p);
 
 void brw_WAIT(struct brw_codegen *p);
 
+void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
+
 /* Special case: there is never a destination, execution size will be
  * taken from src0:
  */
@@ -510,53 +1185,14 @@ brw_untyped_surface_write(struct brw_codegen *p,
                           unsigned num_channels,
                           bool header_present);
 
-void
-brw_typed_atomic(struct brw_codegen *p,
-                 struct brw_reg dst,
-                 struct brw_reg payload,
-                 struct brw_reg surface,
-                 unsigned atomic_op,
-                 unsigned msg_length,
-                 bool response_expected,
-                 bool header_present);
-
-void
-brw_typed_surface_read(struct brw_codegen *p,
-                       struct brw_reg dst,
-                       struct brw_reg payload,
-                       struct brw_reg surface,
-                       unsigned msg_length,
-                       unsigned num_channels,
-                       bool header_present);
-
-void
-brw_typed_surface_write(struct brw_codegen *p,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned num_channels,
-                        bool header_present);
-
-void
-brw_byte_scattered_read(struct brw_codegen *p,
-                        struct brw_reg dst,
-                        struct brw_reg payload,
-                        struct brw_reg surface,
-                        unsigned msg_length,
-                        unsigned bit_size);
-
-void
-brw_byte_scattered_write(struct brw_codegen *p,
-                         struct brw_reg payload,
-                         struct brw_reg surface,
-                         unsigned msg_length,
-                         unsigned bit_size,
-                         bool header_present);
-
 void
 brw_memory_fence(struct brw_codegen *p,
                  struct brw_reg dst,
-                 enum opcode send_op);
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 enum brw_message_target sfid,
+                 bool commit_enable,
+                 unsigned bti);
 
 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
@@ -580,8 +1216,8 @@ brw_broadcast(struct brw_codegen *p,
               struct brw_reg idx);
 
 void
-brw_rounding_mode(struct brw_codegen *p,
-                  enum brw_rnd_mode mode);
+brw_float_controls_mode(struct brw_codegen *p,
+                        unsigned mode, unsigned mask);
 
 /***********************************************************************
  * brw_eu_util.c:
@@ -624,8 +1260,8 @@ brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
 
 void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
 
-enum brw_conditional_mod brw_negate_cmod(uint32_t cmod);
-enum brw_conditional_mod brw_swap_cmod(uint32_t cmod);
+enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
+enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
 
 /* brw_eu_compact.c */
 void brw_init_compaction_tables(const struct gen_device_info *devinfo);
@@ -640,6 +1276,9 @@ void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
                                  brw_inst *orig, brw_inst *uncompacted);
 
 /* brw_eu_validate.c */
+bool brw_validate_instruction(const struct gen_device_info *devinfo,
+                              const brw_inst *inst, int offset,
+                              struct disasm_info *disasm);
 bool brw_validate_instructions(const struct gen_device_info *devinfo,
                                const void *assembly, int start_offset, int end_offset,
                                struct disasm_info *disasm);
@@ -656,32 +1295,46 @@ next_offset(const struct gen_device_info *devinfo, void *store, int offset)
 }
 
 struct opcode_desc {
-   /* The union is an implementation detail used by brw_opcode_desc() to handle
-    * opcodes that have been reused for different instructions across hardware
-    * generations.
-    *
-    * The gens field acts as a tag. If it is non-zero, name points to a string
-    * containing the instruction mnemonic. If it is zero, the table field is
-    * valid and either points to a secondary opcode_desc table with 'size'
-    * elements or is NULL and no such instruction exists for the opcode.
-    */
-   union {
-      struct {
-         char    *name;
-         int      nsrc;
-      };
-      struct {
-         const struct opcode_desc *table;
-         unsigned size;
-      };
-   };
-   int      ndst;
-   int      gens;
+   unsigned ir;
+   unsigned hw;
+   const char *name;
+   int nsrc;
+   int ndst;
+   int gens;
 };
 
 const struct opcode_desc *
 brw_opcode_desc(const struct gen_device_info *devinfo, enum opcode opcode);
 
+const struct opcode_desc *
+brw_opcode_desc_from_hw(const struct gen_device_info *devinfo, unsigned hw);
+
+static inline unsigned
+brw_opcode_encode(const struct gen_device_info *devinfo, enum opcode opcode)
+{
+   return brw_opcode_desc(devinfo, opcode)->hw;
+}
+
+static inline enum opcode
+brw_opcode_decode(const struct gen_device_info *devinfo, unsigned hw)
+{
+   const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw);
+   return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL;
+}
+
+static inline void
+brw_inst_set_opcode(const struct gen_device_info *devinfo,
+                    brw_inst *inst, enum opcode opcode)
+{
+   brw_inst_set_hw_opcode(devinfo, inst, brw_opcode_encode(devinfo, opcode));
+}
+
+static inline enum opcode
+brw_inst_opcode(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   return brw_opcode_decode(devinfo, brw_inst_hw_opcode(devinfo, inst));
+}
+
 static inline bool
 is_3src(const struct gen_device_info *devinfo, enum opcode opcode)
 {