From 2572c2771d0cab0b9bc489d354ede44dfc88547b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Tue, 29 Aug 2017 18:29:29 -0700
Subject: [PATCH] i965: Validate "Special Requirements for Handling Double
 Precision Data Types"

I did not implement:

   CNL's restriction on 64-bit int + align16, because I don't think
   we'll ever use this combination regardless of hardware generation.

   The restriction on immediate DF -> F conversions, because there's no
   reason to ever generate that, and I don't even know how DF -> F
   conversions are supposed to work in Align16 since (1) the dst stride
   must be 1, but (2) the dst stride would have to be 2 for src and dst
   strides to be aligned.
---
 src/intel/compiler/brw_eu_validate.c    | 169 +++++++
 src/intel/compiler/test_eu_validate.cpp | 623 ++++++++++++++++++++++++
 2 files changed, 792 insertions(+)

diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index 8fcc5293666..8568f712d77 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -1085,6 +1085,174 @@ vector_immediate_restrictions(const struct gen_device_info *devinfo,
    return error_msg;
 }
 
+static struct string
+special_requirements_for_handling_double_precision_data_types(
+                                       const struct gen_device_info *devinfo,
+                                       const brw_inst *inst)
+{
+   unsigned num_sources = num_sources_from_inst(devinfo, inst);
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   if (num_sources == 3 || num_sources == 0)
+      return (struct string){};
+
+   enum brw_reg_type exec_type = execution_type(devinfo, inst);
+   unsigned exec_type_size = brw_reg_type_to_size(exec_type);
+
+   enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
+   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+   unsigned dst_type_size = brw_reg_type_to_size(dst_type);
+   unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
+   unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
+   unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
+   unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
+
+   bool is_integer_dword_multiply =
+      devinfo->gen >= 8 &&
+      brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL &&
+      (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
+       brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) &&
+      (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D ||
+       brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD);
+
+   if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply)
+      return (struct string){};
+
+   for (unsigned i = 0; i < num_sources; i++) {
+      unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
+      bool is_scalar_region;
+      enum brw_reg_file file;
+      enum brw_reg_type type;
+
+#define DO_SRC(n)                                                              \
+      if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
+          BRW_IMMEDIATE_VALUE)                                                 \
+         continue;                                                             \
+                                                                               \
+      is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
+      vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
+      width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
+      hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
+      file = brw_inst_src ## n ## _reg_file(devinfo, inst);                    \
+      type = brw_inst_src ## n ## _type(devinfo, inst);                        \
+      type_size = brw_reg_type_to_size(type);                                  \
+      reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
+      subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
+      address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
+
+      if (i == 0) {
+         DO_SRC(0);
+      } else {
+         DO_SRC(1);
+      }
+#undef DO_SRC
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    When source or destination datatype is 64b or operation is integer
+       *    DWord multiply, regioning in Align1 must follow these rules:
+       *
+       *    1. Source and Destination horizontal stride must be aligned to the
+       *       same qword.
+       *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
+       *    3. Source and Destination offset must be the same, except the case
+       *       of scalar source.
+       *
+       * We assume that the restriction applies to GLK as well.
+       */
+      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
+          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+         unsigned src_stride = hstride * type_size;
+         unsigned dst_stride = dst_hstride * dst_type_size;
+
+         ERROR_IF(!is_scalar_region &&
+                  (src_stride % 8 != 0 ||
+                   dst_stride % 8 != 0 ||
+                   src_stride != dst_stride),
+                  "Source and destination horizontal stride must equal and a "
+                  "multiple of a qword when the execution type is 64-bit");
+
+         ERROR_IF(vstride != width * hstride,
+                  "Vstride must be Width * Hstride when the execution type is "
+                  "64-bit");
+
+         ERROR_IF(!is_scalar_region && dst_subreg != subreg,
+                  "Source and destination offset must be the same when the "
+                  "execution type is 64-bit");
+      }
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    When source or destination datatype is 64b or operation is integer
+       *    DWord multiply, indirect addressing must not be used.
+       *
+       * We assume that the restriction applies to GLK as well.
+       */
+      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
+         ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
+                  BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
+                  "Indirect addressing is not allowed when the execution type "
+                  "is 64-bit");
+      }
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    ARF registers must never be used with 64b datatype or when
+       *    operation is integer DWord multiply.
+       *
+       * We assume that the restriction applies to GLK as well.
+       *
+       * We assume that the restriction does not apply to the null register.
+       */
+      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
+         ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC ||
+                  brw_inst_acc_wr_control(devinfo, inst) ||
+                  (BRW_ARCHITECTURE_REGISTER_FILE == file &&
+                   reg != BRW_ARF_NULL) ||
+                  (BRW_ARCHITECTURE_REGISTER_FILE == dst_file &&
+                   dst_reg != BRW_ARF_NULL),
+                  "Architecture registers cannot be used when the execution "
+                  "type is 64-bit");
+      }
+   }
+
+   /* The PRMs say that for BDW, SKL:
+    *
+    *    If Align16 is required for an operation with QW destination and non-QW
+    *    source datatypes, the execution size cannot exceed 2.
+    *
+    * We assume that the restriction applies to all Gen8+ parts.
+    */
+   if (devinfo->gen >= 8) {
+      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+      unsigned src0_type_size = brw_reg_type_to_size(src0_type);
+      unsigned src1_type_size = brw_reg_type_to_size(src1_type);
+
+      ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
+               dst_type_size == 8 &&
+               (src0_type_size != 8 || src1_type_size != 8) &&
+               brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
+               "In Align16 exec size cannot exceed 2 with a QWord destination "
+               "and a non-QWord source");
+   }
+
+   /* The PRMs say that for CHV, BXT:
+    *
+    *    When source or destination datatype is 64b or operation is integer
+    *    DWord multiply, DepCtrl must not be used.
+    *
+    * We assume that the restriction applies to GLK as well.
+    */
+   if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
+      ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
+               brw_inst_no_dd_clear(devinfo, inst),
+               "DepCtrl is not allowed when the execution type is 64-bit");
+   }
+
+   return error_msg;
+}
+
 bool
 brw_validate_instructions(const struct gen_device_info *devinfo,
                           void *assembly, int start_offset, int end_offset,
@@ -1113,6 +1281,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
          CHECK(general_restrictions_on_region_parameters);
          CHECK(region_alignment_rules);
          CHECK(vector_immediate_restrictions);
+         CHECK(special_requirements_for_handling_double_precision_data_types);
       }
 
       if (error_msg.str && annotation) {
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 4e0317ec74d..e8b175beea7 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -138,6 +138,7 @@ validate(struct brw_codegen *p)
 
 #define last_inst    (&p->store[p->nr_insn - 1])
 #define g0           brw_vec8_grf(0, 0)
+#define acc0         brw_acc_reg(8)
 #define null         brw_null_reg()
 #define zero         brw_imm_f(0.0f)
 
@@ -935,3 +936,625 @@ TEST_P(validation_test, vector_immediate_destination_stride)
       clear_instructions(p);
    }
 }
+
+TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned exec_size;
+
+      enum brw_reg_type dst_type;
+      unsigned dst_subreg;
+      unsigned dst_stride;
+
+      enum brw_reg_type src_type;
+      unsigned src_subreg;
+      unsigned src_vstride;
+      unsigned src_width;
+      unsigned src_hstride;
+
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type,    \
+             src_subreg, src_vstride, src_width, src_hstride, expected_result) \
+      {                                                                        \
+         BRW_OPCODE_##opcode,                                                  \
+         BRW_EXECUTE_##exec_size,                                              \
+         BRW_REGISTER_TYPE_##dst_type,                                         \
+         dst_subreg,                                                           \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
+         BRW_REGISTER_TYPE_##src_type,                                         \
+         src_subreg,                                                           \
+         BRW_VERTICAL_STRIDE_##src_vstride,                                    \
+         BRW_WIDTH_##src_width,                                                \
+         BRW_HORIZONTAL_STRIDE_##src_hstride,                                  \
+         expected_result,                                                      \
+      }
+
+      /* Some instruction that violate no restrictions, as a control */
+      INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
+      INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
+      INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
+
+      INST(MOV, 4, DF, 0, 1, F,  0, 8, 4, 2, true ),
+      INST(MOV, 4, Q,  0, 1, D,  0, 8, 4, 2, true ),
+      INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
+
+      INST(MOV, 4, F,  0, 2, DF, 0, 4, 4, 1, true ),
+      INST(MOV, 4, D,  0, 2, Q,  0, 4, 4, 1, true ),
+      INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
+
+      INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
+      INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
+
+      /* Something with subreg nrs */
+      INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
+      INST(MOV, 2, Q,  8, 1, Q,  8, 2, 2, 1, true ),
+      INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
+
+      INST(MUL, 2, D,  4, 2, D,  4, 4, 2, 2, true ),
+      INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    When source or destination datatype is 64b or operation is integer
+       *    DWord multiply, regioning in Align1 must follow these rules:
+       *
+       *    1. Source and Destination horizontal stride must be aligned to the
+       *       same qword.
+       */
+      INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
+      INST(MOV, 4, Q,  0, 2, Q,  0, 4, 4, 1, false),
+      INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
+
+      INST(MOV, 4, DF, 0, 2, F,  0, 8, 4, 2, false),
+      INST(MOV, 4, Q,  0, 2, D,  0, 8, 4, 2, false),
+      INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
+
+      INST(MOV, 4, DF, 0, 2, F,  0, 4, 4, 1, false),
+      INST(MOV, 4, Q,  0, 2, D,  0, 4, 4, 1, false),
+      INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
+
+      INST(MUL, 4, D,  0, 2, D,  0, 4, 4, 1, false),
+      INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
+
+      INST(MUL, 4, D,  0, 1, D,  0, 8, 4, 2, false),
+      INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
+
+      /*    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
+      INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
+      INST(MOV, 4, Q,  0, 1, Q,  0, 0, 2, 1, false),
+      INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
+
+      INST(MOV, 4, DF, 0, 1, F,  0, 0, 2, 2, false),
+      INST(MOV, 4, Q,  0, 1, D,  0, 0, 2, 2, false),
+      INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
+
+      INST(MOV, 8, F,  0, 2, DF, 0, 0, 2, 1, false),
+      INST(MOV, 8, D,  0, 2, Q,  0, 0, 2, 1, false),
+      INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
+
+      INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
+      INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
+
+      INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
+      INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
+
+      /*    3. Source and Destination offset must be the same, except the case
+       *       of scalar source.
+       */
+      INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
+      INST(MOV, 2, Q,  8, 1, Q,  0, 2, 2, 1, false),
+      INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
+
+      INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
+      INST(MOV, 2, Q,  0, 1, Q,  8, 2, 2, 1, false),
+      INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
+
+      INST(MUL, 4, D,  4, 2, D,  0, 4, 2, 2, false),
+      INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
+
+      INST(MUL, 4, D,  0, 2, D,  4, 4, 2, 2, false),
+      INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
+
+      INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
+      INST(MOV, 2, Q,  8, 1, Q,  0, 0, 1, 0, true ),
+      INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
+
+      INST(MOV, 2, DF, 8, 1, F,  4, 0, 1, 0, true ),
+      INST(MOV, 2, Q,  8, 1, D,  4, 0, 1, 0, true ),
+      INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
+
+      INST(MUL, 4, D,  4, 1, D,  0, 0, 1, 0, true ),
+      INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
+
+      INST(MUL, 4, D,  0, 1, D,  4, 0, 1, 0, true ),
+      INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
+
+#undef INST
+   };
+
+   /* These restrictions only apply to Gen8+ */
+   if (devinfo.gen < 8)
+      return;
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (inst[i].opcode == BRW_OPCODE_MOV) {
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type));
+      } else {
+         assert(inst[i].opcode == BRW_OPCODE_MUL);
+         brw_MUL(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type),
+                    retype(zero, inst[i].src_type));
+      }
+      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+      brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
+      brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
+      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
+
+      if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
+         EXPECT_EQ(inst[i].expected_result, validate(p));
+      } else {
+         EXPECT_TRUE(validate(p));
+      }
+
+      clear_instructions(p);
+   }
+}
+
+TEST_P(validation_test, qword_low_power_no_indirect_addressing)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned exec_size;
+
+      enum brw_reg_type dst_type;
+      bool dst_is_indirect;
+      unsigned dst_stride;
+
+      enum brw_reg_type src_type;
+      bool src_is_indirect;
+      unsigned src_vstride;
+      unsigned src_width;
+      unsigned src_hstride;
+
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride,         \
+             src_type, src_is_indirect, src_vstride, src_width, src_hstride,   \
+             expected_result)                                                  \
+      {                                                                        \
+         BRW_OPCODE_##opcode,                                                  \
+         BRW_EXECUTE_##exec_size,                                              \
+         BRW_REGISTER_TYPE_##dst_type,                                         \
+         dst_is_indirect,                                                      \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
+         BRW_REGISTER_TYPE_##src_type,                                         \
+         src_is_indirect,                                                      \
+         BRW_VERTICAL_STRIDE_##src_vstride,                                    \
+         BRW_WIDTH_##src_width,                                                \
+         BRW_HORIZONTAL_STRIDE_##src_hstride,                                  \
+         expected_result,                                                      \
+      }
+
+      /* Some instruction that violate no restrictions, as a control */
+      INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
+      INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
+      INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
+
+      INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
+      INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
+
+      INST(MOV, 4, F,  1, 1, F,  0, 4, 4, 1, true ),
+      INST(MOV, 4, F,  0, 1, F,  1, 4, 4, 1, true ),
+      INST(MOV, 4, F,  1, 1, F,  1, 4, 4, 1, true ),
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    When source or destination datatype is 64b or operation is integer
+       *    DWord multiply, indirect addressing must not be used.
+       */
+      INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
+      INST(MOV, 4, Q,  1, 1, Q,  0, 4, 4, 1, false),
+      INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
+
+      INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
+      INST(MOV, 4, Q,  0, 1, Q,  1, 4, 4, 1, false),
+      INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
+
+      INST(MOV, 4, DF, 1, 1, F,  0, 8, 4, 2, false),
+      INST(MOV, 4, Q,  1, 1, D,  0, 8, 4, 2, false),
+      INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
+
+      INST(MOV, 4, DF, 0, 1, F,  1, 8, 4, 2, false),
+      INST(MOV, 4, Q,  0, 1, D,  1, 8, 4, 2, false),
+      INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
+
+      INST(MOV, 4, F,  1, 2, DF, 0, 4, 4, 1, false),
+      INST(MOV, 4, D,  1, 2, Q,  0, 4, 4, 1, false),
+      INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
+
+      INST(MOV, 4, F,  0, 2, DF, 1, 4, 4, 1, false),
+      INST(MOV, 4, D,  0, 2, Q,  1, 4, 4, 1, false),
+      INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
+
+      INST(MUL, 8, D,  1, 2, D,  0, 8, 4, 2, false),
+      INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
+
+      INST(MUL, 8, D,  0, 2, D,  1, 8, 4, 2, false),
+      INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
+
+#undef INST
+   };
+
+   /* These restrictions only apply to Gen8+ */
+   if (devinfo.gen < 8)
+      return;
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (inst[i].opcode == BRW_OPCODE_MOV) {
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type));
+      } else {
+         assert(inst[i].opcode == BRW_OPCODE_MUL);
+         brw_MUL(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type),
+                    retype(zero, inst[i].src_type));
+      }
+      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+      brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
+      brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
+      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
+
+      if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
+         EXPECT_EQ(inst[i].expected_result, validate(p));
+      } else {
+         EXPECT_TRUE(validate(p));
+      }
+
+      clear_instructions(p);
+   }
+}
+
+TEST_P(validation_test, qword_low_power_no_64bit_arf)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned exec_size;
+
+      struct brw_reg dst;
+      enum brw_reg_type dst_type;
+      unsigned dst_stride;
+
+      struct brw_reg src;
+      enum brw_reg_type src_type;
+      unsigned src_vstride;
+      unsigned src_width;
+      unsigned src_hstride;
+
+      bool acc_wr;
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, exec_size, dst, dst_type, dst_stride,                     \
+             src, src_type, src_vstride, src_width, src_hstride,               \
+             acc_wr, expected_result)                                          \
+      {                                                                        \
+         BRW_OPCODE_##opcode,                                                  \
+         BRW_EXECUTE_##exec_size,                                              \
+         dst,                                                                  \
+         BRW_REGISTER_TYPE_##dst_type,                                         \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
+         src,                                                                  \
+         BRW_REGISTER_TYPE_##src_type,                                         \
+         BRW_VERTICAL_STRIDE_##src_vstride,                                    \
+         BRW_WIDTH_##src_width,                                                \
+         BRW_HORIZONTAL_STRIDE_##src_hstride,                                  \
+         acc_wr,                                                               \
+         expected_result,                                                      \
+      }
+
+      /* Some instruction that violate no restrictions, as a control */
+      INST(MOV, 4, g0,   DF, 1, g0,   F,  4, 2, 2, 0, true ),
+      INST(MOV, 4, g0,   F,  2, g0,   DF, 4, 4, 1, 0, true ),
+
+      INST(MOV, 4, g0,   Q,  1, g0,   D,  4, 2, 2, 0, true ),
+      INST(MOV, 4, g0,   D,  2, g0,   Q,  4, 4, 1, 0, true ),
+
+      INST(MOV, 4, g0,   UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
+      INST(MOV, 4, g0,   UD, 2, g0,   UQ, 4, 4, 1, 0, true ),
+
+      INST(MOV, 4, null, F,  1, g0,   F,  4, 4, 1, 0, true ),
+      INST(MOV, 4, acc0, F,  1, g0,   F,  4, 4, 1, 0, true ),
+      INST(MOV, 4, g0,   F,  1, acc0, F,  4, 4, 1, 0, true ),
+
+      INST(MOV, 4, null, D,  1, g0,   D,  4, 4, 1, 0, true ),
+      INST(MOV, 4, acc0, D,  1, g0,   D,  4, 4, 1, 0, true ),
+      INST(MOV, 4, g0,   D,  1, acc0, D,  4, 4, 1, 0, true ),
+
+      INST(MOV, 4, null, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
+      INST(MOV, 4, acc0, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
+      INST(MOV, 4, g0,   UD, 1, acc0, UD, 4, 4, 1, 0, true ),
+
+      INST(MUL, 4, g0,   D,  2, g0,   D,  4, 2, 2, 0, true ),
+      INST(MUL, 4, g0,   UD, 2, g0,   UD, 4, 2, 2, 0, true ),
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    ARF registers must never be used with 64b datatype or when
+       *    operation is integer DWord multiply.
+       */
+      INST(MOV, 4, acc0, DF, 1, g0,   F,  4, 2, 2, 0, false),
+      INST(MOV, 4, g0,   DF, 1, acc0, F,  4, 2, 2, 0, false),
+
+      INST(MOV, 4, acc0, Q,  1, g0,   D,  4, 2, 2, 0, false),
+      INST(MOV, 4, g0,   Q,  1, acc0, D,  4, 2, 2, 0, false),
+
+      INST(MOV, 4, acc0, UQ, 1, g0,   UD, 4, 2, 2, 0, false),
+      INST(MOV, 4, g0,   UQ, 1, acc0, UD, 4, 2, 2, 0, false),
+
+      INST(MOV, 4, acc0, F,  2, g0,   DF, 4, 4, 1, 0, false),
+      INST(MOV, 4, g0,   F,  2, acc0, DF, 4, 4, 1, 0, false),
+
+      INST(MOV, 4, acc0, D,  2, g0,   Q,  4, 4, 1, 0, false),
+      INST(MOV, 4, g0,   D,  2, acc0, Q,  4, 4, 1, 0, false),
+
+      INST(MOV, 4, acc0, UD, 2, g0,   UQ, 4, 4, 1, 0, false),
+      INST(MOV, 4, g0,   UD, 2, acc0, UQ, 4, 4, 1, 0, false),
+
+      INST(MUL, 4, acc0, D,  2, g0,   D,  4, 2, 2, 0, false),
+      INST(MUL, 4, acc0, UD, 2, g0,   UD, 4, 2, 2, 0, false),
+      /* MUL cannot have integer accumulator sources, so don't test that */
+
+      /* We assume that the restriction does not apply to the null register */
+      INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 0, true ),
+      INST(MOV, 4, null, Q,  1, g0,   D,  4, 2, 2, 0, true ),
+      INST(MOV, 4, null, UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
+
+      /* Check implicit accumulator write control */
+      INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
+      INST(MUL, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
+
+#undef INST
+   };
+
+   /* These restrictions only apply to Gen8+ */
+   if (devinfo.gen < 8)
+      return;
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (inst[i].opcode == BRW_OPCODE_MOV) {
+         brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
+                    retype(inst[i].src, inst[i].src_type));
+      } else {
+         assert(inst[i].opcode == BRW_OPCODE_MUL);
+         brw_MUL(p, retype(inst[i].dst, inst[i].dst_type),
+                    retype(inst[i].src, inst[i].src_type),
+                    retype(zero, inst[i].src_type));
+         brw_inst_set_opcode(&devinfo, last_inst, inst[i].opcode);
+      }
+      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+      brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
+      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
+
+      if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
+         EXPECT_EQ(inst[i].expected_result, validate(p));
+      } else {
+         EXPECT_TRUE(validate(p));
+      }
+
+      clear_instructions(p);
+   }
+
+   /* MAC implicitly reads the accumulator */
+   brw_MAC(p, retype(g0, BRW_REGISTER_TYPE_DF),
+              retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF),
+              retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF));
+   if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
+      EXPECT_FALSE(validate(p));
+   } else {
+      EXPECT_TRUE(validate(p));
+   }
+}
+
+TEST_P(validation_test, align16_64_bit_integer)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned exec_size;
+
+      enum brw_reg_type dst_type;
+      enum brw_reg_type src_type;
+
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, exec_size, dst_type, src_type, expected_result)           \
+      {                                                                        \
+         BRW_OPCODE_##opcode,                                                  \
+         BRW_EXECUTE_##exec_size,                                              \
+         BRW_REGISTER_TYPE_##dst_type,                                         \
+         BRW_REGISTER_TYPE_##src_type,                                         \
+         expected_result,                                                      \
+      }
+
+      /* Some instruction that violate no restrictions, as a control */
+      INST(MOV, 2, Q,  D,  true ),
+      INST(MOV, 2, UQ, UD, true ),
+      INST(MOV, 2, DF, F,  true ),
+
+      INST(ADD, 2, Q,  D,  true ),
+      INST(ADD, 2, UQ, UD, true ),
+      INST(ADD, 2, DF, F,  true ),
+
+      /* The PRMs say that for BDW, SKL:
+       *
+       *    If Align16 is required for an operation with QW destination and non-QW
+       *    source datatypes, the execution size cannot exceed 2.
+       */
+
+      INST(MOV, 4, Q,  D,  false),
+      INST(MOV, 4, UQ, UD, false),
+      INST(MOV, 4, DF, F,  false),
+
+      INST(ADD, 4, Q,  D,  false),
+      INST(ADD, 4, UQ, UD, false),
+      INST(ADD, 4, DF, F,  false),
+
+#undef INST
+   };
+
+   /* 64-bit integer types exist on Gen8+ */
+   if (devinfo.gen < 8)
+      return;
+
+   brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (inst[i].opcode == BRW_OPCODE_MOV) {
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type));
+      } else {
+         assert(inst[i].opcode == BRW_OPCODE_ADD);
+         brw_ADD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type),
+                    retype(g0, inst[i].src_type));
+      }
+      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+      EXPECT_EQ(inst[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
+}
+
+TEST_P(validation_test, qword_low_power_no_depctrl)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned exec_size;
+
+      enum brw_reg_type dst_type;
+      unsigned dst_stride;
+
+      enum brw_reg_type src_type;
+      unsigned src_vstride;
+      unsigned src_width;
+      unsigned src_hstride;
+
+      bool no_dd_check;
+      bool no_dd_clear;
+
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, exec_size, dst_type, dst_stride,                          \
+             src_type, src_vstride, src_width, src_hstride,                    \
+             no_dd_check, no_dd_clear, expected_result)                        \
+      {                                                                        \
+         BRW_OPCODE_##opcode,                                                  \
+         BRW_EXECUTE_##exec_size,                                              \
+         BRW_REGISTER_TYPE_##dst_type,                                         \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
+         BRW_REGISTER_TYPE_##src_type,                                         \
+         BRW_VERTICAL_STRIDE_##src_vstride,                                    \
+         BRW_WIDTH_##src_width,                                                \
+         BRW_HORIZONTAL_STRIDE_##src_hstride,                                  \
+         no_dd_check,                                                          \
+         no_dd_clear,                                                          \
+         expected_result,                                                      \
+      }
+
+      /* Some instruction that violate no restrictions, as a control */
+      INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 0, true ),
+      INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 0, true ),
+      INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
+
+      INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 0, true ),
+      INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 0, true ),
+      INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
+
+      INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 0, true ),
+      INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
+
+      INST(MOV, 4, F,  1, F,  4, 4, 1, 1, 1, true ),
+
+      /* The PRMs say that for CHV, BXT:
+       *
+       *    When source or destination datatype is 64b or operation is integer
+       *    DWord multiply, DepCtrl must not be used.
+       */
+      INST(MOV, 4, DF, 1, F,  8, 4, 2, 1, 0, false),
+      INST(MOV, 4, Q,  1, D,  8, 4, 2, 1, 0, false),
+      INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
+
+      INST(MOV, 4, F,  2, DF, 4, 4, 1, 1, 0, false),
+      INST(MOV, 4, D,  2, Q,  4, 4, 1, 1, 0, false),
+      INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
+
+      INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 1, false),
+      INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 1, false),
+      INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
+
+      INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 1, false),
+      INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 1, false),
+      INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
+
+      INST(MUL, 8, D,  2, D,  8, 4, 2, 1, 0, false),
+      INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
+
+      INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 1, false),
+      INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
+
+#undef INST
+   };
+
+   /* These restrictions only apply to Gen8+ */
+   if (devinfo.gen < 8)
+      return;
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (inst[i].opcode == BRW_OPCODE_MOV) {
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type));
+      } else {
+         assert(inst[i].opcode == BRW_OPCODE_MUL);
+         brw_MUL(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].src_type),
+                    retype(zero, inst[i].src_type));
+      }
+      brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+      brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
+      brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
+
+      brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
+      brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
+
+      if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) {
+         EXPECT_EQ(inst[i].expected_result, validate(p));
+      } else {
+         EXPECT_TRUE(validate(p));
+      }
+
+      clear_instructions(p);
+   }
+}
-- 
2.30.2