i965: Validate destination restrictions with vector immediates
authorMatt Turner <mattst88@gmail.com>
Fri, 28 Jul 2017 01:29:50 +0000 (18:29 -0700)
committerMatt Turner <mattst88@gmail.com>
Mon, 21 Aug 2017 21:05:23 +0000 (14:05 -0700)
Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_eu_validate.c
src/intel/compiler/test_eu_validate.cpp

index 8a6ec035ccd6c541d6dee2b15379591fcc0a0495..6673e0741a8309d710510844bfe88e2f08b2c262 100644 (file)
@@ -279,19 +279,8 @@ validate_reg(const struct gen_device_info *devinfo,
    const int execsize_for_reg[] = {1, 2, 4, 8, 16, 32};
    int width, hstride, vstride, execsize;
 
-   if (reg.file == BRW_IMMEDIATE_VALUE) {
-      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
-       * mean the destination has to be 128-bit aligned and the
-       * destination horiz stride has to be a word.
-       */
-      if (reg.type == BRW_REGISTER_TYPE_V) {
-         unsigned UNUSED elem_size = brw_element_size(devinfo, inst, dst);
-         assert(hstride_for_reg[brw_inst_dst_hstride(devinfo, inst)] *
-                elem_size == 2);
-      }
-
+   if (reg.file == BRW_IMMEDIATE_VALUE)
       return;
-   }
 
    if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
        reg.file == BRW_ARF_NULL)
index 827cd707c700a9b0ca10289d9225dca5b3deab34..7f0595e6f8e7ab0c73851b143cf477c28e6d4f01 100644 (file)
@@ -1036,6 +1036,66 @@ region_alignment_rules(const struct gen_device_info *devinfo,
    return error_msg;
 }
 
+static struct string
+vector_immediate_restrictions(const struct gen_device_info *devinfo,
+                              const brw_inst *inst)
+{
+   unsigned num_sources = num_sources_from_inst(devinfo, inst);
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   if (num_sources == 3 || num_sources == 0)
+      return (struct string){};
+
+   unsigned file = num_sources == 1 ?
+                   brw_inst_src0_reg_file(devinfo, inst) :
+                   brw_inst_src1_reg_file(devinfo, inst);
+   if (file != BRW_IMMEDIATE_VALUE)
+      return (struct string){};
+
+   unsigned dst_type_size = brw_element_size(devinfo, inst, dst);
+   unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
+                         brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
+   unsigned dst_stride = 1 << (brw_inst_dst_hstride(devinfo, inst) - 1);
+   unsigned type = num_sources == 1 ?
+                   brw_inst_src0_reg_type(devinfo, inst) :
+                   brw_inst_src1_reg_type(devinfo, inst);
+
+   /* The PRMs say:
+    *
+    *    When an immediate vector is used in an instruction, the destination
+    *    must be 128-bit aligned with destination horizontal stride equivalent
+    *    to a word for an immediate integer vector (v) and equivalent to a
+    *    DWord for an immediate float vector (vf).
+    *
+    * The text has not been updated for the addition of the immediate unsigned
+    * integer vector type (uv) on SNB, but presumably the same restriction
+    * applies.
+    */
+   switch (type) {
+   case BRW_HW_REG_IMM_TYPE_V:
+   case BRW_HW_REG_IMM_TYPE_UV:
+   case BRW_HW_REG_IMM_TYPE_VF:
+      ERROR_IF(dst_subreg % (128 / 8) != 0,
+               "Destination must be 128-bit aligned in order to use immediate "
+               "vector types");
+
+      if (type == BRW_HW_REG_IMM_TYPE_VF) {
+         ERROR_IF(dst_type_size * dst_stride != 4,
+                  "Destination must have stride equivalent to dword in order "
+                  "to use the VF type");
+      } else {
+         ERROR_IF(dst_type_size * dst_stride != 2,
+                  "Destination must have stride equivalent to word in order "
+                  "to use the V or UV type");
+      }
+      break;
+   default:
+      break;
+   }
+
+   return error_msg;
+}
+
 bool
 brw_validate_instructions(const struct gen_device_info *devinfo,
                           void *assembly, int start_offset, int end_offset,
@@ -1063,6 +1123,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
          CHECK(general_restrictions_based_on_operand_types);
          CHECK(general_restrictions_on_region_parameters);
          CHECK(region_alignment_rules);
+         CHECK(vector_immediate_restrictions);
       }
 
       if (error_msg.str && annotation) {
index 09f4cc142a53357e858dab5e5c39194ba1179c18..b43c41704b41fb674d5821290cad02859a1bf876 100644 (file)
@@ -132,6 +132,7 @@ validate(struct brw_codegen *p)
 #define last_inst    (&p->store[p->nr_insn - 1])
 #define g0           brw_vec8_grf(0, 0)
 #define null         brw_null_reg()
+#define zero         brw_imm_f(0.0f)
 
 static void
 clear_instructions(struct brw_codegen *p)
@@ -844,5 +845,83 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
    } else {
       EXPECT_FALSE(validate(p));
    }
+}
+
+TEST_P(validation_test, vector_immediate_destination_alignment)
+{
+   static const struct {
+      enum brw_reg_type dst_type;
+      enum brw_reg_type src_type;
+      unsigned subnr;
+      unsigned exec_size;
+      bool expected_result;
+   } move[] = {
+      { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF,  0, BRW_EXECUTE_4, true  },
+      { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, 16, BRW_EXECUTE_4, true  },
+      { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF,  1, BRW_EXECUTE_4, false },
+
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,   0, BRW_EXECUTE_8, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,  16, BRW_EXECUTE_8, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,   1, BRW_EXECUTE_8, false },
 
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV,  0, BRW_EXECUTE_8, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, 16, BRW_EXECUTE_8, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV,  1, BRW_EXECUTE_8, false },
+   };
+
+   for (unsigned i = 0; i < sizeof(move) / sizeof(move[0]); i++) {
+      /* UV type is Gen6+ */
+      if (devinfo.gen < 6 &&
+          move[i].src_type == BRW_REGISTER_TYPE_UV)
+         continue;
+
+      brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
+      brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
+      brw_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
+
+      EXPECT_EQ(move[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
+}
+
+TEST_P(validation_test, vector_immediate_destination_stride)
+{
+   static const struct {
+      enum brw_reg_type dst_type;
+      enum brw_reg_type src_type;
+      unsigned stride;
+      bool expected_result;
+   } move[] = {
+      { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true  },
+      { BRW_REGISTER_TYPE_F, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
+      { BRW_REGISTER_TYPE_D, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true  },
+      { BRW_REGISTER_TYPE_D, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true  },
+      { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true  },
+
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,  BRW_HORIZONTAL_STRIDE_1, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,  BRW_HORIZONTAL_STRIDE_2, false },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_V,  BRW_HORIZONTAL_STRIDE_4, false },
+      { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_V,  BRW_HORIZONTAL_STRIDE_2, true  },
+
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true  },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false },
+      { BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false },
+      { BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true  },
+   };
+
+   for (unsigned i = 0; i < sizeof(move) / sizeof(move[0]); i++) {
+      /* UV type is Gen6+ */
+      if (devinfo.gen < 6 &&
+          move[i].src_type == BRW_REGISTER_TYPE_UV)
+         continue;
+
+      brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
+      brw_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
+
+      EXPECT_EQ(move[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
 }