From 7376d57a9c6ae69bc47bbbfe5d3b1a0ed0639227 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 1 Feb 2019 11:41:33 +0100
Subject: [PATCH] intel/compiler: validate region restrictions for half-float
 conversions

v2:
 - Consider implicit conversions in 2-src instructions too (Curro)
 - For restrictions that involve destination stride requirements
   only validate them for Align1, since Align16 always requires
   packed data.
 - Skip general rule for the dst/execution type size ratio for
   mixed float instructions on CHV and SKL+, these have their own
   set of rules that we'll be validated separately.

v3 (Curro):
 - Do not check src1 type in single-source instructions.
 - Check restriction on src1.
 - Remove invalid test.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/intel/compiler/brw_eu_validate.c    | 155 +++++++++++++++++++++++-
 src/intel/compiler/test_eu_validate.cpp | 116 ++++++++++++++++++
 2 files changed, 270 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index bd0e48a5e5c..54bffb3af03 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -469,6 +469,66 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride)
    return false;
 }
 
+/**
+ * Returns whether an instruction is an explicit or implicit conversion
+ * to/from half-float.
+ */
+static bool
+is_half_float_conversion(const struct gen_device_info *devinfo,
+                         const brw_inst *inst)
+{
+   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+
+   unsigned num_sources = num_sources_from_inst(devinfo, inst);
+   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+
+   if (dst_type != src0_type &&
+       (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
+      return true;
+   } else if (num_sources > 1) {
+      enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+      return dst_type != src1_type &&
+            (dst_type == BRW_REGISTER_TYPE_HF ||
+             src1_type == BRW_REGISTER_TYPE_HF);
+   }
+
+   return false;
+}
+
+/*
+ * Returns whether an instruction is using mixed float operation mode
+ */
+static bool
+is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   if (devinfo->gen < 8)
+      return false;
+
+   if (inst_is_send(devinfo, inst))
+      return false;
+
+   unsigned opcode = brw_inst_opcode(devinfo, inst);
+   const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
+   if (desc->ndst == 0)
+      return false;
+
+   /* FIXME: support 3-src instructions */
+   unsigned num_sources = num_sources_from_inst(devinfo, inst);
+   assert(num_sources < 3);
+
+   enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+   enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+
+   if (num_sources == 1)
+      return types_are_mixed_float(src0_type, dst_type);
+
+   enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+
+   return types_are_mixed_float(src0_type, src1_type) ||
+          types_are_mixed_float(src0_type, dst_type) ||
+          types_are_mixed_float(src1_type, dst_type);
+}
+
 /**
  * Checks restrictions listed in "General Restrictions Based on Operand Types"
  * in the "Register Region Restrictions" section.
@@ -539,7 +599,100 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
        exec_type_size == 8 && dst_type_size == 4)
       dst_type_size = 8;
 
-   if (exec_type_size > dst_type_size) {
+   if (is_half_float_conversion(devinfo, inst)) {
+      /**
+       * A helper to validate used in the validation of the following restriction
+       * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
+       *
+       *    "There is no direct conversion from HF to DF or DF to HF.
+       *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
+       *
+       * Even if these restrictions are listed for the MOV instruction, we
+       * validate this more generally, since there is the possibility
+       * of implicit conversions from other instructions, such us implicit
+       * conversion from integer to HF with the ADD instruction in SKL+.
+       */
+      enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+      enum brw_reg_type src1_type = num_sources > 1 ?
+                                    brw_inst_src1_type(devinfo, inst) : 0;
+      ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
+               (type_sz(src0_type) == 8 ||
+                (num_sources > 1 && type_sz(src1_type) == 8)),
+               "There are no direct conversions between 64-bit types and HF");
+
+      ERROR_IF(type_sz(dst_type) == 8 &&
+               (src0_type == BRW_REGISTER_TYPE_HF ||
+                (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
+               "There are no direct conversions between 64-bit types and HF");
+
+      /* From the BDW+ PRM:
+       *
+       *   "Conversion between Integer and HF (Half Float) must be
+       *    DWord-aligned and strided by a DWord on the destination."
+       *
+       * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
+       *
+       *   "There is a relaxed alignment rule for word destinations. When
+       *    the destination type is word (UW, W, HF), destination data types
+       *    can be aligned to either the lowest word or the second lowest
+       *    word of the execution channel. This means the destination data
+       *    words can be either all in the even word locations or all in the
+       *    odd word locations."
+       *
+       * We do not implement the second rule as is though, since empirical
+       * testing shows inconsistencies:
+       *   - It suggests that packed 16-bit is not allowed, which is not true.
+       *   - It suggests that conversions from Q/DF to W (which need to be
+       *     64-bit aligned on the destination) are not possible, which is
+       *     not true.
+       *
+       * So from this rule we only validate the implication that conversions
+       * from F to HF need to be DWord strided (except in Align1 mixed
+       * float mode where packed fp16 destination is allowed so long as the
+       * destination is oword-aligned).
+       *
+       * Finally, we only validate this for Align1 because Align16 always
+       * requires packed destinations, so these restrictions can't possibly
+       * apply to Align16 mode.
+       */
+      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+         if ((dst_type == BRW_REGISTER_TYPE_HF &&
+              (brw_reg_type_is_integer(src0_type) ||
+               (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
+             (brw_reg_type_is_integer(dst_type) &&
+              (src0_type == BRW_REGISTER_TYPE_HF ||
+               (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
+            ERROR_IF(dst_stride * dst_type_size != 4,
+                     "Conversions between integer and half-float must be "
+                     "strided by a DWord on the destination");
+
+            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
+            ERROR_IF(subreg % 4 != 0,
+                     "Conversions between integer and half-float must be "
+                     "aligned to a DWord on the destination");
+         } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
+                    dst_type == BRW_REGISTER_TYPE_HF) {
+            unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
+            ERROR_IF(dst_stride != 2 &&
+                     !(is_mixed_float(devinfo, inst) &&
+                       dst_stride == 1 && subreg % 16 == 0),
+                     "Conversions to HF must have either all words in even "
+                     "word locations or all words in odd word locations or "
+                     "be mixed-float with Oword-aligned packed destination");
+         }
+      }
+   }
+
+   /* There are special regioning rules for mixed-float mode in CHV and SKL that
+    * override the general rule for the ratio of sizes of the destination type
+    * and the execution type. We will add validation for those in a later patch.
+    */
+   bool validate_dst_size_and_exec_size_ratio =
+      !is_mixed_float(devinfo, inst) ||
+      !(devinfo->is_cherryview || devinfo->gen >= 9);
+
+   if (validate_dst_size_and_exec_size_ratio &&
+       exec_type_size > dst_type_size) {
       if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
          ERROR_IF(dst_stride * dst_type_size != exec_type_size,
                   "Destination stride must be equal to the ratio of the sizes "
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 73300b23122..3fdbecb003b 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -848,6 +848,122 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
    }
 }
 
+TEST_P(validation_test, half_float_conversion)
+{
+   static const struct {
+      enum brw_reg_type dst_type;
+      enum brw_reg_type src_type;
+      unsigned dst_stride;
+      unsigned dst_subnr;
+      bool expected_result_bdw;
+      bool expected_result_chv_gen9;
+   } inst[] = {
+#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result)  \
+      {                                                                     \
+         BRW_REGISTER_TYPE_##dst_type,                                      \
+         BRW_REGISTER_TYPE_##src_type,                                      \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
+         dst_subnr,                                                         \
+         expected_result,                                                   \
+         expected_result,                                                   \
+      }
+#define INST_S(dst_type, src_type, dst_stride, dst_subnr,                   \
+               expected_result_bdw, expected_result_chv_gen9)               \
+      {                                                                     \
+         BRW_REGISTER_TYPE_##dst_type,                                      \
+         BRW_REGISTER_TYPE_##src_type,                                      \
+         BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
+         dst_subnr,                                                         \
+         expected_result_bdw,                                               \
+         expected_result_chv_gen9,                                          \
+      }
+
+      /* MOV to half-float destination */
+      INST_C(HF,  B, 1, 0, false),
+      INST_C(HF,  W, 1, 0, false),
+      INST_C(HF, HF, 1, 0, true),
+      INST_C(HF, HF, 1, 2, true),
+      INST_C(HF,  D, 1, 0, false),
+      INST_S(HF,  F, 1, 0, false, true),
+      INST_C(HF,  Q, 1, 0, false),
+      INST_C(HF,  B, 2, 0, true),
+      INST_C(HF,  B, 2, 2, false),
+      INST_C(HF,  W, 2, 0, true),
+      INST_C(HF,  W, 2, 2, false),
+      INST_C(HF, HF, 2, 0, true),
+      INST_C(HF, HF, 2, 2, true),
+      INST_C(HF,  D, 2, 0, true),
+      INST_C(HF,  D, 2, 2, false),
+      INST_C(HF,  F, 2, 0, true),
+      INST_S(HF,  F, 2, 2, false, true),
+      INST_C(HF,  Q, 2, 0, false),
+      INST_C(HF, DF, 2, 0, false),
+      INST_C(HF,  B, 4, 0, false),
+      INST_C(HF,  W, 4, 0, false),
+      INST_C(HF, HF, 4, 0, true),
+      INST_C(HF, HF, 4, 2, true),
+      INST_C(HF,  D, 4, 0, false),
+      INST_C(HF,  F, 4, 0, false),
+      INST_C(HF,  Q, 4, 0, false),
+      INST_C(HF, DF, 4, 0, false),
+
+      /* MOV from half-float source */
+      INST_C( B, HF, 1, 0, false),
+      INST_C( W, HF, 1, 0, false),
+      INST_C( D, HF, 1, 0, true),
+      INST_C( D, HF, 1, 4, true),
+      INST_C( F, HF, 1, 0, true),
+      INST_C( F, HF, 1, 4, true),
+      INST_C( Q, HF, 1, 0, false),
+      INST_C(DF, HF, 1, 0, false),
+      INST_C( B, HF, 2, 0, false),
+      INST_C( W, HF, 2, 0, true),
+      INST_C( W, HF, 2, 2, false),
+      INST_C( D, HF, 2, 0, false),
+      INST_C( F, HF, 2, 0, true),
+      INST_C( B, HF, 4, 0, true),
+      INST_C( B, HF, 4, 1, false),
+      INST_C( W, HF, 4, 0, false),
+
+#undef INST_C
+#undef INST_S
+   };
+
+   if (devinfo.gen < 8)
+      return;
+
+   for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+      if (!devinfo.has_64bit_types &&
+          (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
+         continue;
+      }
+
+      brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
+
+      brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+      brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
+
+      if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
+         brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+         brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
+      } else {
+         brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+         brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+      }
+
+      if (devinfo.is_cherryview || devinfo.gen >= 9)
+         EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
+      else
+         EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+      clear_instructions(p);
+   }
+}
+
 TEST_P(validation_test, vector_immediate_destination_alignment)
 {
    static const struct {
-- 
2.30.2