intel/compiler: don't use byte operands for src1 on ICL
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Wed, 19 Jun 2019 12:09:35 +0000 (05:09 -0700)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Sat, 29 Jun 2019 12:56:09 +0000 (12:56 +0000)
The simulator complains about using byte operands, we also have
documentation telling us.

Note that add operations on bytes seems to work fine on HW (like ADD).
Using dwords operands with CMP & SEL fixes the following tests :

   dEQP-VK.spirv_assembly.type.vec*.i8.*

v2: Drop the GLK changes (Matt)
    Add validator tests (Matt)

v3: Drop GLK ref (Matt)
    Don't mix float/integer in MAD (Matt)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com> (v1)
Reviewed-by: Matt Turner <mattst88@gmail.com>
BSpec: 3017
Cc: <mesa-stable@lists.freedesktop.org>
src/intel/compiler/brw_eu_validate.c
src/intel/compiler/brw_fs_builder.h
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/test_eu_validate.cpp

index 943f724e60f0f23e0798ed7c558d13a12eb12dad..203280570aaf29329f7f3ab386455f174316e232 100644 (file)
@@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo,
    return error_msg;
 }
 
+static struct string
+alignment_supported(const struct gen_device_info *devinfo,
+                    const brw_inst *inst)
+{
+   struct string error_msg = { .str = NULL, .len = 0 };
+
+   ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
+            "Align16 not supported");
+
+   return error_msg;
+}
+
 static bool
 inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst)
 {
@@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
    struct string error_msg = { .str = NULL, .len = 0 };
 
+   if (devinfo->gen >= 11) {
+      if (num_sources == 3) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
+                  brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1/2 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+      if (num_sources == 2) {
+         ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1,
+                  "Byte data type is not supported for src1 register regioning. This includes "
+                  "byte broadcast as well.");
+      }
+   }
+
    if (num_sources == 3)
-      return (struct string){};
+      return error_msg;
 
    if (inst_is_send(devinfo, inst))
-      return (struct string){};
+      return error_msg;
 
    if (exec_size == 1)
-      return (struct string){};
+      return error_msg;
 
    if (desc->ndst == 0)
-      return (struct string){};
+      return error_msg;
 
    /* The PRMs say:
     *
@@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
 
    if (dst_type_is_byte) {
       if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
-         if (!inst_is_raw_move(devinfo, inst)) {
+         if (!inst_is_raw_move(devinfo, inst))
             ERROR("Only raw MOV supports a packed-byte destination");
-            return error_msg;
-         } else {
-            return (struct string){};
-         }
+         return error_msg;
       }
    }
 
@@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo,
       } else {
          CHECK(sources_not_null);
          CHECK(send_restrictions);
+         CHECK(alignment_supported);
          CHECK(general_restrictions_based_on_operand_types);
          CHECK(general_restrictions_on_region_parameters);
          CHECK(special_restrictions_for_mixed_float_mode);
index 9655c2ef55477625e3710d4526f98882f7b8adc7..b781f1257ba4f050abed024617e3934b35078d22 100644 (file)
@@ -322,10 +322,11 @@ namespace brw {
          case SHADER_OPCODE_INT_REMAINDER:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_math_operand(src0),
-                                    fix_math_operand(src1)));
+                                    fix_math_operand(fix_byte_src(src1))));
 
          default:
-            return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
+            return emit(instruction(opcode, dispatch_width(), dst,
+                                    src0, fix_byte_src(src1)));
 
          }
       }
@@ -344,12 +345,12 @@ namespace brw {
          case BRW_OPCODE_LRP:
             return emit(instruction(opcode, dispatch_width(), dst,
                                     fix_3src_operand(src0),
-                                    fix_3src_operand(src1),
-                                    fix_3src_operand(src2)));
+                                    fix_3src_operand(fix_byte_src(src1)),
+                                    fix_3src_operand(fix_byte_src(src2))));
 
          default:
             return emit(instruction(opcode, dispatch_width(), dst,
-                                    src0, src1, src2));
+                                    src0, fix_byte_src(src1), fix_byte_src(src2)));
          }
       }
 
@@ -399,8 +400,11 @@ namespace brw {
       {
          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 
-         return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                     fix_unsigned_negate(src1)));
+         /* In some cases we can't have bytes as operand for src1, so use the
+          * same type for both operand.
+          */
+         return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)),
+                                     fix_unsigned_negate(fix_byte_src(src1))));
       }
 
       /**
@@ -657,8 +661,8 @@ namespace brw {
                             emit(BRW_OPCODE_CSEL,
                                  retype(dst, BRW_REGISTER_TYPE_F),
                                  retype(src0, BRW_REGISTER_TYPE_F),
-                                 retype(src1, BRW_REGISTER_TYPE_F),
-                                 src2));
+                                 retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F),
+                                 fix_byte_src(src2)));
       }
 
       /**
@@ -719,6 +723,22 @@ namespace brw {
 
       backend_shader *shader;
 
+      /**
+       * Byte sized operands are not supported for src1 on Gen11+.
+       */
+      src_reg
+      fix_byte_src(const src_reg &src) const
+      {
+         if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) ||
+             type_sz(src.type) != 1)
+            return src;
+
+         dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
+                             BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D);
+         MOV(temp, src);
+         return src_reg(temp);
+      }
+
    private:
       /**
        * Workaround for negation of UD registers.  See comment in
index e0818caae13b1dda525a64acb87dabf58863e53d..b9d42b6e26e5184054b67b2ade3f1ba4d25a3800 100644 (file)
@@ -1340,9 +1340,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
    case nir_op_ine32: {
       fs_reg dest = result;
 
+      /* On Gen11 we have an additional issue being that src1 cannot be a byte
+       * type. So we convert both operands for the comparison.
+       */
+      fs_reg temp_op[2];
+      temp_op[0] = bld.fix_byte_src(op[0]);
+      temp_op[1] = bld.fix_byte_src(op[1]);
+
       const uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
       if (bit_size != 32)
-         dest = bld.vgrf(op[0].type, 1);
+         dest = bld.vgrf(temp_op[0].type, 1);
 
       brw_conditional_mod cond;
       switch (instr->op) {
@@ -1363,7 +1370,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
       default:
          unreachable("bad opcode");
       }
-      bld.CMP(dest, op[0], op[1], cond);
+      bld.CMP(dest, temp_op[0], temp_op[1], cond);
 
       if (bit_size > 32) {
          bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0));
index 65326416064a313e52547bf2ba3573b74853713c..efdae4fd79bb8eb5bd70894f0710ba3639f5ca14 100644 (file)
@@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
       clear_instructions(p);
    }
 }
+
+TEST_P(validation_test, gen11_no_byte_src_1_2)
+{
+   static const struct {
+      enum opcode opcode;
+      unsigned access_mode;
+
+      enum brw_reg_type dst_type;
+      struct {
+         enum brw_reg_type type;
+         unsigned vstride;
+         unsigned width;
+         unsigned hstride;
+      } srcs[3];
+
+      int  gen;
+      bool expected_result;
+   } inst[] = {
+#define INST(opcode, access_mode, dst_type,                             \
+             src0_type, src0_vstride, src0_width, src0_hstride,         \
+             src1_type, src1_vstride, src1_width, src1_hstride,         \
+             src2_type,                                                 \
+             gen, expected_result)                                      \
+      {                                                                 \
+         BRW_OPCODE_##opcode,                                           \
+         BRW_ALIGN_##access_mode,                                       \
+         BRW_REGISTER_TYPE_##dst_type,                                  \
+         {                                                              \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src0_type,                           \
+               BRW_VERTICAL_STRIDE_##src0_vstride,                      \
+               BRW_WIDTH_##src0_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src0_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src1_type,                           \
+               BRW_VERTICAL_STRIDE_##src1_vstride,                      \
+               BRW_WIDTH_##src1_width,                                  \
+               BRW_HORIZONTAL_STRIDE_##src1_hstride,                    \
+            },                                                          \
+            {                                                           \
+               BRW_REGISTER_TYPE_##src2_type,                           \
+            },                                                          \
+         },                                                             \
+         gen,                                                           \
+         expected_result,                                               \
+      }
+
+      /* Passes on < 11 */
+      INST(MOV, 16,  F, B, 2, 4, 0, UD, 0, 4, 0,  D,  8, true ),
+      INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0,  D,  7, true ),
+      INST(MAD, 16,  D, B, 0, 4, 0, UB, 0, 1, 0,  B, 10, true ),
+
+      /* Fails on 11+ */
+      INST(MAD,  1, UB, W, 1, 1, 0,  D, 0, 4, 0,  B, 11, false ),
+      INST(MAD,  1, UB, W, 1, 1, 1, UB, 1, 1, 0,  W, 11, false ),
+      INST(ADD,  1,  W, W, 1, 4, 1,  B, 1, 1, 0,  D, 11, false ),
+
+      /* Passes on 11+ */
+      INST(MOV,  1,  W, B, 8, 8, 1,  D, 8, 8, 1,  D, 11, true ),
+      INST(ADD,  1, UD, B, 8, 8, 1,  W, 8, 8, 1,  D, 11, true ),
+      INST(MAD,  1,  B, B, 0, 1, 0,  D, 0, 4, 0,  W, 11, true ),
+
+#undef INST
+   };
+
+
+   for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+      /* Skip instruction not meant for this gen. */
+      if (devinfo.gen != inst[i].gen)
+         continue;
+
+      brw_push_insn_state(p);
+
+      brw_set_default_exec_size(p, BRW_EXECUTE_8);
+      brw_set_default_access_mode(p, inst[i].access_mode);
+
+      switch (inst[i].opcode) {
+      case BRW_OPCODE_MOV:
+         brw_MOV(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      case BRW_OPCODE_ADD:
+         brw_ADD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type));
+         brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+         brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
+         brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+         brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
+         break;
+      case BRW_OPCODE_MAD:
+         brw_MAD(p, retype(g0, inst[i].dst_type),
+                    retype(g0, inst[i].srcs[0].type),
+                    retype(g0, inst[i].srcs[1].type),
+                    retype(g0, inst[i].srcs[2].type));
+         brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+         brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+         break;
+      default:
+         unreachable("invalid opcode");
+      }
+
+      brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+      brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+      brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+
+      brw_pop_insn_state(p);
+
+      EXPECT_EQ(inst[i].expected_result, validate(p));
+
+      clear_instructions(p);
+   }
+}