#include <gtest/gtest.h>
#include "brw_eu.h"
+#include "brw_eu_defines.h"
+#include "util/bitset.h"
#include "util/ralloc.h"
-enum subgen {
- IS_G45 = 1,
- IS_BYT,
- IS_HSW,
- IS_CHV,
- IS_BXT,
- IS_KBL,
- IS_GLK,
- IS_CFL,
-};
-
static const struct gen_info {
const char *name;
- int gen;
- enum subgen subgen;
} gens[] = {
- { "brw", 4 },
- { "g45", 4, IS_G45 },
- { "ilk", 5 },
- { "snb", 6 },
- { "ivb", 7 },
- { "byt", 7, IS_BYT },
- { "hsw", 7, IS_HSW },
- { "bdw", 8 },
- { "chv", 8, IS_CHV },
- { "skl", 9 },
- { "bxt", 9, IS_BXT },
- { "kbl", 9, IS_KBL },
- { "glk", 9, IS_GLK },
- { "cfl", 9, IS_CFL },
- { "cnl", 10 },
+ { "brw", },
+ { "g4x", },
+ { "ilk", },
+ { "snb", },
+ { "ivb", },
+ { "byt", },
+ { "hsw", },
+ { "bdw", },
+ { "chv", },
+ { "skl", },
+ { "bxt", },
+ { "kbl", },
+ { "aml", },
+ { "glk", },
+ { "cfl", },
+ { "whl", },
+ { "cnl", },
+ { "icl", },
+ { "tgl", },
};
class validation_test: public ::testing::TestWithParam<struct gen_info> {
void validation_test::SetUp()
{
struct gen_info info = GetParam();
+ int devid = gen_device_name_to_pci_device_id(info.name);
- devinfo.gen = info.gen;
- devinfo.is_g4x = info.subgen == IS_G45;
- devinfo.is_baytrail = info.subgen == IS_BYT;
- devinfo.is_haswell = info.subgen == IS_HSW;
- devinfo.is_cherryview = info.subgen == IS_CHV;
- devinfo.is_broxton = info.subgen == IS_BXT;
- devinfo.is_kabylake = info.subgen == IS_KBL;
- devinfo.is_geminilake = info.subgen == IS_GLK;
- devinfo.is_coffeelake = info.subgen == IS_CFL;
+ gen_get_device_info_from_pci_id(devid, &devinfo);
brw_init_codegen(&devinfo, p, p);
}
validate(struct brw_codegen *p)
{
const bool print = getenv("TEST_DEBUG");
- struct disasm_info disasm = disasm_initialize(p->devinfo, NULL);
+ struct disasm_info *disasm = disasm_initialize(p->devinfo, NULL);
if (print) {
- disasm_new_inst_group(&disasm, 0);
- disasm_new_inst_group(&disasm, p->next_insn_offset);
+ disasm_new_inst_group(disasm, 0);
+ disasm_new_inst_group(disasm, p->next_insn_offset);
}
bool ret = brw_validate_instructions(p->devinfo, p->store, 0,
- p->next_insn_offset, &disasm);
+ p->next_insn_offset, disasm);
if (print) {
- dump_assembly(p->store, &disasm);
+ dump_assembly(p->store, disasm);
}
- ralloc_free(disasm.mem_ctx);
+ ralloc_free(disasm);
return ret;
}
* reserved on Gen 7
* "goto" on Gen8+
*/
- brw_next_insn(p, 46);
+ brw_next_insn(p, brw_opcode_decode(&devinfo, 46));
if (devinfo.gen == 7) {
EXPECT_FALSE(validate(p));
}
}
+TEST_P(validation_test, invalid_exec_size_encoding)
+{
+ const struct {
+ enum brw_execution_size exec_size;
+ bool expected_result;
+ } test_case[] = {
+ { BRW_EXECUTE_1, true },
+ { BRW_EXECUTE_2, true },
+ { BRW_EXECUTE_4, true },
+ { BRW_EXECUTE_8, true },
+ { BRW_EXECUTE_16, true },
+ { BRW_EXECUTE_32, true },
+
+ { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false },
+ { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false },
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
+ brw_MOV(p, g0, g0);
+
+ brw_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
+ brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
+ brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_GENERAL_REGISTER_FILE, BRW_REGISTER_TYPE_W);
+
+ if (test_case[i].exec_size == BRW_EXECUTE_1) {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
+ } else {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+ }
+
+ EXPECT_EQ(test_case[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, invalid_file_encoding)
+{
+ /* Register file on Gen12 is only one bit */
+ if (devinfo.gen >= 12)
+ return;
+
+ brw_MOV(p, g0, g0);
+ brw_inst_set_dst_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
+
+ if (devinfo.gen > 6) {
+ EXPECT_FALSE(validate(p));
+ } else {
+ EXPECT_TRUE(validate(p));
+ }
+
+ clear_instructions(p);
+
+ if (devinfo.gen < 6) {
+ gen4_math(p, g0, BRW_MATH_FUNCTION_SIN, 0, g0, BRW_MATH_PRECISION_FULL);
+ } else {
+ gen6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
+ }
+ brw_inst_set_src0_file_type(&devinfo, last_inst, BRW_MESSAGE_REGISTER_FILE, BRW_REGISTER_TYPE_F);
+
+ if (devinfo.gen > 6) {
+ EXPECT_FALSE(validate(p));
+ } else {
+ EXPECT_TRUE(validate(p));
+ }
+}
+
+TEST_P(validation_test, invalid_type_encoding)
+{
+ enum brw_reg_file files[2] = {
+ BRW_GENERAL_REGISTER_FILE,
+ BRW_IMMEDIATE_VALUE,
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
+ const enum brw_reg_file file = files[i];
+ const int num_bits = devinfo.gen >= 8 ? 4 : 3;
+ const int num_encodings = 1 << num_bits;
+
+ /* The data types are encoded into <num_bits> bits to be used in hardware
+ * instructions, so keep a record in a bitset the invalid patterns so
+ * they can be verified to be invalid when used.
+ */
+ BITSET_DECLARE(invalid_encodings, num_encodings);
+
+ const struct {
+ enum brw_reg_type type;
+ bool expected_result;
+ } test_case[] = {
+ { BRW_REGISTER_TYPE_NF, devinfo.gen == 11 && file != IMM },
+ { BRW_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.gen >= 8 || file != IMM) },
+ { BRW_REGISTER_TYPE_F, true },
+ { BRW_REGISTER_TYPE_HF, devinfo.gen >= 8 },
+ { BRW_REGISTER_TYPE_VF, file == IMM },
+ { BRW_REGISTER_TYPE_Q, devinfo.has_64bit_int },
+ { BRW_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
+ { BRW_REGISTER_TYPE_D, true },
+ { BRW_REGISTER_TYPE_UD, true },
+ { BRW_REGISTER_TYPE_W, true },
+ { BRW_REGISTER_TYPE_UW, true },
+ { BRW_REGISTER_TYPE_B, file == FIXED_GRF },
+ { BRW_REGISTER_TYPE_UB, file == FIXED_GRF },
+ { BRW_REGISTER_TYPE_V, file == IMM },
+ { BRW_REGISTER_TYPE_UV, devinfo.gen >= 6 && file == IMM },
+ };
+
+ /* Initially assume all hardware encodings are invalid */
+ BITSET_ONES(invalid_encodings);
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_4);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
+ if (test_case[i].expected_result) {
+ unsigned hw_type = brw_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
+ if (hw_type != INVALID_REG_TYPE) {
+ /* ... and remove valid encodings from the set */
+ assert(BITSET_TEST(invalid_encodings, hw_type));
+ BITSET_CLEAR(invalid_encodings, hw_type);
+ }
+
+ if (file == FIXED_GRF) {
+ struct brw_reg g = retype(g0, test_case[i].type);
+ brw_MOV(p, g, g);
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+ } else {
+ enum brw_reg_type t;
+
+ switch (test_case[i].type) {
+ case BRW_REGISTER_TYPE_V:
+ t = BRW_REGISTER_TYPE_W;
+ break;
+ case BRW_REGISTER_TYPE_UV:
+ t = BRW_REGISTER_TYPE_UW;
+ break;
+ case BRW_REGISTER_TYPE_VF:
+ t = BRW_REGISTER_TYPE_F;
+ break;
+ default:
+ t = test_case[i].type;
+ break;
+ }
+
+ struct brw_reg g = retype(g0, t);
+ brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type));
+ }
+
+ EXPECT_TRUE(validate(p));
+
+ clear_instructions(p);
+ }
+ }
+
+ /* The remaining encodings in invalid_encodings do not have a mapping
+ * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
+ * encodings are rejected by the validator.
+ */
+ int e;
+ BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
+ if (file == FIXED_GRF) {
+ brw_MOV(p, g0, g0);
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+ } else {
+ brw_MOV(p, g0, brw_imm_w(0));
+ }
+ brw_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
+ brw_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
+
+ EXPECT_FALSE(validate(p));
+
+ clear_instructions(p);
+ }
+ }
+}
+
+TEST_P(validation_test, invalid_type_encoding_3src_a16)
+{
+ /* 3-src instructions in align16 mode only supported on Gen6-10 */
+ if (devinfo.gen < 6 || devinfo.gen > 10)
+ return;
+
+ const int num_bits = devinfo.gen >= 8 ? 3 : 2;
+ const int num_encodings = 1 << num_bits;
+
+ /* The data types are encoded into <num_bits> bits to be used in hardware
+ * instructions, so keep a record in a bitset the invalid patterns so
+ * they can be verified to be invalid when used.
+ */
+ BITSET_DECLARE(invalid_encodings, num_encodings);
+
+ const struct {
+ enum brw_reg_type type;
+ bool expected_result;
+ } test_case[] = {
+ { BRW_REGISTER_TYPE_DF, devinfo.gen >= 7 },
+ { BRW_REGISTER_TYPE_F, true },
+ { BRW_REGISTER_TYPE_HF, devinfo.gen >= 8 },
+ { BRW_REGISTER_TYPE_D, devinfo.gen >= 7 },
+ { BRW_REGISTER_TYPE_UD, devinfo.gen >= 7 },
+ };
+
+ /* Initially assume all hardware encodings are invalid */
+ BITSET_ONES(invalid_encodings);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_exec_size(p, BRW_EXECUTE_4);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
+ if (test_case[i].expected_result) {
+ unsigned hw_type = brw_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
+ if (hw_type != INVALID_HW_REG_TYPE) {
+ /* ... and remove valid encodings from the set */
+ assert(BITSET_TEST(invalid_encodings, hw_type));
+ BITSET_CLEAR(invalid_encodings, hw_type);
+ }
+
+ struct brw_reg g = retype(g0, test_case[i].type);
+ if (!brw_reg_type_is_integer(test_case[i].type)) {
+ brw_MAD(p, g, g, g, g);
+ } else {
+ brw_BFE(p, g, g, g, g);
+ }
+
+ EXPECT_TRUE(validate(p));
+
+ clear_instructions(p);
+ }
+ }
+
+ /* The remaining encodings in invalid_encodings do not have a mapping
+ * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
+ * encodings are rejected by the validator.
+ */
+ int e;
+ BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
+ for (unsigned i = 0; i < 2; i++) {
+ if (i == 0) {
+ brw_MAD(p, g0, g0, g0, g0);
+ } else {
+ brw_BFE(p, g0, g0, g0, g0);
+ }
+
+ brw_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
+ brw_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
+
+ EXPECT_FALSE(validate(p));
+
+ clear_instructions(p);
+
+ if (devinfo.gen == 6)
+ break;
+ }
+ }
+}
+
+TEST_P(validation_test, invalid_type_encoding_3src_a1)
+{
+ /* 3-src instructions in align1 mode only supported on Gen10+ */
+ if (devinfo.gen < 10)
+ return;
+
+ const int num_bits = 3 + 1 /* for exec_type */;
+ const int num_encodings = 1 << num_bits;
+
+ /* The data types are encoded into <num_bits> bits to be used in hardware
+ * instructions, so keep a record in a bitset the invalid patterns so
+ * they can be verified to be invalid when used.
+ */
+ BITSET_DECLARE(invalid_encodings, num_encodings);
+
+ const struct {
+ enum brw_reg_type type;
+ unsigned exec_type;
+ bool expected_result;
+ } test_case[] = {
+#define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x)
+ { BRW_REGISTER_TYPE_NF, E(FLOAT), devinfo.gen == 11 },
+ { BRW_REGISTER_TYPE_DF, E(FLOAT), devinfo.has_64bit_float },
+ { BRW_REGISTER_TYPE_F, E(FLOAT), true },
+ { BRW_REGISTER_TYPE_HF, E(FLOAT), true },
+ { BRW_REGISTER_TYPE_D, E(INT), true },
+ { BRW_REGISTER_TYPE_UD, E(INT), true },
+ { BRW_REGISTER_TYPE_W, E(INT), true },
+ { BRW_REGISTER_TYPE_UW, E(INT), true },
+
+ /* There are no ternary instructions that can operate on B-type sources
+ * on Gen11-12. Src1/Src2 cannot be B-typed either.
+ */
+ { BRW_REGISTER_TYPE_B, E(INT), devinfo.gen == 10 },
+ { BRW_REGISTER_TYPE_UB, E(INT), devinfo.gen == 10 },
+ };
+
+ /* Initially assume all hardware encodings are invalid */
+ BITSET_ONES(invalid_encodings);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_exec_size(p, BRW_EXECUTE_4);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
+ if (test_case[i].expected_result) {
+ unsigned hw_type = brw_reg_type_to_a1_hw_3src_type(&devinfo, test_case[i].type);
+ unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3);
+ if (hw_type != INVALID_HW_REG_TYPE) {
+ /* ... and remove valid encodings from the set */
+ assert(BITSET_TEST(invalid_encodings, hw_exec_type));
+ BITSET_CLEAR(invalid_encodings, hw_exec_type);
+ }
+
+ struct brw_reg g = retype(g0, test_case[i].type);
+ if (!brw_reg_type_is_integer(test_case[i].type)) {
+ brw_MAD(p, g, g, g, g);
+ } else {
+ brw_BFE(p, g, g, g, g);
+ }
+
+ EXPECT_TRUE(validate(p));
+
+ clear_instructions(p);
+ }
+ }
+
+ /* The remaining encodings in invalid_encodings do not have a mapping
+ * from BRW_REGISTER_TYPE_* and must be invalid. Verify that invalid
+ * encodings are rejected by the validator.
+ */
+ int e;
+ BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
+ const unsigned hw_type = e & 0x7;
+ const unsigned exec_type = e >> 3;
+
+ for (unsigned i = 0; i < 2; i++) {
+ if (i == 0) {
+ brw_MAD(p, g0, g0, g0, g0);
+ brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
+ } else {
+ brw_CSEL(p, g0, g0, g0, g0);
+ brw_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ);
+ brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
+ }
+
+ brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type);
+ brw_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type);
+ brw_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type);
+ brw_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type);
+ brw_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type);
+
+ EXPECT_FALSE(validate(p));
+
+ clear_instructions(p);
+ }
+ }
+}
+
+TEST_P(validation_test, 3src_inst_access_mode)
+{
+ /* 3-src instructions only supported on Gen6+ */
+ if (devinfo.gen < 6)
+ return;
+
+ /* No access mode bit on Gen12+ */
+ if (devinfo.gen >= 12)
+ return;
+
+ const struct {
+ unsigned mode;
+ bool expected_result;
+ } test_case[] = {
+ { BRW_ALIGN_1, devinfo.gen >= 10 },
+ { BRW_ALIGN_16, devinfo.gen <= 10 },
+ };
+
+ for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
+ if (devinfo.gen < 10)
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+ brw_MAD(p, g0, g0, g0, g0);
+ brw_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
+
+ EXPECT_EQ(test_case[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
/* When the Execution Data Type is wider than the destination data type, the
* destination must [...] specify a HorzStride equal to the ratio in sizes of
* the two data types.
clear_instructions(p);
+ /* Align16 does not exist on Gen11+ */
+ if (devinfo.gen >= 11)
+ return;
+
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, g0, g0, g0);
/* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
{
+ /* Align16 does not exist on Gen11+ */
+ if (devinfo.gen >= 11)
+ return;
+
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, g0, g0, g0);
/* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
{
+ /* Align16 does not exist on Gen11+ */
+ if (devinfo.gen >= 11)
+ return;
+
const struct {
enum brw_vertical_stride vstride;
bool expected_result;
brw_set_default_access_mode(p, BRW_ALIGN_16);
- for (unsigned i = 0; i < sizeof(vstride) / sizeof(vstride[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
brw_ADD(p, g0, g0, g0);
brw_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
clear_instructions(p);
}
- for (unsigned i = 0; i < sizeof(vstride) / sizeof(vstride[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
brw_ADD(p, g0, g0, g0);
brw_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
{ BRW_REGISTER_TYPE_B , BRW_REGISTER_TYPE_D , 0, 0, 0, false },
};
- for (unsigned i = 0; i < sizeof(move) / sizeof(move[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
brw_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
brw_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
}
}
+TEST_P(validation_test, byte_64bit_conversion)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src_type;
+ unsigned dst_stride;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src_type, dst_stride, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ expected_result, \
+ }
+
+ INST(B, Q, 1, false),
+ INST(B, UQ, 1, false),
+ INST(B, DF, 1, false),
+ INST(UB, Q, 1, false),
+ INST(UB, UQ, 1, false),
+ INST(UB, DF, 1, false),
+
+ INST(B, Q, 2, false),
+ INST(B, UQ, 2, false),
+ INST(B , DF, 2, false),
+ INST(UB, Q, 2, false),
+ INST(UB, UQ, 2, false),
+ INST(UB, DF, 2, false),
+
+ INST(B, Q, 4, false),
+ INST(B, UQ, 4, false),
+ INST(B, DF, 4, false),
+ INST(UB, Q, 4, false),
+ INST(UB, UQ, 4, false),
+ INST(UB, DF, 4, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ inst[i].src_type == BRW_REGISTER_TYPE_DF)
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
+ brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, half_float_conversion)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src_type;
+ unsigned dst_stride;
+ unsigned dst_subnr;
+ bool expected_result_bdw;
+ bool expected_result_chv_gen9;
+ } inst[] = {
+#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_subnr, \
+ expected_result, \
+ expected_result, \
+ }
+#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
+ expected_result_bdw, expected_result_chv_gen9) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_subnr, \
+ expected_result_bdw, \
+ expected_result_chv_gen9, \
+ }
+
+ /* MOV to half-float destination */
+ INST_C(HF, B, 1, 0, false),
+ INST_C(HF, W, 1, 0, false),
+ INST_C(HF, HF, 1, 0, true),
+ INST_C(HF, HF, 1, 2, true),
+ INST_C(HF, D, 1, 0, false),
+ INST_S(HF, F, 1, 0, false, true),
+ INST_C(HF, Q, 1, 0, false),
+ INST_C(HF, B, 2, 0, true),
+ INST_C(HF, B, 2, 2, false),
+ INST_C(HF, W, 2, 0, true),
+ INST_C(HF, W, 2, 2, false),
+ INST_C(HF, HF, 2, 0, true),
+ INST_C(HF, HF, 2, 2, true),
+ INST_C(HF, D, 2, 0, true),
+ INST_C(HF, D, 2, 2, false),
+ INST_C(HF, F, 2, 0, true),
+ INST_S(HF, F, 2, 2, false, true),
+ INST_C(HF, Q, 2, 0, false),
+ INST_C(HF, DF, 2, 0, false),
+ INST_C(HF, B, 4, 0, false),
+ INST_C(HF, W, 4, 0, false),
+ INST_C(HF, HF, 4, 0, true),
+ INST_C(HF, HF, 4, 2, true),
+ INST_C(HF, D, 4, 0, false),
+ INST_C(HF, F, 4, 0, false),
+ INST_C(HF, Q, 4, 0, false),
+ INST_C(HF, DF, 4, 0, false),
+
+ /* MOV from half-float source */
+ INST_C( B, HF, 1, 0, false),
+ INST_C( W, HF, 1, 0, false),
+ INST_C( D, HF, 1, 0, true),
+ INST_C( D, HF, 1, 4, true),
+ INST_C( F, HF, 1, 0, true),
+ INST_C( F, HF, 1, 4, true),
+ INST_C( Q, HF, 1, 0, false),
+ INST_C(DF, HF, 1, 0, false),
+ INST_C( B, HF, 2, 0, false),
+ INST_C( W, HF, 2, 0, true),
+ INST_C( W, HF, 2, 2, false),
+ INST_C( D, HF, 2, 0, false),
+ INST_C( F, HF, 2, 0, true),
+ INST_C( B, HF, 4, 0, true),
+ INST_C( B, HF, 4, 1, false),
+ INST_C( W, HF, 4, 0, false),
+
+#undef INST_C
+#undef INST_S
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+ inst[i].src_type == BRW_REGISTER_TYPE_DF))
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+ inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
+ brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
+
+ brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+ brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
+
+ if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
+ } else {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+ }
+
+ if (devinfo.is_cherryview || devinfo.gen >= 9)
+ EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
+ else
+ EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_source_indirect_addressing)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ bool dst_indirect;
+ bool src0_indirect;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, \
+ dst_stride, dst_indirect, src0_indirect, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_indirect, \
+ src0_indirect, \
+ expected_result, \
+ }
+
+ /* Source and dest are mixed float: indirect src addressing not allowed */
+ INST(HF, F, F, 2, false, false, true),
+ INST(HF, F, F, 2, true, false, true),
+ INST(HF, F, F, 2, false, true, false),
+ INST(HF, F, F, 2, true, true, false),
+ INST( F, HF, F, 1, false, false, true),
+ INST( F, HF, F, 1, true, false, true),
+ INST( F, HF, F, 1, false, true, false),
+ INST( F, HF, F, 1, true, true, false),
+
+ INST(HF, HF, F, 2, false, false, true),
+ INST(HF, HF, F, 2, true, false, true),
+ INST(HF, HF, F, 2, false, true, false),
+ INST(HF, HF, F, 2, true, true, false),
+ INST( F, F, HF, 1, false, false, true),
+ INST( F, F, HF, 1, true, false, true),
+ INST( F, F, HF, 1, false, true, false),
+ INST( F, F, HF, 1, true, true, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+ brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align1_simd16)
+{
+ static const struct {
+ unsigned exec_size;
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ bool expected_result;
+ } inst[] = {
+#define INST(exec_size, dst_type, src0_type, src1_type, \
+ dst_stride, expected_result) \
+ { \
+ BRW_EXECUTE_##exec_size, \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ expected_result, \
+ }
+
+ /* No SIMD16 in mixed mode when destination is packed f16 */
+ INST( 8, HF, F, HF, 2, true),
+ INST(16, HF, HF, F, 2, true),
+ INST(16, HF, HF, F, 1, false),
+ INST(16, HF, F, HF, 1, false),
+
+ /* No SIMD16 in mixed mode when destination is f32 */
+ INST( 8, F, HF, F, 1, true),
+ INST( 8, F, F, HF, 1, true),
+ INST(16, F, HF, F, 1, false),
+ INST(16, F, F, HF, 1, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ bool read_acc;
+ unsigned subnr;
+ bool expected_result_bdw;
+ bool expected_result_chv_skl;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
+ expected_result_bdw, expected_result_chv_skl) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ read_acc, \
+ subnr, \
+ expected_result_bdw, \
+ expected_result_chv_skl, \
+ }
+
+ /* Destination is not packed */
+ INST(HF, HF, F, 2, true, 0, true, true),
+ INST(HF, HF, F, 2, true, 2, true, true),
+ INST(HF, HF, F, 2, true, 4, true, true),
+ INST(HF, HF, F, 2, true, 8, true, true),
+ INST(HF, HF, F, 2, true, 16, true, true),
+
+ /* Destination is packed, we don't read acc */
+ INST(HF, HF, F, 1, false, 0, false, true),
+ INST(HF, HF, F, 1, false, 2, false, true),
+ INST(HF, HF, F, 1, false, 4, false, true),
+ INST(HF, HF, F, 1, false, 8, false, true),
+ INST(HF, HF, F, 1, false, 16, false, true),
+
+ /* Destination is packed, we read acc */
+ INST(HF, HF, F, 1, true, 0, false, false),
+ INST(HF, HF, F, 1, true, 2, false, false),
+ INST(HF, HF, F, 1, true, 4, false, false),
+ INST(HF, HF, F, 1, true, 8, false, false),
+ INST(HF, HF, F, 1, true, 16, false, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+ brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
+
+ if (devinfo.is_cherryview || devinfo.gen >= 9)
+ EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
+ else
+ EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
+{
+ static const struct {
+ unsigned exec_size;
+ unsigned opcode;
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ bool read_acc;
+ bool expected_result_bdw;
+ bool expected_result_chv_skl;
+ } inst[] = {
+#define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
+ dst_stride, read_acc,expected_result_bdw, \
+ expected_result_chv_skl) \
+ { \
+ BRW_EXECUTE_##exec_size, \
+ BRW_OPCODE_##opcode, \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ read_acc, \
+ expected_result_bdw, \
+ expected_result_chv_skl, \
+ }
+
+ /* Packed fp16 dest with implicit acc needs hstride=2 */
+ INST(8, MAC, HF, HF, F, 1, false, false, false),
+ INST(8, MAC, HF, HF, F, 2, false, true, true),
+ INST(8, MAC, HF, F, HF, 1, false, false, false),
+ INST(8, MAC, HF, F, HF, 2, false, true, true),
+
+ /* Packed fp16 dest with explicit acc needs hstride=2 */
+ INST(8, ADD, HF, HF, F, 1, true, false, false),
+ INST(8, ADD, HF, HF, F, 2, true, true, true),
+ INST(8, ADD, HF, F, HF, 1, true, false, false),
+ INST(8, ADD, HF, F, HF, 2, true, true, true),
+
+ /* If destination is not fp16, restriction doesn't apply */
+ INST(8, MAC, F, HF, F, 1, false, true, true),
+ INST(8, MAC, F, HF, F, 2, false, true, true),
+
+ /* If there is no implicit/explicit acc, restriction doesn't apply */
+ INST(8, ADD, HF, HF, F, 1, false, false, true),
+ INST(8, ADD, HF, HF, F, 2, false, true, true),
+ INST(8, ADD, HF, F, HF, 1, false, false, true),
+ INST(8, ADD, HF, F, HF, 2, false, true, true),
+ INST(8, ADD, F, HF, F, 1, false, true, true),
+ INST(8, ADD, F, HF, F, 2, false, true, true),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (inst[i].opcode == BRW_OPCODE_MAC) {
+ brw_MAC(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+ } else {
+ assert(inst[i].opcode == BRW_OPCODE_ADD);
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+ }
+
+ brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+ if (devinfo.is_cherryview || devinfo.gen >= 9)
+ EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
+ else
+ EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ unsigned src0_stride;
+ unsigned src1_stride;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, \
+ dst_stride, src0_stride, src1_stride, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ BRW_HORIZONTAL_STRIDE_##src0_stride, \
+ BRW_HORIZONTAL_STRIDE_##src1_stride, \
+ expected_result, \
+ }
+
+ INST(HF, HF, F, 2, 2, 1, true),
+ INST(HF, F, HF, 2, 1, 2, true),
+ INST(HF, F, HF, 1, 1, 2, true),
+ INST(HF, F, HF, 2, 1, 1, false),
+ INST(HF, HF, F, 2, 1, 1, false),
+ INST(HF, HF, F, 1, 1, 1, false),
+ INST(HF, HF, F, 2, 1, 1, false),
+ INST( F, HF, F, 1, 1, 1, false),
+ INST( F, F, HF, 1, 1, 2, true),
+ INST( F, HF, HF, 1, 2, 1, false),
+ INST( F, HF, HF, 1, 2, 2, true),
+
+#undef INST
+ };
+
+ /* No half-float math in gen8 */
+ if (devinfo.gen < 9)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ gen6_math(p, retype(g0, inst[i].dst_type),
+ BRW_MATH_FUNCTION_POW,
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
+
+ brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
+{
+ static const struct {
+ unsigned exec_size;
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned dst_stride;
+ unsigned dst_subnr;
+ bool expected_result_bdw;
+ bool expected_result_chv_skl;
+ } inst[] = {
+#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
+ expected_result_bdw, expected_result_chv_skl) \
+ { \
+ BRW_EXECUTE_##exec_size, \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_subnr, \
+ expected_result_bdw, \
+ expected_result_chv_skl \
+ }
+
+ /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
+ * oword-aligned
+ */
+ INST( 8, HF, HF, F, 1, 0, false, true),
+ INST( 8, HF, HF, F, 1, 2, false, false),
+ INST( 8, HF, HF, F, 1, 4, false, false),
+ INST( 8, HF, HF, F, 1, 8, false, false),
+ INST( 8, HF, HF, F, 1, 16, false, true),
+
+ /* SIMD16 packed fp16 always crosses oword boundaries */
+ INST(16, HF, HF, F, 1, 0, false, false),
+ INST(16, HF, HF, F, 1, 2, false, false),
+ INST(16, HF, HF, F, 1, 4, false, false),
+ INST(16, HF, HF, F, 1, 8, false, false),
+ INST(16, HF, HF, F, 1, 16, false, false),
+
+ /* If destination is not packed (or not fp16) we can cross oword
+ * boundaries
+ */
+ INST( 8, HF, HF, F, 2, 0, true, true),
+ INST( 8, F, HF, F, 1, 0, true, true),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+ brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+ brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+ brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+ if (devinfo.is_cherryview || devinfo.gen >= 9)
+ EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
+ else
+ EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align16_packed_data)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned src0_vstride;
+ unsigned src1_vstride;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, \
+ src0_vstride, src1_vstride, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_VERTICAL_STRIDE_##src0_vstride, \
+ BRW_VERTICAL_STRIDE_##src1_vstride, \
+ expected_result, \
+ }
+
+ /* We only test with F destination because there is a restriction
+ * by which F->HF conversions need to be DWord aligned but Align16 also
+ * requires that destination horizontal stride is 1.
+ */
+ INST(F, F, HF, 4, 4, true),
+ INST(F, F, HF, 2, 4, false),
+ INST(F, F, HF, 4, 2, false),
+ INST(F, F, HF, 0, 4, false),
+ INST(F, F, HF, 4, 0, false),
+ INST(F, HF, F, 4, 4, true),
+ INST(F, HF, F, 4, 2, false),
+ INST(F, HF, F, 2, 4, false),
+ INST(F, HF, F, 0, 4, false),
+ INST(F, HF, F, 4, 0, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8 || devinfo.gen >= 11)
+ return;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
+ brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align16_no_simd16)
+{
+ static const struct {
+ unsigned exec_size;
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ bool expected_result;
+ } inst[] = {
+#define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
+ { \
+ BRW_EXECUTE_##exec_size, \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ expected_result, \
+ }
+
+ /* We only test with F destination because there is a restriction
+ * by which F->HF conversions need to be DWord aligned but Align16 also
+ * requires that destination horizontal stride is 1.
+ */
+ INST( 8, F, F, HF, true),
+ INST( 8, F, HF, F, true),
+ INST( 8, F, F, HF, true),
+ INST(16, F, F, HF, false),
+ INST(16, F, HF, F, false),
+ INST(16, F, F, HF, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8 || devinfo.gen >= 11)
+ return;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align16_no_acc_read)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ bool read_acc;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ read_acc, \
+ expected_result, \
+ }
+
+ /* We only test with F destination because there is a restriction
+ * by which F->HF conversions need to be DWord aligned but Align16 also
+ * requires that destination horizontal stride is 1.
+ */
+ INST( F, F, HF, false, true),
+ INST( F, F, HF, true, false),
+ INST( F, HF, F, false, true),
+ INST( F, HF, F, true, false),
+
+#undef INST
+ };
+
+ if (devinfo.gen < 8 || devinfo.gen >= 11)
+ return;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
+TEST_P(validation_test, mixed_float_align16_math_packed_format)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src0_type;
+ enum brw_reg_type src1_type;
+ unsigned src0_vstride;
+ unsigned src1_vstride;
+ bool expected_result;
+ } inst[] = {
+#define INST(dst_type, src0_type, src1_type, \
+ src0_vstride, src1_vstride, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_VERTICAL_STRIDE_##src0_vstride, \
+ BRW_VERTICAL_STRIDE_##src1_vstride, \
+ expected_result, \
+ }
+
+ /* We only test with F destination because there is a restriction
+ * by which F->HF conversions need to be DWord aligned but Align16 also
+ * requires that destination horizontal stride is 1.
+ */
+ INST( F, HF, F, 4, 0, false),
+ INST( F, HF, HF, 4, 4, true),
+ INST( F, F, HF, 4, 0, false),
+ INST( F, F, HF, 2, 4, false),
+ INST( F, F, HF, 4, 2, false),
+ INST( F, HF, HF, 0, 4, false),
+
+#undef INST
+ };
+
+ /* Align16 Math for mixed float mode is not supported in gen8 */
+ if (devinfo.gen < 9 || devinfo.gen >= 11)
+ return;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ gen6_math(p, retype(g0, inst[i].dst_type),
+ BRW_MATH_FUNCTION_POW,
+ retype(g0, inst[i].src0_type),
+ retype(g0, inst[i].src1_type));
+
+ brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
+ brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
TEST_P(validation_test, vector_immediate_destination_alignment)
{
static const struct {
{ BRW_REGISTER_TYPE_W, BRW_REGISTER_TYPE_UV, 1, BRW_EXECUTE_8, false },
};
- for (unsigned i = 0; i < sizeof(move) / sizeof(move[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
/* UV type is Gen6+ */
if (devinfo.gen < 6 &&
move[i].src_type == BRW_REGISTER_TYPE_UV)
{ BRW_REGISTER_TYPE_B, BRW_REGISTER_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true },
};
- for (unsigned i = 0; i < sizeof(move) / sizeof(move[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
/* UV type is Gen6+ */
if (devinfo.gen < 6 &&
move[i].src_type == BRW_REGISTER_TYPE_UV)
if (devinfo.gen < 8)
return;
- for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ /* NoDDChk/NoDDClr does not exist on Gen12+ */
+ if (devinfo.gen >= 12)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+ inst[i].src_type == BRW_REGISTER_TYPE_DF))
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+ inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
if (inst[i].opcode == BRW_OPCODE_MOV) {
brw_MOV(p, retype(g0, inst[i].dst_type),
retype(g0, inst[i].src_type));
if (devinfo.gen < 8)
return;
- for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+ inst[i].src_type == BRW_REGISTER_TYPE_DF))
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+ inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
if (inst[i].opcode == BRW_OPCODE_MOV) {
brw_MOV(p, retype(g0, inst[i].dst_type),
retype(g0, inst[i].src_type));
if (devinfo.gen < 8)
return;
- for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+ inst[i].src_type == BRW_REGISTER_TYPE_DF))
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+ inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
if (inst[i].opcode == BRW_OPCODE_MOV) {
brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
retype(inst[i].src, inst[i].src_type));
clear_instructions(p);
}
+ if (!devinfo.has_64bit_float)
+ return;
+
/* MAC implicitly reads the accumulator */
brw_MAC(p, retype(g0, BRW_REGISTER_TYPE_DF),
retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF),
if (devinfo.gen < 8)
return;
+ /* Align16 does not exist on Gen11+ */
+ if (devinfo.gen >= 11)
+ return;
+
brw_set_default_access_mode(p, BRW_ALIGN_16);
- for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
if (inst[i].opcode == BRW_OPCODE_MOV) {
brw_MOV(p, retype(g0, inst[i].dst_type),
retype(g0, inst[i].src_type));
if (devinfo.gen < 8)
return;
- for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ /* NoDDChk/NoDDClr does not exist on Gen12+ */
+ if (devinfo.gen >= 12)
+ return;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ if (!devinfo.has_64bit_float &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+ inst[i].src_type == BRW_REGISTER_TYPE_DF))
+ continue;
+
+ if (!devinfo.has_64bit_int &&
+ (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+ inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+ inst[i].src_type == BRW_REGISTER_TYPE_UQ))
+ continue;
+
if (inst[i].opcode == BRW_OPCODE_MOV) {
brw_MOV(p, retype(g0, inst[i].dst_type),
retype(g0, inst[i].src_type));
clear_instructions(p);
}
}
+
+TEST_P(validation_test, gen11_no_byte_src_1_2)
+{
+ static const struct {
+ enum opcode opcode;
+ unsigned access_mode;
+
+ enum brw_reg_type dst_type;
+ struct {
+ enum brw_reg_type type;
+ unsigned vstride;
+ unsigned width;
+ unsigned hstride;
+ } srcs[3];
+
+ int gen;
+ bool expected_result;
+ } inst[] = {
+#define INST(opcode, access_mode, dst_type, \
+ src0_type, src0_vstride, src0_width, src0_hstride, \
+ src1_type, src1_vstride, src1_width, src1_hstride, \
+ src2_type, \
+ gen, expected_result) \
+ { \
+ BRW_OPCODE_##opcode, \
+ BRW_ALIGN_##access_mode, \
+ BRW_REGISTER_TYPE_##dst_type, \
+ { \
+ { \
+ BRW_REGISTER_TYPE_##src0_type, \
+ BRW_VERTICAL_STRIDE_##src0_vstride, \
+ BRW_WIDTH_##src0_width, \
+ BRW_HORIZONTAL_STRIDE_##src0_hstride, \
+ }, \
+ { \
+ BRW_REGISTER_TYPE_##src1_type, \
+ BRW_VERTICAL_STRIDE_##src1_vstride, \
+ BRW_WIDTH_##src1_width, \
+ BRW_HORIZONTAL_STRIDE_##src1_hstride, \
+ }, \
+ { \
+ BRW_REGISTER_TYPE_##src2_type, \
+ }, \
+ }, \
+ gen, \
+ expected_result, \
+ }
+
+ /* Passes on < 11 */
+ INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ),
+ INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ),
+ INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ),
+
+ /* Fails on 11+ */
+ INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ),
+ INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ),
+ INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ),
+
+ /* Passes on 11+ */
+ INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ),
+ INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ),
+ INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ),
+
+#undef INST
+ };
+
+
+ for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
+ /* Skip instruction not meant for this gen. */
+ if (devinfo.gen != inst[i].gen)
+ continue;
+
+ brw_push_insn_state(p);
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_set_default_access_mode(p, inst[i].access_mode);
+
+ switch (inst[i].opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].srcs[0].type));
+ brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].srcs[0].type),
+ retype(g0, inst[i].srcs[1].type));
+ brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+ brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+ brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
+ brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+ brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
+ break;
+ case BRW_OPCODE_MAD:
+ brw_MAD(p, retype(g0, inst[i].dst_type),
+ retype(g0, inst[i].srcs[0].type),
+ retype(g0, inst[i].srcs[1].type),
+ retype(g0, inst[i].srcs[2].type));
+ brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+ brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+ brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
+ brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
+ break;
+ default:
+ unreachable("invalid opcode");
+ }
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+
+ brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
+ brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
+
+ brw_pop_insn_state(p);
+
+ EXPECT_EQ(inst[i].expected_result, validate(p));
+
+ clear_instructions(p);
+ }
+}