From: Matt Turner Date: Wed, 30 Aug 2017 01:29:29 +0000 (-0700) Subject: i965: Validate "Special Requirements for Handling Double Precision Data Types" X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2572c2771d0cab0b9bc489d354ede44dfc88547b;p=mesa.git i965: Validate "Special Requirements for Handling Double Precision Data Types" I did not implement: CNL's restriction on 64-bit int + align16, because I don't think we'll ever use this combination regardless of hardware generation. The restriction on immediate DF -> F conversions, because there's no reason to ever generate that, and I don't even know how DF -> F conversions are supposed to work in Align16 since (1) the dst stride must be 1, but (2) the dst stride would have to be 2 for src and dst strides to be aligned. --- diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 8fcc5293666..8568f712d77 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -1085,6 +1085,174 @@ vector_immediate_restrictions(const struct gen_device_info *devinfo, return error_msg; } +static struct string +special_requirements_for_handling_double_precision_data_types( + const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + struct string error_msg = { .str = NULL, .len = 0 }; + + if (num_sources == 3 || num_sources == 0) + return (struct string){}; + + enum brw_reg_type exec_type = execution_type(devinfo, inst); + unsigned exec_type_size = brw_reg_type_to_size(exec_type); + + enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + unsigned dst_type_size = brw_reg_type_to_size(dst_type); + unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); + unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); + unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); + + bool is_integer_dword_multiply = + devinfo->gen >= 8 && + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL && + (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || + brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && + (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || + brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); + + if (dst_type_size != 8 && exec_type_size != 8 && !is_integer_dword_multiply) + return (struct string){}; + + for (unsigned i = 0; i < num_sources; i++) { + unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; + bool is_scalar_region; + enum brw_reg_file file; + enum brw_reg_type type; + +#define DO_SRC(n) \ + if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ + BRW_IMMEDIATE_VALUE) \ + continue; \ + \ + is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ + vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ + width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ + hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ + file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ + type = brw_inst_src ## n ## _type(devinfo, inst); \ + type_size = brw_reg_type_to_size(type); \ + reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ + subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ + address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) + + if (i == 0) { + DO_SRC(0); + } else { + DO_SRC(1); + } +#undef DO_SRC + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, regioning in Align1 must follow these rules: + * + * 1. Source and Destination horizontal stride must be aligned to the + * same qword. + * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. + * 3. Source and Destination offset must be the same, except the case + * of scalar source. + * + * We assume that the restriction applies to GLK as well. + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && + (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { + unsigned src_stride = hstride * type_size; + unsigned dst_stride = dst_hstride * dst_type_size; + + ERROR_IF(!is_scalar_region && + (src_stride % 8 != 0 || + dst_stride % 8 != 0 || + src_stride != dst_stride), + "Source and destination horizontal stride must equal and a " + "multiple of a qword when the execution type is 64-bit"); + + ERROR_IF(vstride != width * hstride, + "Vstride must be Width * Hstride when the execution type is " + "64-bit"); + + ERROR_IF(!is_scalar_region && dst_subreg != subreg, + "Source and destination offset must be the same when the " + "execution type is 64-bit"); + } + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, indirect addressing must not be used. + * + * We assume that the restriction applies to GLK as well. + */ + if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { + ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || + BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, + "Indirect addressing is not allowed when the execution type " + "is 64-bit"); + } + + /* The PRMs say that for CHV, BXT: + * + * ARF registers must never be used with 64b datatype or when + * operation is integer DWord multiply. + * + * We assume that the restriction applies to GLK as well. + * + * We assume that the restriction does not apply to the null register. + */ + if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { + ERROR_IF(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MAC || + brw_inst_acc_wr_control(devinfo, inst) || + (BRW_ARCHITECTURE_REGISTER_FILE == file && + reg != BRW_ARF_NULL) || + (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && + dst_reg != BRW_ARF_NULL), + "Architecture registers cannot be used when the execution " + "type is 64-bit"); + } + } + + /* The PRMs say that for BDW, SKL: + * + * If Align16 is required for an operation with QW destination and non-QW + * source datatypes, the execution size cannot exceed 2. + * + * We assume that the restriction applies to all Gen8+ parts. + */ + if (devinfo->gen >= 8) { + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + unsigned src0_type_size = brw_reg_type_to_size(src0_type); + unsigned src1_type_size = brw_reg_type_to_size(src1_type); + + ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && + dst_type_size == 8 && + (src0_type_size != 8 || src1_type_size != 8) && + brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, + "In Align16 exec size cannot exceed 2 with a QWord destination " + "and a non-QWord source"); + } + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, DepCtrl must not be used. + * + * We assume that the restriction applies to GLK as well. + */ + if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) { + ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || + brw_inst_no_dd_clear(devinfo, inst), + "DepCtrl is not allowed when the execution type is 64-bit"); + } + + return error_msg; +} + bool brw_validate_instructions(const struct gen_device_info *devinfo, void *assembly, int start_offset, int end_offset, @@ -1113,6 +1281,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo, CHECK(general_restrictions_on_region_parameters); CHECK(region_alignment_rules); CHECK(vector_immediate_restrictions); + CHECK(special_requirements_for_handling_double_precision_data_types); } if (error_msg.str && annotation) { diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 4e0317ec74d..e8b175beea7 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -138,6 +138,7 @@ validate(struct brw_codegen *p) #define last_inst (&p->store[p->nr_insn - 1]) #define g0 brw_vec8_grf(0, 0) +#define acc0 brw_acc_reg(8) #define null brw_null_reg() #define zero brw_imm_f(0.0f) @@ -935,3 +936,625 @@ TEST_P(validation_test, vector_immediate_destination_stride) clear_instructions(p); } } + +TEST_P(validation_test, qword_low_power_align1_regioning_restrictions) +{ + static const struct { + enum opcode opcode; + unsigned exec_size; + + enum brw_reg_type dst_type; + unsigned dst_subreg; + unsigned dst_stride; + + enum brw_reg_type src_type; + unsigned src_subreg; + unsigned src_vstride; + unsigned src_width; + unsigned src_hstride; + + bool expected_result; + } inst[] = { +#define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \ + src_subreg, src_vstride, src_width, src_hstride, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + dst_subreg, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + BRW_REGISTER_TYPE_##src_type, \ + src_subreg, \ + BRW_VERTICAL_STRIDE_##src_vstride, \ + BRW_WIDTH_##src_width, \ + BRW_HORIZONTAL_STRIDE_##src_hstride, \ + expected_result, \ + } + + /* Some instruction that violate no restrictions, as a control */ + INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ), + INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ), + INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ), + + INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ), + INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ), + INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ), + + INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ), + INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ), + INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ), + + INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ), + INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ), + + /* Something with subreg nrs */ + INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ), + INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ), + INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ), + + INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ), + INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ), + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, regioning in Align1 must follow these rules: + * + * 1. Source and Destination horizontal stride must be aligned to the + * same qword. + */ + INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false), + INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false), + INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false), + + INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false), + INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false), + INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false), + + INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false), + INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false), + INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false), + + INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false), + INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false), + + INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false), + INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false), + + /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */ + INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false), + INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false), + INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false), + + INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false), + INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false), + INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false), + + INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false), + INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false), + INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false), + + INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false), + INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false), + + INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false), + INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false), + + /* 3. Source and Destination offset must be the same, except the case + * of scalar source. + */ + INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false), + INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false), + INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false), + + INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false), + INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false), + INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false), + + INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false), + INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false), + + INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false), + INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false), + + INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ), + INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ), + INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ), + + INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ), + INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ), + INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ), + + INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ), + INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ), + + INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ), + INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ), + +#undef INST + }; + + /* These restrictions only apply to Gen8+ */ + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MOV) { + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_MUL); + brw_MUL(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type), + retype(zero, inst[i].src_type)); + } + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg); + brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); + + if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) { + EXPECT_EQ(inst[i].expected_result, validate(p)); + } else { + EXPECT_TRUE(validate(p)); + } + + clear_instructions(p); + } +} + +TEST_P(validation_test, qword_low_power_no_indirect_addressing) +{ + static const struct { + enum opcode opcode; + unsigned exec_size; + + enum brw_reg_type dst_type; + bool dst_is_indirect; + unsigned dst_stride; + + enum brw_reg_type src_type; + bool src_is_indirect; + unsigned src_vstride; + unsigned src_width; + unsigned src_hstride; + + bool expected_result; + } inst[] = { +#define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \ + src_type, src_is_indirect, src_vstride, src_width, src_hstride, \ + expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + dst_is_indirect, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + BRW_REGISTER_TYPE_##src_type, \ + src_is_indirect, \ + BRW_VERTICAL_STRIDE_##src_vstride, \ + BRW_WIDTH_##src_width, \ + BRW_HORIZONTAL_STRIDE_##src_hstride, \ + expected_result, \ + } + + /* Some instruction that violate no restrictions, as a control */ + INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ), + INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ), + INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ), + + INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ), + INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ), + + INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ), + INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ), + INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ), + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, indirect addressing must not be used. + */ + INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false), + INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false), + INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false), + + INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false), + INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false), + INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false), + + INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false), + INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false), + INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false), + + INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false), + INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false), + INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false), + + INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false), + INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false), + INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false), + + INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false), + INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false), + INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false), + + INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false), + INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false), + + INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false), + INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false), + +#undef INST + }; + + /* These restrictions only apply to Gen8+ */ + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MOV) { + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_MUL); + brw_MUL(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type), + retype(zero, inst[i].src_type)); + } + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect); + brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); + + if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) { + EXPECT_EQ(inst[i].expected_result, validate(p)); + } else { + EXPECT_TRUE(validate(p)); + } + + clear_instructions(p); + } +} + +TEST_P(validation_test, qword_low_power_no_64bit_arf) +{ + static const struct { + enum opcode opcode; + unsigned exec_size; + + struct brw_reg dst; + enum brw_reg_type dst_type; + unsigned dst_stride; + + struct brw_reg src; + enum brw_reg_type src_type; + unsigned src_vstride; + unsigned src_width; + unsigned src_hstride; + + bool acc_wr; + bool expected_result; + } inst[] = { +#define INST(opcode, exec_size, dst, dst_type, dst_stride, \ + src, src_type, src_vstride, src_width, src_hstride, \ + acc_wr, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_EXECUTE_##exec_size, \ + dst, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + src, \ + BRW_REGISTER_TYPE_##src_type, \ + BRW_VERTICAL_STRIDE_##src_vstride, \ + BRW_WIDTH_##src_width, \ + BRW_HORIZONTAL_STRIDE_##src_hstride, \ + acc_wr, \ + expected_result, \ + } + + /* Some instruction that violate no restrictions, as a control */ + INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ), + INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ), + + INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ), + INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ), + + INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ), + INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ), + + INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ), + INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ), + INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ), + + INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ), + INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ), + INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ), + + INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ), + INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ), + INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ), + + INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ), + INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ), + + /* The PRMs say that for CHV, BXT: + * + * ARF registers must never be used with 64b datatype or when + * operation is integer DWord multiply. + */ + INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false), + INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false), + + INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false), + INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false), + + INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false), + INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false), + + INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false), + INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false), + + INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false), + INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false), + + INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false), + INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false), + + INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false), + INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false), + /* MUL cannot have integer accumulator sources, so don't test that */ + + /* We assume that the restriction does not apply to the null register */ + INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ), + INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ), + INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ), + + /* Check implicit accumulator write control */ + INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false), + INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false), + +#undef INST + }; + + /* These restrictions only apply to Gen8+ */ + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MOV) { + brw_MOV(p, retype(inst[i].dst, inst[i].dst_type), + retype(inst[i].src, inst[i].src_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_MUL); + brw_MUL(p, retype(inst[i].dst, inst[i].dst_type), + retype(inst[i].src, inst[i].src_type), + retype(zero, inst[i].src_type)); + brw_inst_set_opcode(&devinfo, last_inst, inst[i].opcode); + } + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); + + if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) { + EXPECT_EQ(inst[i].expected_result, validate(p)); + } else { + EXPECT_TRUE(validate(p)); + } + + clear_instructions(p); + } + + /* MAC implicitly reads the accumulator */ + brw_MAC(p, retype(g0, BRW_REGISTER_TYPE_DF), + retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF), + retype(stride(g0, 4, 4, 1), BRW_REGISTER_TYPE_DF)); + if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) { + EXPECT_FALSE(validate(p)); + } else { + EXPECT_TRUE(validate(p)); + } +} + +TEST_P(validation_test, align16_64_bit_integer) +{ + static const struct { + enum opcode opcode; + unsigned exec_size; + + enum brw_reg_type dst_type; + enum brw_reg_type src_type; + + bool expected_result; + } inst[] = { +#define INST(opcode, exec_size, dst_type, src_type, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src_type, \ + expected_result, \ + } + + /* Some instruction that violate no restrictions, as a control */ + INST(MOV, 2, Q, D, true ), + INST(MOV, 2, UQ, UD, true ), + INST(MOV, 2, DF, F, true ), + + INST(ADD, 2, Q, D, true ), + INST(ADD, 2, UQ, UD, true ), + INST(ADD, 2, DF, F, true ), + + /* The PRMs say that for BDW, SKL: + * + * If Align16 is required for an operation with QW destination and non-QW + * source datatypes, the execution size cannot exceed 2. + */ + + INST(MOV, 4, Q, D, false), + INST(MOV, 4, UQ, UD, false), + INST(MOV, 4, DF, F, false), + + INST(ADD, 4, Q, D, false), + INST(ADD, 4, UQ, UD, false), + INST(ADD, 4, DF, F, false), + +#undef INST + }; + + /* 64-bit integer types exist on Gen8+ */ + if (devinfo.gen < 8) + return; + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MOV) { + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_ADD); + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type), + retype(g0, inst[i].src_type)); + } + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, qword_low_power_no_depctrl) +{ + static const struct { + enum opcode opcode; + unsigned exec_size; + + enum brw_reg_type dst_type; + unsigned dst_stride; + + enum brw_reg_type src_type; + unsigned src_vstride; + unsigned src_width; + unsigned src_hstride; + + bool no_dd_check; + bool no_dd_clear; + + bool expected_result; + } inst[] = { +#define INST(opcode, exec_size, dst_type, dst_stride, \ + src_type, src_vstride, src_width, src_hstride, \ + no_dd_check, no_dd_clear, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + BRW_REGISTER_TYPE_##src_type, \ + BRW_VERTICAL_STRIDE_##src_vstride, \ + BRW_WIDTH_##src_width, \ + BRW_HORIZONTAL_STRIDE_##src_hstride, \ + no_dd_check, \ + no_dd_clear, \ + expected_result, \ + } + + /* Some instruction that violate no restrictions, as a control */ + INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ), + INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ), + INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ), + + INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ), + INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ), + INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ), + + INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ), + INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ), + + INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ), + + /* The PRMs say that for CHV, BXT: + * + * When source or destination datatype is 64b or operation is integer + * DWord multiply, DepCtrl must not be used. + */ + INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false), + INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false), + INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false), + + INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false), + INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false), + INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false), + + INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false), + INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false), + INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false), + + INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false), + INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false), + INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false), + + INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false), + INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false), + + INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false), + INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false), + +#undef INST + }; + + /* These restrictions only apply to Gen8+ */ + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MOV) { + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_MUL); + brw_MUL(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src_type), + retype(zero, inst[i].src_type)); + } + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); + + brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check); + brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear); + + if (devinfo.is_cherryview || gen_device_info_is_9lp(&devinfo)) { + EXPECT_EQ(inst[i].expected_result, validate(p)); + } else { + EXPECT_TRUE(validate(p)); + } + + clear_instructions(p); + } +}