From 7376d57a9c6ae69bc47bbbfe5d3b1a0ed0639227 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Fri, 1 Feb 2019 11:41:33 +0100 Subject: [PATCH] intel/compiler: validate region restrictions for half-float conversions v2: - Consider implicit conversions in 2-src instructions too (Curro) - For restrictions that involve destination stride requirements only validate them for Align1, since Align16 always requires packed data. - Skip general rule for the dst/execution type size ratio for mixed float instructions on CHV and SKL+, these have their own set of rules that we'll be validated separately. v3 (Curro): - Do not check src1 type in single-source instructions. - Check restriction on src1. - Remove invalid test. Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_eu_validate.c | 155 +++++++++++++++++++++++- src/intel/compiler/test_eu_validate.cpp | 116 ++++++++++++++++++ 2 files changed, 270 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index bd0e48a5e5c..54bffb3af03 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -469,6 +469,66 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride) return false; } +/** + * Returns whether an instruction is an explicit or implicit conversion + * to/from half-float. + */ +static bool +is_half_float_conversion(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + + unsigned num_sources = num_sources_from_inst(devinfo, inst); + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + + if (dst_type != src0_type && + (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { + return true; + } else if (num_sources > 1) { + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + return dst_type != src1_type && + (dst_type == BRW_REGISTER_TYPE_HF || + src1_type == BRW_REGISTER_TYPE_HF); + } + + return false; +} + +/* + * Returns whether an instruction is using mixed float operation mode + */ +static bool +is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + if (devinfo->gen < 8) + return false; + + if (inst_is_send(devinfo, inst)) + return false; + + unsigned opcode = brw_inst_opcode(devinfo, inst); + const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); + if (desc->ndst == 0) + return false; + + /* FIXME: support 3-src instructions */ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + assert(num_sources < 3); + + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + + if (num_sources == 1) + return types_are_mixed_float(src0_type, dst_type); + + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + + return types_are_mixed_float(src0_type, src1_type) || + types_are_mixed_float(src0_type, dst_type) || + types_are_mixed_float(src1_type, dst_type); +} + /** * Checks restrictions listed in "General Restrictions Based on Operand Types" * in the "Register Region Restrictions" section. @@ -539,7 +599,100 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf exec_type_size == 8 && dst_type_size == 4) dst_type_size = 8; - if (exec_type_size > dst_type_size) { + if (is_half_float_conversion(devinfo, inst)) { + /** + * A helper to validate used in the validation of the following restriction + * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: + * + * "There is no direct conversion from HF to DF or DF to HF. + * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." + * + * Even if these restrictions are listed for the MOV instruction, we + * validate this more generally, since there is the possibility + * of implicit conversions from other instructions, such us implicit + * conversion from integer to HF with the ADD instruction in SKL+. + */ + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = num_sources > 1 ? + brw_inst_src1_type(devinfo, inst) : 0; + ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && + (type_sz(src0_type) == 8 || + (num_sources > 1 && type_sz(src1_type) == 8)), + "There are no direct conversions between 64-bit types and HF"); + + ERROR_IF(type_sz(dst_type) == 8 && + (src0_type == BRW_REGISTER_TYPE_HF || + (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), + "There are no direct conversions between 64-bit types and HF"); + + /* From the BDW+ PRM: + * + * "Conversion between Integer and HF (Half Float) must be + * DWord-aligned and strided by a DWord on the destination." + * + * Also, the above restrictions seems to be expanded on CHV and SKL+ by: + * + * "There is a relaxed alignment rule for word destinations. When + * the destination type is word (UW, W, HF), destination data types + * can be aligned to either the lowest word or the second lowest + * word of the execution channel. This means the destination data + * words can be either all in the even word locations or all in the + * odd word locations." + * + * We do not implement the second rule as is though, since empirical + * testing shows inconsistencies: + * - It suggests that packed 16-bit is not allowed, which is not true. + * - It suggests that conversions from Q/DF to W (which need to be + * 64-bit aligned on the destination) are not possible, which is + * not true. + * + * So from this rule we only validate the implication that conversions + * from F to HF need to be DWord strided (except in Align1 mixed + * float mode where packed fp16 destination is allowed so long as the + * destination is oword-aligned). + * + * Finally, we only validate this for Align1 because Align16 always + * requires packed destinations, so these restrictions can't possibly + * apply to Align16 mode. + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if ((dst_type == BRW_REGISTER_TYPE_HF && + (brw_reg_type_is_integer(src0_type) || + (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || + (brw_reg_type_is_integer(dst_type) && + (src0_type == BRW_REGISTER_TYPE_HF || + (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { + ERROR_IF(dst_stride * dst_type_size != 4, + "Conversions between integer and half-float must be " + "strided by a DWord on the destination"); + + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + ERROR_IF(subreg % 4 != 0, + "Conversions between integer and half-float must be " + "aligned to a DWord on the destination"); + } else if ((devinfo->is_cherryview || devinfo->gen >= 9) && + dst_type == BRW_REGISTER_TYPE_HF) { + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + ERROR_IF(dst_stride != 2 && + !(is_mixed_float(devinfo, inst) && + dst_stride == 1 && subreg % 16 == 0), + "Conversions to HF must have either all words in even " + "word locations or all words in odd word locations or " + "be mixed-float with Oword-aligned packed destination"); + } + } + } + + /* There are special regioning rules for mixed-float mode in CHV and SKL that + * override the general rule for the ratio of sizes of the destination type + * and the execution type. We will add validation for those in a later patch. + */ + bool validate_dst_size_and_exec_size_ratio = + !is_mixed_float(devinfo, inst) || + !(devinfo->is_cherryview || devinfo->gen >= 9); + + if (validate_dst_size_and_exec_size_ratio && + exec_type_size > dst_type_size) { if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { ERROR_IF(dst_stride * dst_type_size != exec_type_size, "Destination stride must be equal to the ratio of the sizes " diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 73300b23122..3fdbecb003b 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -848,6 +848,122 @@ TEST_P(validation_test, byte_destination_relaxed_alignment) } } +TEST_P(validation_test, half_float_conversion) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src_type; + unsigned dst_stride; + unsigned dst_subnr; + bool expected_result_bdw; + bool expected_result_chv_gen9; + } inst[] = { +#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + dst_subnr, \ + expected_result, \ + expected_result, \ + } +#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \ + expected_result_bdw, expected_result_chv_gen9) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + dst_subnr, \ + expected_result_bdw, \ + expected_result_chv_gen9, \ + } + + /* MOV to half-float destination */ + INST_C(HF, B, 1, 0, false), + INST_C(HF, W, 1, 0, false), + INST_C(HF, HF, 1, 0, true), + INST_C(HF, HF, 1, 2, true), + INST_C(HF, D, 1, 0, false), + INST_S(HF, F, 1, 0, false, true), + INST_C(HF, Q, 1, 0, false), + INST_C(HF, B, 2, 0, true), + INST_C(HF, B, 2, 2, false), + INST_C(HF, W, 2, 0, true), + INST_C(HF, W, 2, 2, false), + INST_C(HF, HF, 2, 0, true), + INST_C(HF, HF, 2, 2, true), + INST_C(HF, D, 2, 0, true), + INST_C(HF, D, 2, 2, false), + INST_C(HF, F, 2, 0, true), + INST_S(HF, F, 2, 2, false, true), + INST_C(HF, Q, 2, 0, false), + INST_C(HF, DF, 2, 0, false), + INST_C(HF, B, 4, 0, false), + INST_C(HF, W, 4, 0, false), + INST_C(HF, HF, 4, 0, true), + INST_C(HF, HF, 4, 2, true), + INST_C(HF, D, 4, 0, false), + INST_C(HF, F, 4, 0, false), + INST_C(HF, Q, 4, 0, false), + INST_C(HF, DF, 4, 0, false), + + /* MOV from half-float source */ + INST_C( B, HF, 1, 0, false), + INST_C( W, HF, 1, 0, false), + INST_C( D, HF, 1, 0, true), + INST_C( D, HF, 1, 4, true), + INST_C( F, HF, 1, 0, true), + INST_C( F, HF, 1, 4, true), + INST_C( Q, HF, 1, 0, false), + INST_C(DF, HF, 1, 0, false), + INST_C( B, HF, 2, 0, false), + INST_C( W, HF, 2, 0, true), + INST_C( W, HF, 2, 2, false), + INST_C( D, HF, 2, 0, false), + INST_C( F, HF, 2, 0, true), + INST_C( B, HF, 4, 0, true), + INST_C( B, HF, 4, 1, false), + INST_C( W, HF, 4, 0, false), + +#undef INST_C +#undef INST_S + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (!devinfo.has_64bit_types && + (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) { + continue; + } + + brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type)); + + brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr); + + if (inst[i].src_type == BRW_REGISTER_TYPE_B) { + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2); + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); + } else { + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + } + + if (devinfo.is_cherryview || devinfo.gen >= 9) + EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p)); + else + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); + + clear_instructions(p); + } +} + TEST_P(validation_test, vector_immediate_destination_alignment) { static const struct { -- 2.30.2