X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_eu_validate.c;h=d87c550a190e182d6e8a7f12d9d1a8afe277403c;hb=2e1df6a17ff82c4a456caa8be4bfae1fac009b6a;hp=f359599c38d35078143935ddd6c119ddfcbddf03;hpb=e91c3540fc620b39a16d5bce9fd75aa0ddd7ed7e;p=mesa.git diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index f359599c38d..d87c550a190 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -1,5 +1,5 @@ /* - * Copyright © 2015 Intel Corporation + * Copyright © 2015-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -24,8 +24,21 @@ /** @file brw_eu_validate.c * * This file implements a pass that validates shader assembly. + * + * The restrictions implemented herein are intended to verify that instructions + * in shader assembly do not violate restrictions documented in the graphics + * programming reference manuals. + * + * The restrictions are difficult for humans to quickly verify due to their + * complexity and abundance. + * + * It is critical that this code is thoroughly unit tested because false + * results will lead developers astray, which is worse than having no validator + * at all. Functional changes to this file without corresponding unit tests (in + * test_eu_validate.cpp) will be rejected. */ +#include #include "brw_eu.h" /* We're going to do lots of string concatenation, so this should help. */ @@ -90,6 +103,22 @@ inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) } } +static bool +inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + if (devinfo->gen >= 12) { + return inst_is_send(devinfo, inst); + } else { + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_SENDS: + case BRW_OPCODE_SENDSC: + return true; + default: + return false; + } + } +} + static unsigned signed_type(unsigned type) { @@ -102,10 +131,17 @@ signed_type(unsigned type) } } +static enum brw_reg_type +inst_dst_type(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return (devinfo->gen < 12 || !inst_is_send(devinfo, inst)) ? + brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D; +} + static bool inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst) { - unsigned dst_type = signed_type(brw_inst_dst_type(devinfo, inst)); + unsigned dst_type = signed_type(inst_dst_type(devinfo, inst)); unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { @@ -135,7 +171,8 @@ dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) static bool src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) { - return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT && + brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; } @@ -147,9 +184,17 @@ src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) } static bool -src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; +} + +static bool +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) { - return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; + return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; } static bool @@ -223,6 +268,76 @@ num_sources_from_inst(const struct gen_device_info *devinfo, } } +static struct string +invalid_values(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + struct string error_msg = { .str = NULL, .len = 0 }; + + switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) { + case BRW_EXECUTE_1: + case BRW_EXECUTE_2: + case BRW_EXECUTE_4: + case BRW_EXECUTE_8: + case BRW_EXECUTE_16: + case BRW_EXECUTE_32: + break; + default: + ERROR("invalid execution size"); + break; + } + + if (inst_is_send(devinfo, inst)) + return error_msg; + + if (num_sources == 3) { + /* Nothing to test: + * No 3-src instructions on Gen4-5 + * No reg file bits on Gen6-10 (align16) + * No invalid encodings on Gen10-12 (align1) + */ + } else { + if (devinfo->gen > 6) { + ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF || + (num_sources > 0 && + brw_inst_src0_reg_file(devinfo, inst) == MRF) || + (num_sources > 1 && + brw_inst_src1_reg_file(devinfo, inst) == MRF), + "invalid register file encoding"); + } + } + + if (error_msg.str) + return error_msg; + + if (num_sources == 3) { + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if (devinfo->gen >= 10) { + ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE || + brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE || + brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE || + brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE, + "invalid register type encoding"); + } else { + ERROR("Align1 mode not allowed on Gen < 10"); + } + } else { + ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE || + brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE, + "invalid register type encoding"); + } + } else { + ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE || + (num_sources > 0 && + brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) || + (num_sources > 1 && + brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE), + "invalid register type encoding"); + } + + return error_msg; +} + static struct string sources_not_null(const struct gen_device_info *devinfo, const brw_inst *inst) @@ -236,7 +351,13 @@ sources_not_null(const struct gen_device_info *devinfo, if (num_sources == 3) return (struct string){}; - if (num_sources >= 1) + /* Nothing to test. Split sends can only encode a file in sources that are + * allowed to be NULL. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + + if (num_sources >= 1 && brw_inst_opcode(devinfo, inst) != BRW_OPCODE_SYNC) ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); if (num_sources == 2) @@ -245,22 +366,100 @@ sources_not_null(const struct gen_device_info *devinfo, return error_msg; } +static struct string +alignment_supported(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, + "Align16 not supported"); + + return error_msg; +} + +static bool +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + /* Check instructions that use implicit accumulator sources */ + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_MAC: + case BRW_OPCODE_MACH: + case BRW_OPCODE_SADA2: + return true; + default: + break; + } + + /* FIXME: support 3-src instructions */ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + assert(num_sources < 3); + + return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); +} + static struct string send_restrictions(const struct gen_device_info *devinfo, const brw_inst *inst) { struct string error_msg = { .str = NULL, .len = 0 }; - if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { + if (inst_is_split_send(devinfo, inst)) { + ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, + "src1 of split send must be a GRF or NULL"); + + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_src0_da_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && + brw_inst_send_src1_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + + if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { + /* Assume minimums if we don't know */ + unsigned mlen = 1; + if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { + const uint32_t desc = brw_inst_send_desc(devinfo, inst); + mlen = brw_message_desc_mlen(devinfo, desc); + } + + unsigned ex_mlen = 1; + if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { + const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst); + ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); + } + const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); + const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); + ERROR_IF((src0_reg_nr <= src1_reg_nr && + src1_reg_nr < src0_reg_nr + mlen) || + (src1_reg_nr <= src0_reg_nr && + src0_reg_nr < src1_reg_nr + ex_mlen), + "split send payloads must not overlap"); + } + } else if (inst_is_send(devinfo, inst)) { ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, "send must use direct addressing"); if (devinfo->gen >= 7) { - ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); + ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE, + "send from non-GRF"); ERROR_IF(brw_inst_eot(devinfo, inst) && brw_inst_src0_da_reg_nr(devinfo, inst) < 112, "send with EOT must use g112-g127"); } + + if (devinfo->gen >= 8) { + ERROR_IF(!dst_is_null(devinfo, inst) && + (brw_inst_dst_da_reg_nr(devinfo, inst) + + brw_inst_rlen(devinfo, inst) > 127) && + (brw_inst_src0_da_reg_nr(devinfo, inst) + + brw_inst_mlen(devinfo, inst) > + brw_inst_dst_da_reg_nr(devinfo, inst)), + "r127 must not be used for return address when there is " + "a src and dest overlap"); + } } return error_msg; @@ -270,13 +469,25 @@ static bool is_unsupported_inst(const struct gen_device_info *devinfo, const brw_inst *inst) { - return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL; + return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_ILLEGAL; +} + +/** + * Returns whether a combination of two types would qualify as mixed float + * operation mode + */ +static inline bool +types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) +{ + return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || + (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); } static enum brw_reg_type execution_type_for_type(enum brw_reg_type type) { switch (type) { + case BRW_REGISTER_TYPE_NF: case BRW_REGISTER_TYPE_DF: case BRW_REGISTER_TYPE_F: case BRW_REGISTER_TYPE_HF: @@ -314,23 +525,31 @@ execution_type(const struct gen_device_info *devinfo, const brw_inst *inst) enum brw_reg_type src0_exec_type, src1_exec_type; /* Execution data type is independent of destination data type, except in - * mixed F/HF instructions on CHV and SKL+. + * mixed F/HF instructions. */ - enum brw_reg_type dst_exec_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_exec_type = inst_dst_type(devinfo, inst); src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); if (num_sources == 1) { - if ((devinfo->gen >= 9 || devinfo->is_cherryview) && - src0_exec_type == BRW_REGISTER_TYPE_HF) { + if (src0_exec_type == BRW_REGISTER_TYPE_HF) return dst_exec_type; - } return src0_exec_type; } src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); + if (types_are_mixed_float(src0_exec_type, src1_exec_type) || + types_are_mixed_float(src0_exec_type, dst_exec_type) || + types_are_mixed_float(src1_exec_type, dst_exec_type)) { + return BRW_REGISTER_TYPE_F; + } + if (src0_exec_type == src1_exec_type) return src0_exec_type; + if (src0_exec_type == BRW_REGISTER_TYPE_NF || + src1_exec_type == BRW_REGISTER_TYPE_NF) + return BRW_REGISTER_TYPE_NF; + /* Mixed operand types where one is float is float on Gen < 6 * (and not allowed on later platforms) */ @@ -355,18 +574,7 @@ execution_type(const struct gen_device_info *devinfo, const brw_inst *inst) src1_exec_type == BRW_REGISTER_TYPE_DF) return BRW_REGISTER_TYPE_DF; - if (devinfo->gen >= 9 || devinfo->is_cherryview) { - if (dst_exec_type == BRW_REGISTER_TYPE_F || - src0_exec_type == BRW_REGISTER_TYPE_F || - src1_exec_type == BRW_REGISTER_TYPE_F) { - return BRW_REGISTER_TYPE_F; - } else { - return BRW_REGISTER_TYPE_HF; - } - } - - assert(src0_exec_type == BRW_REGISTER_TYPE_F); - return BRW_REGISTER_TYPE_F; + unreachable("not reached"); } /** @@ -389,6 +597,91 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride) return false; } +/** + * Returns whether an instruction is an explicit or implicit conversion + * to/from half-float. + */ +static bool +is_half_float_conversion(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + + unsigned num_sources = num_sources_from_inst(devinfo, inst); + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + + if (dst_type != src0_type && + (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { + return true; + } else if (num_sources > 1) { + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + return dst_type != src1_type && + (dst_type == BRW_REGISTER_TYPE_HF || + src1_type == BRW_REGISTER_TYPE_HF); + } + + return false; +} + +/* + * Returns whether an instruction is using mixed float operation mode + */ +static bool +is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + if (devinfo->gen < 8) + return false; + + if (inst_is_send(devinfo, inst)) + return false; + + unsigned opcode = brw_inst_opcode(devinfo, inst); + const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode); + if (desc->ndst == 0) + return false; + + /* FIXME: support 3-src instructions */ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + assert(num_sources < 3); + + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + + if (num_sources == 1) + return types_are_mixed_float(src0_type, dst_type); + + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + + return types_are_mixed_float(src0_type, src1_type) || + types_are_mixed_float(src0_type, dst_type) || + types_are_mixed_float(src1_type, dst_type); +} + +/** + * Returns whether an instruction is an explicit or implicit conversion + * to/from byte. + */ +static bool +is_byte_conversion(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + + unsigned num_sources = num_sources_from_inst(devinfo, inst); + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + + if (dst_type != src0_type && + (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { + return true; + } else if (num_sources > 1) { + enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); + return dst_type != src1_type && + (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); + } + + return false; +} + /** * Checks restrictions listed in "General Restrictions Based on Operand Types" * in the "Register Region Restrictions" section. @@ -403,17 +696,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); struct string error_msg = { .str = NULL, .len = 0 }; - if (num_sources == 3) - return (struct string){}; - if (inst_is_send(devinfo, inst)) - return (struct string){}; + return error_msg; + + if (devinfo->gen >= 11) { + if (num_sources == 3) { + ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || + brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1/2 register regioning. This includes " + "byte broadcast as well."); + } + if (num_sources == 2) { + ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1 register regioning. This includes " + "byte broadcast as well."); + } + } + + if (num_sources == 3) + return error_msg; if (exec_size == 1) - return (struct string){}; + return error_msg; if (desc->ndst == 0) - return (struct string){}; + return error_msg; /* The PRMs say: * @@ -431,19 +738,16 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf */ unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); - enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); bool dst_type_is_byte = - brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || - brw_inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; + inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_B || + inst_dst_type(devinfo, inst) == BRW_REGISTER_TYPE_UB; if (dst_type_is_byte) { if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { - if (!inst_is_raw_move(devinfo, inst)) { + if (!inst_is_raw_move(devinfo, inst)) ERROR("Only raw MOV supports a packed-byte destination"); - return error_msg; - } else { - return (struct string){}; - } + return error_msg; } } @@ -459,10 +763,130 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf exec_type_size == 8 && dst_type_size == 4) dst_type_size = 8; - if (exec_type_size > dst_type_size) { - ERROR_IF(dst_stride * dst_type_size != exec_type_size, - "Destination stride must be equal to the ratio of the sizes of " - "the execution data type to the destination type"); + if (is_byte_conversion(devinfo, inst)) { + /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: + * + * "There is no direct conversion from B/UB to DF or DF to B/UB. + * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." + * + * Even if these restrictions are listed for the MOV instruction, we + * validate this more generally, since there is the possibility + * of implicit conversions from other instructions. + */ + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = num_sources > 1 ? + brw_inst_src1_type(devinfo, inst) : 0; + + ERROR_IF(type_sz(dst_type) == 1 && + (type_sz(src0_type) == 8 || + (num_sources > 1 && type_sz(src1_type) == 8)), + "There are no direct conversions between 64-bit types and B/UB"); + + ERROR_IF(type_sz(dst_type) == 8 && + (type_sz(src0_type) == 1 || + (num_sources > 1 && type_sz(src1_type) == 1)), + "There are no direct conversions between 64-bit types and B/UB"); + } + + if (is_half_float_conversion(devinfo, inst)) { + /** + * A helper to validate used in the validation of the following restriction + * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: + * + * "There is no direct conversion from HF to DF or DF to HF. + * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." + * + * Even if these restrictions are listed for the MOV instruction, we + * validate this more generally, since there is the possibility + * of implicit conversions from other instructions, such us implicit + * conversion from integer to HF with the ADD instruction in SKL+. + */ + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = num_sources > 1 ? + brw_inst_src1_type(devinfo, inst) : 0; + ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && + (type_sz(src0_type) == 8 || + (num_sources > 1 && type_sz(src1_type) == 8)), + "There are no direct conversions between 64-bit types and HF"); + + ERROR_IF(type_sz(dst_type) == 8 && + (src0_type == BRW_REGISTER_TYPE_HF || + (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), + "There are no direct conversions between 64-bit types and HF"); + + /* From the BDW+ PRM: + * + * "Conversion between Integer and HF (Half Float) must be + * DWord-aligned and strided by a DWord on the destination." + * + * Also, the above restrictions seems to be expanded on CHV and SKL+ by: + * + * "There is a relaxed alignment rule for word destinations. When + * the destination type is word (UW, W, HF), destination data types + * can be aligned to either the lowest word or the second lowest + * word of the execution channel. This means the destination data + * words can be either all in the even word locations or all in the + * odd word locations." + * + * We do not implement the second rule as is though, since empirical + * testing shows inconsistencies: + * - It suggests that packed 16-bit is not allowed, which is not true. + * - It suggests that conversions from Q/DF to W (which need to be + * 64-bit aligned on the destination) are not possible, which is + * not true. + * + * So from this rule we only validate the implication that conversions + * from F to HF need to be DWord strided (except in Align1 mixed + * float mode where packed fp16 destination is allowed so long as the + * destination is oword-aligned). + * + * Finally, we only validate this for Align1 because Align16 always + * requires packed destinations, so these restrictions can't possibly + * apply to Align16 mode. + */ + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { + if ((dst_type == BRW_REGISTER_TYPE_HF && + (brw_reg_type_is_integer(src0_type) || + (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || + (brw_reg_type_is_integer(dst_type) && + (src0_type == BRW_REGISTER_TYPE_HF || + (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { + ERROR_IF(dst_stride * dst_type_size != 4, + "Conversions between integer and half-float must be " + "strided by a DWord on the destination"); + + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + ERROR_IF(subreg % 4 != 0, + "Conversions between integer and half-float must be " + "aligned to a DWord on the destination"); + } else if ((devinfo->is_cherryview || devinfo->gen >= 9) && + dst_type == BRW_REGISTER_TYPE_HF) { + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + ERROR_IF(dst_stride != 2 && + !(is_mixed_float(devinfo, inst) && + dst_stride == 1 && subreg % 16 == 0), + "Conversions to HF must have either all words in even " + "word locations or all words in odd word locations or " + "be mixed-float with Oword-aligned packed destination"); + } + } + } + + /* There are special regioning rules for mixed-float mode in CHV and SKL that + * override the general rule for the ratio of sizes of the destination type + * and the execution type. We will add validation for those in a later patch. + */ + bool validate_dst_size_and_exec_size_ratio = + !is_mixed_float(devinfo, inst) || + !(devinfo->is_cherryview || devinfo->gen >= 9); + + if (validate_dst_size_and_exec_size_ratio && + exec_type_size > dst_type_size) { + if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) { + ERROR_IF(dst_stride * dst_type_size != exec_type_size, + "Destination stride must be equal to the ratio of the sizes " + "of the execution data type to the destination type"); + } unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); @@ -507,6 +931,12 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, if (num_sources == 3) return (struct string){}; + /* Split sends don't have the bits in the instruction to encode regions so + * there's nothing to check. + */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, @@ -625,7 +1055,7 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, unsigned offset = rowbase; for (int x = 0; x < width; x++) { - access_mask |= mask << offset; + access_mask |= mask << (offset % 64); offset += hstride * element_size; } @@ -647,6 +1077,223 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, return error_msg; } +static struct string +special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + const unsigned opcode = brw_inst_opcode(devinfo, inst); + const unsigned num_sources = num_sources_from_inst(devinfo, inst); + if (num_sources >= 3) + return error_msg; + + if (!is_mixed_float(devinfo, inst)) + return error_msg; + + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); + bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; + + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = num_sources > 1 ? + brw_inst_src1_type(devinfo, inst) : 0; + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + + unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); + bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "Indirect addressing on source is not supported when source and + * destination data types are mixed float." + */ + ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || + (num_sources > 1 && + brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), + "Indirect addressing on source is not supported when source and " + "destination data types are mixed float"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is f32. Instruction + * execution size must be no more than 8." + */ + ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, + "Mixed float mode with 32-bit float destination is limited " + "to SIMD8"); + + if (is_align16) { + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "In Align16 mode, when half float and float data types are mixed + * between source operands OR between source and destination operands, + * the register content are assumed to be packed." + * + * Since Align16 doesn't have a concept of horizontal stride (or width), + * it means that vertical stride must always be 4, since 0 and 2 would + * lead to replicated data, and any other value is disallowed in Align16. + */ + ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "Align16 mixed float mode assumes packed data (vstride must be 4"); + + ERROR_IF(num_sources >= 2 && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "Align16 mixed float mode assumes packed data (vstride must be 4"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "For Align16 mixed mode, both input and output packed f16 data + * must be oword aligned, no oword crossing in packed f16." + * + * The previous rule requires that Align16 operands are always packed, + * and since there is only one bit for Align16 subnr, which represents + * offsets 0B and 16B, this rule is always enforced and we don't need to + * validate it. + */ + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is packed f16 for both + * Align1 and Align16." + * + * And: + * + * "In Align16 mode, when half float and float data types are mixed + * between source operands OR between source and destination operands, + * the register content are assumed to be packed." + * + * Which implies that SIMD16 is not available in Align16. This is further + * confirmed by: + * + * "For Align16 mixed mode, both input and output packed f16 data + * must be oword aligned, no oword crossing in packed f16" + * + * Since oword-aligned packed f16 data would cross oword boundaries when + * the execution size is larger than 8. + */ + ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No accumulator read access for Align16 mixed float." + */ + ERROR_IF(inst_uses_src_acc(devinfo, inst), + "No accumulator read access for Align16 mixed float"); + } else { + assert(!is_align16); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is packed f16 for both + * Align1 and Align16." + */ + ERROR_IF(exec_size > 8 && dst_is_packed && + dst_type == BRW_REGISTER_TYPE_HF, + "Align1 mixed float mode is limited to SIMD8 when destination " + "is packed half-float"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "Math operations for mixed mode: + * - In Align1, f16 inputs need to be strided" + */ + if (opcode == BRW_OPCODE_MATH) { + if (src0_type == BRW_REGISTER_TYPE_HF) { + ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, + "Align1 mixed mode math needs strided half-float inputs"); + } + + if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { + ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, + "Align1 mixed mode math needs strided half-float inputs"); + } + } + + if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "In Align1, destination stride can be smaller than execution + * type. When destination is stride of 1, 16 bit packed data is + * updated on the destination. However, output packed f16 data + * must be oword aligned, no oword crossing in packed f16." + * + * The requirement of not crossing oword boundaries for 16-bit oword + * aligned data means that execution size is limited to 8. + */ + unsigned subreg; + if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) + subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + else + subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); + ERROR_IF(subreg % 16 != 0, + "Align1 mixed mode packed half-float output must be " + "oword aligned"); + ERROR_IF(exec_size > 8, + "Align1 mixed mode packed half-float output must not " + "cross oword boundaries (max exec size is 8)"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "When source is float or half float from accumulator register and + * destination is half float with a stride of 1, the source must + * register aligned. i.e., source must have offset zero." + * + * Align16 mixed float mode doesn't allow accumulator access on sources, + * so we only need to check this for Align1. + */ + if (src0_is_acc(devinfo, inst) && + (src0_type == BRW_REGISTER_TYPE_F || + src0_type == BRW_REGISTER_TYPE_HF)) { + ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, + "Mixed float mode requires register-aligned accumulator " + "source reads when destination is packed half-float"); + + } + + if (num_sources > 1 && + src1_is_acc(devinfo, inst) && + (src1_type == BRW_REGISTER_TYPE_F || + src1_type == BRW_REGISTER_TYPE_HF)) { + ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, + "Mixed float mode requires register-aligned accumulator " + "source reads when destination is packed half-float"); + } + } + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No swizzle is allowed when an accumulator is used as an implicit + * source or an explicit source in an instruction. i.e. when + * destination is half float with an implicit accumulator source, + * destination stride needs to be 2." + * + * FIXME: it is not quite clear what the first sentence actually means + * or its link to the implication described after it, so we only + * validate the explicit implication, which is clearly described. + */ + if (dst_type == BRW_REGISTER_TYPE_HF && + inst_uses_src_acc(devinfo, inst)) { + ERROR_IF(dst_stride != 2, + "Mixed float mode with implicit/explicit accumulator " + "source and half-float destination requires a stride " + "of 2 on the destination"); + } + } + + return error_msg; +} + /** * Creates an \p access_mask for an \p exec_size, \p element_size, and a region * @@ -678,7 +1325,7 @@ align1_access_mask(uint64_t access_mask[static 32], unsigned offset = rowbase; for (int x = 0; x < width; x++) { - access_mask[element++] = mask << offset; + access_mask[element++] = mask << (offset % 64); offset += hstride * element_size; } @@ -783,7 +1430,7 @@ region_alignment_rules(const struct gen_device_info *devinfo, return error_msg; unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); - enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); unsigned element_size = brw_reg_type_to_size(dst_type); unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; @@ -990,7 +1637,7 @@ region_alignment_rules(const struct gen_device_info *devinfo, * is that the size of the destination type is 4 bytes. */ if (devinfo->gen <= 7 && dst_regs == 2) { - enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); bool dst_is_packed_dword = is_packed(exec_size * stride, exec_size, stride) && brw_reg_type_to_size(dst_type) == 4; @@ -1041,7 +1688,7 @@ vector_immediate_restrictions(const struct gen_device_info *devinfo, if (file != BRW_IMMEDIATE_VALUE) return (struct string){}; - enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); unsigned dst_type_size = brw_reg_type_to_size(dst_type); unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; @@ -1097,11 +1744,15 @@ special_requirements_for_handling_double_precision_data_types( if (num_sources == 3 || num_sources == 0) return (struct string){}; + /* Split sends don't have types so there's no doubles there. */ + if (inst_is_split_send(devinfo, inst)) + return (struct string){}; + enum brw_reg_type exec_type = execution_type(devinfo, inst); unsigned exec_type_size = brw_reg_type_to_size(exec_type); enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); - enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + enum brw_reg_type dst_type = inst_dst_type(devinfo, inst); unsigned dst_type_size = brw_reg_type_to_size(dst_type); unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); @@ -1255,17 +1906,84 @@ special_requirements_for_handling_double_precision_data_types( return error_msg; } +static struct string +instruction_restrictions(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + /* From GEN:BUG:1604601757: + * + * "When multiplying a DW and any lower precision integer, source modifier + * is not supported." + */ + if (devinfo->gen >= 12 && + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MUL) { + enum brw_reg_type exec_type = execution_type(devinfo, inst); + const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 || + brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || + !(brw_inst_src0_negate(devinfo, inst) || + brw_inst_src0_abs(devinfo, inst)); + const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 || + brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || + !(brw_inst_src1_negate(devinfo, inst) || + brw_inst_src1_abs(devinfo, inst)); + + ERROR_IF(!brw_reg_type_is_floating_point(exec_type) && + type_sz(exec_type) == 4 && !(src0_valid && src1_valid), + "When multiplying a DW and any lower precision integer, source " + "modifier is not supported."); + } + + return error_msg; +} + +bool +brw_validate_instruction(const struct gen_device_info *devinfo, + const brw_inst *inst, int offset, + struct disasm_info *disasm) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + if (is_unsupported_inst(devinfo, inst)) { + ERROR("Instruction not supported on this Gen"); + } else { + CHECK(invalid_values); + + if (error_msg.str == NULL) { + CHECK(sources_not_null); + CHECK(send_restrictions); + CHECK(alignment_supported); + CHECK(general_restrictions_based_on_operand_types); + CHECK(general_restrictions_on_region_parameters); + CHECK(special_restrictions_for_mixed_float_mode); + CHECK(region_alignment_rules); + CHECK(vector_immediate_restrictions); + CHECK(special_requirements_for_handling_double_precision_data_types); + CHECK(instruction_restrictions); + } + } + + if (error_msg.str && disasm) { + disasm_insert_error(disasm, offset, error_msg.str); + } + free(error_msg.str); + + return error_msg.len == 0; +} + bool brw_validate_instructions(const struct gen_device_info *devinfo, - void *assembly, int start_offset, int end_offset, - struct annotation_info *annotation) + const void *assembly, int start_offset, int end_offset, + struct disasm_info *disasm) { bool valid = true; for (int src_offset = start_offset; src_offset < end_offset;) { - struct string error_msg = { .str = NULL, .len = 0 }; const brw_inst *inst = assembly + src_offset; bool is_compact = brw_inst_cmpt_control(devinfo, inst); + unsigned inst_size = is_compact ? sizeof(brw_compact_inst) + : sizeof(brw_inst); brw_inst uncompacted; if (is_compact) { @@ -1274,29 +1992,10 @@ brw_validate_instructions(const struct gen_device_info *devinfo, inst = &uncompacted; } - if (is_unsupported_inst(devinfo, inst)) { - ERROR("Instruction not supported on this Gen"); - } else { - CHECK(sources_not_null); - CHECK(send_restrictions); - CHECK(general_restrictions_based_on_operand_types); - CHECK(general_restrictions_on_region_parameters); - CHECK(region_alignment_rules); - CHECK(vector_immediate_restrictions); - CHECK(special_requirements_for_handling_double_precision_data_types); - } + bool v = brw_validate_instruction(devinfo, inst, src_offset, disasm); + valid = valid && v; - if (error_msg.str && annotation) { - annotation_insert_error(annotation, src_offset, error_msg.str); - } - valid = valid && error_msg.len == 0; - free(error_msg.str); - - if (is_compact) { - src_offset += sizeof(brw_compact_inst); - } else { - src_offset += sizeof(brw_inst); - } + src_offset += inst_size; } return valid;