From: Lionel Landwerlin Date: Wed, 19 Jun 2019 12:09:35 +0000 (-0700) Subject: intel/compiler: don't use byte operands for src1 on ICL X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5847de6e9afe12bd29ad694a76860a0575ab4747;p=mesa.git intel/compiler: don't use byte operands for src1 on ICL The simulator complains about using byte operands, we also have documentation telling us. Note that add operations on bytes seems to work fine on HW (like ADD). Using dwords operands with CMP & SEL fixes the following tests : dEQP-VK.spirv_assembly.type.vec*.i8.* v2: Drop the GLK changes (Matt) Add validator tests (Matt) v3: Drop GLK ref (Matt) Don't mix float/integer in MAD (Matt) Signed-off-by: Lionel Landwerlin Reviewed-by: Rafael Antognolli (v1) Reviewed-by: Matt Turner BSpec: 3017 Cc: --- diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 943f724e60f..203280570aa 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -289,6 +289,18 @@ sources_not_null(const struct gen_device_info *devinfo, return error_msg; } +static struct string +alignment_supported(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + ERROR_IF(devinfo->gen >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, + "Align16 not supported"); + + return error_msg; +} + static bool inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) { @@ -600,17 +612,31 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); struct string error_msg = { .str = NULL, .len = 0 }; + if (devinfo->gen >= 11) { + if (num_sources == 3) { + ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || + brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1/2 register regioning. This includes " + "byte broadcast as well."); + } + if (num_sources == 2) { + ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, + "Byte data type is not supported for src1 register regioning. This includes " + "byte broadcast as well."); + } + } + if (num_sources == 3) - return (struct string){}; + return error_msg; if (inst_is_send(devinfo, inst)) - return (struct string){}; + return error_msg; if (exec_size == 1) - return (struct string){}; + return error_msg; if (desc->ndst == 0) - return (struct string){}; + return error_msg; /* The PRMs say: * @@ -635,12 +661,9 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf if (dst_type_is_byte) { if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { - if (!inst_is_raw_move(devinfo, inst)) { + if (!inst_is_raw_move(devinfo, inst)) ERROR("Only raw MOV supports a packed-byte destination"); - return error_msg; - } else { - return (struct string){}; - } + return error_msg; } } @@ -1823,6 +1846,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo, } else { CHECK(sources_not_null); CHECK(send_restrictions); + CHECK(alignment_supported); CHECK(general_restrictions_based_on_operand_types); CHECK(general_restrictions_on_region_parameters); CHECK(special_restrictions_for_mixed_float_mode); diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 9655c2ef554..b781f1257ba 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -322,10 +322,11 @@ namespace brw { case SHADER_OPCODE_INT_REMAINDER: return emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0), - fix_math_operand(src1))); + fix_math_operand(fix_byte_src(src1)))); default: - return emit(instruction(opcode, dispatch_width(), dst, src0, src1)); + return emit(instruction(opcode, dispatch_width(), dst, + src0, fix_byte_src(src1))); } } @@ -344,12 +345,12 @@ namespace brw { case BRW_OPCODE_LRP: return emit(instruction(opcode, dispatch_width(), dst, fix_3src_operand(src0), - fix_3src_operand(src1), - fix_3src_operand(src2))); + fix_3src_operand(fix_byte_src(src1)), + fix_3src_operand(fix_byte_src(src2)))); default: return emit(instruction(opcode, dispatch_width(), dst, - src0, src1, src2)); + src0, fix_byte_src(src1), fix_byte_src(src2))); } } @@ -399,8 +400,11 @@ namespace brw { { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); + /* In some cases we can't have bytes as operand for src1, so use the + * same type for both operand. + */ + return set_condmod(mod, SEL(dst, fix_unsigned_negate(fix_byte_src(src0)), + fix_unsigned_negate(fix_byte_src(src1)))); } /** @@ -657,8 +661,8 @@ namespace brw { emit(BRW_OPCODE_CSEL, retype(dst, BRW_REGISTER_TYPE_F), retype(src0, BRW_REGISTER_TYPE_F), - retype(src1, BRW_REGISTER_TYPE_F), - src2)); + retype(fix_byte_src(src1), BRW_REGISTER_TYPE_F), + fix_byte_src(src2))); } /** @@ -719,6 +723,22 @@ namespace brw { backend_shader *shader; + /** + * Byte sized operands are not supported for src1 on Gen11+. + */ + src_reg + fix_byte_src(const src_reg &src) const + { + if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) || + type_sz(src.type) != 1) + return src; + + dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_D); + MOV(temp, src); + return src_reg(temp); + } + private: /** * Workaround for negation of UD registers. See comment in diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index e0818caae13..b9d42b6e26e 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1340,9 +1340,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_ine32: { fs_reg dest = result; + /* On Gen11 we have an additional issue being that src1 cannot be a byte + * type. So we convert both operands for the comparison. + */ + fs_reg temp_op[2]; + temp_op[0] = bld.fix_byte_src(op[0]); + temp_op[1] = bld.fix_byte_src(op[1]); + const uint32_t bit_size = nir_src_bit_size(instr->src[0].src); if (bit_size != 32) - dest = bld.vgrf(op[0].type, 1); + dest = bld.vgrf(temp_op[0].type, 1); brw_conditional_mod cond; switch (instr->op) { @@ -1363,7 +1370,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, default: unreachable("bad opcode"); } - bld.CMP(dest, op[0], op[1], cond); + bld.CMP(dest, temp_op[0], temp_op[1], cond); if (bit_size > 32) { bld.MOV(result, subscript(dest, BRW_REGISTER_TYPE_UD, 0)); diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 65326416064..efdae4fd79b 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -2372,3 +2372,124 @@ TEST_P(validation_test, qword_low_power_no_depctrl) clear_instructions(p); } } + +TEST_P(validation_test, gen11_no_byte_src_1_2) +{ + static const struct { + enum opcode opcode; + unsigned access_mode; + + enum brw_reg_type dst_type; + struct { + enum brw_reg_type type; + unsigned vstride; + unsigned width; + unsigned hstride; + } srcs[3]; + + int gen; + bool expected_result; + } inst[] = { +#define INST(opcode, access_mode, dst_type, \ + src0_type, src0_vstride, src0_width, src0_hstride, \ + src1_type, src1_vstride, src1_width, src1_hstride, \ + src2_type, \ + gen, expected_result) \ + { \ + BRW_OPCODE_##opcode, \ + BRW_ALIGN_##access_mode, \ + BRW_REGISTER_TYPE_##dst_type, \ + { \ + { \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_VERTICAL_STRIDE_##src0_vstride, \ + BRW_WIDTH_##src0_width, \ + BRW_HORIZONTAL_STRIDE_##src0_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_VERTICAL_STRIDE_##src1_vstride, \ + BRW_WIDTH_##src1_width, \ + BRW_HORIZONTAL_STRIDE_##src1_hstride, \ + }, \ + { \ + BRW_REGISTER_TYPE_##src2_type, \ + }, \ + }, \ + gen, \ + expected_result, \ + } + + /* Passes on < 11 */ + INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ), + INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ), + INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ), + + /* Fails on 11+ */ + INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ), + INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ), + INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ), + + /* Passes on 11+ */ + INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ), + INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ), + INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ), + +#undef INST + }; + + + for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { + /* Skip instruction not meant for this gen. */ + if (devinfo.gen != inst[i].gen) + continue; + + brw_push_insn_state(p); + + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_access_mode(p, inst[i].access_mode); + + switch (inst[i].opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type)); + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride); + break; + case BRW_OPCODE_MAD: + brw_MAD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].srcs[0].type), + retype(g0, inst[i].srcs[1].type), + retype(g0, inst[i].srcs[2].type)); + brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); + brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); + break; + default: + unreachable("invalid opcode"); + } + + brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + + brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); + brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); + + brw_pop_insn_state(p); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +}