From: Matt Turner Date: Mon, 13 Jan 2020 19:17:27 +0000 (-0800) Subject: intel/compiler: Split has_64bit_types into float/int X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=49c21802cbca8240b272318759b1e472142929e6;p=mesa.git intel/compiler: Split has_64bit_types into float/int Gen7 has 64-bit floats but not 64-bit ints. Acked-by: Caio Marcelo de Oliveira Filho Part-of: --- diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 0a4140eda43..a3b8ddb4bd7 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -139,7 +139,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) nir_lower_dsub | nir_lower_ddiv; - if (!devinfo->has_64bit_types || (INTEL_DEBUG & DEBUG_SOFT64)) { + if (!devinfo->has_64bit_float || (INTEL_DEBUG & DEBUG_SOFT64)) { int64_options |= nir_lower_mov64 | nir_lower_icmp64 | nir_lower_iadd64 | diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b5f3bdd7698..d9afe425e31 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2545,7 +2545,8 @@ fs_visitor::opt_algebraic() foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_MOV: - if (!devinfo->has_64bit_types && + if (!devinfo->has_64bit_float && + !devinfo->has_64bit_int && (inst->dst.type == BRW_REGISTER_TYPE_DF || inst->dst.type == BRW_REGISTER_TYPE_UQ || inst->dst.type == BRW_REGISTER_TYPE_Q)) { @@ -2678,7 +2679,8 @@ fs_visitor::opt_algebraic() } break; case BRW_OPCODE_SEL: - if (!devinfo->has_64bit_types && + if (!devinfo->has_64bit_float && + !devinfo->has_64bit_int && (inst->dst.type == BRW_REGISTER_TYPE_DF || inst->dst.type == BRW_REGISTER_TYPE_UQ || inst->dst.type == BRW_REGISTER_TYPE_Q)) { @@ -7366,7 +7368,7 @@ fs_visitor::optimize() split_virtual_grfs(); /* Lower 64 bit MOVs generated by payload lowering. */ - if (!devinfo->has_64bit_types) + if (!devinfo->has_64bit_float && !devinfo->has_64bit_int) OPT(opt_algebraic); OPT(register_coalesce); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index e8bbce5b290..fc19a3283fc 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -459,7 +459,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst, if (type_sz(reg.type) > 4 && ((devinfo->gen == 7 && !devinfo->is_haswell) || devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) || - !devinfo->has_64bit_types)) { + !devinfo->has_64bit_float)) { /* IVB has an issue (which we found empirically) where it reads two * address register components per channel for indirectly addressed * 64-bit sources. diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c index 6cf69051ea2..e3fff5d2d1a 100644 --- a/src/intel/compiler/brw_reg_type.c +++ b/src/intel/compiler/brw_reg_type.c @@ -243,8 +243,10 @@ brw_reg_type_to_hw_type(const struct gen_device_info *devinfo, table = gen4_hw_type; } - assert(devinfo->has_64bit_types || brw_reg_type_to_size(type) < 8 || - type == BRW_REGISTER_TYPE_NF); + assert(devinfo->gen == 11 || type != BRW_REGISTER_TYPE_NF); + assert(devinfo->has_64bit_float || type != BRW_REGISTER_TYPE_DF); + assert(devinfo->has_64bit_int || + (type != BRW_REGISTER_TYPE_Q && type != BRW_REGISTER_TYPE_UQ)); if (file == BRW_IMMEDIATE_VALUE) { assert(table[type].imm_type != (enum hw_imm_type)INVALID); diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 7d7daa4581a..d794af4259d 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -893,7 +893,13 @@ TEST_P(validation_test, byte_64bit_conversion) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && type_sz(inst[i].src_type) == 8) + if (!devinfo.has_64bit_float && + inst[i].src_type == BRW_REGISTER_TYPE_DF) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type)); @@ -989,10 +995,17 @@ TEST_P(validation_test, half_float_conversion) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && - (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) { + if (!devinfo.has_64bit_float && + (inst[i].dst_type == BRW_REGISTER_TYPE_DF || + inst[i].src_type == BRW_REGISTER_TYPE_DF)) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].dst_type == BRW_REGISTER_TYPE_Q || + inst[i].dst_type == BRW_REGISTER_TYPE_UQ || + inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; - } brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type)); @@ -1875,8 +1888,16 @@ TEST_P(validation_test, qword_low_power_align1_regioning_restrictions) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && - (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8)) + if (!devinfo.has_64bit_float && + (inst[i].dst_type == BRW_REGISTER_TYPE_DF || + inst[i].src_type == BRW_REGISTER_TYPE_DF)) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].dst_type == BRW_REGISTER_TYPE_Q || + inst[i].dst_type == BRW_REGISTER_TYPE_UQ || + inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; if (inst[i].opcode == BRW_OPCODE_MOV) { @@ -1999,8 +2020,16 @@ TEST_P(validation_test, qword_low_power_no_indirect_addressing) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && - (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8)) + if (!devinfo.has_64bit_float && + (inst[i].dst_type == BRW_REGISTER_TYPE_DF || + inst[i].src_type == BRW_REGISTER_TYPE_DF)) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].dst_type == BRW_REGISTER_TYPE_Q || + inst[i].dst_type == BRW_REGISTER_TYPE_UQ || + inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; if (inst[i].opcode == BRW_OPCODE_MOV) { @@ -2139,8 +2168,16 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && - (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8)) + if (!devinfo.has_64bit_float && + (inst[i].dst_type == BRW_REGISTER_TYPE_DF || + inst[i].src_type == BRW_REGISTER_TYPE_DF)) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].dst_type == BRW_REGISTER_TYPE_Q || + inst[i].dst_type == BRW_REGISTER_TYPE_UQ || + inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; if (inst[i].opcode == BRW_OPCODE_MOV) { @@ -2171,7 +2208,7 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf) clear_instructions(p); } - if (!devinfo.has_64bit_types) + if (!devinfo.has_64bit_float) return; /* MAC implicitly reads the accumulator */ @@ -2348,8 +2385,16 @@ TEST_P(validation_test, qword_low_power_no_depctrl) return; for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { - if (!devinfo.has_64bit_types && - (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8)) + if (!devinfo.has_64bit_float && + (inst[i].dst_type == BRW_REGISTER_TYPE_DF || + inst[i].src_type == BRW_REGISTER_TYPE_DF)) + continue; + + if (!devinfo.has_64bit_int && + (inst[i].dst_type == BRW_REGISTER_TYPE_Q || + inst[i].dst_type == BRW_REGISTER_TYPE_UQ || + inst[i].src_type == BRW_REGISTER_TYPE_Q || + inst[i].src_type == BRW_REGISTER_TYPE_UQ)) continue; if (inst[i].opcode == BRW_OPCODE_MOV) { diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index 2b43ce0010b..0649c0705ea 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -219,7 +219,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = { .must_use_separate_stencil = true, \ .has_llc = true, \ .has_pln = true, \ - .has_64bit_types = true, \ + .has_64bit_float = true, \ .has_surface_tile_offset = true, \ .timestamp_frequency = 12500000 @@ -417,7 +417,8 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = { .has_sample_with_hiz = false, \ .has_pln = true, \ .has_integer_dword_mul = true, \ - .has_64bit_types = true, \ + .has_64bit_float = true, \ + .has_64bit_int = true, \ .supports_simd16_3src = true, \ .has_surface_tile_offset = true, \ .num_thread_per_eu = 7, \ @@ -894,7 +895,8 @@ static const struct gen_device_info gen_device_info_cnl_gt2 = { #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ GEN8_FEATURES, \ GEN11_HW_INFO, \ - .has_64bit_types = false, \ + .has_64bit_float = false, \ + .has_64bit_int = false, \ .has_integer_dword_mul = false, \ .has_sample_with_hiz = false, \ .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ @@ -1061,7 +1063,8 @@ static const struct gen_device_info gen_device_info_ehl_4 = { #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3) \ GEN8_FEATURES, \ GEN12_HW_INFO, \ - .has_64bit_types = false, \ + .has_64bit_float = false, \ + .has_64bit_int = false, \ .has_integer_dword_mul = false, \ .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ .simulator_id = 22, \ diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h index 65bef8f158e..73538160b54 100644 --- a/src/intel/dev/gen_device_info.h +++ b/src/intel/dev/gen_device_info.h @@ -70,7 +70,8 @@ struct gen_device_info bool has_llc; bool has_pln; - bool has_64bit_types; + bool has_64bit_float; + bool has_64bit_int; bool has_integer_dword_mul; bool has_compr4; bool has_surface_tile_offset; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 5599f35304e..1bfc14ccd83 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -949,9 +949,9 @@ void anv_GetPhysicalDeviceFeatures( .shaderClipDistance = true, .shaderCullDistance = true, .shaderFloat64 = pdevice->info.gen >= 8 && - pdevice->info.has_64bit_types, + pdevice->info.has_64bit_float, .shaderInt64 = pdevice->info.gen >= 8 && - pdevice->info.has_64bit_types, + pdevice->info.has_64bit_int, .shaderInt16 = pdevice->info.gen >= 8, .shaderResourceMinLod = pdevice->info.gen >= 9, .variableMultisampleRate = true,