intel/compiler: Split has_64bit_types into float/int
authorMatt Turner <mattst88@gmail.com>
Mon, 13 Jan 2020 19:17:27 +0000 (11:17 -0800)
committerMarge Bot <eric+marge@anholt.net>
Wed, 22 Jan 2020 00:19:20 +0000 (00:19 +0000)
Gen7 has 64-bit floats but not 64-bit ints.

Acked-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2635>

src/intel/compiler/brw_compiler.c
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_reg_type.c
src/intel/compiler/test_eu_validate.cpp
src/intel/dev/gen_device_info.c
src/intel/dev/gen_device_info.h
src/intel/vulkan/anv_device.c

index 0a4140eda43f3306cfba7ac87fd2e87da659287c..a3b8ddb4bd7fe4a8de63090659d7466d6353dd37 100644 (file)
@@ -139,7 +139,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
       nir_lower_dsub |
       nir_lower_ddiv;
 
-   if (!devinfo->has_64bit_types || (INTEL_DEBUG & DEBUG_SOFT64)) {
+   if (!devinfo->has_64bit_float || (INTEL_DEBUG & DEBUG_SOFT64)) {
       int64_options |= nir_lower_mov64 |
                        nir_lower_icmp64 |
                        nir_lower_iadd64 |
index b5f3bdd7698d52072a18db075b4f622fce93df19..d9afe425e3192b07dcf3aec2c5be2001fe3c341b 100644 (file)
@@ -2545,7 +2545,8 @@ fs_visitor::opt_algebraic()
    foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
       switch (inst->opcode) {
       case BRW_OPCODE_MOV:
-         if (!devinfo->has_64bit_types &&
+         if (!devinfo->has_64bit_float &&
+             !devinfo->has_64bit_int &&
              (inst->dst.type == BRW_REGISTER_TYPE_DF ||
               inst->dst.type == BRW_REGISTER_TYPE_UQ ||
               inst->dst.type == BRW_REGISTER_TYPE_Q)) {
@@ -2678,7 +2679,8 @@ fs_visitor::opt_algebraic()
          }
          break;
       case BRW_OPCODE_SEL:
-         if (!devinfo->has_64bit_types &&
+         if (!devinfo->has_64bit_float &&
+             !devinfo->has_64bit_int &&
              (inst->dst.type == BRW_REGISTER_TYPE_DF ||
               inst->dst.type == BRW_REGISTER_TYPE_UQ ||
               inst->dst.type == BRW_REGISTER_TYPE_Q)) {
@@ -7366,7 +7368,7 @@ fs_visitor::optimize()
       split_virtual_grfs();
 
       /* Lower 64 bit MOVs generated by payload lowering. */
-      if (!devinfo->has_64bit_types)
+      if (!devinfo->has_64bit_float && !devinfo->has_64bit_int)
          OPT(opt_algebraic);
 
       OPT(register_coalesce);
index e8bbce5b2903263c8eae368a1a36bed411e4f238..fc19a3283fccc23c4672f24a77402204902cada2 100644 (file)
@@ -459,7 +459,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
       if (type_sz(reg.type) > 4 &&
           ((devinfo->gen == 7 && !devinfo->is_haswell) ||
            devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) ||
-           !devinfo->has_64bit_types)) {
+           !devinfo->has_64bit_float)) {
          /* IVB has an issue (which we found empirically) where it reads two
           * address register components per channel for indirectly addressed
           * 64-bit sources.
index 6cf69051ea25edc77e095a79149aea9ec7c5f045..e3fff5d2d1a3d7ffdb11fd9efabeb630741b5c6d 100644 (file)
@@ -243,8 +243,10 @@ brw_reg_type_to_hw_type(const struct gen_device_info *devinfo,
       table = gen4_hw_type;
    }
 
-   assert(devinfo->has_64bit_types || brw_reg_type_to_size(type) < 8 ||
-          type == BRW_REGISTER_TYPE_NF);
+   assert(devinfo->gen == 11 || type != BRW_REGISTER_TYPE_NF); 
+   assert(devinfo->has_64bit_float || type != BRW_REGISTER_TYPE_DF);
+   assert(devinfo->has_64bit_int ||
+          (type != BRW_REGISTER_TYPE_Q && type != BRW_REGISTER_TYPE_UQ));
 
    if (file == BRW_IMMEDIATE_VALUE) {
       assert(table[type].imm_type != (enum hw_imm_type)INVALID);
index 7d7daa4581a106267da9860f9878bf5685674b99..d794af4259da18552862b03ae10e6ec34c6d8ef7 100644 (file)
@@ -893,7 +893,13 @@ TEST_P(validation_test, byte_64bit_conversion)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types && type_sz(inst[i].src_type) == 8)
+      if (!devinfo.has_64bit_float &&
+          inst[i].src_type == BRW_REGISTER_TYPE_DF)
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
 
       brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
@@ -989,10 +995,17 @@ TEST_P(validation_test, half_float_conversion)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types &&
-          (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
+      if (!devinfo.has_64bit_float &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+           inst[i].src_type == BRW_REGISTER_TYPE_DF))
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+           inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
-      }
 
       brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
 
@@ -1875,8 +1888,16 @@ TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types &&
-          (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8))
+      if (!devinfo.has_64bit_float &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+           inst[i].src_type == BRW_REGISTER_TYPE_DF))
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+           inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
 
       if (inst[i].opcode == BRW_OPCODE_MOV) {
@@ -1999,8 +2020,16 @@ TEST_P(validation_test, qword_low_power_no_indirect_addressing)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types &&
-          (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8))
+      if (!devinfo.has_64bit_float &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+           inst[i].src_type == BRW_REGISTER_TYPE_DF))
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+           inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
 
       if (inst[i].opcode == BRW_OPCODE_MOV) {
@@ -2139,8 +2168,16 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types &&
-          (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8))
+      if (!devinfo.has_64bit_float &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+           inst[i].src_type == BRW_REGISTER_TYPE_DF))
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+           inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
 
       if (inst[i].opcode == BRW_OPCODE_MOV) {
@@ -2171,7 +2208,7 @@ TEST_P(validation_test, qword_low_power_no_64bit_arf)
       clear_instructions(p);
    }
 
-   if (!devinfo.has_64bit_types)
+   if (!devinfo.has_64bit_float)
       return;
 
    /* MAC implicitly reads the accumulator */
@@ -2348,8 +2385,16 @@ TEST_P(validation_test, qword_low_power_no_depctrl)
       return;
 
    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
-      if (!devinfo.has_64bit_types &&
-          (type_sz(inst[i].dst_type) == 8 || type_sz(inst[i].src_type) == 8))
+      if (!devinfo.has_64bit_float &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_DF ||
+           inst[i].src_type == BRW_REGISTER_TYPE_DF))
+         continue;
+
+      if (!devinfo.has_64bit_int &&
+          (inst[i].dst_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].dst_type == BRW_REGISTER_TYPE_UQ ||
+           inst[i].src_type == BRW_REGISTER_TYPE_Q ||
+           inst[i].src_type == BRW_REGISTER_TYPE_UQ))
          continue;
 
       if (inst[i].opcode == BRW_OPCODE_MOV) {
index 2b43ce0010b5d31df94338df7cf404b85583d4e3..0649c0705ead3ce6288487c1d0976dc345c2b1fc 100644 (file)
@@ -219,7 +219,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {
    .must_use_separate_stencil = true,               \
    .has_llc = true,                                 \
    .has_pln = true,                                 \
-   .has_64bit_types = true,                         \
+   .has_64bit_float = true,                         \
    .has_surface_tile_offset = true,                 \
    .timestamp_frequency = 12500000
 
@@ -417,7 +417,8 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = {
    .has_sample_with_hiz = false,                    \
    .has_pln = true,                                 \
    .has_integer_dword_mul = true,                   \
-   .has_64bit_types = true,                         \
+   .has_64bit_float = true,                         \
+   .has_64bit_int = true,                           \
    .supports_simd16_3src = true,                    \
    .has_surface_tile_offset = true,                 \
    .num_thread_per_eu = 7,                          \
@@ -894,7 +895,8 @@ static const struct gen_device_info gen_device_info_cnl_gt2 = {
 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
    GEN8_FEATURES,                                     \
    GEN11_HW_INFO,                                     \
-   .has_64bit_types = false,                          \
+   .has_64bit_float = false,                          \
+   .has_64bit_int = false,                            \
    .has_integer_dword_mul = false,                    \
    .has_sample_with_hiz = false,                      \
    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
@@ -1061,7 +1063,8 @@ static const struct gen_device_info gen_device_info_ehl_4 = {
 #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3)      \
    GEN8_FEATURES,                                               \
    GEN12_HW_INFO,                                               \
-   .has_64bit_types = false,                                    \
+   .has_64bit_float = false,                                    \
+   .has_64bit_int = false,                                      \
    .has_integer_dword_mul = false,                              \
    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
    .simulator_id = 22,                                          \
index 65bef8f158e3885484c3b166fc756c901c69769d..73538160b54773fb25854697974ce91693485c5b 100644 (file)
@@ -70,7 +70,8 @@ struct gen_device_info
    bool has_llc;
 
    bool has_pln;
-   bool has_64bit_types;
+   bool has_64bit_float;
+   bool has_64bit_int;
    bool has_integer_dword_mul;
    bool has_compr4;
    bool has_surface_tile_offset;
index 5599f35304e4d2d408a2aa1db0d3ed36a785f0ba..1bfc14ccd838b5b20d6c64c7caa4b3581ba60ea9 100644 (file)
@@ -949,9 +949,9 @@ void anv_GetPhysicalDeviceFeatures(
       .shaderClipDistance                       = true,
       .shaderCullDistance                       = true,
       .shaderFloat64                            = pdevice->info.gen >= 8 &&
-                                                  pdevice->info.has_64bit_types,
+                                                  pdevice->info.has_64bit_float,
       .shaderInt64                              = pdevice->info.gen >= 8 &&
-                                                  pdevice->info.has_64bit_types,
+                                                  pdevice->info.has_64bit_int,
       .shaderInt16                              = pdevice->info.gen >= 8,
       .shaderResourceMinLod                     = pdevice->info.gen >= 9,
       .variableMultisampleRate                  = true,