radv/aco: Setup alternate path in RADV to support the experimental ACO compiler
authorDaniel Schürmann <daniel@schuermann.dev>
Tue, 17 Sep 2019 12:35:22 +0000 (14:35 +0200)
committerDaniel Schürmann <daniel@schuermann.dev>
Thu, 19 Sep 2019 10:10:00 +0000 (12:10 +0200)
LLVM remains default and ACO can be enabled with RADV_PERFTEST=aco.

Co-authored-by: Daniel Schürmann <daniel@schuermann.dev>
Co-authored-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_llvm_util.c
src/amd/meson.build
src/amd/vulkan/meson.build
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_extensions.py
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index 64942670b6c09599f4ab2f40313a25851a1bd7d7..ddc8fee839be9a8934fa3ccbc9a302736e259d81 100644 (file)
@@ -49,6 +49,9 @@ static void ac_init_llvm_target()
        /* For inline assembly. */
        LLVMInitializeAMDGPUAsmParser();
 
+       /* For ACO disassembly. */
+       LLVMInitializeAMDGPUDisassembler();
+
        /* Workaround for bug in llvm 4.0 that causes image intrinsics
         * to disappear.
         * https://reviews.llvm.org/D26348
index f96a9aac09523e0a6bc51090b96c192d748dd08c..1e459b26c1ab47e1088a2d052875005923d25bb6 100644 (file)
@@ -22,6 +22,7 @@ inc_amd = include_directories('.')
 
 subdir('addrlib')
 subdir('common')
+subdir('compiler')
 if with_amd_vk
   subdir('vulkan')
 endif
index 9278f1144d914aee0ecef28e65417f9416a9d90b..72cb64c58478043e566a31485c1fe915a05a398e 100644 (file)
@@ -156,7 +156,7 @@ libvulkan_radeon = shared_library(
   ],
   dependencies : [
     dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
-    dep_valgrind, radv_deps,
+    dep_valgrind, radv_deps, idep_aco,
     idep_mesautil, idep_nir, idep_vulkan_util, idep_amdgfxregs_h, idep_xmlconfig,
   ],
   c_args : [c_vis_args, no_override_init_args, radv_flags],
index 6937eeacc49b567463dabc3782b66154c6483276..f35053b869506457fbc5910c414bedfe816321dd 100644 (file)
@@ -2844,6 +2844,10 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
                        break;
                case VK_ACCESS_SHADER_READ_BIT:
                        flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+                       /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
+                        * invalidate the scalar cache. */
+                       if (cmd_buffer->device->physical_device->use_aco)
+                               flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
 
                        if (!image_is_coherent)
                                flush_bits |= RADV_CMD_FLAG_INV_L2;
index 6276589d02565dfb837687da4e15212276c6faf0..ca71d535f2adbe016e31199a02abd3cd7574a708 100644 (file)
@@ -58,18 +58,19 @@ enum {
 };
 
 enum {
-       RADV_PERFTEST_NO_BATCHCHAIN  =   0x1,
-       RADV_PERFTEST_SISCHED        =   0x2,
-       RADV_PERFTEST_LOCAL_BOS      =   0x4,
-       RADV_PERFTEST_OUT_OF_ORDER   =   0x8,
-       RADV_PERFTEST_DCC_MSAA       =  0x10,
-       RADV_PERFTEST_BO_LIST        =  0x20,
-       RADV_PERFTEST_SHADER_BALLOT  =  0x40,
-       RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
-       RADV_PERFTEST_CS_WAVE_32     = 0x100,
-       RADV_PERFTEST_PS_WAVE_32     = 0x200,
-       RADV_PERFTEST_GE_WAVE_32     = 0x400,
-       RADV_PERFTEST_DFSM           = 0x800,
+       RADV_PERFTEST_NO_BATCHCHAIN   =    0x1,
+       RADV_PERFTEST_SISCHED         =    0x2,
+       RADV_PERFTEST_LOCAL_BOS       =    0x4,
+       RADV_PERFTEST_OUT_OF_ORDER    =    0x8,
+       RADV_PERFTEST_DCC_MSAA        =   0x10,
+       RADV_PERFTEST_BO_LIST         =   0x20,
+       RADV_PERFTEST_SHADER_BALLOT   =   0x40,
+       RADV_PERFTEST_TC_COMPAT_CMASK =   0x80,
+       RADV_PERFTEST_CS_WAVE_32      =  0x100,
+       RADV_PERFTEST_PS_WAVE_32      =  0x200,
+       RADV_PERFTEST_GE_WAVE_32      =  0x400,
+       RADV_PERFTEST_DFSM            =  0x800,
+       RADV_PERFTEST_ACO             = 0x1000,
 };
 
 bool
index af8607c1559318159144964d0c21a805896e094a..26de979b64f9dc557bd3d482d291b2ad4a01d16c 100644 (file)
@@ -86,41 +86,41 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid)
 }
 
 static void
-radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len, bool aco)
 {
        const char *chip_string;
 
        switch (family) {
-       case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
-       case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
-       case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
-       case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
-       case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
-       case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
-       case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
-       case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
-       case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
-       case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
-       case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
-       case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
-       case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
-       case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
-       case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
-       case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
-       case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
-       case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
-       case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
-       case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
-       case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
-       case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
-       case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
-       case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
-       case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
-       case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
-       default: chip_string = "AMD RADV unknown"; break;
-       }
-
-       snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
+       case CHIP_TAHITI: chip_string = "TAHITI"; break;
+       case CHIP_PITCAIRN: chip_string = "PITCAIRN"; break;
+       case CHIP_VERDE: chip_string = "CAPE VERDE"; break;
+       case CHIP_OLAND: chip_string = "OLAND"; break;
+       case CHIP_HAINAN: chip_string = "HAINAN"; break;
+       case CHIP_BONAIRE: chip_string = "BONAIRE"; break;
+       case CHIP_KAVERI: chip_string = "KAVERI"; break;
+       case CHIP_KABINI: chip_string = "KABINI"; break;
+       case CHIP_HAWAII: chip_string = "HAWAII"; break;
+       case CHIP_TONGA: chip_string = "TONGA"; break;
+       case CHIP_ICELAND: chip_string = "ICELAND"; break;
+       case CHIP_CARRIZO: chip_string = "CARRIZO"; break;
+       case CHIP_FIJI: chip_string = "FIJI"; break;
+       case CHIP_POLARIS10: chip_string = "POLARIS10"; break;
+       case CHIP_POLARIS11: chip_string = "POLARIS11"; break;
+       case CHIP_POLARIS12: chip_string = "POLARIS12"; break;
+       case CHIP_STONEY: chip_string = "STONEY"; break;
+       case CHIP_VEGAM: chip_string = "VEGA M"; break;
+       case CHIP_VEGA10: chip_string = "VEGA10"; break;
+       case CHIP_VEGA12: chip_string = "VEGA12"; break;
+       case CHIP_VEGA20: chip_string = "VEGA20"; break;
+       case CHIP_RAVEN: chip_string = "RAVEN"; break;
+       case CHIP_RAVEN2: chip_string = "RAVEN2"; break;
+       case CHIP_NAVI10: chip_string = "NAVI10"; break;
+       case CHIP_NAVI12: chip_string = "NAVI12"; break;
+       case CHIP_NAVI14: chip_string = "NAVI14"; break;
+       default: chip_string = "unknown"; break;
+       }
+
+       snprintf(name, name_len, "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", aco ? "/ACO" : "", chip_string);
 }
 
 static uint64_t
@@ -327,7 +327,14 @@ radv_physical_device_init(struct radv_physical_device *device,
 
        radv_handle_env_var_force_family(device);
 
-       radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+       device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
+       if ((device->rad_info.chip_class < GFX8 ||
+            device->rad_info.chip_class > GFX9) && device->use_aco) {
+               fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n");
+               device->use_aco = false;
+       }
+
+       radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name), device->use_aco);
 
        if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
                device->ws->destroy(device->ws);
@@ -339,7 +346,8 @@ radv_physical_device_init(struct radv_physical_device *device,
        /* These flags affect shader compilation. */
        uint64_t shader_env_flags =
                (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
-               (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
+               (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0) |
+               (device->use_aco ? 0x4 : 0);
 
        /* The gpu id is already embedded in the uuid so we just pass "radv"
         * when creating the cache.
@@ -362,9 +370,10 @@ radv_physical_device_init(struct radv_physical_device *device,
                (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
 
        device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
-                                   device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+                                   (device->use_aco || device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT);
 
        device->use_ngg_streamout = false;
+       device->use_aco = device->instance->perftest_flags & RADV_PERFTEST_ACO;
 
        /* Determine the number of threads per wave for all stages. */
        device->cs_wave_size = 64;
@@ -500,6 +509,7 @@ static const struct debug_control radv_perftest_options[] = {
        {"pswave32", RADV_PERFTEST_PS_WAVE_32},
        {"gewave32", RADV_PERFTEST_GE_WAVE_32},
        {"dfsm", RADV_PERFTEST_DFSM},
+       {"aco", RADV_PERFTEST_ACO},
        {NULL, 0}
 };
 
@@ -622,6 +632,8 @@ VkResult radv_CreateInstance(
        instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
                                                   radv_perftest_options);
 
+       if (instance->perftest_flags & RADV_PERFTEST_ACO)
+               fprintf(stderr, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n");
 
        if (instance->debug_flags & RADV_DEBUG_STARTUP)
                radv_logi("Created an instance");
@@ -832,7 +844,7 @@ void radv_GetPhysicalDeviceFeatures(
                .shaderCullDistance                       = true,
                .shaderFloat64                            = true,
                .shaderInt64                              = true,
-               .shaderInt16                              = pdevice->rad_info.chip_class >= GFX9,
+               .shaderInt16                              = pdevice->rad_info.chip_class >= GFX9 && !pdevice->use_aco,
                .sparseBinding                            = true,
                .variableMultisampleRate                  = true,
                .inheritedQueries                         = true,
@@ -874,7 +886,7 @@ void radv_GetPhysicalDeviceFeatures2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
                        VkPhysicalDevice16BitStorageFeatures *features =
                            (VkPhysicalDevice16BitStorageFeatures*)ext;
-                       bool enabled = pdevice->rad_info.chip_class >= GFX8;
+                       bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
                        features->storageBuffer16BitAccess = enabled;
                        features->uniformAndStorageBuffer16BitAccess = enabled;
                        features->storagePushConstant16 = enabled;
@@ -968,7 +980,7 @@ void radv_GetPhysicalDeviceFeatures2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
                        VkPhysicalDevice8BitStorageFeaturesKHR *features =
                            (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
-                       bool enabled = pdevice->rad_info.chip_class >= GFX8;
+                       bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
                        features->storageBuffer8BitAccess = enabled;
                        features->uniformAndStorageBuffer8BitAccess = enabled;
                        features->storagePushConstant8 = enabled;
@@ -977,8 +989,8 @@ void radv_GetPhysicalDeviceFeatures2(
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
                        VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
                                (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
-                       features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
-                       features->shaderInt8 = true;
+                       features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+                       features->shaderInt8 = !pdevice->use_aco;
                        break;
                }
                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
index 4bb4c4dfc23feb4087f240796f9f0d0775eef966..eb2505ba76535a6a8c010119ebafe333dca8a301 100644 (file)
@@ -51,7 +51,7 @@ class Extension:
 # and dEQP-VK.api.info.device fail due to the duplicated strings.
 EXTENSIONS = [
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
-    Extension('VK_KHR_16bit_storage',                     1, True),
+    Extension('VK_KHR_16bit_storage',                     1, '!device->use_aco'),
     Extension('VK_KHR_bind_memory2',                      1, True),
     Extension('VK_KHR_create_renderpass2',                1, True),
     Extension('VK_KHR_dedicated_allocation',              1, True),
@@ -87,7 +87,7 @@ EXTENSIONS = [
     Extension('VK_KHR_sampler_ycbcr_conversion',          1, True),
     Extension('VK_KHR_shader_atomic_int64',               1, 'LLVM_VERSION_MAJOR >= 9'),
     Extension('VK_KHR_shader_draw_parameters',            1, True),
-    Extension('VK_KHR_shader_float16_int8',               1, True),
+    Extension('VK_KHR_shader_float16_int8',               1, '!device->use_aco'),
     Extension('VK_KHR_storage_buffer_storage_class',      1, True),
     Extension('VK_KHR_surface',                          25, 'RADV_HAS_SURFACE'),
     Extension('VK_KHR_surface_protected_capabilities',    1, 'RADV_HAS_SURFACE'),
@@ -99,7 +99,7 @@ EXTENSIONS = [
     Extension('VK_KHR_xlib_surface',                      6, 'VK_USE_PLATFORM_XLIB_KHR'),
     Extension('VK_KHR_multiview',                         1, True),
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
-    Extension('VK_KHR_8bit_storage',                      1, 'device->rad_info.chip_class >= GFX8'),
+    Extension('VK_KHR_8bit_storage',                      1, 'device->rad_info.chip_class >= GFX8 && !device->use_aco'),
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
     Extension('VK_EXT_buffer_device_address',             1, True),
@@ -138,8 +138,8 @@ EXTENSIONS = [
     Extension('VK_AMD_buffer_marker',                     1, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
-    Extension('VK_AMD_gpu_shader_half_float',             1, 'device->rad_info.chip_class >= GFX9'),
-    Extension('VK_AMD_gpu_shader_int16',                  1, 'device->rad_info.chip_class >= GFX9'),
+    Extension('VK_AMD_gpu_shader_half_float',             1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
+    Extension('VK_AMD_gpu_shader_int16',                  1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
     Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.has_out_of_order_rast'),
     Extension('VK_AMD_shader_ballot',                     1, 'device->use_shader_ballot'),
     Extension('VK_AMD_shader_core_properties',            1, True),
index 5036fa69d2084e269ecb4f0f44f0c6ac5f42c15c..70ffc2412b35481fa4be1963aeae1992c3d42c80 100644 (file)
@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
                hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
        if (device->physical_device->ge_wave_size == 32)
                hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+       if (device->physical_device->use_aco)
+               hash_flags |= RADV_HASH_SHADER_ACO;
        return hash_flags;
 }
 
@@ -2551,6 +2553,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
                           (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
 }
 
+static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+       return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+              stage == MESA_SHADER_FRAGMENT ||
+              stage == MESA_SHADER_COMPUTE;
+}
+
 static
 void radv_create_shaders(struct radv_pipeline *pipeline,
                          struct radv_device *device,
@@ -2613,6 +2623,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                modules[MESA_SHADER_FRAGMENT] = &fs_m;
        }
 
+       bool has_gs = modules[MESA_SHADER_GEOMETRY];
+       bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+       bool use_aco = device->physical_device->use_aco;
+
        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
                const VkPipelineShaderStageCreateInfo *stage = pStages[i];
 
@@ -2621,10 +2635,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
                radv_start_feedback(stage_feedbacks[i]);
 
+               bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                nir[i] = radv_shader_compile_to_nir(device, modules[i],
                                                    stage ? stage->pName : "main", i,
                                                    stage ? stage->pSpecializationInfo : NULL,
-                                                   flags, pipeline->layout);
+                                                   flags, pipeline->layout, aco);
 
                /* We don't want to alter meta shaders IR directly so clone it
                 * first.
@@ -2651,7 +2666,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                                           nir_lower_non_uniform_ssbo_access |
                                           nir_lower_non_uniform_texture_access |
                                           nir_lower_non_uniform_image_access);
-                       NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+                       if (!aco)
+                               NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
                }
 
                if (radv_can_dump_shader(device, modules[i], false))
@@ -2690,11 +2708,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
                        radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
 
+                       bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
                        pipeline->shaders[MESA_SHADER_FRAGMENT] =
                               radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
                                                          pipeline->layout, keys + MESA_SHADER_FRAGMENT,
                                                          infos + MESA_SHADER_FRAGMENT,
-                                                         keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+                                                         keep_executable_info, aco,
+                                                         &binaries[MESA_SHADER_FRAGMENT]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
                }
@@ -2725,7 +2745,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
                                                                                              pipeline->layout,
                                                                                              &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
-                                                                                             &binaries[MESA_SHADER_TESS_CTRL]);
+                                                                                             false, &binaries[MESA_SHADER_TESS_CTRL]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
                }
@@ -2744,7 +2764,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
                        pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
                                                                                             pipeline->layout,
                                                                                             &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
-                                                                                            &binaries[MESA_SHADER_GEOMETRY]);
+                                                                                            false, &binaries[MESA_SHADER_GEOMETRY]);
 
                        radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
                }
@@ -2763,10 +2783,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
                        radv_start_feedback(stage_feedbacks[i]);
 
+                       bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
                        pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
                                                                          pipeline->layout,
                                                                          keys + i, infos + i,keep_executable_info,
-                                                                         &binaries[i]);
+                                                                         aco, &binaries[i]);
 
                        radv_stop_feedback(stage_feedbacks[i], false);
                }
index 03dc9e02145f1f947c040c1d6674b669f593e0c0..0a3e7ca9d881765c577a9e429a30c39fa10e0784 100644 (file)
@@ -296,6 +296,9 @@ struct radv_physical_device {
        uint8_t cs_wave_size;
        uint8_t ge_wave_size;
 
+       /* Whether to use the experimental compiler backend */
+       bool use_aco;
+
        /* This is the drivers on-disk cache used as a fallback as opposed to
         * the pipeline cache defined by apps.
         */
@@ -1421,6 +1424,7 @@ struct radv_shader_module;
 #define RADV_HASH_SHADER_CS_WAVE32           (1 << 4)
 #define RADV_HASH_SHADER_PS_WAVE32           (1 << 5)
 #define RADV_HASH_SHADER_GE_WAVE32           (1 << 6)
+#define RADV_HASH_SHADER_ACO                 (1 << 7)
 
 void
 radv_hash_shaders(unsigned char *hash,
index 98abe8cd4373230bdf72a67d4cd3deeeb785c639..2bd4c351745393bb5e29e02371e00015fe9e1896 100644 (file)
 #include "util/debug.h"
 #include "ac_exp_param.h"
 
+#include "aco_interface.h"
+
 #include "util/string_buffer.h"
 
-static const struct nir_shader_compiler_options nir_options = {
+static const struct nir_shader_compiler_options nir_options_llvm = {
        .vertex_id_zero_based = true,
        .lower_scmp = true,
        .lower_flrp16 = true,
@@ -80,6 +82,36 @@ static const struct nir_shader_compiler_options nir_options = {
        .use_interpolated_input_intrinsics = true,
 };
 
+static const struct nir_shader_compiler_options nir_options_aco = {
+       .vertex_id_zero_based = true,
+       .lower_scmp = true,
+       .lower_flrp16 = true,
+       .lower_flrp32 = true,
+       .lower_flrp64 = true,
+       .lower_device_index_to_zero = true,
+       .lower_fdiv = true,
+       .lower_bitfield_insert_to_bitfield_select = true,
+       .lower_bitfield_extract = true,
+       .lower_sub = true, /* TODO: set this to false once !1236 is merged */
+       .lower_pack_snorm_2x16 = true,
+       .lower_pack_snorm_4x8 = true,
+       .lower_pack_unorm_2x16 = true,
+       .lower_pack_unorm_4x8 = true,
+       .lower_unpack_snorm_2x16 = true,
+       .lower_unpack_snorm_4x8 = true,
+       .lower_unpack_unorm_2x16 = true,
+       .lower_unpack_unorm_4x8 = true,
+       .lower_unpack_half_2x16 = true,
+       .lower_extract_byte = true,
+       .lower_extract_word = true,
+       .lower_ffma = true,
+       .lower_fpow = true,
+       .lower_mul_2x32_64 = true,
+       .lower_rotate = true,
+       .max_unroll_iterations = 32,
+       .use_interpolated_input_intrinsics = true,
+};
+
 bool
 radv_can_dump_shader(struct radv_device *device,
                     struct radv_shader_module *module,
@@ -257,15 +289,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           gl_shader_stage stage,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
-                          const struct radv_pipeline_layout *layout)
+                          const struct radv_pipeline_layout *layout,
+                          bool use_aco)
 {
        nir_shader *nir;
+       const nir_shader_compiler_options *nir_options = use_aco ? &nir_options_aco :
+                                                                  &nir_options_llvm;
        if (module->nir) {
                /* Some things such as our meta clear/blit code will give us a NIR
                 * shader directly.  In that case, we just ignore the SPIR-V entirely
                 * and just use the NIR shader */
                nir = module->nir;
-               nir->options = &nir_options;
+               nir->options = nir_options;
                nir_validate_shader(nir, "in internal shader");
 
                assert(exec_list_length(&nir->functions) == 1);
@@ -305,13 +340,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .descriptor_indexing = true,
                                .device_group = true,
                                .draw_parameters = true,
-                               .float16 = true,
+                               .float16 = !device->physical_device->use_aco,
                                .float64 = true,
                                .geometry_streams = true,
                                .image_read_without_format = true,
                                .image_write_without_format = true,
-                               .int8 = true,
-                               .int16 = true,
+                               .int8 = !device->physical_device->use_aco,
+                               .int16 = !device->physical_device->use_aco,
                                .int64 = true,
                                .int64_atomics = true,
                                .multiview = true,
@@ -320,8 +355,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                                .runtime_descriptor_array = true,
                                .shader_viewport_index_layer = true,
                                .stencil_export = true,
-                               .storage_8bit = true,
-                               .storage_16bit = true,
+                               .storage_8bit = !device->physical_device->use_aco,
+                               .storage_16bit = !device->physical_device->use_aco,
                                .storage_image_ms = true,
                                .subgroup_arithmetic = true,
                                .subgroup_ballot = true,
@@ -343,7 +378,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
                nir = spirv_to_nir(spirv, module->size / 4,
                                   spec_entries, num_spec_entries,
                                   stage, entrypoint_name,
-                                  &spirv_options, &nir_options);
+                                  &spirv_options, nir_options);
                assert(nir->info.stage == stage);
                nir_validate_shader(nir, "after spirv_to_nir");
 
@@ -383,6 +418,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                NIR_PASS_V(nir, nir_split_var_copies);
                NIR_PASS_V(nir, nir_split_per_member_structs);
 
+               if (nir->info.stage == MESA_SHADER_FRAGMENT && use_aco)
+                        NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
                if (nir->info.stage == MESA_SHADER_FRAGMENT)
                        NIR_PASS_V(nir, nir_lower_input_attachments, true);
 
@@ -961,7 +998,7 @@ radv_shader_variant_create(struct radv_device *device,
                assert(binary->type == RADV_BINARY_TYPE_LEGACY);
                config = ((struct radv_shader_binary_legacy *)binary)->config;
                variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
-               variant->exec_size = variant->code_size;
+               variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
        }
 
        variant->info = binary->info;
@@ -1049,13 +1086,12 @@ shader_variant_compile(struct radv_device *device,
                       struct radv_nir_compiler_options *options,
                       bool gs_copy_shader,
                       bool keep_shader_info,
+                      bool use_aco,
                       struct radv_shader_binary **binary_out)
 {
        enum radeon_family chip_family = device->physical_device->rad_info.family;
-       enum ac_target_machine_options tm_options = 0;
-       struct ac_llvm_compiler ac_llvm;
        struct radv_shader_binary *binary = NULL;
-       bool thread_compiler;
+       bool init_llvm;
 
        options->family = chip_family;
        options->chip_class = device->physical_device->rad_info.chip_class;
@@ -1079,32 +1115,48 @@ shader_variant_compile(struct radv_device *device,
        else
                options->wave_size = device->physical_device->ge_wave_size;
 
-       if (options->supports_spill)
-               tm_options |= AC_TM_SUPPORTS_SPILL;
-       if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
-               tm_options |= AC_TM_SISCHED;
-       if (options->check_ir)
-               tm_options |= AC_TM_CHECK_IR;
-       if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
-               tm_options |= AC_TM_NO_LOAD_STORE_OPT;
-
-       thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
-       ac_init_llvm_once();
-       radv_init_llvm_compiler(&ac_llvm,
-                               thread_compiler,
-                               chip_family, tm_options,
-                               options->wave_size);
-       if (gs_copy_shader) {
-               assert(shader_count == 1);
-               radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
-                                           info, options);
+       init_llvm = !use_aco || options->dump_shader;
+#ifndef NDEBUG
+       init_llvm |= options->record_llvm_ir;
+#endif
+       if (init_llvm)
+               ac_init_llvm_once();
+
+       if (use_aco) {
+               aco_compile_shader(shader_count, shaders, &binary, info, options);
+               binary->info = *info;
        } else {
-               radv_compile_nir_shader(&ac_llvm, &binary, info,
-                                       shaders, shader_count, options);
-       }
-       binary->info = *info;
+               enum ac_target_machine_options tm_options = 0;
+               struct ac_llvm_compiler ac_llvm;
+               bool thread_compiler;
+
+               if (options->supports_spill)
+                       tm_options |= AC_TM_SUPPORTS_SPILL;
+               if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
+                       tm_options |= AC_TM_SISCHED;
+               if (options->check_ir)
+                       tm_options |= AC_TM_CHECK_IR;
+               if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+                       tm_options |= AC_TM_NO_LOAD_STORE_OPT;
+
+               thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+               radv_init_llvm_compiler(&ac_llvm,
+                                       thread_compiler,
+                                       chip_family, tm_options,
+                                       options->wave_size);
+
+               if (gs_copy_shader) {
+                       assert(shader_count == 1);
+                       radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
+                                                   info, options);
+               } else {
+                       radv_compile_nir_shader(&ac_llvm, &binary, info,
+                                               shaders, shader_count, options);
+               }
 
-       radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+               binary->info = *info;
+               radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+       }
 
        struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
                                                                         keep_shader_info);
@@ -1143,6 +1195,7 @@ radv_shader_variant_compile(struct radv_device *device,
                           const struct radv_shader_variant_key *key,
                           struct radv_shader_info *info,
                           bool keep_shader_info,
+                          bool use_aco,
                           struct radv_shader_binary **binary_out)
 {
        struct radv_nir_compiler_options options = {0};
@@ -1156,7 +1209,7 @@ radv_shader_variant_compile(struct radv_device *device,
        options.robust_buffer_access = device->robust_buffer_access;
 
        return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
-                                    &options, false, keep_shader_info, binary_out);
+                                    &options, false, keep_shader_info, use_aco, binary_out);
 }
 
 struct radv_shader_variant *
@@ -1172,7 +1225,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
        options.key.has_multiview_view_index = multiview;
 
        return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
-                                     info, &options, true, keep_shader_info, binary_out);
+                                     info, &options, true, keep_shader_info, false, binary_out);
 }
 
 void
index 57f9d160ed0032623412ea0800e51ef9ab5d27c8..459ff863a91274207f97e420143000cc8e536195 100644 (file)
@@ -333,6 +333,7 @@ struct radv_shader_binary_legacy {
        struct radv_shader_binary base;
        struct ac_shader_config config;
        unsigned code_size;
+       unsigned exec_size;
        unsigned llvm_ir_size;
        unsigned disasm_size;
        
@@ -390,7 +391,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
                           gl_shader_stage stage,
                           const VkSpecializationInfo *spec_info,
                           const VkPipelineCreateFlags flags,
-                          const struct radv_pipeline_layout *layout);
+                          const struct radv_pipeline_layout *layout,
+                          bool use_aco);
 
 void *
 radv_alloc_shader_memory(struct radv_device *device,
@@ -412,6 +414,7 @@ radv_shader_variant_compile(struct radv_device *device,
                            const struct radv_shader_variant_key *key,
                            struct radv_shader_info *info,
                            bool keep_shader_info,
+                           bool use_aco,
                            struct radv_shader_binary **binary_out);
 
 struct radv_shader_variant *