From a70a9987181a09258406cc0d8ff5e34acc000371 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 17 Sep 2019 14:35:22 +0200 Subject: [PATCH] radv/aco: Setup alternate path in RADV to support the experimental ACO compiler MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit LLVM remains default and ACO can be enabled with RADV_PERFTEST=aco. Co-authored-by: Daniel Schürmann Co-authored-by: Rhys Perry Reviewed-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/common/ac_llvm_util.c | 3 + src/amd/meson.build | 1 + src/amd/vulkan/meson.build | 2 +- src/amd/vulkan/radv_cmd_buffer.c | 4 + src/amd/vulkan/radv_debug.h | 25 +++--- src/amd/vulkan/radv_device.c | 90 +++++++++++--------- src/amd/vulkan/radv_extensions.py | 10 +-- src/amd/vulkan/radv_pipeline.c | 33 ++++++-- src/amd/vulkan/radv_private.h | 4 + src/amd/vulkan/radv_shader.c | 131 +++++++++++++++++++++--------- src/amd/vulkan/radv_shader.h | 5 +- 11 files changed, 205 insertions(+), 103 deletions(-) diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 64942670b6c..ddc8fee839b 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -49,6 +49,9 @@ static void ac_init_llvm_target() /* For inline assembly. */ LLVMInitializeAMDGPUAsmParser(); + /* For ACO disassembly. */ + LLVMInitializeAMDGPUDisassembler(); + /* Workaround for bug in llvm 4.0 that causes image intrinsics * to disappear. * https://reviews.llvm.org/D26348 diff --git a/src/amd/meson.build b/src/amd/meson.build index f96a9aac095..1e459b26c1a 100644 --- a/src/amd/meson.build +++ b/src/amd/meson.build @@ -22,6 +22,7 @@ inc_amd = include_directories('.') subdir('addrlib') subdir('common') +subdir('compiler') if with_amd_vk subdir('vulkan') endif diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 9278f1144d9..72cb64c5847 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -156,7 +156,7 @@ libvulkan_radeon = shared_library( ], dependencies : [ dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m, - dep_valgrind, radv_deps, + dep_valgrind, radv_deps, idep_aco, idep_mesautil, idep_nir, idep_vulkan_util, idep_amdgfxregs_h, idep_xmlconfig, ], c_args : [c_vis_args, no_override_init_args, radv_flags], diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 6937eeacc49..f35053b8695 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2844,6 +2844,10 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, break; case VK_ACCESS_SHADER_READ_BIT: flush_bits |= RADV_CMD_FLAG_INV_VCACHE; + /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to + * invalidate the scalar cache. */ + if (cmd_buffer->device->physical_device->use_aco) + flush_bits |= RADV_CMD_FLAG_INV_SCACHE; if (!image_is_coherent) flush_bits |= RADV_CMD_FLAG_INV_L2; diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 6276589d025..ca71d535f2a 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -58,18 +58,19 @@ enum { }; enum { - RADV_PERFTEST_NO_BATCHCHAIN = 0x1, - RADV_PERFTEST_SISCHED = 0x2, - RADV_PERFTEST_LOCAL_BOS = 0x4, - RADV_PERFTEST_OUT_OF_ORDER = 0x8, - RADV_PERFTEST_DCC_MSAA = 0x10, - RADV_PERFTEST_BO_LIST = 0x20, - RADV_PERFTEST_SHADER_BALLOT = 0x40, - RADV_PERFTEST_TC_COMPAT_CMASK = 0x80, - RADV_PERFTEST_CS_WAVE_32 = 0x100, - RADV_PERFTEST_PS_WAVE_32 = 0x200, - RADV_PERFTEST_GE_WAVE_32 = 0x400, - RADV_PERFTEST_DFSM = 0x800, + RADV_PERFTEST_NO_BATCHCHAIN = 0x1, + RADV_PERFTEST_SISCHED = 0x2, + RADV_PERFTEST_LOCAL_BOS = 0x4, + RADV_PERFTEST_OUT_OF_ORDER = 0x8, + RADV_PERFTEST_DCC_MSAA = 0x10, + RADV_PERFTEST_BO_LIST = 0x20, + RADV_PERFTEST_SHADER_BALLOT = 0x40, + RADV_PERFTEST_TC_COMPAT_CMASK = 0x80, + RADV_PERFTEST_CS_WAVE_32 = 0x100, + RADV_PERFTEST_PS_WAVE_32 = 0x200, + RADV_PERFTEST_GE_WAVE_32 = 0x400, + RADV_PERFTEST_DFSM = 0x800, + RADV_PERFTEST_ACO = 0x1000, }; bool diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index af8607c1559..26de979b64f 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -86,41 +86,41 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid) } static void -radv_get_device_name(enum radeon_family family, char *name, size_t name_len) +radv_get_device_name(enum radeon_family family, char *name, size_t name_len, bool aco) { const char *chip_string; switch (family) { - case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break; - case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break; - case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break; - case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break; - case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break; - case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break; - case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break; - case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break; - case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break; - case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break; - case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break; - case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break; - case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break; - case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break; - case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break; - case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break; - case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break; - case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break; - case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break; - case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break; - case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break; - case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break; - case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break; - case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break; - case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break; - case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break; - default: chip_string = "AMD RADV unknown"; break; - } - - snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string); + case CHIP_TAHITI: chip_string = "TAHITI"; break; + case CHIP_PITCAIRN: chip_string = "PITCAIRN"; break; + case CHIP_VERDE: chip_string = "CAPE VERDE"; break; + case CHIP_OLAND: chip_string = "OLAND"; break; + case CHIP_HAINAN: chip_string = "HAINAN"; break; + case CHIP_BONAIRE: chip_string = "BONAIRE"; break; + case CHIP_KAVERI: chip_string = "KAVERI"; break; + case CHIP_KABINI: chip_string = "KABINI"; break; + case CHIP_HAWAII: chip_string = "HAWAII"; break; + case CHIP_TONGA: chip_string = "TONGA"; break; + case CHIP_ICELAND: chip_string = "ICELAND"; break; + case CHIP_CARRIZO: chip_string = "CARRIZO"; break; + case CHIP_FIJI: chip_string = "FIJI"; break; + case CHIP_POLARIS10: chip_string = "POLARIS10"; break; + case CHIP_POLARIS11: chip_string = "POLARIS11"; break; + case CHIP_POLARIS12: chip_string = "POLARIS12"; break; + case CHIP_STONEY: chip_string = "STONEY"; break; + case CHIP_VEGAM: chip_string = "VEGA M"; break; + case CHIP_VEGA10: chip_string = "VEGA10"; break; + case CHIP_VEGA12: chip_string = "VEGA12"; break; + case CHIP_VEGA20: chip_string = "VEGA20"; break; + case CHIP_RAVEN: chip_string = "RAVEN"; break; + case CHIP_RAVEN2: chip_string = "RAVEN2"; break; + case CHIP_NAVI10: chip_string = "NAVI10"; break; + case CHIP_NAVI12: chip_string = "NAVI12"; break; + case CHIP_NAVI14: chip_string = "NAVI14"; break; + default: chip_string = "unknown"; break; + } + + snprintf(name, name_len, "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", aco ? "/ACO" : "", chip_string); } static uint64_t @@ -327,7 +327,14 @@ radv_physical_device_init(struct radv_physical_device *device, radv_handle_env_var_force_family(device); - radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name)); + device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO; + if ((device->rad_info.chip_class < GFX8 || + device->rad_info.chip_class > GFX9) && device->use_aco) { + fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n"); + device->use_aco = false; + } + + radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name), device->use_aco); if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) { device->ws->destroy(device->ws); @@ -339,7 +346,8 @@ radv_physical_device_init(struct radv_physical_device *device, /* These flags affect shader compilation. */ uint64_t shader_env_flags = (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) | - (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0); + (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0) | + (device->use_aco ? 0x4 : 0); /* The gpu id is already embedded in the uuid so we just pass "radv" * when creating the cache. @@ -362,9 +370,10 @@ radv_physical_device_init(struct radv_physical_device *device, (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); device->use_shader_ballot = device->rad_info.chip_class >= GFX8 && - device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT; + (device->use_aco || device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT); device->use_ngg_streamout = false; + device->use_aco = device->instance->perftest_flags & RADV_PERFTEST_ACO; /* Determine the number of threads per wave for all stages. */ device->cs_wave_size = 64; @@ -500,6 +509,7 @@ static const struct debug_control radv_perftest_options[] = { {"pswave32", RADV_PERFTEST_PS_WAVE_32}, {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM}, + {"aco", RADV_PERFTEST_ACO}, {NULL, 0} }; @@ -622,6 +632,8 @@ VkResult radv_CreateInstance( instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options); + if (instance->perftest_flags & RADV_PERFTEST_ACO) + fprintf(stderr, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n"); if (instance->debug_flags & RADV_DEBUG_STARTUP) radv_logi("Created an instance"); @@ -832,7 +844,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, - .shaderInt16 = pdevice->rad_info.chip_class >= GFX9, + .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && !pdevice->use_aco, .sparseBinding = true, .variableMultisampleRate = true, .inheritedQueries = true, @@ -874,7 +886,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - bool enabled = pdevice->rad_info.chip_class >= GFX8; + bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco; features->storageBuffer16BitAccess = enabled; features->uniformAndStorageBuffer16BitAccess = enabled; features->storagePushConstant16 = enabled; @@ -968,7 +980,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { VkPhysicalDevice8BitStorageFeaturesKHR *features = (VkPhysicalDevice8BitStorageFeaturesKHR*)ext; - bool enabled = pdevice->rad_info.chip_class >= GFX8; + bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco; features->storageBuffer8BitAccess = enabled; features->uniformAndStorageBuffer8BitAccess = enabled; features->storagePushConstant8 = enabled; @@ -977,8 +989,8 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { VkPhysicalDeviceFloat16Int8FeaturesKHR *features = (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext; - features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8; - features->shaderInt8 = true; + features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco; + features->shaderInt8 = !pdevice->use_aco; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 4bb4c4dfc23..eb2505ba765 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -51,7 +51,7 @@ class Extension: # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'), - Extension('VK_KHR_16bit_storage', 1, True), + Extension('VK_KHR_16bit_storage', 1, '!device->use_aco'), Extension('VK_KHR_bind_memory2', 1, True), Extension('VK_KHR_create_renderpass2', 1, True), Extension('VK_KHR_dedicated_allocation', 1, True), @@ -87,7 +87,7 @@ EXTENSIONS = [ Extension('VK_KHR_sampler_ycbcr_conversion', 1, True), Extension('VK_KHR_shader_atomic_int64', 1, 'LLVM_VERSION_MAJOR >= 9'), Extension('VK_KHR_shader_draw_parameters', 1, True), - Extension('VK_KHR_shader_float16_int8', 1, True), + Extension('VK_KHR_shader_float16_int8', 1, '!device->use_aco'), Extension('VK_KHR_storage_buffer_storage_class', 1, True), Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'), Extension('VK_KHR_surface_protected_capabilities', 1, 'RADV_HAS_SURFACE'), @@ -99,7 +99,7 @@ EXTENSIONS = [ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), Extension('VK_KHR_multiview', 1, True), Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), - Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'), + Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8 && !device->use_aco'), Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_buffer_device_address', 1, True), @@ -138,8 +138,8 @@ EXTENSIONS = [ Extension('VK_AMD_buffer_marker', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), - Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX9'), - Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX9'), + Extension('VK_AMD_gpu_shader_half_float', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'), + Extension('VK_AMD_gpu_shader_int16', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'), Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.has_out_of_order_rast'), Extension('VK_AMD_shader_ballot', 1, 'device->use_shader_ballot'), Extension('VK_AMD_shader_core_properties', 1, True), diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 5036fa69d20..70ffc2412b3 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device) hash_flags |= RADV_HASH_SHADER_PS_WAVE32; if (device->physical_device->ge_wave_size == 32) hash_flags |= RADV_HASH_SHADER_GE_WAVE32; + if (device->physical_device->use_aco) + hash_flags |= RADV_HASH_SHADER_ACO; return hash_flags; } @@ -2551,6 +2553,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit) (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0); } +static +bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts) +{ + return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) || + stage == MESA_SHADER_FRAGMENT || + stage == MESA_SHADER_COMPUTE; +} + static void radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, @@ -2613,6 +2623,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline, modules[MESA_SHADER_FRAGMENT] = &fs_m; } + bool has_gs = modules[MESA_SHADER_GEOMETRY]; + bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]; + bool use_aco = device->physical_device->use_aco; + for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { const VkPipelineShaderStageCreateInfo *stage = pStages[i]; @@ -2621,10 +2635,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[i]); + bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i, stage ? stage->pSpecializationInfo : NULL, - flags, pipeline->layout); + flags, pipeline->layout, aco); /* We don't want to alter meta shaders IR directly so clone it * first. @@ -2651,7 +2666,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline, nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access); - NIR_PASS_V(nir[i], nir_lower_bool_to_int32); + + bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); + if (!aco) + NIR_PASS_V(nir[i], nir_lower_bool_to_int32); } if (radv_can_dump_shader(device, modules[i], false)) @@ -2690,11 +2708,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline, if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) { radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]); + bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts); pipeline->shaders[MESA_SHADER_FRAGMENT] = radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline->layout, keys + MESA_SHADER_FRAGMENT, infos + MESA_SHADER_FRAGMENT, - keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]); + keep_executable_info, aco, + &binaries[MESA_SHADER_FRAGMENT]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false); } @@ -2725,7 +2745,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->layout, &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info, - &binaries[MESA_SHADER_TESS_CTRL]); + false, &binaries[MESA_SHADER_TESS_CTRL]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false); } @@ -2744,7 +2764,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->layout, &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info, - &binaries[MESA_SHADER_GEOMETRY]); + false, &binaries[MESA_SHADER_GEOMETRY]); radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false); } @@ -2763,10 +2783,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline, radv_start_feedback(stage_feedbacks[i]); + bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts); pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1, pipeline->layout, keys + i, infos + i,keep_executable_info, - &binaries[i]); + aco, &binaries[i]); radv_stop_feedback(stage_feedbacks[i], false); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 03dc9e02145..0a3e7ca9d88 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -296,6 +296,9 @@ struct radv_physical_device { uint8_t cs_wave_size; uint8_t ge_wave_size; + /* Whether to use the experimental compiler backend */ + bool use_aco; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -1421,6 +1424,7 @@ struct radv_shader_module; #define RADV_HASH_SHADER_CS_WAVE32 (1 << 4) #define RADV_HASH_SHADER_PS_WAVE32 (1 << 5) #define RADV_HASH_SHADER_GE_WAVE32 (1 << 6) +#define RADV_HASH_SHADER_ACO (1 << 7) void radv_hash_shaders(unsigned char *hash, diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 98abe8cd437..2bd4c351745 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -48,9 +48,11 @@ #include "util/debug.h" #include "ac_exp_param.h" +#include "aco_interface.h" + #include "util/string_buffer.h" -static const struct nir_shader_compiler_options nir_options = { +static const struct nir_shader_compiler_options nir_options_llvm = { .vertex_id_zero_based = true, .lower_scmp = true, .lower_flrp16 = true, @@ -80,6 +82,36 @@ static const struct nir_shader_compiler_options nir_options = { .use_interpolated_input_intrinsics = true, }; +static const struct nir_shader_compiler_options nir_options_aco = { + .vertex_id_zero_based = true, + .lower_scmp = true, + .lower_flrp16 = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_device_index_to_zero = true, + .lower_fdiv = true, + .lower_bitfield_insert_to_bitfield_select = true, + .lower_bitfield_extract = true, + .lower_sub = true, /* TODO: set this to false once !1236 is merged */ + .lower_pack_snorm_2x16 = true, + .lower_pack_snorm_4x8 = true, + .lower_pack_unorm_2x16 = true, + .lower_pack_unorm_4x8 = true, + .lower_unpack_snorm_2x16 = true, + .lower_unpack_snorm_4x8 = true, + .lower_unpack_unorm_2x16 = true, + .lower_unpack_unorm_4x8 = true, + .lower_unpack_half_2x16 = true, + .lower_extract_byte = true, + .lower_extract_word = true, + .lower_ffma = true, + .lower_fpow = true, + .lower_mul_2x32_64 = true, + .lower_rotate = true, + .max_unroll_iterations = 32, + .use_interpolated_input_intrinsics = true, +}; + bool radv_can_dump_shader(struct radv_device *device, struct radv_shader_module *module, @@ -257,15 +289,18 @@ radv_shader_compile_to_nir(struct radv_device *device, gl_shader_stage stage, const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags, - const struct radv_pipeline_layout *layout) + const struct radv_pipeline_layout *layout, + bool use_aco) { nir_shader *nir; + const nir_shader_compiler_options *nir_options = use_aco ? &nir_options_aco : + &nir_options_llvm; if (module->nir) { /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely * and just use the NIR shader */ nir = module->nir; - nir->options = &nir_options; + nir->options = nir_options; nir_validate_shader(nir, "in internal shader"); assert(exec_list_length(&nir->functions) == 1); @@ -305,13 +340,13 @@ radv_shader_compile_to_nir(struct radv_device *device, .descriptor_indexing = true, .device_group = true, .draw_parameters = true, - .float16 = true, + .float16 = !device->physical_device->use_aco, .float64 = true, .geometry_streams = true, .image_read_without_format = true, .image_write_without_format = true, - .int8 = true, - .int16 = true, + .int8 = !device->physical_device->use_aco, + .int16 = !device->physical_device->use_aco, .int64 = true, .int64_atomics = true, .multiview = true, @@ -320,8 +355,8 @@ radv_shader_compile_to_nir(struct radv_device *device, .runtime_descriptor_array = true, .shader_viewport_index_layer = true, .stencil_export = true, - .storage_8bit = true, - .storage_16bit = true, + .storage_8bit = !device->physical_device->use_aco, + .storage_16bit = !device->physical_device->use_aco, .storage_image_ms = true, .subgroup_arithmetic = true, .subgroup_ballot = true, @@ -343,7 +378,7 @@ radv_shader_compile_to_nir(struct radv_device *device, nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage, entrypoint_name, - &spirv_options, &nir_options); + &spirv_options, nir_options); assert(nir->info.stage == stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -383,6 +418,8 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_split_per_member_structs); + if (nir->info.stage == MESA_SHADER_FRAGMENT && use_aco) + NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out); if (nir->info.stage == MESA_SHADER_FRAGMENT) NIR_PASS_V(nir, nir_lower_input_attachments, true); @@ -961,7 +998,7 @@ radv_shader_variant_create(struct radv_device *device, assert(binary->type == RADV_BINARY_TYPE_LEGACY); config = ((struct radv_shader_binary_legacy *)binary)->config; variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size); - variant->exec_size = variant->code_size; + variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size; } variant->info = binary->info; @@ -1049,13 +1086,12 @@ shader_variant_compile(struct radv_device *device, struct radv_nir_compiler_options *options, bool gs_copy_shader, bool keep_shader_info, + bool use_aco, struct radv_shader_binary **binary_out) { enum radeon_family chip_family = device->physical_device->rad_info.family; - enum ac_target_machine_options tm_options = 0; - struct ac_llvm_compiler ac_llvm; struct radv_shader_binary *binary = NULL; - bool thread_compiler; + bool init_llvm; options->family = chip_family; options->chip_class = device->physical_device->rad_info.chip_class; @@ -1079,32 +1115,48 @@ shader_variant_compile(struct radv_device *device, else options->wave_size = device->physical_device->ge_wave_size; - if (options->supports_spill) - tm_options |= AC_TM_SUPPORTS_SPILL; - if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) - tm_options |= AC_TM_SISCHED; - if (options->check_ir) - tm_options |= AC_TM_CHECK_IR; - if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) - tm_options |= AC_TM_NO_LOAD_STORE_OPT; - - thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); - ac_init_llvm_once(); - radv_init_llvm_compiler(&ac_llvm, - thread_compiler, - chip_family, tm_options, - options->wave_size); - if (gs_copy_shader) { - assert(shader_count == 1); - radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, - info, options); + init_llvm = !use_aco || options->dump_shader; +#ifndef NDEBUG + init_llvm |= options->record_llvm_ir; +#endif + if (init_llvm) + ac_init_llvm_once(); + + if (use_aco) { + aco_compile_shader(shader_count, shaders, &binary, info, options); + binary->info = *info; } else { - radv_compile_nir_shader(&ac_llvm, &binary, info, - shaders, shader_count, options); - } - binary->info = *info; + enum ac_target_machine_options tm_options = 0; + struct ac_llvm_compiler ac_llvm; + bool thread_compiler; + + if (options->supports_spill) + tm_options |= AC_TM_SUPPORTS_SPILL; + if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) + tm_options |= AC_TM_SISCHED; + if (options->check_ir) + tm_options |= AC_TM_CHECK_IR; + if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT) + tm_options |= AC_TM_NO_LOAD_STORE_OPT; + + thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); + radv_init_llvm_compiler(&ac_llvm, + thread_compiler, + chip_family, tm_options, + options->wave_size); + + if (gs_copy_shader) { + assert(shader_count == 1); + radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, + info, options); + } else { + radv_compile_nir_shader(&ac_llvm, &binary, info, + shaders, shader_count, options); + } - radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); + binary->info = *info; + radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); + } struct radv_shader_variant *variant = radv_shader_variant_create(device, binary, keep_shader_info); @@ -1143,6 +1195,7 @@ radv_shader_variant_compile(struct radv_device *device, const struct radv_shader_variant_key *key, struct radv_shader_info *info, bool keep_shader_info, + bool use_aco, struct radv_shader_binary **binary_out) { struct radv_nir_compiler_options options = {0}; @@ -1156,7 +1209,7 @@ radv_shader_variant_compile(struct radv_device *device, options.robust_buffer_access = device->robust_buffer_access; return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info, - &options, false, keep_shader_info, binary_out); + &options, false, keep_shader_info, use_aco, binary_out); } struct radv_shader_variant * @@ -1172,7 +1225,7 @@ radv_create_gs_copy_shader(struct radv_device *device, options.key.has_multiview_view_index = multiview; return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX, - info, &options, true, keep_shader_info, binary_out); + info, &options, true, keep_shader_info, false, binary_out); } void diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 57f9d160ed0..459ff863a91 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -333,6 +333,7 @@ struct radv_shader_binary_legacy { struct radv_shader_binary base; struct ac_shader_config config; unsigned code_size; + unsigned exec_size; unsigned llvm_ir_size; unsigned disasm_size; @@ -390,7 +391,8 @@ radv_shader_compile_to_nir(struct radv_device *device, gl_shader_stage stage, const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags, - const struct radv_pipeline_layout *layout); + const struct radv_pipeline_layout *layout, + bool use_aco); void * radv_alloc_shader_memory(struct radv_device *device, @@ -412,6 +414,7 @@ radv_shader_variant_compile(struct radv_device *device, const struct radv_shader_variant_key *key, struct radv_shader_info *info, bool keep_shader_info, + bool use_aco, struct radv_shader_binary **binary_out); struct radv_shader_variant * -- 2.30.2