From 96a54455592646f34a2c80656bd75aef1cb00305 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 1 Aug 2019 10:43:44 +0200 Subject: [PATCH] radv/gfx10: use the correct target machine for Wave32 Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_llvm_helper.cpp | 30 +++++++++++++++++++++-------- src/amd/vulkan/radv_shader.c | 3 ++- src/amd/vulkan/radv_shader_helper.h | 3 ++- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp index 2b14ddcf184..612548e4219 100644 --- a/src/amd/vulkan/radv_llvm_helper.cpp +++ b/src/amd/vulkan/radv_llvm_helper.cpp @@ -28,8 +28,10 @@ class radv_llvm_per_thread_info { public: radv_llvm_per_thread_info(enum radeon_family arg_family, - enum ac_target_machine_options arg_tm_options) - : family(arg_family), tm_options(arg_tm_options), passes(NULL) {} + enum ac_target_machine_options arg_tm_options, + unsigned arg_wave_size) + : family(arg_family), tm_options(arg_tm_options), + wave_size(arg_wave_size), passes(NULL), passes_wave32(NULL) {} ~radv_llvm_per_thread_info() { @@ -47,19 +49,28 @@ public: if (!passes) return false; + if (llvm_info.tm_wave32) { + passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32); + if (!passes_wave32) + return false; + } + return true; } bool compile_to_memory_buffer(LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size) { - return ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size); + struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes; + return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size); } bool is_same(enum radeon_family arg_family, - enum ac_target_machine_options arg_tm_options) { + enum ac_target_machine_options arg_tm_options, + unsigned arg_wave_size) { if (arg_family == family && - arg_tm_options == tm_options) + arg_tm_options == tm_options && + arg_wave_size == wave_size) return true; return false; } @@ -67,7 +78,9 @@ public: private: enum radeon_family family; enum ac_target_machine_options tm_options; + unsigned wave_size; struct ac_compiler_passes *passes; + struct ac_compiler_passes *passes_wave32; }; /* we have to store a linked list per thread due to the possiblity of multiple gpus being required */ @@ -99,17 +112,18 @@ bool radv_compile_to_elf(struct ac_llvm_compiler *info, bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler, enum radeon_family family, - enum ac_target_machine_options tm_options) + enum ac_target_machine_options tm_options, + unsigned wave_size) { if (thread_compiler) { for (auto &I : radv_llvm_per_thread_list) { - if (I.is_same(family, tm_options)) { + if (I.is_same(family, tm_options, wave_size)) { *info = I.llvm_info; return true; } } - radv_llvm_per_thread_list.emplace_back(family, tm_options); + radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size); radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back(); if (!tinfo.init()) { diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 97fa80b348c..d0f6895f148 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1159,7 +1159,8 @@ shader_variant_compile(struct radv_device *device, radv_init_llvm_once(); radv_init_llvm_compiler(&ac_llvm, thread_compiler, - chip_family, tm_options); + chip_family, tm_options, + radv_get_shader_wave_size(device->physical_device, stage)); if (gs_copy_shader) { assert(shader_count == 1); radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, diff --git a/src/amd/vulkan/radv_shader_helper.h b/src/amd/vulkan/radv_shader_helper.h index d9dace0b495..c64d2df676b 100644 --- a/src/amd/vulkan/radv_shader_helper.h +++ b/src/amd/vulkan/radv_shader_helper.h @@ -29,7 +29,8 @@ extern "C" { bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler, enum radeon_family family, - enum ac_target_machine_options tm_options); + enum ac_target_machine_options tm_options, + unsigned wave_size); void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler); -- 2.30.2