From 0f2c7341e8fc0ea5bb219a24a7120bd4c79bd3d6 Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Mon, 5 Mar 2018 11:13:11 +1100 Subject: [PATCH] ac/radv: move lower_indirect_derefs() to ac_nir_to_llvm.c Until llvm handles indirects better we will need to use these workarounds in the radeonsi backend also. Reviewed-by: Bas Nieuwenhuizen --- src/amd/common/ac_nir_to_llvm.c | 37 +++++++++++++++++++++++++++++ src/amd/common/ac_nir_to_llvm.h | 2 ++ src/amd/vulkan/radv_pipeline.c | 8 +++---- src/amd/vulkan/radv_shader.c | 41 +-------------------------------- src/amd/vulkan/radv_shader.h | 4 ---- 5 files changed, 44 insertions(+), 48 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 0ffcd75c3ac..40ddf289742 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -7312,3 +7312,40 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, MESA_SHADER_VERTEX, dump_shader, options->supports_spill); } + +void +ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) +{ + /* While it would be nice not to have this flag, we are constrained + * by the reality that LLVM 5.0 doesn't have working VGPR indexing + * on GFX9. + */ + bool llvm_has_working_vgpr_indexing = chip_class <= VI; + + /* TODO: Indirect indexing of GS inputs is unimplemented. + * + * TCS and TES load inputs directly from LDS or offchip memory, so + * indirect indexing is trivial. + */ + nir_variable_mode indirect_mask = 0; + if (nir->info.stage == MESA_SHADER_GEOMETRY || + (nir->info.stage != MESA_SHADER_TESS_CTRL && + nir->info.stage != MESA_SHADER_TESS_EVAL && + !llvm_has_working_vgpr_indexing)) { + indirect_mask |= nir_var_shader_in; + } + if (!llvm_has_working_vgpr_indexing && + nir->info.stage != MESA_SHADER_TESS_CTRL) + indirect_mask |= nir_var_shader_out; + + /* TODO: We shouldn't need to do this, however LLVM isn't currently + * smart enough to handle indirects without causing excess spilling + * causing the gpu to hang. + * + * See the following thread for more details of the problem: + * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html + */ + indirect_mask |= nir_var_local; + + nir_lower_indirect_derefs(nir, indirect_mask); +} diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index 766acec6ed3..eea393a9c24 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -229,6 +229,8 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, const struct ac_nir_compiler_options *options, bool dump_shader); +void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); + void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, struct nir_shader *nir); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 6ad0b486f10..f8f09a7e166 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1527,14 +1527,14 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders) if (progress) { if (nir_lower_global_vars_to_local(ordered_shaders[i])) { - radv_lower_indirect_derefs(ordered_shaders[i], - pipeline->device->physical_device); + ac_lower_indirect_derefs(ordered_shaders[i], + pipeline->device->physical_device->rad_info.chip_class); } radv_optimize_nir(ordered_shaders[i]); if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) { - radv_lower_indirect_derefs(ordered_shaders[i - 1], - pipeline->device->physical_device); + ac_lower_indirect_derefs(ordered_shaders[i - 1], + pipeline->device->physical_device->rad_info.chip_class); } radv_optimize_nir(ordered_shaders[i - 1]); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 171802eede7..e1de01f5511 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -115,45 +115,6 @@ void radv_DestroyShaderModule( vk_free2(&device->alloc, pAllocator, module); } -bool -radv_lower_indirect_derefs(struct nir_shader *nir, - struct radv_physical_device *device) -{ - /* While it would be nice not to have this flag, we are constrained - * by the reality that LLVM 5.0 doesn't have working VGPR indexing - * on GFX9. - */ - bool llvm_has_working_vgpr_indexing = - device->rad_info.chip_class <= VI; - - /* TODO: Indirect indexing of GS inputs is unimplemented. - * - * TCS and TES load inputs directly from LDS or offchip memory, so - * indirect indexing is trivial. - */ - nir_variable_mode indirect_mask = 0; - if (nir->info.stage == MESA_SHADER_GEOMETRY || - (nir->info.stage != MESA_SHADER_TESS_CTRL && - nir->info.stage != MESA_SHADER_TESS_EVAL && - !llvm_has_working_vgpr_indexing)) { - indirect_mask |= nir_var_shader_in; - } - if (!llvm_has_working_vgpr_indexing && - nir->info.stage != MESA_SHADER_TESS_CTRL) - indirect_mask |= nir_var_shader_out; - - /* TODO: We shouldn't need to do this, however LLVM isn't currently - * smart enough to handle indirects without causing excess spilling - * causing the gpu to hang. - * - * See the following thread for more details of the problem: - * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html - */ - indirect_mask |= nir_var_local; - - return nir_lower_indirect_derefs(nir, indirect_mask); -} - void radv_optimize_nir(struct nir_shader *shader) { @@ -304,7 +265,7 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_lower_var_copies(nir); nir_lower_global_vars_to_local(nir); nir_remove_dead_variables(nir, nir_var_local); - radv_lower_indirect_derefs(nir, device->physical_device); + ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); radv_optimize_nir(nir); return nir; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 31fe7397711..b07f8a89e74 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -104,10 +104,6 @@ void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant); -bool -radv_lower_indirect_derefs(struct nir_shader *nir, - struct radv_physical_device *device); - const char * radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage); -- 2.30.2