From 76544f632d98c8d7462c863b783fbe9e9a2d885c Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 15 Oct 2019 20:43:39 +0100 Subject: [PATCH] radv: adjust loop unrolling heuristics for int64 In particular, increase the cost of 64-bit integer division. Fixes huge shaders with dEQP-VK.spirv_assembly.type.scalar.i64.mod_geom , with ACO used for GS this creates shaders requiring a branch with >32767 dword offset. Signed-off-by: Rhys Perry Reviewed-by: Bas Nieuwenhuizen --- .../compiler/aco_instruction_selection_setup.cpp | 8 +------- src/amd/vulkan/radv_shader.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 8a04e719f0b..2c349635799 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -1337,13 +1337,7 @@ setup_isel_context(Program* program, /* lower ALU operations */ // TODO: implement logic64 in aco, it's more effective for sgprs - nir_lower_int64(nir, (nir_lower_int64_options) (nir_lower_imul64 | - nir_lower_imul_high64 | - nir_lower_imul_2x32_64 | - nir_lower_divmod64 | - nir_lower_logic64 | - nir_lower_minmax64 | - nir_lower_iabs64)); + nir_lower_int64(nir, nir->options->lower_int64_options); nir_opt_idiv_const(nir, 32); nir_lower_idiv(nir, nir_lower_idiv_precise); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 4b48fd3dd46..ad022b24ffe 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -81,6 +81,14 @@ static const struct nir_shader_compiler_options nir_options_llvm = { .lower_rotate = true, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, + /* nir_lower_int64() isn't actually called for the LLVM backend, but + * this helps the loop unrolling heuristics. */ + .lower_int64_options = nir_lower_imul64 | + nir_lower_imul_high64 | + nir_lower_imul_2x32_64 | + nir_lower_divmod64 | + nir_lower_minmax64 | + nir_lower_iabs64, }; static const struct nir_shader_compiler_options nir_options_aco = { @@ -111,6 +119,13 @@ static const struct nir_shader_compiler_options nir_options_aco = { .lower_rotate = true, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, + .lower_int64_options = nir_lower_imul64 | + nir_lower_imul_high64 | + nir_lower_imul_2x32_64 | + nir_lower_divmod64 | + nir_lower_logic64 | + nir_lower_minmax64 | + nir_lower_iabs64, }; bool -- 2.30.2