radv: adjust loop unrolling heuristics for int64
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 15 Oct 2019 19:43:39 +0000 (20:43 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Thu, 7 Nov 2019 23:29:12 +0000 (23:29 +0000)
In particular, increase the cost of 64-bit integer division.

Fixes huge shaders with dEQP-VK.spirv_assembly.type.scalar.i64.mod_geom
, with ACO used for GS this creates shaders requiring a branch with
>32767 dword offset.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_shader.c

index 8a04e719f0bf310ab0f716b4a86c411e2c278c75..2c349635799dc8e91bd658cc3cd7361a1f07af74 100644 (file)
@@ -1337,13 +1337,7 @@ setup_isel_context(Program* program,
 
       /* lower ALU operations */
       // TODO: implement logic64 in aco, it's more effective for sgprs
-      nir_lower_int64(nir, (nir_lower_int64_options) (nir_lower_imul64 |
-                                                      nir_lower_imul_high64 |
-                                                      nir_lower_imul_2x32_64 |
-                                                      nir_lower_divmod64 |
-                                                      nir_lower_logic64 |
-                                                      nir_lower_minmax64 |
-                                                      nir_lower_iabs64));
+      nir_lower_int64(nir, nir->options->lower_int64_options);
 
       nir_opt_idiv_const(nir, 32);
       nir_lower_idiv(nir, nir_lower_idiv_precise);
index 4b48fd3dd46d0e92e9b7833355fe1df283c4ba79..ad022b24ffe0e296db98195c23a236df8c3d8687 100644 (file)
@@ -81,6 +81,14 @@ static const struct nir_shader_compiler_options nir_options_llvm = {
        .lower_rotate = true,
        .max_unroll_iterations = 32,
        .use_interpolated_input_intrinsics = true,
+       /* nir_lower_int64() isn't actually called for the LLVM backend, but
+        * this helps the loop unrolling heuristics. */
+       .lower_int64_options = nir_lower_imul64 |
+                               nir_lower_imul_high64 |
+                               nir_lower_imul_2x32_64 |
+                               nir_lower_divmod64 |
+                               nir_lower_minmax64 |
+                               nir_lower_iabs64,
 };
 
 static const struct nir_shader_compiler_options nir_options_aco = {
@@ -111,6 +119,13 @@ static const struct nir_shader_compiler_options nir_options_aco = {
        .lower_rotate = true,
        .max_unroll_iterations = 32,
        .use_interpolated_input_intrinsics = true,
+       .lower_int64_options = nir_lower_imul64 |
+                               nir_lower_imul_high64 |
+                               nir_lower_imul_2x32_64 |
+                               nir_lower_divmod64 |
+                               nir_lower_logic64 |
+                               nir_lower_minmax64 |
+                               nir_lower_iabs64,
 };
 
 bool