radv: use nir_opt_find_array_copies()
authorTimothy Arceri <tarceri@itsqueeze.com>
Wed, 17 Oct 2018 22:42:17 +0000 (09:42 +1100)
committerTimothy Arceri <tarceri@itsqueeze.com>
Thu, 18 Oct 2018 04:04:09 +0000 (15:04 +1100)
Totals from affected shaders:
SGPRS: 1112 -> 1112 (0.00 %)
VGPRS: 1492 -> 1196 (-19.84 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 112172 -> 101316 (-9.68 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 93 -> 98 (5.38 %)
Wait states: 0 -> 0 (0.00 %)

All affected shaders are from "Batman: Arkham City" over DXVK.

The pass detects that the temporary array created by DXVK for
storing TCS inputs is a copy of the input arrays and allows
us to avoid copying all of the input data and then indirecting
on it with if-ladders, instead we just do indirect indexing.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h

index e1d665d0ac7a89731b29992bd709a58a789d934a..8d15a048bbf231e9bcfd84ce4cf584bd1ea4f99c 100644 (file)
@@ -1808,13 +1808,13 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
                                ac_lower_indirect_derefs(ordered_shaders[i],
                                                         pipeline->device->physical_device->rad_info.chip_class);
                        }
-                       radv_optimize_nir(ordered_shaders[i], false);
+                       radv_optimize_nir(ordered_shaders[i], false, false);
 
                        if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
                                ac_lower_indirect_derefs(ordered_shaders[i - 1],
                                                         pipeline->device->physical_device->rad_info.chip_class);
                        }
-                       radv_optimize_nir(ordered_shaders[i - 1], false);
+                       radv_optimize_nir(ordered_shaders[i - 1], false, false);
                }
        }
 }
@@ -2073,7 +2073,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
                        if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) {
                                nir_lower_io_to_scalar_early(nir[i], mask);
-                               radv_optimize_nir(nir[i], false);
+                               radv_optimize_nir(nir[i], false, false);
                        }
                }
        }
index 3b3422c8da6cf67c31a1c98d035dae8a85692f5b..52aa83d4a5a39c712349db4e70e4c1c8643f1820 100644 (file)
@@ -118,7 +118,8 @@ void radv_DestroyShaderModule(
 }
 
 void
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+                  bool allow_copies)
 {
         bool progress;
 
@@ -128,6 +129,15 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively)
                 NIR_PASS_V(shader, nir_lower_vars_to_ssa);
                NIR_PASS_V(shader, nir_lower_pack);
 
+               if (allow_copies) {
+                       /* Only run this pass in the first call to
+                        * radv_optimize_nir.  Later calls assume that we've
+                        * lowered away any copy_deref instructions and we
+                        *  don't want to introduce any more.
+                       */
+                       NIR_PASS(progress, shader, nir_opt_find_array_copies);
+               }
+
                NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
                NIR_PASS(progress, shader, nir_opt_dead_write_vars);
 
@@ -306,7 +316,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
        }
 
        nir_split_var_copies(nir);
-       nir_lower_var_copies(nir);
 
        nir_lower_global_vars_to_local(nir);
        nir_remove_dead_variables(nir, nir_var_local);
@@ -323,7 +332,12 @@ radv_shader_compile_to_nir(struct radv_device *device,
        nir_lower_load_const_to_scalar(nir);
 
        if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
-               radv_optimize_nir(nir, false);
+               radv_optimize_nir(nir, false, true);
+
+       /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+        * to remove any copies introduced by nir_opt_find_array_copies().
+        */
+       nir_lower_var_copies(nir);
 
        /* Indirect lowering must be called after the radv_optimize_nir() loop
         * has been called at least once. Otherwise indirect lowering can
@@ -331,7 +345,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
         * considered too large for unrolling.
         */
        ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
-       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT);
+       radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
 
        return nir;
 }
index c490b69f52b1d146d90ed098b21609a2bf4945a2..22423e5f99a9ec5abba689e3419d7e02e9581049 100644 (file)
@@ -298,7 +298,8 @@ struct radv_shader_slab {
 };
 
 void
-radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively);
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+                 bool allow_copies);
 
 nir_shader *
 radv_shader_compile_to_nir(struct radv_device *device,