radv/gfx10: do not always execute a barrier before the second shader
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 17 Jul 2019 13:43:53 +0000 (15:43 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 18 Jul 2019 08:06:34 +0000 (10:06 +0200)
With NGG, empty waves may still be required to export data.

This fixes dEQP-VK.ycbcr.format.*_unorm.geometry_*.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_nir_to_llvm.c

index 3e18303879ec230520cda3f9d1619f4de24317ed..7e623414adc73ba951b9c57bafb17b3f32bd490c 100644 (file)
@@ -4448,8 +4448,37 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                        declare_esgs_ring(&ctx);
                }
 
-               if (i)
+               bool nested_barrier = false;
+
+               if (i) {
+                       if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
+                           ctx.options->key.vs_common_out.as_ngg) {
+                               nested_barrier = false;
+                       } else {
+                               nested_barrier = true;
+                       }
+               }
+
+               if (nested_barrier) {
+                       /* Execute a barrier before the second shader in
+                        * a merged shader.
+                        *
+                        * Execute the barrier inside the conditional block,
+                        * so that empty waves can jump directly to s_endpgm,
+                        * which will also signal the barrier.
+                        *
+                        * This is possible in gfx9, because an empty wave
+                        * for the second shader does not participate in
+                        * the epilogue. With NGG, empty waves may still
+                        * be required to export data (e.g. GS output vertices),
+                        * so we cannot let them exit early.
+                        *
+                        * If the shader is TCS and the TCS epilog is present
+                        * and contains a barrier, it will wait there and then
+                        * reach s_endpgm.
+                       */
                        ac_emit_barrier(&ctx.ac, ctx.stage);
+               }
 
                nir_foreach_variable(variable, &shaders[i]->outputs)
                        scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);