aco: Implement control_barrier for tessellation control shaders.
authorTimur Kristóf <timur.kristof@gmail.com>
Wed, 26 Feb 2020 16:41:04 +0000 (17:41 +0100)
committerMarge Bot <eric+marge@anholt.net>
Wed, 11 Mar 2020 08:34:10 +0000 (08:34 +0000)
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3964>

src/amd/compiler/aco_instruction_selection.cpp

index 0be69aa47e81bd7fefc9797d2dd5a5cbcbb7018d..069f3d24f62576d10196349ca80c5ec30cca1d40 100644 (file)
@@ -6315,10 +6315,31 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       visit_get_buffer_size(ctx, instr);
       break;
    case nir_intrinsic_control_barrier: {
-      unsigned* bsize = ctx->program->info->cs.block_size;
-      unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
-      if (workgroup_size > ctx->program->wave_size)
+      if (ctx->program->chip_class == GFX6 && ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+         /* GFX6 only (thanks to a hw bug workaround):
+          * The real barrier instruction isn’t needed, because an entire patch
+          * always fits into a single wave.
+          */
+         break;
+      }
+
+      if (ctx->shader->info.stage == MESA_SHADER_COMPUTE) {
+         unsigned* bsize = ctx->program->info->cs.block_size;
+         unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
+         if (workgroup_size > ctx->program->wave_size)
+            bld.sopp(aco_opcode::s_barrier);
+      } else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+         /* For each patch provided during rendering, n​ TCS shader invocations will be processed,
+          * where n​ is the number of vertices in the output patch.
+          */
+         unsigned workgroup_size = ctx->tcs_num_patches * ctx->shader->info.tess.tcs_vertices_out;
+         if (workgroup_size > ctx->program->wave_size)
+            bld.sopp(aco_opcode::s_barrier);
+      } else {
+         /* We don't know the workgroup size, so always emit the s_barrier. */
          bld.sopp(aco_opcode::s_barrier);
+      }
+
       break;
    }
    case nir_intrinsic_group_memory_barrier: