visit_get_buffer_size(ctx, instr);
break;
case nir_intrinsic_control_barrier: {
- unsigned* bsize = ctx->program->info->cs.block_size;
- unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
- if (workgroup_size > ctx->program->wave_size)
+ if (ctx->program->chip_class == GFX6 && ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+ /* GFX6 only (thanks to a hw bug workaround):
+ * The real barrier instruction isn’t needed, because an entire patch
+ * always fits into a single wave.
+ */
+ break;
+ }
+
+ if (ctx->shader->info.stage == MESA_SHADER_COMPUTE) {
+ unsigned* bsize = ctx->program->info->cs.block_size;
+ unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];
+ if (workgroup_size > ctx->program->wave_size)
+ bld.sopp(aco_opcode::s_barrier);
+ } else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+ /* For each patch provided during rendering, n TCS shader invocations will be processed,
+ * where n is the number of vertices in the output patch.
+ */
+ unsigned workgroup_size = ctx->tcs_num_patches * ctx->shader->info.tess.tcs_vertices_out;
+ if (workgroup_size > ctx->program->wave_size)
+ bld.sopp(aco_opcode::s_barrier);
+ } else {
+ /* We don't know the workgroup size, so always emit the s_barrier. */
bld.sopp(aco_opcode::s_barrier);
+ }
+
break;
}
case nir_intrinsic_group_memory_barrier: