From 3d9eb17d5d037d09479f35b8cd919ea158634d48 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 13 May 2020 16:05:46 +0100 Subject: [PATCH] aco: improve workgroup-scope and lower vmem/smem barriers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit No fossil-db changes on Navi. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index fa2b5234d2e..466158f81f3 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -448,7 +448,7 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic { wait_imm imm; sync_scope subgroup_scope = ctx.program->workgroup_size <= ctx.program->wave_size ? scope_workgroup : scope_subgroup; - if (sync.semantics & semantics) { + if ((sync.semantics & semantics) && sync.scope > subgroup_scope) { unsigned storage = sync.storage; while (storage) { unsigned idx = u_bit_scan(&storage); @@ -460,6 +460,10 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic if (bar_scope_lds <= subgroup_scope) events &= ~event_lds; + /* in non-WGP, the L1/L0 cache keeps all memory operations in-order for the same workgroup */ + if (ctx.chip_class < GFX10 && sync.scope <= scope_workgroup) + events &= ~(event_vmem | event_vmem_store | event_smem); + if (events) imm.combine(ctx.barrier_imm[idx]); } -- 2.30.2