From: Rhys Perry Date: Wed, 13 May 2020 15:05:46 +0000 (+0100) Subject: aco: improve workgroup-scope and lower vmem/smem barriers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3d9eb17d5d037d09479f35b8cd919ea158634d48;p=mesa.git aco: improve workgroup-scope and lower vmem/smem barriers No fossil-db changes on Navi. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index fa2b5234d2e..466158f81f3 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -448,7 +448,7 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic { wait_imm imm; sync_scope subgroup_scope = ctx.program->workgroup_size <= ctx.program->wave_size ? scope_workgroup : scope_subgroup; - if (sync.semantics & semantics) { + if ((sync.semantics & semantics) && sync.scope > subgroup_scope) { unsigned storage = sync.storage; while (storage) { unsigned idx = u_bit_scan(&storage); @@ -460,6 +460,10 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic if (bar_scope_lds <= subgroup_scope) events &= ~event_lds; + /* in non-WGP, the L1/L0 cache keeps all memory operations in-order for the same workgroup */ + if (ctx.chip_class < GFX10 && sync.scope <= scope_workgroup) + events &= ~(event_vmem | event_vmem_store | event_smem); + if (events) imm.combine(ctx.barrier_imm[idx]); }