aco: improve workgroup-scope and lower vmem/smem barriers
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 13 May 2020 15:05:46 +0000 (16:05 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 28 Jul 2020 16:56:34 +0000 (16:56 +0000)
No fossil-db changes on Navi.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4905>

src/amd/compiler/aco_insert_waitcnt.cpp

index fa2b5234d2e214ad33009914cffaa072a066d111..466158f81f34fd1e141892c70ddd5ffa623cf09a 100644 (file)
@@ -448,7 +448,7 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic
 {
    wait_imm imm;
    sync_scope subgroup_scope = ctx.program->workgroup_size <= ctx.program->wave_size ? scope_workgroup : scope_subgroup;
-   if (sync.semantics & semantics) {
+   if ((sync.semantics & semantics) && sync.scope > subgroup_scope) {
       unsigned storage = sync.storage;
       while (storage) {
          unsigned idx = u_bit_scan(&storage);
@@ -460,6 +460,10 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic
          if (bar_scope_lds <= subgroup_scope)
             events &= ~event_lds;
 
+         /* in non-WGP, the L1/L0 cache keeps all memory operations in-order for the same workgroup */
+         if (ctx.chip_class < GFX10 && sync.scope <= scope_workgroup)
+            events &= ~(event_vmem | event_vmem_store | event_smem);
+
          if (events)
             imm.combine(ctx.barrier_imm[idx]);
       }