radv/aco,aco: use scoped barriers
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 13 May 2020 15:12:39 +0000 (16:12 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 28 Jul 2020 16:56:34 +0000 (16:56 +0000)
fossil-db (Navi):
Totals from 109 (0.08% of 132058) affected shaders:
SGPRs: 5416 -> 5424 (+0.15%)
CodeSize: 460500 -> 460508 (+0.00%); split: -0.07%, +0.07%
Instrs: 87278 -> 87272 (-0.01%); split: -0.09%, +0.09%
Cycles: 2241996 -> 2241852 (-0.01%); split: -0.04%, +0.04%
VMEM: 33868 -> 35539 (+4.93%); split: +5.14%, -0.20%
SMEM: 7183 -> 7184 (+0.01%); split: +0.36%, -0.35%
VClause: 1857 -> 1882 (+1.35%)
SClause: 2052 -> 2055 (+0.15%); split: -0.05%, +0.19%
Copies: 6377 -> 6380 (+0.05%); split: -0.02%, +0.06%
PreSGPRs: 3391 -> 3392 (+0.03%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4905>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_scheduler.cpp
src/amd/vulkan/radv_shader.c

index 81c50ebbc9c8fc3e6e4409501255428ad6eeb194..5b93d3fcf069bf074e0c42df747654a364389de0 100644 (file)
@@ -6700,6 +6700,24 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
    }
 }
 
+sync_scope translate_nir_scope(nir_scope scope)
+{
+   switch (scope) {
+   case NIR_SCOPE_NONE:
+   case NIR_SCOPE_INVOCATION:
+      return scope_invocation;
+   case NIR_SCOPE_SUBGROUP:
+      return scope_subgroup;
+   case NIR_SCOPE_WORKGROUP:
+      return scope_workgroup;
+   case NIR_SCOPE_QUEUE_FAMILY:
+      return scope_queuefamily;
+   case NIR_SCOPE_DEVICE:
+      return scope_device;
+   }
+   unreachable("invalid scope");
+}
+
 void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
    Builder bld(ctx->program, ctx->block);
    storage_class all_mem = (storage_class)(storage_buffer | storage_image | storage_atomic_counter | storage_shared);
@@ -6713,20 +6731,44 @@ void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) {
                      memory_sync_info(all_mem, semantic_acqrel, scope_device));
          break;
       case nir_intrinsic_memory_barrier_buffer:
+         bld.barrier(aco_opcode::p_barrier,
+                     memory_sync_info((storage_class)storage_buffer, semantic_acqrel, scope_device));
       case nir_intrinsic_memory_barrier_image:
-         /* since NIR splits barriers, we have to unify buffer and image barriers
-          * for now so dEQP-VK.memory_model.message_passing.core11.u32.coherent.
-          * fence_fence.atomicwrite.device.payload_nonlocal.buffer.guard_nonlocal.image.comp
-          * passes
-          */
          bld.barrier(aco_opcode::p_barrier,
-                     memory_sync_info((storage_class)(storage_buffer | storage_image), semantic_acqrel, scope_device));
+                     memory_sync_info((storage_class)storage_image, semantic_acqrel, scope_device));
          break;
       case nir_intrinsic_memory_barrier_tcs_patch:
       case nir_intrinsic_memory_barrier_shared:
          bld.barrier(aco_opcode::p_barrier,
                      memory_sync_info(storage_shared, semantic_acqrel, scope_workgroup));
          break;
+      case nir_intrinsic_scoped_barrier: {
+         unsigned semantics = 0;
+         unsigned storage = 0;
+         sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr));
+         sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr));
+
+         unsigned nir_storage = nir_intrinsic_memory_modes(instr);
+         if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global))
+            storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image
+         if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared))
+            storage |= storage_shared;
+         if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out))
+            storage |= storage_shared;
+
+         unsigned nir_semantics = nir_intrinsic_memory_semantics(instr);
+         if (nir_semantics & NIR_MEMORY_ACQUIRE)
+            semantics |= semantic_acquire;
+         if (nir_semantics & NIR_MEMORY_RELEASE)
+            semantics |= semantic_release;
+
+         assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+         bld.barrier(aco_opcode::p_barrier,
+                     memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope),
+                     exec_scope);
+         break;
+      }
       default:
          unreachable("Unimplemented memory barrier intrinsic");
          break;
@@ -7568,6 +7610,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
    case nir_intrinsic_memory_barrier_buffer:
    case nir_intrinsic_memory_barrier_image:
    case nir_intrinsic_memory_barrier_shared:
+   case nir_intrinsic_scoped_barrier:
       emit_memory_barrier(ctx, instr);
       break;
    case nir_intrinsic_load_num_work_groups: {
index 261d58ceb03e19daee85c2b0069d6eec27da219b..102f0bf3ee6ce7ffe2dc4d7f49c43bfa75e098bd 100644 (file)
@@ -494,11 +494,6 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool
    if (first->bar_classes && second->bar_classes)
       return hazard_fail_barrier;
 
-   /* Don't move memory loads/stores to before control barriers. This is to make
-    * memory barriers followed by control barriers work. */
-   if (first->has_control_barrier && (second->access_atomic | second->access_relaxed))
-      return hazard_fail_barrier;
-
    /* don't move memory loads/stores past potentially aliasing loads/stores */
    unsigned aliasing_storage = instr->format == Format::SMEM ?
                                query->aliasing_storage_smem :
index adc54c0c589fe79ca4bbf343678bdf04077fa14e..2c5a89b255924db9b7473c5d3dbd336b77b58ec4 100644 (file)
@@ -130,6 +130,7 @@ static const struct nir_shader_compiler_options nir_options_aco = {
                                 nir_lower_dsqrt |
                                 nir_lower_drsq |
                                 nir_lower_ddiv,
+       .use_scoped_barrier = true,
 };
 
 bool