From: Rhys Perry Date: Wed, 13 May 2020 15:12:39 +0000 (+0100) Subject: radv/aco,aco: use scoped barriers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cd392a10d056833d915ba2912e4dbe58d86bf28f;p=mesa.git radv/aco,aco: use scoped barriers fossil-db (Navi): Totals from 109 (0.08% of 132058) affected shaders: SGPRs: 5416 -> 5424 (+0.15%) CodeSize: 460500 -> 460508 (+0.00%); split: -0.07%, +0.07% Instrs: 87278 -> 87272 (-0.01%); split: -0.09%, +0.09% Cycles: 2241996 -> 2241852 (-0.01%); split: -0.04%, +0.04% VMEM: 33868 -> 35539 (+4.93%); split: +5.14%, -0.20% SMEM: 7183 -> 7184 (+0.01%); split: +0.36%, -0.35% VClause: 1857 -> 1882 (+1.35%) SClause: 2052 -> 2055 (+0.15%); split: -0.05%, +0.19% Copies: 6377 -> 6380 (+0.05%); split: -0.02%, +0.06% PreSGPRs: 3391 -> 3392 (+0.03%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 81c50ebbc9c..5b93d3fcf06 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6700,6 +6700,24 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr) } } +sync_scope translate_nir_scope(nir_scope scope) +{ + switch (scope) { + case NIR_SCOPE_NONE: + case NIR_SCOPE_INVOCATION: + return scope_invocation; + case NIR_SCOPE_SUBGROUP: + return scope_subgroup; + case NIR_SCOPE_WORKGROUP: + return scope_workgroup; + case NIR_SCOPE_QUEUE_FAMILY: + return scope_queuefamily; + case NIR_SCOPE_DEVICE: + return scope_device; + } + unreachable("invalid scope"); +} + void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) { Builder bld(ctx->program, ctx->block); storage_class all_mem = (storage_class)(storage_buffer | storage_image | storage_atomic_counter | storage_shared); @@ -6713,20 +6731,44 @@ void emit_memory_barrier(isel_context *ctx, nir_intrinsic_instr *instr) { memory_sync_info(all_mem, semantic_acqrel, scope_device)); break; case nir_intrinsic_memory_barrier_buffer: + bld.barrier(aco_opcode::p_barrier, + memory_sync_info((storage_class)storage_buffer, semantic_acqrel, scope_device)); case nir_intrinsic_memory_barrier_image: - /* since NIR splits barriers, we have to unify buffer and image barriers - * for now so dEQP-VK.memory_model.message_passing.core11.u32.coherent. - * fence_fence.atomicwrite.device.payload_nonlocal.buffer.guard_nonlocal.image.comp - * passes - */ bld.barrier(aco_opcode::p_barrier, - memory_sync_info((storage_class)(storage_buffer | storage_image), semantic_acqrel, scope_device)); + memory_sync_info((storage_class)storage_image, semantic_acqrel, scope_device)); break; case nir_intrinsic_memory_barrier_tcs_patch: case nir_intrinsic_memory_barrier_shared: bld.barrier(aco_opcode::p_barrier, memory_sync_info(storage_shared, semantic_acqrel, scope_workgroup)); break; + case nir_intrinsic_scoped_barrier: { + unsigned semantics = 0; + unsigned storage = 0; + sync_scope mem_scope = translate_nir_scope(nir_intrinsic_memory_scope(instr)); + sync_scope exec_scope = translate_nir_scope(nir_intrinsic_execution_scope(instr)); + + unsigned nir_storage = nir_intrinsic_memory_modes(instr); + if (nir_storage & (nir_var_mem_ssbo | nir_var_mem_global)) + storage |= storage_buffer | storage_image; //TODO: split this when NIR gets nir_var_mem_image + if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && (nir_storage & nir_var_mem_shared)) + storage |= storage_shared; + if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL && (nir_storage & nir_var_shader_out)) + storage |= storage_shared; + + unsigned nir_semantics = nir_intrinsic_memory_semantics(instr); + if (nir_semantics & NIR_MEMORY_ACQUIRE) + semantics |= semantic_acquire; + if (nir_semantics & NIR_MEMORY_RELEASE) + semantics |= semantic_release; + + assert(!(nir_semantics & (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE))); + + bld.barrier(aco_opcode::p_barrier, + memory_sync_info((storage_class)storage, (memory_semantics)semantics, mem_scope), + exec_scope); + break; + } default: unreachable("Unimplemented memory barrier intrinsic"); break; @@ -7568,6 +7610,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) case nir_intrinsic_memory_barrier_buffer: case nir_intrinsic_memory_barrier_image: case nir_intrinsic_memory_barrier_shared: + case nir_intrinsic_scoped_barrier: emit_memory_barrier(ctx, instr); break; case nir_intrinsic_load_num_work_groups: { diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 261d58ceb03..102f0bf3ee6 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -494,11 +494,6 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool if (first->bar_classes && second->bar_classes) return hazard_fail_barrier; - /* Don't move memory loads/stores to before control barriers. This is to make - * memory barriers followed by control barriers work. */ - if (first->has_control_barrier && (second->access_atomic | second->access_relaxed)) - return hazard_fail_barrier; - /* don't move memory loads/stores past potentially aliasing loads/stores */ unsigned aliasing_storage = instr->format == Format::SMEM ? query->aliasing_storage_smem : diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index adc54c0c589..2c5a89b2559 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -130,6 +130,7 @@ static const struct nir_shader_compiler_options nir_options_aco = { nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv, + .use_scoped_barrier = true, }; bool