From 07250a92daf7240f9239504b9f141f28b302e3d6 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 18 Jun 2020 14:45:31 +0100 Subject: [PATCH] aco: implement subgroup shader_clock on GFX10.3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 17 ++++++++++++----- src/amd/compiler/aco_opt_value_numbering.cpp | 2 ++ src/amd/compiler/aco_scheduler.cpp | 3 ++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3e27759cc0c..a3b47d6e98a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8155,11 +8155,18 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) break; } case nir_intrinsic_shader_clock: { - aco_opcode opcode = - nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ? - aco_opcode::s_memrealtime : aco_opcode::s_memtime; - bld.smem(opcode, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), memory_sync_info(0, semantic_volatile)); - emit_split_vector(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 2); + Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); + if (nir_intrinsic_memory_scope(instr) == NIR_SCOPE_SUBGROUP && ctx->options->chip_class >= GFX10_3) { + /* "((size - 1) << 11) | register" (SHADER_CYCLES is encoded as register 29) */ + Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand(0u)); + } else { + aco_opcode opcode = + nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ? + aco_opcode::s_memrealtime : aco_opcode::s_memtime; + bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile)); + } + emit_split_vector(ctx, dst, 2); break; } case nir_intrinsic_load_vertex_id_zero_base: { diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index ede60f4775e..11aad20a0b0 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -219,6 +219,8 @@ struct InstrPred { switch (a->format) { case Format::SOPK: { + if (a->opcode == aco_opcode::s_getreg_b32) + return false; SOPK_instruction* aK = static_cast(a); SOPK_instruction* bK = static_cast(b); return aK->imm == bK->imm; diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 40941e4c539..52b64f02116 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -459,7 +459,8 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool /* don't move non-reorderable instructions */ if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime || - instr->opcode == aco_opcode::s_setprio) + instr->opcode == aco_opcode::s_setprio || + instr->opcode == aco_opcode::s_getreg_b32) return hazard_fail_unreorderable; memory_event_set instr_set; -- 2.30.2