aco: implement subgroup shader_clock on GFX10.3
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 18 Jun 2020 13:45:31 +0000 (14:45 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Tue, 4 Aug 2020 19:39:33 +0000 (20:39 +0100)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5546>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_opt_value_numbering.cpp
src/amd/compiler/aco_scheduler.cpp

index 3e27759cc0c4d2e5c4813dabfc1ba2b396916c11..a3b47d6e98a16b143c9d72d56966dfcc087f5fcc 100644 (file)
@@ -8155,11 +8155,18 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
       break;
    }
    case nir_intrinsic_shader_clock: {
-      aco_opcode opcode =
-         nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
-            aco_opcode::s_memrealtime : aco_opcode::s_memtime;
-      bld.smem(opcode, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), memory_sync_info(0, semantic_volatile));
-      emit_split_vector(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 2);
+      Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
+      if (nir_intrinsic_memory_scope(instr) == NIR_SCOPE_SUBGROUP && ctx->options->chip_class >= GFX10_3) {
+         /* "((size - 1) << 11) | register" (SHADER_CYCLES is encoded as register 29) */
+         Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29);
+         bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand(0u));
+      } else {
+         aco_opcode opcode =
+            nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ?
+               aco_opcode::s_memrealtime : aco_opcode::s_memtime;
+         bld.smem(opcode, Definition(dst), memory_sync_info(0, semantic_volatile));
+      }
+      emit_split_vector(ctx, dst, 2);
       break;
    }
    case nir_intrinsic_load_vertex_id_zero_base: {
index ede60f4775e777f52c99e3023c676a642e990443..11aad20a0b09b808d5df31df20dbee558ab68945 100644 (file)
@@ -219,6 +219,8 @@ struct InstrPred {
 
       switch (a->format) {
          case Format::SOPK: {
+            if (a->opcode == aco_opcode::s_getreg_b32)
+               return false;
             SOPK_instruction* aK = static_cast<SOPK_instruction*>(a);
             SOPK_instruction* bK = static_cast<SOPK_instruction*>(b);
             return aK->imm == bK->imm;
index 40941e4c539c6bae159bdc4c7e47b52df6756134..52b64f0211620ff7429cf4f8226c1be6619b0860 100644 (file)
@@ -459,7 +459,8 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool
    /* don't move non-reorderable instructions */
    if (instr->opcode == aco_opcode::s_memtime ||
        instr->opcode == aco_opcode::s_memrealtime ||
-       instr->opcode == aco_opcode::s_setprio)
+       instr->opcode == aco_opcode::s_setprio ||
+       instr->opcode == aco_opcode::s_getreg_b32)
       return hazard_fail_unreorderable;
 
    memory_event_set instr_set;