aco: Support subvector loops in aco_assembler.

author Timur Kristóf <timur.kristof@gmail.com>

Tue, 8 Oct 2019 12:43:43 +0000 (14:43 +0200)

committer Timur Kristóf <timur.kristof@gmail.com>

Thu, 10 Oct 2019 07:57:53 +0000 (09:57 +0200)
author Timur Kristóf <timur.kristof@gmail.com>
Tue, 8 Oct 2019 12:43:43 +0000 (14:43 +0200)
committer Timur Kristóf <timur.kristof@gmail.com>
Thu, 10 Oct 2019 07:57:53 +0000 (09:57 +0200)
diff --git a/src/amd/compiler/README b/src/amd/compiler/README

index 990eb62baec20ab43c65f8393128c0e85d47ce08..585c4e60d895577a3f759f0c7390267386b9201c 100644 (file)
--- a/src/amd/compiler/README
+++ b/src/amd/compiler/README
@@ -109,6 +109,13 @@ Stores and atomics always bypass the L1 cache, so they don't support the DLC bit
  and it shouldn't be set in these cases. Setting the DLC for these cases can result
  in graphical glitches.
  
+## RDNA subvector mode
+
+The documentation of S_SUBVECTOR_LOOP_BEGIN and S_SUBVECTOR_LOOP_END is not clear
+on what sort of addressing should be used, but it says that it
+"is equivalent to an S_CBRANCH with extra math", so the subvector loop handling
+in ACO is done according to the S_CBRANCH doc.
+
  # Hardware Bugs
  
  ## SMEM corrupts VCCZ on SI/CI
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp

index fcad107f34c103be549b21da8bc63b1de7ad6078..73432a790ef9bf13117b3446d93c9c397e36398a 100644 (file)
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -20,6 +20,8 @@ struct asm_context {
        else if (chip_class == GFX10)
           opcode = &instr_info.opcode_gfx10[0];
     }
+
+   int subvector_begin_pos = -1;
  };
  
  void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
@@ -80,6 +82,22 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
        break;
     }
     case Format::SOPK: {
+      SOPK_instruction *sopk = static_cast<SOPK_instruction*>(instr);
+
+      if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
+         assert(ctx.chip_class >= GFX10);
+         assert(ctx.subvector_begin_pos == -1);
+         ctx.subvector_begin_pos = out.size();
+      } else if (instr->opcode == aco_opcode::s_subvector_loop_end) {
+         assert(ctx.chip_class >= GFX10);
+         assert(ctx.subvector_begin_pos != -1);
+         /* Adjust s_subvector_loop_begin instruction to the address after the end  */
+         out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
+         /* Adjust s_subvector_loop_end instruction to the address after the beginning  */
+         sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
+         ctx.subvector_begin_pos = -1;
+      }
+
        uint32_t encoding = (0b1011 << 28);
        encoding |= opcode << 23;
        encoding |=
@@ -87,7 +105,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
           instr->definitions[0].physReg() << 16 :
           !instr->operands.empty() && !(instr->operands[0].physReg() == scc) ?
           instr->operands[0].physReg() << 16 : 0;
-      encoding |= static_cast<SOPK_instruction*>(instr)->imm;
+      encoding |= sopk->imm;
        out.push_back(encoding);
        break;
     }
author	Timur Kristóf <timur.kristof@gmail.com>
	Tue, 8 Oct 2019 12:43:43 +0000 (14:43 +0200)
committer	Timur Kristóf <timur.kristof@gmail.com>
	Thu, 10 Oct 2019 07:57:53 +0000 (09:57 +0200)
src/amd/compiler/README		patch \| blob \| history
src/amd/compiler/aco_assembler.cpp		patch \| blob \| history