From 7cf1dcf22db7a0b23a02f5ed42f917ba19d0013f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Tue, 8 Oct 2019 14:43:43 +0200 Subject: [PATCH] aco: Support subvector loops in aco_assembler. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit These are currently not used, but could be useful later. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 7 +++++++ src/amd/compiler/aco_assembler.cpp | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 990eb62baec..585c4e60d89 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -109,6 +109,13 @@ Stores and atomics always bypass the L1 cache, so they don't support the DLC bit and it shouldn't be set in these cases. Setting the DLC for these cases can result in graphical glitches. +## RDNA subvector mode + +The documentation of S_SUBVECTOR_LOOP_BEGIN and S_SUBVECTOR_LOOP_END is not clear +on what sort of addressing should be used, but it says that it +"is equivalent to an S_CBRANCH with extra math", so the subvector loop handling +in ACO is done according to the S_CBRANCH doc. + # Hardware Bugs ## SMEM corrupts VCCZ on SI/CI diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index fcad107f34c..73432a790ef 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -20,6 +20,8 @@ struct asm_context { else if (chip_class == GFX10) opcode = &instr_info.opcode_gfx10[0]; } + + int subvector_begin_pos = -1; }; void emit_instruction(asm_context& ctx, std::vector& out, Instruction* instr) @@ -80,6 +82,22 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SOPK: { + SOPK_instruction *sopk = static_cast(instr); + + if (instr->opcode == aco_opcode::s_subvector_loop_begin) { + assert(ctx.chip_class >= GFX10); + assert(ctx.subvector_begin_pos == -1); + ctx.subvector_begin_pos = out.size(); + } else if (instr->opcode == aco_opcode::s_subvector_loop_end) { + assert(ctx.chip_class >= GFX10); + assert(ctx.subvector_begin_pos != -1); + /* Adjust s_subvector_loop_begin instruction to the address after the end */ + out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos); + /* Adjust s_subvector_loop_end instruction to the address after the beginning */ + sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); + ctx.subvector_begin_pos = -1; + } + uint32_t encoding = (0b1011 << 28); encoding |= opcode << 23; encoding |= @@ -87,7 +105,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* instr->definitions[0].physReg() << 16 : !instr->operands.empty() && !(instr->operands[0].physReg() == scc) ? instr->operands[0].physReg() << 16 : 0; - encoding |= static_cast(instr)->imm; + encoding |= sopk->imm; out.push_back(encoding); break; } -- 2.30.2