From 83fdb1ed3dd13228bcb761a4a4532b67a24a682b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 10 Apr 2020 17:28:33 +0100 Subject: [PATCH] aco: add VOP3P_instruction MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The optimizer isn't yet updated to handle this, since lower_to_hw_instr will be the only user for now. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_assembler.cpp | 28 ++++++++++++++++++ src/amd/compiler/aco_ir.h | 46 +++++++++++++++++++++--------- src/amd/compiler/aco_opcodes.py | 8 +++--- src/amd/compiler/aco_print_ir.cpp | 22 +++++++++++++- 4 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index aa6402c93b4..ed2e3982976 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -554,6 +554,34 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= vop3->neg[i] << (29+i); out.push_back(encoding); + } else if (instr->format == Format::VOP3P) { + VOP3P_instruction* vop3 = static_cast(instr); + + uint32_t encoding; + if (ctx.chip_class == GFX9) { + encoding = (0b110100111 << 23); + } else if (ctx.chip_class == GFX10) { + encoding = (0b110011 << 26); + } else { + unreachable("Unknown chip_class."); + } + + encoding |= opcode << 16; + encoding |= (vop3->clamp ? 1 : 0) << 15; + encoding |= vop3->opsel_lo << 11; + encoding |= (vop3->opsel_hi & 0x4) ? 1 : 0 << 14; + for (unsigned i = 0; i < 3; i++) + encoding |= vop3->neg_hi[i] << (8+i); + encoding |= (0xFF & instr->definitions[0].physReg()); + out.push_back(encoding); + encoding = 0; + for (unsigned i = 0; i < instr->operands.size(); i++) + encoding |= instr->operands[i].physReg() << (i * 9); + encoding |= vop3->opsel_hi & 0x3 << 27; + for (unsigned i = 0; i < 3; i++) + encoding |= vop3->neg_lo[i] << (29+i); + out.push_back(encoding); + } else if (instr->isDPP()){ assert(ctx.chip_class >= GFX8); /* first emit the instruction without the DPP operand */ diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index fb0c9beb208..2d5387c1849 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -90,17 +90,17 @@ enum class Format : std::uint16_t { PSEUDO_REDUCTION = 18, /* Vector ALU Formats */ + VOP3P = 19, VOP1 = 1 << 8, VOP2 = 1 << 9, VOPC = 1 << 10, VOP3 = 1 << 11, VOP3A = 1 << 11, VOP3B = 1 << 11, - VOP3P = 1 << 12, /* Vector Parameter Interpolation Format */ - VINTRP = 1 << 13, - DPP = 1 << 14, - SDWA = 1 << 15, + VINTRP = 1 << 12, + DPP = 1 << 13, + SDWA = 1 << 14, }; enum barrier_interaction : uint8_t { @@ -755,7 +755,7 @@ struct Instruction { || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B - || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P; + || format == Format::VOP3P; } constexpr bool isSALU() const noexcept @@ -782,8 +782,7 @@ struct Instruction { constexpr bool isVOP3() const noexcept { return ((uint16_t) format & (uint16_t) Format::VOP3A) || - ((uint16_t) format & (uint16_t) Format::VOP3B) || - format == Format::VOP3P; + ((uint16_t) format & (uint16_t) Format::VOP3B); } constexpr bool isSDWA() const noexcept @@ -877,6 +876,16 @@ struct VOP3A_instruction : public Instruction { }; static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8); +struct VOP3P_instruction : public Instruction { + bool neg_lo[3]; + bool neg_hi[3]; + uint8_t opsel_lo : 3; + uint8_t opsel_hi : 3; + bool clamp : 1; + uint32_t padding : 9; +}; +static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8); + /** * Data Parallel Primitives Format: * This format can be used for VOP1, VOP2 or VOPC instructions. @@ -1172,14 +1181,23 @@ constexpr bool Instruction::usesModifiers() const noexcept { if (isDPP() || isSDWA()) return true; - if (!isVOP3()) - return false; - const VOP3A_instruction *vop3 = static_cast(this); - for (unsigned i = 0; i < operands.size(); i++) { - if (vop3->abs[i] || vop3->neg[i]) - return true; + + if (format == Format::VOP3P) { + const VOP3P_instruction *vop3p = static_cast(this); + for (unsigned i = 0; i < operands.size(); i++) { + if (vop3p->neg_lo[i] || vop3p->neg_hi[i]) + return true; + } + return vop3p->opsel_lo || vop3p->opsel_hi || vop3p->clamp; + } else if (isVOP3()) { + const VOP3A_instruction *vop3 = static_cast(this); + for (unsigned i = 0; i < operands.size(); i++) { + if (vop3->abs[i] || vop3->neg[i]) + return true; + } + return vop3->opsel || vop3->clamp || vop3->omod; } - return vop3->opsel || vop3->clamp || vop3->omod; + return false; } constexpr bool is_phi(Instruction* instr) diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index a0bc601192c..3fb755f0c7c 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -49,15 +49,15 @@ class Format(Enum): PSEUDO_BRANCH = 16 PSEUDO_BARRIER = 17 PSEUDO_REDUCTION = 18 + VOP3P = 19 VOP1 = 1 << 8 VOP2 = 1 << 9 VOPC = 1 << 10 VOP3A = 1 << 11 VOP3B = 1 << 11 - VOP3P = 1 << 12 - VINTRP = 1 << 13 - DPP = 1 << 14 - SDWA = 1 << 15 + VINTRP = 1 << 12 + DPP = 1 << 13 + SDWA = 1 << 14 def get_builder_fields(self): if self == Format.SOPK: diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index fb771aafa37..b3fcb74cd58 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -480,6 +480,11 @@ static void print_instr_format_specific(struct Instruction *instr, FILE *output) print_barrier_reorder(mtbuf->can_reorder, mtbuf->barrier, output); break; } + case Format::VOP3P: { + if (static_cast(instr)->clamp) + fprintf(output, " clamp"); + break; + } default: { break; } @@ -652,7 +657,22 @@ void aco_print_instr(struct Instruction *instr, FILE *output) } if (abs[i]) fprintf(output, "|"); - } + + if (instr->format == Format::VOP3P) { + VOP3P_instruction* vop3 = static_cast(instr); + if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) { + fprintf(output, ".%c%c", + vop3->opsel_lo & (1 << i) ? 'y' : 'x', + vop3->opsel_hi & (1 << i) ? 'y' : 'x'); + } + if (vop3->neg_lo[i] && vop3->neg_hi[i]) + fprintf(output, "*[-1,-1]"); + else if (vop3->neg_lo[i]) + fprintf(output, "*[-1,1]"); + else if (vop3->neg_hi[i]) + fprintf(output, "*[1,-1]"); + } + } } print_instr_format_specific(instr, output); } -- 2.30.2