X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_assembler.cpp;h=c36e8c9b51ec3c1761ecbc21ce1ab9499f095e69;hb=23631ddd4db192033cba1a2e3f3024f18651867f;hp=9544fc35bd422d02393c5b862486e280954fb0e3;hpb=e0dff5fd86179b4d265060d5fc6138bb6a50b54d;p=mesa.git diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 9544fc35bd4..c36e8c9b51e 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -21,17 +21,27 @@ struct asm_context { opcode = &instr_info.opcode_gfx7[0]; else if (chip_class <= GFX9) opcode = &instr_info.opcode_gfx9[0]; - else if (chip_class == GFX10) + else if (chip_class >= GFX10) opcode = &instr_info.opcode_gfx10[0]; } int subvector_begin_pos = -1; }; -void emit_instruction(asm_context& ctx, std::vector& out, Instruction* instr) +static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg) { - uint32_t instr_offset = out.size() * 4u; + if (sel & sdwa_isra) { + unsigned size = sdwa_rasize & sel; + if (size == 1) + return reg.byte(); + else /* size == 2 */ + return sdwa_isword | (reg.byte() >> 1); + } + return sel & sdwa_asuint; +} +void emit_instruction(asm_context& ctx, std::vector& out, Instruction* instr) +{ /* lower remaining pseudo-instructions */ if (instr->opcode == aco_opcode::p_constaddr) { unsigned dest = instr->definitions[0].physReg(); @@ -56,7 +66,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= 255 << 8; out.push_back(encoding); ctx.constaddrs.push_back(out.size()); - out.push_back(-(instr_offset + 4) + offset); + out.push_back(offset); /* s_addc_u32 dest[1], dest[1], 0 */ encoding = (0b10 << 30); @@ -156,7 +166,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0; if (instr->operands.size() >= 2) { if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) { - encoding |= instr->operands[1].physReg().reg; + encoding |= instr->operands[1].physReg().reg(); } else { encoding |= instr->operands[1].constantValue() >> 2; encoding |= 1 << 8; @@ -262,22 +272,50 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* Interp_instruction* interp = static_cast(instr); uint32_t encoding = 0; - if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { - encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */ + if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || + instr->opcode == aco_opcode::v_interp_p1lv_f16 || + instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || + instr->opcode == aco_opcode::v_interp_p2_f16) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { + encoding = (0b110100 << 26); + } else if (ctx.chip_class >= GFX10) { + encoding = (0b110101 << 26); + } else { + unreachable("Unknown chip_class."); + } + + encoding |= opcode << 16; + encoding |= (0xFF & instr->definitions[0].physReg()); + out.push_back(encoding); + + encoding = 0; + encoding |= interp->attribute; + encoding |= interp->component << 6; + encoding |= instr->operands[0].physReg() << 9; + if (instr->opcode == aco_opcode::v_interp_p2_f16 || + instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || + instr->opcode == aco_opcode::v_interp_p1lv_f16) { + encoding |= instr->operands[2].physReg() << 18; + } + out.push_back(encoding); } else { - encoding = (0b110010 << 26); - } + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { + encoding = (0b110101 << 26); /* Vega ISA doc says 110010 but it's wrong */ + } else { + encoding = (0b110010 << 26); + } - assert(encoding); - encoding |= (0xFF & instr->definitions[0].physReg()) << 18; - encoding |= opcode << 16; - encoding |= interp->attribute << 10; - encoding |= interp->component << 8; - if (instr->opcode == aco_opcode::v_interp_mov_f32) - encoding |= (0x3 & instr->operands[0].constantValue()); - else - encoding |= (0xFF & instr->operands[0].physReg()); - out.push_back(encoding); + assert(encoding); + encoding |= (0xFF & instr->definitions[0].physReg()) << 18; + encoding |= opcode << 16; + encoding |= interp->attribute << 10; + encoding |= interp->component << 8; + if (instr->opcode == aco_opcode::v_interp_mov_f32) + encoding |= (0x3 & instr->operands[0].constantValue()); + else + encoding |= (0xFF & instr->operands[0].physReg()); + out.push_back(encoding); + } break; } case Format::DS: { @@ -368,7 +406,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= (0xFF & instr->operands[1].physReg()); if (ctx.chip_class >= GFX10) { - encoding |= (((opcode & 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */ + encoding |= (((opcode & 0x08) >> 3) << 21); /* MSB of 4-bit OPCODE */ } out.push_back(encoding); @@ -510,7 +548,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* uint32_t encoding; if (ctx.chip_class <= GFX9) { encoding = (0b110100 << 26); - } else if (ctx.chip_class == GFX10) { + } else if (ctx.chip_class >= GFX10) { encoding = (0b110101 << 26); } else { unreachable("Unknown chip_class."); @@ -542,12 +580,40 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= vop3->neg[i] << (29+i); out.push_back(encoding); + } else if (instr->format == Format::VOP3P) { + VOP3P_instruction* vop3 = static_cast(instr); + + uint32_t encoding; + if (ctx.chip_class == GFX9) { + encoding = (0b110100111 << 23); + } else if (ctx.chip_class >= GFX10) { + encoding = (0b110011 << 26); + } else { + unreachable("Unknown chip_class."); + } + + encoding |= opcode << 16; + encoding |= (vop3->clamp ? 1 : 0) << 15; + encoding |= vop3->opsel_lo << 11; + encoding |= (vop3->opsel_hi & 0x4) ? 1 : 0 << 14; + for (unsigned i = 0; i < 3; i++) + encoding |= vop3->neg_hi[i] << (8+i); + encoding |= (0xFF & instr->definitions[0].physReg()); + out.push_back(encoding); + encoding = 0; + for (unsigned i = 0; i < instr->operands.size(); i++) + encoding |= instr->operands[i].physReg() << (i * 9); + encoding |= vop3->opsel_hi & 0x3 << 27; + for (unsigned i = 0; i < 3; i++) + encoding |= vop3->neg_lo[i] << (29+i); + out.push_back(encoding); + } else if (instr->isDPP()){ assert(ctx.chip_class >= GFX8); /* first emit the instruction without the DPP operand */ Operand dpp_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{250}, v1); - instr->format = (Format) ((uint32_t) instr->format & ~(1 << 14)); + instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP); emit_instruction(ctx, out, instr); DPP_instruction* dpp = static_cast(instr); uint32_t encoding = (0xF & dpp->row_mask) << 28; @@ -561,6 +627,49 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= (0xFF) & dpp_op.physReg(); out.push_back(encoding); return; + } else if (instr->isSDWA()) { + /* first emit the instruction without the SDWA operand */ + Operand sdwa_op = instr->operands[0]; + instr->operands[0] = Operand(PhysReg{249}, v1); + instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::SDWA); + emit_instruction(ctx, out, instr); + + SDWA_instruction* sdwa = static_cast(instr); + uint32_t encoding = 0; + + if ((uint16_t)instr->format & (uint16_t)Format::VOPC) { + if (instr->definitions[0].physReg() != vcc) { + encoding |= instr->definitions[0].physReg() << 8; + encoding |= 1 << 15; + } + encoding |= (sdwa->clamp ? 1 : 0) << 13; + } else { + encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8; + uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0; + if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra)) + dst_u = 2; + encoding |= dst_u << 11; + encoding |= (sdwa->clamp ? 1 : 0) << 13; + encoding |= sdwa->omod << 14; + } + + encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16; + encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0; + encoding |= sdwa->abs[0] << 21; + encoding |= sdwa->neg[0] << 20; + + if (instr->operands.size() >= 2) { + encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24; + encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0; + encoding |= sdwa->abs[1] << 29; + encoding |= sdwa->neg[1] << 28; + } + + encoding |= 0xFF & sdwa_op.physReg(); + encoding |= (sdwa_op.physReg() < 256) << 23; + if (instr->operands.size() >= 2) + encoding |= (instr->operands[1].physReg() < 256) << 31; + out.push_back(encoding); } else { unreachable("unimplemented instruction format"); } @@ -604,7 +713,7 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) { if ((*it)->format == Format::EXP) { Export_instruction* exp = static_cast((*it).get()); - if (program->stage & hw_vs) { + if (program->stage & (hw_vs | hw_ngg_gs)) { if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) { exp->done = true; exported = true; @@ -686,7 +795,7 @@ void fix_branches(asm_context& ctx, std::vector& out) void fix_constaddrs(asm_context& ctx, std::vector& out) { for (unsigned addr : ctx.constaddrs) - out[addr] += out.size() * 4u; + out[addr] += (out.size() - addr + 1u) * 4u; } unsigned emit_program(Program* program, @@ -694,7 +803,7 @@ unsigned emit_program(Program* program, { asm_context ctx(program); - if (program->stage & (hw_vs | hw_fs)) + if (program->stage & (hw_vs | hw_fs | hw_ngg_gs)) fix_exports(ctx, code, program); for (Block& block : program->blocks) {