X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_assembler.cpp;h=207c40acf49d56831f463c446f16ba1ee7a65b60;hb=ffb4790279ca779572ec393ba84d71ef1036b437;hp=648031651a89228f927cff233e3f6b6698a6cdb6;hpb=aa75be05af5cd261c96eddb8a42efa85bbb1ba89;p=mesa.git diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 648031651a8..207c40acf49 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -17,7 +17,9 @@ struct asm_context { // TODO: keep track of branch instructions referring blocks // and, when emitting the block, correct the offset in instr asm_context(Program* program) : program(program), chip_class(program->chip_class) { - if (chip_class <= GFX9) + if (chip_class <= GFX7) + opcode = &instr_info.opcode_gfx7[0]; + else if (chip_class <= GFX9) opcode = &instr_info.opcode_gfx9[0]; else if (chip_class == GFX10) opcode = &instr_info.opcode_gfx10[0]; @@ -105,7 +107,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= !instr->definitions.empty() && !(instr->definitions[0].physReg() == scc) ? instr->definitions[0].physReg() << 16 : - !instr->operands.empty() && !(instr->operands[0].physReg() == scc) ? + !instr->operands.empty() && instr->operands[0].physReg() <= 127 ? instr->operands[0].physReg() << 16 : 0; encoding |= sopk->imm; out.push_back(encoding); @@ -145,9 +147,26 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* SMEM_instruction* smem = static_cast(instr); bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); bool is_load = !instr->definitions.empty(); - uint32_t encoding = 0; + if (ctx.chip_class <= GFX7) { + encoding = (0b11000 << 27); + encoding |= opcode << 22; + encoding |= instr->definitions.size() ? instr->definitions[0].physReg() << 15 : 0; + encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0; + if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) { + encoding |= instr->operands[1].physReg().reg; + } else { + encoding |= instr->operands[1].constantValue() >> 2; + encoding |= 1 << 8; + } + out.push_back(encoding); + /* SMRD instructions can take a literal on GFX6 & GFX7 */ + if (instr->operands[1].isConstant() && instr->operands[1].constantValue() >= 1024) + out.push_back(instr->operands[1].constantValue() >> 2); + return; + } + if (ctx.chip_class <= GFX9) { encoding = (0b110000 << 26); assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */ @@ -291,7 +310,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= (mubuf->glc ? 1 : 0) << 14; encoding |= (mubuf->idxen ? 1 : 0) << 13; encoding |= (mubuf->offen ? 1 : 0) << 12; - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */ encoding |= (mubuf->slc ? 1 : 0) << 17; } else if (ctx.chip_class >= GFX10) { @@ -317,6 +336,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt); uint32_t encoding = (0b111010 << 26); + assert(img_format <= 0x7F); assert(!mtbuf->dlc || ctx.chip_class >= GFX10); encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */ encoding |= (mtbuf->glc ? 1 : 0) << 14; @@ -325,7 +345,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= 0x0FFF & mtbuf->offset; encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */ - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding |= opcode << 15; } else { encoding |= (opcode & 0x07) << 16; /* 3 LSBs of 4-bit OPCODE */ @@ -398,9 +418,14 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if (ctx.chip_class <= GFX9) { assert(flat->offset <= 0x1fff); encoding |= flat->offset & 0x1fff; + } else if (instr->format == Format::FLAT) { + /* GFX10 has a 12-bit immediate OFFSET field, + * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug + */ + assert(flat->offset == 0); } else { - assert(flat->offset <= 0x0fff); - encoding |= flat->offset & 0x0fff; + assert(flat->offset <= 0xfff); + encoding |= flat->offset & 0xfff; } if (instr->format == Format::SCRATCH) encoding |= 1 << 14; @@ -419,13 +444,13 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = (0xFF & instr->operands[0].physReg()); if (!instr->definitions.empty()) encoding |= (0xFF & instr->definitions[0].physReg()) << 24; - else + if (instr->operands.size() >= 3) encoding |= (0xFF & instr->operands[2].physReg()) << 8; if (!instr->operands[1].isUndefined()) { assert(ctx.chip_class >= GFX10 || instr->operands[1].physReg() != 0x7F); assert(instr->format != Format::FLAT); encoding |= instr->operands[1].physReg() << 16; - } else if (instr->format != Format::FLAT) { + } else if (instr->format != Format::FLAT || ctx.chip_class >= GFX10) { /* SADDR is actually used with FLAT on GFX10 */ if (ctx.chip_class <= GFX9) encoding |= 0x7F << 16; else @@ -438,9 +463,9 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* case Format::EXP: { Export_instruction* exp = static_cast(instr); uint32_t encoding; - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding = (0b110001 << 26); - } else if (ctx.chip_class >= GFX10) { + } else { encoding = (0b111110 << 26); } @@ -467,12 +492,10 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if ((uint16_t) instr->format & (uint16_t) Format::VOP2) { opcode = opcode + 0x100; } else if ((uint16_t) instr->format & (uint16_t) Format::VOP1) { - if (ctx.chip_class <= GFX9) { + if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) opcode = opcode + 0x140; - } else { - /* RDNA ISA doc says this is 0x140, but that doesn't work */ + else opcode = opcode + 0x180; - } } else if ((uint16_t) instr->format & (uint16_t) Format::VOPC) { opcode = opcode + 0x0; } else if ((uint16_t) instr->format & (uint16_t) Format::VINTRP) { @@ -486,12 +509,16 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = (0b110101 << 26); } - encoding |= opcode << 16; - encoding |= (vop3->clamp ? 1 : 0) << 15; + if (ctx.chip_class <= GFX7) { + encoding |= opcode << 17; + encoding |= (vop3->clamp ? 1 : 0) << 11; + } else { + encoding |= opcode << 16; + encoding |= (vop3->clamp ? 1 : 0) << 15; + } + encoding |= vop3->opsel << 11; for (unsigned i = 0; i < 3; i++) encoding |= vop3->abs[i] << (8+i); - for (unsigned i = 0; i < 4; i++) - encoding |= vop3->opsel[i] << (11+i); if (instr->definitions.size() == 2) encoding |= instr->definitions[1].physReg() << 8; encoding |= (0xFF & instr->definitions[0].physReg()); @@ -509,6 +536,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* out.push_back(encoding); } else if (instr->isDPP()){ + assert(ctx.chip_class >= GFX8); /* first emit the instruction without the DPP operand */ Operand dpp_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{250}, v1); @@ -624,7 +652,8 @@ static void fix_branches_gfx10(asm_context& ctx, std::vector& out) if (gfx10_3f_bug) { /* Insert an s_nop after the branch */ constexpr uint32_t s_nop_0 = 0xbf800000u; - auto out_pos = std::next(out.begin(), buggy_branch_it->first + 1); + int s_nop_pos = buggy_branch_it->first + 1; + auto out_pos = std::next(out.begin(), s_nop_pos); out.insert(out_pos, s_nop_0); /* Update the offset of each affected block */ @@ -636,6 +665,16 @@ static void fix_branches_gfx10(asm_context& ctx, std::vector& out) /* Update the branches following the current one */ for (auto branch_it = std::next(buggy_branch_it); branch_it != ctx.branches.end(); ++branch_it) branch_it->first++; + + /* Find first constant address after the inserted instruction */ + auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [s_nop_pos](const int &caddr_pos) -> bool { + return caddr_pos >= s_nop_pos; + }); + + /* Update the locations of constant addresses */ + for (; caddr_it != ctx.constaddrs.end(); ++caddr_it) + (*caddr_it)++; + } } while (gfx10_3f_bug); }