From 09d676d81ab6e604f73f65ad696a9996312f93a4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 24 Oct 2019 11:45:27 +0200 Subject: [PATCH] aco/gfx10: Mitigate SMEMtoVectorWriteHazard. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is a hazard that happens when an SMEM instruction reads an SGPR and then a VALU instruction writes that same SGPR. This commit adds a workaround that avoids the problem. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 9 ++++ src/amd/compiler/aco_insert_NOPs.cpp | 61 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 79674ebe0db..620b4bcf63c 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -150,6 +150,15 @@ Then, a SALU/SMEM instruction writes the same SGPR. Mitigated by: A VALU instruction or an `s_waitcnt vmcnt(0)` between the two instructions. +### SMEMtoVectorWriteHazard + +Triggered by: +An SMEM instruction reads an SGPR. Then, a VALU instruction writes that same SGPR. +Despite LLVM + +Mitigated by: +Any non-SOPP SALU instruction (except `s_setvskip`, `s_version`, and any non-lgkmcnt `s_waitcnt`). + ### Offset3fBug Any branch that is located at offset 0x3f will be buggy. Just insert some NOPs to make sure no branch diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 034e9e3949e..6924ea152a2 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -43,12 +43,38 @@ struct NOP_ctx { int last_VMEM_since_scalar_write = -1; bool has_VOPC = false; bool has_nonVALU_exec_read = false; + std::bitset<128> sgprs_read_by_SMEM; NOP_ctx(Program* program) : chip_class(program->chip_class) { vcc_physical = program->config->num_sgprs - 2; } }; +template +bool check_written_regs(const aco_ptr &instr, const std::bitset &check_regs) +{ + return std::any_of(instr->definitions.begin(), instr->definitions.end(), [&check_regs](const Definition &def) -> bool { + bool writes_any = false; + for (unsigned i = 0; i < def.size(); i++) { + unsigned def_reg = def.physReg() + i; + writes_any |= def_reg < check_regs.size() && check_regs[def_reg]; + } + return writes_any; + }); +} + +template +void mark_read_regs(const aco_ptr &instr, std::bitset ®_reads) +{ + for (const Operand &op : instr->operands) { + for (unsigned i = 0; i < op.size(); i++) { + unsigned reg = op.physReg() + i; + if (reg < reg_reads.size()) + reg_reads.set(reg); + } + } +} + bool VALU_writes_sgpr(aco_ptr& instr) { if ((uint32_t) instr->format & (uint32_t) Format::VOPC) @@ -352,6 +378,41 @@ std::pair handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr& ctx.has_nonVALU_exec_read = false; } + /* SMEMtoVectorWriteHazard + * Handle any VALU instruction writing an SGPR after an SMEM reads it. + */ + if (instr->format == Format::SMEM) { + /* Remember all SGPRs that are read by the SMEM instruction */ + mark_read_regs(instr, ctx.sgprs_read_by_SMEM); + } else if (VALU_writes_sgpr(instr)) { + /* Check if VALU writes an SGPR that was previously read by SMEM */ + if (check_written_regs(instr, ctx.sgprs_read_by_SMEM)) { + ctx.sgprs_read_by_SMEM.reset(); + + /* Insert s_mov to mitigate the problem */ + aco_ptr s_mov{create_instruction(aco_opcode::s_mov_b32, Format::SOP1, 1, 1)}; + s_mov->definitions[0] = Definition(sgpr_null, s1); + s_mov->operands[0] = Operand(0u); + new_instructions.emplace_back(std::move(s_mov)); + } + } else if (instr->isSALU()) { + if (instr->format != Format::SOPP) { + /* SALU can mitigate the hazard */ + ctx.sgprs_read_by_SMEM.reset(); + } else { + /* Reducing lgkmcnt count to 0 always mitigates the hazard. */ + const SOPP_instruction *sopp = static_cast(instr.get()); + if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) { + if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null) + ctx.sgprs_read_by_SMEM.reset(); + } else if (sopp->opcode == aco_opcode::s_waitcnt) { + unsigned lgkm = (sopp->imm >> 8) & 0x3f; + if (lgkm == 0) + ctx.sgprs_read_by_SMEM.reset(); + } + } + } + return std::make_pair(sNOPs, vNOPs); } -- 2.30.2