From b01847bd94dc15913911b8badf312a3689314d86 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 23 Oct 2019 21:51:14 +0200 Subject: [PATCH] aco/gfx10: Fix mitigation of VMEMtoScalarWriteHazard. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit refines the VMEMtoScalarWriteHazard mitigation, based upon a closer look at what LLVM does. Also changes the code to match the structure of the other hazard mitigations. * The hazard is not only triggered by VMEM, FLAT and GLOBAL but also SCRATCH and DS instructions. * The SMEM/SALU instructions only cause a hazard when they write a register that the VMEM/etc. are reading. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_insert_NOPs.cpp | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 2442f60f48e..05af344c9c6 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -40,7 +40,6 @@ struct NOP_ctx { int VALU_wrsgpr = -10; /* GFX10 */ - int last_VMEM_since_scalar_write = -1; bool has_VOPC = false; bool has_nonVALU_exec_read = false; bool has_VMEM = false; @@ -48,6 +47,7 @@ struct NOP_ctx { bool has_DS = false; bool has_branch_after_DS = false; std::bitset<128> sgprs_read_by_SMEM; + std::bitset<128> sgprs_read_by_VMEM; NOP_ctx(Program* program) : chip_class(program->chip_class) { vcc_physical = program->config->num_sgprs - 2; @@ -342,21 +342,31 @@ std::pair handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr& if (instr->format == Format::SMEM) sNOPs = std::max(sNOPs, handle_SMEM_clause(instr, new_idx, new_instructions)); - /* handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between */ - if (instr->isSALU() || instr->format == Format::SMEM) { - if (!instr->definitions.empty() && ctx.last_VMEM_since_scalar_write != -1) { - ctx.last_VMEM_since_scalar_write = -1; - vNOPs = 1; + /* VMEMtoScalarWriteHazard + * Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between. + */ + if (instr->isVMEM() || instr->format == Format::FLAT || instr->format == Format::GLOBAL || + instr->format == Format::SCRATCH || instr->format == Format::DS) { + /* Remember all SGPRs that are read by the VMEM instruction */ + mark_read_regs(instr, ctx.sgprs_read_by_VMEM); + } else if (instr->isSALU() || instr->format == Format::SMEM) { + /* Check if SALU writes an SGPR that was previously read by the VALU */ + if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) { + ctx.sgprs_read_by_VMEM.reset(); + + /* Insert v_nop to mitigate the problem */ + aco_ptr nop{create_instruction(aco_opcode::v_nop, Format::VOP1, 0, 0)}; + new_instructions.emplace_back(std::move(nop)); } - } else if (instr->isVMEM() || instr->isFlatOrGlobal()) { - ctx.last_VMEM_since_scalar_write = new_idx; } else if (instr->opcode == aco_opcode::s_waitcnt) { + /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */ uint16_t imm = static_cast(instr.get())->imm; unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10); if (vmcnt == 0) - ctx.last_VMEM_since_scalar_write = -1; + ctx.sgprs_read_by_VMEM.reset(); } else if (instr->isVALU()) { - ctx.last_VMEM_since_scalar_write = -1; + /* Hazard is mitigated by any VALU instruction */ + ctx.sgprs_read_by_VMEM.reset(); } /* VcmpxPermlaneHazard -- 2.30.2