From d6dfce02d074d615a3b88a3fccd8ee8c7e13c010 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 23 Oct 2019 21:43:50 +0200 Subject: [PATCH] aco/gfx10: Mitigate VcmpxExecWARHazard. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is a hazard when a non-VALU instruction reads the EXEC mask and then a VALU instruction writes the EXEC mask. This commit adds a workaround that avoids the problem. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 10 ++++++ src/amd/compiler/aco_insert_NOPs.cpp | 49 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index d3ecc896bc4..79674ebe0db 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -181,3 +181,13 @@ Any permlane instruction that follows any VOPC instruction. Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx. Mitigated by: any VALU instruction except `v_nop`. + +### VcmpxExecWARHazard + +Triggered by: +Any non-VALU instruction reads the EXEC mask. Then, any VALU instruction writes the EXEC mask. + +Mitigated by: +A VALU instruction that writes an SGPR (or has a valid SDST operand), or `s_waitcnt_depctr 0xfffe`. +Note: `s_waitcnt_depctr` is an internal instruction, so there is no further information +about what it does or what its operand means. diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index a80dd0c04bc..034e9e3949e 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -22,6 +22,8 @@ * */ +#include + #include "aco_ir.h" namespace aco { @@ -40,6 +42,7 @@ struct NOP_ctx { /* GFX10 */ int last_VMEM_since_scalar_write = -1; bool has_VOPC = false; + bool has_nonVALU_exec_read = false; NOP_ctx(Program* program) : chip_class(program->chip_class) { vcc_physical = program->config->num_sgprs - 2; @@ -57,6 +60,27 @@ bool VALU_writes_sgpr(aco_ptr& instr) return false; } +bool instr_reads_exec(const aco_ptr& instr) +{ + return std::any_of(instr->operands.begin(), instr->operands.end(), [](const Operand &op) -> bool { + return op.physReg() == exec_lo || op.physReg() == exec_hi; + }); +} + +bool instr_writes_exec(const aco_ptr& instr) +{ + return std::any_of(instr->definitions.begin(), instr->definitions.end(), [](const Definition &def) -> bool { + return def.physReg() == exec_lo || def.physReg() == exec_hi; + }); +} + +bool instr_writes_sgpr(const aco_ptr& instr) +{ + return std::any_of(instr->definitions.begin(), instr->definitions.end(), [](const Definition &def) -> bool { + return def.getTemp().type() == RegType::sgpr; + }); +} + bool regs_intersect(PhysReg a_reg, unsigned a_size, PhysReg b_reg, unsigned b_size) { return a_reg > b_reg ? @@ -303,6 +327,31 @@ std::pair handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr& ctx.has_VOPC = false; } + /* VcmpxExecWARHazard + * Handle any VALU instruction writing the exec mask after it was read by a non-VALU instruction. + */ + if (!instr->isVALU() && instr_reads_exec(instr)) { + ctx.has_nonVALU_exec_read = true; + } else if (instr->isVALU()) { + if (instr_writes_exec(instr)) { + ctx.has_nonVALU_exec_read = false; + + /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */ + aco_ptr depctr{create_instruction(aco_opcode::s_waitcnt_depctr, Format::SOPP, 0, 1)}; + depctr->imm = 0xfffe; + depctr->definitions[0] = Definition(sgpr_null, s1); + new_instructions.emplace_back(std::move(depctr)); + } else if (instr_writes_sgpr(instr)) { + /* Any VALU instruction that writes an SGPR mitigates the problem */ + ctx.has_nonVALU_exec_read = false; + } + } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { + /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */ + const SOPP_instruction *sopp = static_cast(instr.get()); + if ((sopp->imm & 0xfffe) == 0xfffe) + ctx.has_nonVALU_exec_read = false; + } + return std::make_pair(sNOPs, vNOPs); } -- 2.30.2