From e5a8616973d45909a75a708f88dfb9c3cbb5ded8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Wed, 23 Oct 2019 11:24:53 +0200 Subject: [PATCH] aco/gfx10: Mitigate VcmpxPermlaneHazard. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Any permlane instruction that follows any VOPC instruction can cause a hazard, this commit implements a workaround that avoids this causing a problem. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 8 ++++++++ src/amd/compiler/aco_insert_NOPs.cpp | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 630f1fcd42a..d3ecc896bc4 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -173,3 +173,11 @@ The 12-bit immediate OFFSET field of FLAT instructions must always be 0. GLOBAL and SCRATCH are unaffected. ACO doesn't use FLAT load/store on GFX10, so is unaffected. + +### VcmpxPermlaneHazard + +Triggered by: +Any permlane instruction that follows any VOPC instruction. +Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx. + +Mitigated by: any VALU instruction except `v_nop`. diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index eb325c940ba..a80dd0c04bc 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -39,6 +39,7 @@ struct NOP_ctx { /* GFX10 */ int last_VMEM_since_scalar_write = -1; + bool has_VOPC = false; NOP_ctx(Program* program) : chip_class(program->chip_class) { vcc_physical = program->config->num_sgprs - 2; @@ -283,6 +284,25 @@ std::pair handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr& ctx.last_VMEM_since_scalar_write = -1; } + /* VcmpxPermlaneHazard + * Handle any permlane following a VOPC instruction, insert v_mov between them. + */ + if (instr->format == Format::VOPC) { + ctx.has_VOPC = true; + } else if (ctx.has_VOPC && + (instr->opcode == aco_opcode::v_permlane16_b32 || + instr->opcode == aco_opcode::v_permlanex16_b32)) { + ctx.has_VOPC = false; + + /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */ + aco_ptr v_mov{create_instruction(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)}; + v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1); + v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1); + new_instructions.emplace_back(std::move(v_mov)); + } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) { + ctx.has_VOPC = false; + } + return std::make_pair(sNOPs, vNOPs); } -- 2.30.2