aco/gfx10: Mitigate VcmpxPermlaneHazard.
authorTimur Kristóf <timur.kristof@gmail.com>
Wed, 23 Oct 2019 09:24:53 +0000 (11:24 +0200)
committerTimur Kristóf <timur.kristof@gmail.com>
Fri, 25 Oct 2019 08:10:42 +0000 (10:10 +0200)
Any permlane instruction that follows any VOPC instruction can cause a hazard,
this commit implements a workaround that avoids this causing a problem.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
src/amd/compiler/README
src/amd/compiler/aco_insert_NOPs.cpp

index 630f1fcd42a0138cdbaabe7435f05caec9602021..d3ecc896bc4aafd7f28ca795b37d5854fd5f2577 100644 (file)
@@ -173,3 +173,11 @@ The 12-bit immediate OFFSET field of FLAT instructions must always be 0.
 GLOBAL and SCRATCH are unaffected.
 
 ACO doesn't use FLAT load/store on GFX10, so is unaffected.
+
+### VcmpxPermlaneHazard
+
+Triggered by:
+Any permlane instruction that follows any VOPC instruction.
+Confirmed by AMD devs that despite the name, this doesn't only affect v_cmpx.
+
+Mitigated by: any VALU instruction except `v_nop`.
index eb325c940bad8cb2a2a5aff8a05ea46cfb2da32e..a80dd0c04bc54d89ae1196f474913c9930d68a55 100644 (file)
@@ -39,6 +39,7 @@ struct NOP_ctx {
 
    /* GFX10 */
    int last_VMEM_since_scalar_write = -1;
+   bool has_VOPC = false;
 
    NOP_ctx(Program* program) : chip_class(program->chip_class) {
       vcc_physical = program->config->num_sgprs - 2;
@@ -283,6 +284,25 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>&
       ctx.last_VMEM_since_scalar_write = -1;
    }
 
+   /* VcmpxPermlaneHazard
+    * Handle any permlane following a VOPC instruction, insert v_mov between them.
+    */
+   if (instr->format == Format::VOPC) {
+      ctx.has_VOPC = true;
+   } else if (ctx.has_VOPC &&
+              (instr->opcode == aco_opcode::v_permlane16_b32 ||
+               instr->opcode == aco_opcode::v_permlanex16_b32)) {
+      ctx.has_VOPC = false;
+
+      /* v_nop would be discarded by SQ, so use v_mov with the first operand of the permlane */
+      aco_ptr<VOP1_instruction> v_mov{create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32, Format::VOP1, 1, 1)};
+      v_mov->definitions[0] = Definition(instr->operands[0].physReg(), v1);
+      v_mov->operands[0] = Operand(instr->operands[0].physReg(), v1);
+      new_instructions.emplace_back(std::move(v_mov));
+   } else if (instr->isVALU() && instr->opcode != aco_opcode::v_nop) {
+      ctx.has_VOPC = false;
+   }
+
    return std::make_pair(sNOPs, vNOPs);
 }