From a9fc81b098ca36d063dbdb6f69ffde1ab215d34b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 27 Nov 2019 17:20:15 +0000 Subject: [PATCH] aco: add v_nop inbetween exec write and VMEM/DS/FLAT MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit LLVM and the proprietary compiler seem to do this Fixes: b01847bd9 ("aco/gfx10: Fix mitigation of VMEMtoScalarWriteHazard.") Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_insert_NOPs.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index b0404300e0e..4871bea4ba7 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -385,7 +385,7 @@ void insert_NOPs_gfx8_9(Program* program) } } -void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr& instr, +void handle_instruction_gfx10(Program *program, NOP_ctx_gfx10 &ctx, aco_ptr& instr, std::vector>& old_instructions, std::vector>& new_instructions) { @@ -396,6 +396,9 @@ void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr& instr, instr->format == Format::SCRATCH || instr->format == Format::DS) { /* Remember all SGPRs that are read by the VMEM instruction */ mark_read_regs(instr, ctx.sgprs_read_by_VMEM); + ctx.sgprs_read_by_VMEM.set(exec); + if (program->wave_size == 64) + ctx.sgprs_read_by_VMEM.set(exec_hi); } else if (instr->isSALU() || instr->format == Format::SMEM) { /* Check if SALU writes an SGPR that was previously read by the VALU */ if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) { @@ -528,7 +531,7 @@ void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr& instr, } } -void handle_block_gfx10(NOP_ctx_gfx10& ctx, Block& block) +void handle_block_gfx10(Program *program, NOP_ctx_gfx10& ctx, Block& block) { if (block.instructions.empty()) return; @@ -537,7 +540,7 @@ void handle_block_gfx10(NOP_ctx_gfx10& ctx, Block& block) instructions.reserve(block.instructions.size()); for (aco_ptr& instr : block.instructions) { - handle_instruction_gfx10(ctx, instr, block.instructions, instructions); + handle_instruction_gfx10(program, ctx, instr, block.instructions, instructions); instructions.emplace_back(std::move(instr)); } @@ -562,7 +565,7 @@ void mitigate_hazards_gfx10(Program *program) for (unsigned b : program->blocks[idx].linear_preds) loop_block_ctx.join(all_ctx[b]); - handle_block_gfx10(loop_block_ctx, program->blocks[idx]); + handle_block_gfx10(program, loop_block_ctx, program->blocks[idx]); /* We only need to continue if the loop header context changed */ if (idx == loop_header_indices.top() && loop_block_ctx == all_ctx[idx]) @@ -577,7 +580,7 @@ void mitigate_hazards_gfx10(Program *program) for (unsigned b : block.linear_preds) ctx.join(all_ctx[b]); - handle_block_gfx10(ctx, block); + handle_block_gfx10(program, ctx, block); } } -- 2.30.2