From 5986e0019472498e060a56c3d967ce0934914ce3 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 27 Nov 2019 17:23:02 +0000 Subject: [PATCH] aco: improve WAR hazard workaround with >64bit stores MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_insert_NOPs.cpp | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 4871bea4ba7..24325b03826 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -287,17 +287,23 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr& instr, /* Write VGPRs holding writedata > 64 bit from MIMG/MUBUF instructions */ // FIXME: handle case if the last instruction of a block without branch is such store - // TODO: confirm that DS instructions cannot cause WAR hazards here if (new_idx > 0) { aco_ptr& pred = new_instructions.back(); - if (pred->isVMEM() && - pred->operands.size() == 4 && - pred->operands[3].size() > 2 && - pred->operands[1].size() != 8 && - (pred->format != Format::MUBUF || pred->operands[2].physReg() >= 102)) { - /* Ops that use a 256-bit T# do not need a wait state. - * BUFFER_STORE_* operations that use an SGPR for "offset" - * do not require any wait states. */ + /* >64-bit MUBUF/MTBUF store with a constant in SOFFSET */ + bool consider_buf = (pred->format == Format::MUBUF || pred->format == Format::MTBUF) && + pred->operands.size() == 4 && + pred->operands[3].size() > 2 && + pred->operands[2].physReg() >= 128; + /* MIMG store with a 128-bit T# with more than two bits set in dmask (making it a >64-bit store) */ + bool consider_mimg = pred->format == Format::MIMG && + pred->operands.size() == 4 && + pred->operands[3].size() > 2 && + pred->operands[1].size() != 8; + /* FLAT/GLOBAL/SCRATCH store with >64-bit data */ + bool consider_flat = (pred->isFlatOrGlobal() || pred->format == Format::SCRATCH) && + pred->operands.size() == 3 && + pred->operands[2].size() > 2; + if (consider_buf || consider_mimg || consider_flat) { PhysReg wrdata = pred->operands[3].physReg(); unsigned size = pred->operands[3].size(); assert(wrdata >= 256); -- 2.30.2