From 34fd894e42ae1ec9d35bf9c4f05364b03dd4a223 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 7 Feb 2020 16:33:35 +0100 Subject: [PATCH] aco: fix waiting for scalar stores before "writing back" data on GFX8-GFX9 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Seems required also on GFX8-GFX9 to achieve correct behaviour. This is an undocumented behaviour but it makes real sense to me. pipeline-db on GFX9: Totals from affected shaders: SGPRS: 1018 -> 1018 (0.00 %) VGPRS: 516 -> 516 (0.00 %) Code Size: 40516 -> 40636 (0.30 %) bytes Max Waves: 280 -> 280 (0.00 %) This fixes some sort of sun flickering with Assassins Creed Origins. Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2488 Cc: Signed-off-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Tested-by: Marge Bot Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 5ec9636752d..8d8024f5aa2 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -374,13 +374,16 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx) imm.combine(parse_wait_instr(ctx, instr)); - if (ctx.chip_class >= GFX10) { - /* Seems to be required on GFX10 to achieve correct behaviour. - * It shouldn't cost anything anyways since we're about to do s_endpgm. - */ - if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) - imm.lgkm = 0; + /* It's required to wait for scalar stores before "writing back" data. + * It shouldn't cost anything anyways since we're about to do s_endpgm. + */ + if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) { + assert(ctx.chip_class >= GFX8); + imm.lgkm = 0; + } + + if (ctx.chip_class >= GFX10) { /* GFX10: A store followed by a load at the same address causes a problem because * the load doesn't load the correct values unless we wait for the store first. * This is NOT mitigated by an s_nop. -- 2.30.2