From a89153d03815dfe56d5521276eaad8cd9087ee0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Tue, 17 Sep 2019 19:59:17 +0200 Subject: [PATCH] aco: Fix s_dcache_wb on GFX10. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/README | 5 +++++ src/amd/compiler/aco_insert_waitcnt.cpp | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 585c4e60d89..cd5861b1f9a 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -109,6 +109,11 @@ Stores and atomics always bypass the L1 cache, so they don't support the DLC bit and it shouldn't be set in these cases. Setting the DLC for these cases can result in graphical glitches. +## RDNA S_DCACHE_WB + +The S_DCACHE_WB is not mentioned in the RDNA ISA doc, but it is needed in order +to achieve correct behavior in some SSBO CTS tests. + ## RDNA subvector mode The documentation of S_SUBVECTOR_LOOP_BEGIN and S_SUBVECTOR_LOOP_END is not clear diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 8cfb4f3f695..76fa54f2c17 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -323,6 +323,14 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx) if (ctx.exp_cnt || ctx.vm_cnt || ctx.lgkm_cnt) imm.combine(check_instr(instr, ctx)); + if (ctx.chip_class >= GFX10) { + /* Seems to be required on GFX10 to achieve correct behaviour. + * It shouldn't cost anything anyways since we're about to do s_endpgm. + */ + if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) + imm.lgkm = 0; + } + if (instr->format == Format::PSEUDO_BARRIER) { unsigned* bsize = ctx.program->info->cs.block_size; unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2]; -- 2.30.2