From: Timur Kristóf Date: Tue, 17 Sep 2019 17:59:17 +0000 (+0200) Subject: aco: Fix s_dcache_wb on GFX10. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a89153d03815dfe56d5521276eaad8cd9087ee0d;p=mesa.git aco: Fix s_dcache_wb on GFX10. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- diff --git a/src/amd/compiler/README b/src/amd/compiler/README index 585c4e60d89..cd5861b1f9a 100644 --- a/src/amd/compiler/README +++ b/src/amd/compiler/README @@ -109,6 +109,11 @@ Stores and atomics always bypass the L1 cache, so they don't support the DLC bit and it shouldn't be set in these cases. Setting the DLC for these cases can result in graphical glitches. +## RDNA S_DCACHE_WB + +The S_DCACHE_WB is not mentioned in the RDNA ISA doc, but it is needed in order +to achieve correct behavior in some SSBO CTS tests. + ## RDNA subvector mode The documentation of S_SUBVECTOR_LOOP_BEGIN and S_SUBVECTOR_LOOP_END is not clear diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 8cfb4f3f695..76fa54f2c17 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -323,6 +323,14 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx) if (ctx.exp_cnt || ctx.vm_cnt || ctx.lgkm_cnt) imm.combine(check_instr(instr, ctx)); + if (ctx.chip_class >= GFX10) { + /* Seems to be required on GFX10 to achieve correct behaviour. + * It shouldn't cost anything anyways since we're about to do s_endpgm. + */ + if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) + imm.lgkm = 0; + } + if (instr->format == Format::PSEUDO_BARRIER) { unsigned* bsize = ctx.program->info->cs.block_size; unsigned workgroup_size = bsize[0] * bsize[1] * bsize[2];