From b5e748b49b3fb9ef7a5e3af01e2ddbac59f90796 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Thu, 19 Dec 2019 19:09:54 +0100 Subject: [PATCH] radeonsi: fix fmask expand compute shader MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 'coord' variable was using TGSI_WRITEMASK_XYZ so subsequent uses of TGSI_WRITEMASK_W were dropped. The result for a 2 samples program was: 0: UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xxxx, SV[0].xyyy 1: STORE IMAGE[0], TEMP[0], TEMP[1], RESTRICT, 2D_MSAA 2: STORE IMAGE[0], TEMP[0], TEMP[2], RESTRICT, 2D_MSAA 3: END instead of the expected: 0: UMAD TEMP[0].xy, SV[1].xyyy, IMM[0].xxxx, SV[0].xyyy 1: MOV TEMP[0].w, IMM[0].yyyy 2: LOAD TEMP[1], IMAGE[0], TEMP[0], RESTRICT, 2D_MSAA 3: MOV TEMP[0].w, IMM[0].zzzz 4: LOAD TEMP[2], IMAGE[0], TEMP[0], RESTRICT, 2D_MSAA 5: MOV TEMP[0].w, IMM[0].yyyy 6: STORE IMAGE[0], TEMP[0], TEMP[1], RESTRICT, 2D_MSAA 7: MOV TEMP[0].w, IMM[0].zzzz 8: STORE IMAGE[0], TEMP[0], TEMP[2], RESTRICT, 2D_MSAA 9: END This fixes half of https://gitlab.freedesktop.org/mesa/mesa/issues/2248 Fixes: 095a58204d9 ("radeonsi: expand FMASK before MSAA image stores are used") Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 90eb39e3506..30cca361ac4 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -722,7 +722,7 @@ void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); struct ureg_dst coord = ureg_writemask(ureg_DECL_temporary(ureg), - TGSI_WRITEMASK_XYZ); + TGSI_WRITEMASK_XYZW); ureg_UMAD(ureg, ureg_writemask(coord, TGSI_WRITEMASK_XY), ureg_swizzle(blk, 0, 1, 1, 1), ureg_imm2u(ureg, 8, 8), ureg_swizzle(tid, 0, 1, 1, 1)); -- 2.30.2