From 0a8ef756d3a94ce6c4738a77f22cda945e6e23af Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Thu, 11 Jul 2019 08:44:19 +0200 Subject: [PATCH] radv/gfx10: Fix NGG GS output mask handlings for LDS indexing. In emit_vertex we optimize storage if the output mask does not have all bits set. Do the same in the epilogue so the indices actually match up. Fixes dEQP-VK.geometry.input.basic_primitive.points because it outputs PSIZE with an output mask of 1, which cause the generic attribute for the color to be loaded from the wrong indices. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_nir_to_llvm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index b5469677d2b..11498bc27aa 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3627,14 +3627,18 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1; outputs[noutput].usage_mask = ctx->shader_info->info.gs.output_usage_mask[i]; + int length = util_last_bit(outputs[noutput].usage_mask); - for (unsigned j = 0; j < 4; j++, out_idx++) { + for (unsigned j = 0; j < length; j++, out_idx++) { gep_idx[2] = LLVMConstInt(ctx->ac.i32, out_idx, false); tmp = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, ""); tmp = LLVMBuildLoad(builder, tmp, ""); outputs[noutput].values[j] = ac_to_float(&ctx->ac, tmp); } + for (unsigned j = length; j < 4; j++) + outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32); + noutput++; } -- 2.30.2