radv/gfx10: fix storing/loading NGG stream outputs for GS
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 17 Sep 2019 09:01:01 +0000 (11:01 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 2 Oct 2019 16:09:32 +0000 (18:09 +0200)
The GS outputs are stored differently in the LDS storage, they
are indexed by out_idx which is incremented for each stored DWORD.
Thus, we need a different path for exporting the stream outputs.

This fixes a bunch of CTS failures when NGG GS is force enabled.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_nir_to_llvm.c

index 69a5e0bb9f9b66fb7ba81d6d26617f6c09fe2f2f..46d9ae970491e989f76d384cbd46d7ff79c81e7f 100644 (file)
@@ -3175,6 +3175,17 @@ static void build_export_prim(struct radv_shader_context *ctx,
        ac_build_export(&ctx->ac, &args);
 }
 
+static struct radv_stream_output *
+radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
+{
+       for (unsigned i = 0; i < so->num_outputs; ++i) {
+               if (so->outputs[i].location == location)
+                       return &so->outputs[i];
+       }
+
+       return NULL;
+}
+
 static void build_streamout_vertex(struct radv_shader_context *ctx,
                                   LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
                                   unsigned stream, LLVMValueRef offset_vtx,
@@ -3195,25 +3206,79 @@ static void build_streamout_vertex(struct radv_shader_context *ctx,
                offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
        }
 
-       for (unsigned i = 0; i < so->num_outputs; ++i) {
-               struct radv_stream_output *output =
-                       &ctx->shader_info->so.outputs[i];
+       if (ctx->stage == MESA_SHADER_GEOMETRY) {
+               struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
+               unsigned noutput = 0;
+               unsigned out_idx = 0;
 
-               if (stream != output->stream)
-                       continue;
+               for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+                       unsigned output_usage_mask =
+                               ctx->shader_info->gs.output_usage_mask[i];
+                       uint8_t output_stream =
+                               output_stream = ctx->shader_info->gs.output_streams[i];
 
-               struct radv_shader_output_values out = {};
+                       if (!(ctx->output_mask & (1ull << i)) ||
+                           output_stream != stream)
+                               continue;
 
-               for (unsigned comp = 0; comp < 4; comp++) {
-                       if (!(output->component_mask & (1 << comp)))
+                       outputs[noutput].slot_name = i;
+                       outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+                       outputs[noutput].usage_mask = output_usage_mask;
+
+                       int length = util_last_bit(output_usage_mask);
+
+                       for (unsigned j = 0; j < length; j++, out_idx++) {
+                               if (!(output_usage_mask & (1 << j)))
+                                       continue;
+
+                               tmp = ac_build_gep0(&ctx->ac, vertexptr,
+                                                   LLVMConstInt(ctx->ac.i32, out_idx, false));
+                               outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
+                       }
+
+                       for (unsigned j = length; j < 4; j++)
+                               outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+
+                       noutput++;
+               }
+
+               for (unsigned i = 0; i < noutput; i++) {
+                       struct radv_stream_output *output =
+                               radv_get_stream_output_by_loc(so, outputs[i].slot_name);
+
+                       if (!output ||
+                           output->stream != stream)
                                continue;
 
-                       tmp = ac_build_gep0(&ctx->ac, vertexptr,
-                                           LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
-                       out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+                       struct radv_shader_output_values out = {};
+
+                       for (unsigned j = 0; j < 4; j++) {
+                               out.values[j] = outputs[i].values[j];
+                       }
+
+                       radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
                }
+       } else {
+               for (unsigned i = 0; i < so->num_outputs; ++i) {
+                       struct radv_stream_output *output =
+                               &ctx->shader_info->so.outputs[i];
+
+                       if (stream != output->stream)
+                               continue;
+
+                       struct radv_shader_output_values out = {};
+
+                       for (unsigned comp = 0; comp < 4; comp++) {
+                               if (!(output->component_mask & (1 << comp)))
+                                       continue;
 
-               radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+                               tmp = ac_build_gep0(&ctx->ac, vertexptr,
+                                                   LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+                               out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+                       }
+
+                       radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+               }
        }
 }