radv/gfx10: adjust the GS NGG scratch size for streamout
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 9 Sep 2019 08:29:22 +0000 (10:29 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Sep 2019 10:08:22 +0000 (12:08 +0200)
It needs more space for multiple streams.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.c

index d9c91f0591b8b08f98f2bd3b8b5f58a68dda16f7..5019fa301c1903660e29a20da11e4507c200de8c 100644 (file)
@@ -4208,9 +4208,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                                ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
                                }
 
-                               /* TODO: streamout */
+                               unsigned scratch_size = 8;
+                               if (ctx.shader_info->so.num_outputs)
+                                       scratch_size = 44;
 
-                               LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8);
+                               LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
                                ctx.gs_ngg_scratch =
                                        LLVMAddGlobalInAddressSpace(ctx.ac.module,
                                                                    ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
index c8dd54fae5338dc38f5f2c1a782170473406bd25..02a8712a972c90b1b9a3c9226a2434361fc08c61 100644 (file)
@@ -900,9 +900,23 @@ radv_shader_variant_create(struct radv_device *device,
                         * size randomly hangs with CTS. Just use the maximum
                         * possible LDS size for now.
                         */
+                       unsigned ngg_scratch_size = 8 * 4;
+                       if (binary->info.so.num_outputs) {
+                               /* Memory layout of NGG streamout scratch:
+                                * [0-3]: number of generated primitives
+                                * [4-7]: number of emitted primitives
+                                * [8-11]: streamout offsets
+                                * [12:19]: primitive offsets for stream 0
+                                * [20:27]: primitive offsets for stream 1
+                                * [28:35]: primitive offsets for stream 2
+                                * [36:43]: primitive offsets for stream 3
+                                */
+                               ngg_scratch_size = 44 * 4;
+                       }
+
                        struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
                        sym->name = "esgs_ring";
-                       sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - 32; /* 32 is NGG scratch */
+                       sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - ngg_scratch_size;
                        sym->align = 64 * 1024;
                }