From 5ebc76471c48b1831114db2558bdc33a7dc0be05 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 9 Sep 2019 10:29:22 +0200 Subject: [PATCH] radv/gfx10: adjust the GS NGG scratch size for streamout It needs more space for multiple streams. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_nir_to_llvm.c | 6 ++++-- src/amd/vulkan/radv_shader.c | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d9c91f0591b..5019fa301c1 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -4208,9 +4208,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); } - /* TODO: streamout */ + unsigned scratch_size = 8; + if (ctx.shader_info->so.num_outputs) + scratch_size = 44; - LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, 8); + LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size); ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c8dd54fae53..02a8712a972 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -900,9 +900,23 @@ radv_shader_variant_create(struct radv_device *device, * size randomly hangs with CTS. Just use the maximum * possible LDS size for now. */ + unsigned ngg_scratch_size = 8 * 4; + if (binary->info.so.num_outputs) { + /* Memory layout of NGG streamout scratch: + * [0-3]: number of generated primitives + * [4-7]: number of emitted primitives + * [8-11]: streamout offsets + * [12:19]: primitive offsets for stream 0 + * [20:27]: primitive offsets for stream 1 + * [28:35]: primitive offsets for stream 2 + * [36:43]: primitive offsets for stream 3 + */ + ngg_scratch_size = 44 * 4; + } + struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; sym->name = "esgs_ring"; - sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - 32; /* 32 is NGG scratch */ + sym->size = (32 * 1024) - (binary->info.ngg_info.ngg_emit_size * 4) - ngg_scratch_size; sym->align = 64 * 1024; } -- 2.30.2