radv/gfx10: unconditionally declare scratch space for NGG streamout without GS
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 9 Sep 2019 08:32:10 +0000 (10:32 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Sep 2019 10:08:22 +0000 (12:08 +0200)
Streamout outputs are stored in the ESGS ring.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_nir_to_llvm.c

index 5019fa301c1903660e29a20da11e4507c200de8c..4454502cc31cc3f0c77948d848c22c78da575308 100644 (file)
@@ -4190,6 +4190,28 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
            shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
                ac_nir_fixup_ls_hs_input_vgprs(&ctx);
 
+       if (shaders[shader_count - 1]->info.stage != MESA_SHADER_GEOMETRY &&
+           (ctx.options->key.vs_common_out.as_ngg &&
+            !ctx.options->key.vs_common_out.as_es)) {
+               /* Unconditionally declare scratch space base for streamout and
+                * vertex compaction. Whether space is actually allocated is
+                * determined during linking / PM4 creation.
+                *
+                * Add an extra dword per vertex to ensure an odd stride, which
+                * avoids bank conflicts for SoA accesses.
+                */
+               declare_esgs_ring(&ctx);
+
+               /* This is really only needed when streamout and / or vertex
+                * compaction is enabled.
+                */
+               LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
+               ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module,
+                       asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+               LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
+               LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+       }
+
        for(int i = 0; i < shader_count; ++i) {
                ctx.stage = shaders[i]->info.stage;
                ctx.shader = shaders[i];