radeonsi: add a common function for getting the size of gs_ngg_scratch
authorMarek Olšák <marek.olsak@amd.com>
Thu, 30 Jul 2020 12:31:22 +0000 (08:31 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 7 Aug 2020 15:22:22 +0000 (11:22 -0400)
The next commit will use it.

Fixes: a23802bcb9a - ac,radeonsi: start adding support for gfx10.3
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6137>

src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_internal.h

index efeb9e8838c2f0aea814bce11bfa12aff8029b02..b484ebae5225b4ba4d1cb58284e68a802c5fe837 100644 (file)
@@ -1887,6 +1887,16 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts
    *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
 }
 
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
+{
+   const struct si_shader_selector *sel = shader->selector;
+
+   if (sel->type == PIPE_SHADER_GEOMETRY && sel->so.num_outputs)
+      return 44;
+
+   return 8;
+}
+
 /**
  * Determine subgroup information like maximum number of vertices and prims.
  *
index d4a60e4653699ae304fff4ce7a1ffec53a9e59a0..b6b11292d7a3434127816ff625c63750468cf6c3 100644 (file)
@@ -1392,12 +1392,8 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
             ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
          }
 
-         unsigned scratch_size = 8;
-         if (sel->so.num_outputs)
-            scratch_size = 44;
-
          assert(!ctx->gs_ngg_scratch);
-         LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, scratch_size);
+         LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
          ctx->gs_ngg_scratch =
             LLVMAddGlobalInAddressSpace(ctx->ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
          LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(ai32));
@@ -1425,7 +1421,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
        * compaction is enabled.
        */
       if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) {
-         LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, 8);
+         LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
          ctx->gs_ngg_scratch =
             LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
          LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(asi32));
index 4386e07caccbeaad073a89ee7a51120bd22bb6b5..b31a9f4fde15e0ff90b030059098cc21393a1cf7 100644 (file)
@@ -219,6 +219,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
 void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs);
 void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx);
 void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx);
+unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
 bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
 
 /* si_shader_llvm.c */