From 97456e847e090577b67df7ea0a49183fc5e77462 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 30 Jul 2020 08:31:22 -0400 Subject: [PATCH] radeonsi: add a common function for getting the size of gs_ngg_scratch The next commit will use it. Fixes: a23802bcb9a - ac,radeonsi: start adding support for gfx10.3 Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 10 ++++++++++ src/gallium/drivers/radeonsi/si_shader.c | 8 ++------ src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index efeb9e8838c..b484ebae522 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1887,6 +1887,16 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse); } +unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader) +{ + const struct si_shader_selector *sel = shader->selector; + + if (sel->type == PIPE_SHADER_GEOMETRY && sel->so.num_outputs) + return 44; + + return 8; +} + /** * Determine subgroup information like maximum number of vertices and prims. * diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d4a60e46536..b6b11292d7a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1392,12 +1392,8 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, ""); } - unsigned scratch_size = 8; - if (sel->so.num_outputs) - scratch_size = 44; - assert(!ctx->gs_ngg_scratch); - LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, scratch_size); + LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS); LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(ai32)); @@ -1425,7 +1421,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad * compaction is enabled. */ if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) { - LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, 8); + LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS); LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(asi32)); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 4386e07cacc..b31a9f4fde1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -219,6 +219,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs); void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx); void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx); +unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader); bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader); /* si_shader_llvm.c */ -- 2.30.2