From 8799eaed993995e93714cf42c80409b5d53a9537 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 8 Feb 2018 17:26:16 +0100 Subject: [PATCH] radeonsi: remove 2 unused user SGPRs from merged TES-GS with 32-bit pointers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The effect of the last 13 commits on user SGPR counts: Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 30 ++++++++++++++----- src/gallium/drivers/radeonsi/si_shader.h | 9 +++++- .../drivers/radeonsi/si_state_shaders.c | 5 ++-- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c497c2ff147..4a0cc290911 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2136,7 +2136,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, if (sctx->tes_shader.cso) sh_dw_offset = GFX9_TCS_NUM_USER_SGPR; else if (sctx->gs_shader.cso) - sh_dw_offset = GFX9_GS_NUM_USER_SGPR; + sh_dw_offset = GFX9_VSGS_NUM_USER_SGPR; } unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1f5af71653a..b69c1bbb24b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3430,7 +3430,12 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); #endif - unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; + unsigned vgpr; + if (ctx->type == PIPE_SHADER_VERTEX) + vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR; + else + vgpr = 8 + GFX9_TESGS_NUM_USER_SGPR; + for (unsigned i = 0; i < 5; i++) { unsigned param = ctx->param_gs_vtx01_offset + i; ret = si_insert_input_ret_float(ctx, ret, param, vgpr++); @@ -4789,12 +4794,13 @@ static void create_function(struct si_shader_context *ctx) if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_specific_input_sgprs(ctx, &fninfo); } else { - /* TESS_EVAL (and also GEOMETRY): - * Declare as many input SGPRs as the VS has. */ ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + if (!HAVE_32BIT_POINTERS) { + /* Declare as many input SGPRs as the VS has. */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + } } if (!HAVE_32BIT_POINTERS) { @@ -4822,8 +4828,15 @@ static void create_function(struct si_shader_context *ctx) if (ctx->type == PIPE_SHADER_VERTEX || ctx->type == PIPE_SHADER_TESS_EVAL) { + unsigned num_user_sgprs; + + if (ctx->type == PIPE_SHADER_VERTEX) + num_user_sgprs = GFX9_VSGS_NUM_USER_SGPR; + else + num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR; + /* ES return values are inputs to GS. */ - for (i = 0; i < 8 + GFX9_GS_NUM_USER_SGPR; i++) + for (i = 0; i < 8 + num_user_sgprs; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 5; i++) returns[num_returns++] = ctx->f32; /* VGPRs */ @@ -6342,7 +6355,10 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx, si_init_function_info(&fninfo); if (ctx->screen->info.chip_class >= GFX9) { - num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR; + if (key->gs_prolog.states.gfx9_prev_is_vs) + num_sgprs = 8 + GFX9_VSGS_NUM_USER_SGPR; + else + num_sgprs = 8 + GFX9_TESGS_NUM_USER_SGPR; num_vgprs = 5; /* ES inputs are not needed by GS */ } else { num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 471f2e9f589..f58978989d4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -212,7 +212,13 @@ enum { /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, - GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, +#if HAVE_32BIT_POINTERS + GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, + GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR, +#else + GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, + GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, +#endif SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2), /* PS only */ @@ -425,6 +431,7 @@ struct si_tcs_epilog_bits { struct si_gs_prolog_bits { unsigned tri_strip_adj_fix:1; + unsigned gfx9_prev_is_vs:1; }; /* Common PS bits between the shader key and the prolog key. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3643ba500b6..b28352951a5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -785,9 +785,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) unsigned num_user_sgprs; if (es_type == PIPE_SHADER_VERTEX) - num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR); + num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_VSGS_NUM_USER_SGPR); else - num_user_sgprs = GFX9_GS_NUM_USER_SGPR; + num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR; gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info); @@ -1317,6 +1317,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, si_shader_selector_key_vs(sctx, sctx->vs_shader.cso, key, &key->part.gs.vs_prolog); key->part.gs.es = sctx->vs_shader.cso; + key->part.gs.prolog.gfx9_prev_is_vs = 1; } /* Merged ES-GS can have unbalanced wave usage. -- 2.30.2