radeonsi: remove 2 unused user SGPRs from merged TES-GS with 32-bit pointers
authorMarek Olšák <marek.olsak@amd.com>
Thu, 8 Feb 2018 16:26:16 +0000 (17:26 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 26 Feb 2018 11:01:19 +0000 (12:01 +0100)
The effect of the last 13 commits on user SGPR counts:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index c497c2ff1476d7701d3fb273c979e24074d7444c..4a0cc29091144005508ff00fb13b23b0cabb3cf4 100644 (file)
@@ -2136,7 +2136,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
                        if (sctx->tes_shader.cso)
                                sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
                        else if (sctx->gs_shader.cso)
-                               sh_dw_offset = GFX9_GS_NUM_USER_SGPR;
+                               sh_dw_offset = GFX9_VSGS_NUM_USER_SGPR;
                }
 
                unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
index 1f5af71653a8c4336ff9a4b0482c10b1f9e0d74d..b69c1bbb24b331bdff0e8f228121891b100315ae 100644 (file)
@@ -3430,7 +3430,12 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
                                  8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
 #endif
 
-       unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
+       unsigned vgpr;
+       if (ctx->type == PIPE_SHADER_VERTEX)
+               vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
+       else
+               vgpr = 8 + GFX9_TESGS_NUM_USER_SGPR;
+
        for (unsigned i = 0; i < 5; i++) {
                unsigned param = ctx->param_gs_vtx01_offset + i;
                ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
@@ -4789,12 +4794,13 @@ static void create_function(struct si_shader_context *ctx)
                if (ctx->type == PIPE_SHADER_VERTEX) {
                        declare_vs_specific_input_sgprs(ctx, &fninfo);
                } else {
-                       /* TESS_EVAL (and also GEOMETRY):
-                        * Declare as many input SGPRs as the VS has. */
                        ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                        ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
-                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                       ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+                       if (!HAVE_32BIT_POINTERS) {
+                               /* Declare as many input SGPRs as the VS has. */
+                               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+                               ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+                       }
                }
 
                if (!HAVE_32BIT_POINTERS) {
@@ -4822,8 +4828,15 @@ static void create_function(struct si_shader_context *ctx)
 
                if (ctx->type == PIPE_SHADER_VERTEX ||
                    ctx->type == PIPE_SHADER_TESS_EVAL) {
+                       unsigned num_user_sgprs;
+
+                       if (ctx->type == PIPE_SHADER_VERTEX)
+                               num_user_sgprs = GFX9_VSGS_NUM_USER_SGPR;
+                       else
+                               num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
+
                        /* ES return values are inputs to GS. */
-                       for (i = 0; i < 8 + GFX9_GS_NUM_USER_SGPR; i++)
+                       for (i = 0; i < 8 + num_user_sgprs; i++)
                                returns[num_returns++] = ctx->i32; /* SGPRs */
                        for (i = 0; i < 5; i++)
                                returns[num_returns++] = ctx->f32; /* VGPRs */
@@ -6342,7 +6355,10 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
        si_init_function_info(&fninfo);
 
        if (ctx->screen->info.chip_class >= GFX9) {
-               num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
+               if (key->gs_prolog.states.gfx9_prev_is_vs)
+                       num_sgprs = 8 + GFX9_VSGS_NUM_USER_SGPR;
+               else
+                       num_sgprs = 8 + GFX9_TESGS_NUM_USER_SGPR;
                num_vgprs = 5; /* ES inputs are not needed by GS */
        } else {
                num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;
index 471f2e9f5893344cb3e1ec42a22a31735fddc983..f58978989d4dac84ac081495ed8701973e8d910f 100644 (file)
@@ -212,7 +212,13 @@ enum {
 
        /* GS limits */
        GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
-       GFX9_GS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
+#if HAVE_32BIT_POINTERS
+       GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
+       GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
+#else
+       GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
+       GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
+#endif
        SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2),
 
        /* PS only */
@@ -425,6 +431,7 @@ struct si_tcs_epilog_bits {
 
 struct si_gs_prolog_bits {
        unsigned        tri_strip_adj_fix:1;
+       unsigned        gfx9_prev_is_vs:1;
 };
 
 /* Common PS bits between the shader key and the prolog key. */
index 3643ba500b60b82f02e5d8723f12201e79f945a6..b28352951a50d5f5e43bf328c5fa6a9e3b05e3e6 100644 (file)
@@ -785,9 +785,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
 
                unsigned num_user_sgprs;
                if (es_type == PIPE_SHADER_VERTEX)
-                       num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR);
+                       num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_VSGS_NUM_USER_SGPR);
                else
-                       num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
+                       num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
 
                gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
 
@@ -1317,6 +1317,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                                si_shader_selector_key_vs(sctx, sctx->vs_shader.cso,
                                                          key, &key->part.gs.vs_prolog);
                                key->part.gs.es = sctx->vs_shader.cso;
+                               key->part.gs.prolog.gfx9_prev_is_vs = 1;
                        }
 
                        /* Merged ES-GS can have unbalanced wave usage.