radeonsi: make SI_SGPR_VERTEX_BUFFERS the last user SGPR input
authorMarek Olšák <marek.olsak@amd.com>
Sun, 8 Oct 2017 17:38:37 +0000 (19:38 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 26 Feb 2018 11:01:08 +0000 (12:01 +0100)
so that it can be removed and replaced with inline VBO descriptors,
and the pointer can be packed in unused bits of VBO descriptors.
This also removes the pointer from merged TES-GS where it's useless.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index d1864d0cf82a4bc580da471424de36f1d9522449..c497c2ff1476d7701d3fb273c979e24074d7444c 100644 (file)
@@ -2127,9 +2127,19 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
 
        if (sctx->vertex_buffer_pointer_dirty) {
                struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-               unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] +
-                                    SI_SGPR_VERTEX_BUFFERS * 4;
 
+               /* Find the location of the VB descriptor pointer. */
+               /* TODO: In the future, the pointer will be packed in unused
+                *       bits of the first 2 VB descriptors. */
+               unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
+               if (sctx->b.chip_class >= GFX9) {
+                       if (sctx->tes_shader.cso)
+                               sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
+                       else if (sctx->gs_shader.cso)
+                               sh_dw_offset = GFX9_GS_NUM_USER_SGPR;
+               }
+
+               unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4;
                si_emit_shader_pointer_head(cs, sh_offset, 1);
                si_emit_shader_pointer_body(sctx->screen, cs,
                                            sctx->vb_descriptors_buffer->gpu_address +
index 54f90fcac7276492f742149500e746a8800e7ed4..1f5af71653a8c4336ff9a4b0482c10b1f9e0d74d 100644 (file)
@@ -4558,8 +4558,6 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx,
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
                                            struct si_function_info *fninfo)
 {
-       ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
-               ac_array_in_const32_addr_space(ctx->v4i32));
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
@@ -4661,6 +4659,8 @@ static void create_function(struct si_shader_context *ctx)
 
                declare_per_stage_desc_pointers(ctx, &fninfo, true);
                declare_vs_specific_input_sgprs(ctx, &fninfo);
+               ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+                       ac_array_in_const32_addr_space(ctx->v4i32));
 
                if (shader->key.as_es) {
                        ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
@@ -4733,6 +4733,10 @@ static void create_function(struct si_shader_context *ctx)
                ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
+               if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
+                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
+               ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+                       ac_array_in_const32_addr_space(ctx->v4i32));
 
                /* VGPRs (first TCS, then VS) */
                add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
@@ -4790,9 +4794,6 @@ static void create_function(struct si_shader_context *ctx)
                        ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                        ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                        add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                       if (!HAVE_32BIT_POINTERS)
-                               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                        ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
                }
 
@@ -4800,6 +4801,10 @@ static void create_function(struct si_shader_context *ctx)
                        declare_samplers_and_images(ctx, &fninfo,
                                                    ctx->type == PIPE_SHADER_GEOMETRY);
                }
+               if (ctx->type == PIPE_SHADER_VERTEX) {
+                       ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
+                               ac_array_in_const32_addr_space(ctx->v4i32));
+               }
 
                /* VGPRs (first GS, then VS/TES) */
                ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
@@ -7324,7 +7329,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
-               add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
index 1b1f650f8690b1c4459e24788e8576bdca3128a4..471f2e9f5893344cb3e1ec42a22a31735fddc983 100644 (file)
@@ -169,11 +169,7 @@ enum {
        SI_NUM_RESOURCE_SGPRS,
 
        /* all VS variants */
-       SI_SGPR_VERTEX_BUFFERS  = SI_NUM_RESOURCE_SGPRS,
-#if !HAVE_32BIT_POINTERS
-       SI_SGPR_VERTEX_BUFFERS_HI,
-#endif
-       SI_SGPR_BASE_VERTEX,
+       SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS,
        SI_SGPR_START_INSTANCE,
        SI_SGPR_DRAWID,
        SI_SGPR_VS_STATE_BITS,
@@ -209,6 +205,9 @@ enum {
        GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
        GFX9_SGPR_TCS_OUT_OFFSETS,
        GFX9_SGPR_TCS_OUT_LAYOUT,
+#if !HAVE_32BIT_POINTERS
+       GFX9_SGPR_align_for_vb_pointer,
+#endif
        GFX9_TCS_NUM_USER_SGPR,
 
        /* GS limits */
index ad38a0f31b6e8aa6ad7623800d455c5d31962d6a..3643ba500b60b82f02e5d8723f12201e79f945a6 100644 (file)
@@ -459,6 +459,17 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader)
        return shader->pm4;
 }
 
+static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
+{
+       /* Add the pointer to VBO descriptors. */
+       if (HAVE_32BIT_POINTERS) {
+               return num_always_on_user_sgprs + 1;
+       } else {
+               assert(num_always_on_user_sgprs % 2 == 0);
+               return num_always_on_user_sgprs + 2;
+       }
+}
+
 static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
 {
        struct si_pm4_state *pm4;
@@ -488,7 +499,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
                           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
                           S_00B528_DX10_CLAMP(1) |
                           S_00B528_FLOAT_MODE(shader->config.float_mode);
-       shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) |
+       shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) |
                           S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
 }
 
@@ -515,9 +526,12 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
                 */
                ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1;
 
+               unsigned num_user_sgprs =
+                       si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR);
+
                shader->config.rsrc2 =
-                       S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) |
-                       S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) |
+                       S_00B42C_USER_SGPR(num_user_sgprs) |
+                       S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) |
                        S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
        } else {
                si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
@@ -562,7 +576,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
        if (shader->selector->type == PIPE_SHADER_VERTEX) {
                /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
                vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0;
-               num_user_sgprs = SI_VS_NUM_USER_SGPR;
+               num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
        } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
                vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2;
                num_user_sgprs = SI_TES_NUM_USER_SGPR;
@@ -769,6 +783,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
                else
                        gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
 
+               unsigned num_user_sgprs;
+               if (es_type == PIPE_SHADER_VERTEX)
+                       num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR);
+               else
+                       num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
+
                gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info);
 
                si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
@@ -781,8 +801,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
                               S_00B228_FLOAT_MODE(shader->config.float_mode) |
                               S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
                si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
-                              S_00B22C_USER_SGPR(GFX9_GS_NUM_USER_SGPR) |
-                              S_00B22C_USER_SGPR_MSB(GFX9_GS_NUM_USER_SGPR >> 5) |
+                              S_00B22C_USER_SGPR(num_user_sgprs) |
+                              S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) |
                               S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
                               S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) |
                               S_00B22C_LDS_SIZE(gs_info.lds_size) |
@@ -887,7 +907,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                        num_user_sgprs = SI_SGPR_VS_BLIT_DATA +
                                         info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS];
                } else {
-                       num_user_sgprs = SI_VS_NUM_USER_SGPR;
+                       num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR);
                }
        } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
                vgpr_comp_cnt = enable_prim_id ? 3 : 2;