From: Marek Olšák Date: Sun, 8 Oct 2017 17:38:37 +0000 (+0200) Subject: radeonsi: make SI_SGPR_VERTEX_BUFFERS the last user SGPR input X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3fa7a59d69b8953843edbef8b52c681cab45dd70;p=mesa.git radeonsi: make SI_SGPR_VERTEX_BUFFERS the last user SGPR input so that it can be removed and replaced with inline VBO descriptors, and the pointer can be packed in unused bits of VBO descriptors. This also removes the pointer from merged TES-GS where it's useless. Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d1864d0cf82..c497c2ff147 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2127,9 +2127,19 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, if (sctx->vertex_buffer_pointer_dirty) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + - SI_SGPR_VERTEX_BUFFERS * 4; + /* Find the location of the VB descriptor pointer. */ + /* TODO: In the future, the pointer will be packed in unused + * bits of the first 2 VB descriptors. */ + unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR; + if (sctx->b.chip_class >= GFX9) { + if (sctx->tes_shader.cso) + sh_dw_offset = GFX9_TCS_NUM_USER_SGPR; + else if (sctx->gs_shader.cso) + sh_dw_offset = GFX9_GS_NUM_USER_SGPR; + } + + unsigned sh_offset = sh_base[PIPE_SHADER_VERTEX] + sh_dw_offset * 4; si_emit_shader_pointer_head(cs, sh_offset, 1); si_emit_shader_pointer_body(sctx->screen, cs, sctx->vb_descriptors_buffer->gpu_address + diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 54f90fcac72..1f5af71653a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4558,8 +4558,6 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx, static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { - ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, - ac_array_in_const32_addr_space(ctx->v4i32)); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); @@ -4661,6 +4659,8 @@ static void create_function(struct si_shader_context *ctx) declare_per_stage_desc_pointers(ctx, &fninfo, true); declare_vs_specific_input_sgprs(ctx, &fninfo); + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); if (shader->key.as_es) { ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4733,6 +4733,10 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); + if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); /* VGPRs (first TCS, then VS) */ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id); @@ -4790,9 +4794,6 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } @@ -4800,6 +4801,10 @@ static void create_function(struct si_shader_context *ctx) declare_samplers_and_images(ctx, &fninfo, ctx->type == PIPE_SHADER_GEOMETRY); } + if (ctx->type == PIPE_SHADER_VERTEX) { + ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->v4i32)); + } /* VGPRs (first GS, then VS/TES) */ ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32); @@ -7324,7 +7329,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); - add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1b1f650f869..471f2e9f589 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -169,11 +169,7 @@ enum { SI_NUM_RESOURCE_SGPRS, /* all VS variants */ - SI_SGPR_VERTEX_BUFFERS = SI_NUM_RESOURCE_SGPRS, -#if !HAVE_32BIT_POINTERS - SI_SGPR_VERTEX_BUFFERS_HI, -#endif - SI_SGPR_BASE_VERTEX, + SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS, SI_SGPR_START_INSTANCE, SI_SGPR_DRAWID, SI_SGPR_VS_STATE_BITS, @@ -209,6 +205,9 @@ enum { GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, +#if !HAVE_32BIT_POINTERS + GFX9_SGPR_align_for_vb_pointer, +#endif GFX9_TCS_NUM_USER_SGPR, /* GS limits */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ad38a0f31b6..3643ba500b6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -459,6 +459,17 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader) return shader->pm4; } +static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs) +{ + /* Add the pointer to VBO descriptors. */ + if (HAVE_32BIT_POINTERS) { + return num_always_on_user_sgprs + 1; + } else { + assert(num_always_on_user_sgprs % 2 == 0); + return num_always_on_user_sgprs + 2; + } +} + static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) { struct si_pm4_state *pm4; @@ -488,7 +499,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); - shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_VS_NUM_USER_SGPR) | + shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR)) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } @@ -515,9 +526,12 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) */ ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1; + unsigned num_user_sgprs = + si_get_num_vs_user_sgprs(GFX9_TCS_NUM_USER_SGPR); + shader->config.rsrc2 = - S_00B42C_USER_SGPR(GFX9_TCS_NUM_USER_SGPR) | - S_00B42C_USER_SGPR_MSB(GFX9_TCS_NUM_USER_SGPR >> 5) | + S_00B42C_USER_SGPR(num_user_sgprs) | + S_00B42C_USER_SGPR_MSB(num_user_sgprs >> 5) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } else { si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); @@ -562,7 +576,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) if (shader->selector->type == PIPE_SHADER_VERTEX) { /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0; - num_user_sgprs = SI_VS_NUM_USER_SGPR; + num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR); } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2; num_user_sgprs = SI_TES_NUM_USER_SGPR; @@ -769,6 +783,12 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) else gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ + unsigned num_user_sgprs; + if (es_type == PIPE_SHADER_VERTEX) + num_user_sgprs = si_get_num_vs_user_sgprs(GFX9_GS_NUM_USER_SGPR); + else + num_user_sgprs = GFX9_GS_NUM_USER_SGPR; + gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info); si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); @@ -781,8 +801,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, - S_00B22C_USER_SGPR(GFX9_GS_NUM_USER_SGPR) | - S_00B22C_USER_SGPR_MSB(GFX9_GS_NUM_USER_SGPR >> 5) | + S_00B22C_USER_SGPR(num_user_sgprs) | + S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) | S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) | S_00B22C_LDS_SIZE(gs_info.lds_size) | @@ -887,7 +907,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, num_user_sgprs = SI_SGPR_VS_BLIT_DATA + info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; } else { - num_user_sgprs = SI_VS_NUM_USER_SGPR; + num_user_sgprs = si_get_num_vs_user_sgprs(SI_VS_NUM_USER_SGPR); } } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = enable_prim_id ? 3 : 2;