From 3b2e93e472d185a5df5ec3c3d23c8744a0c23e42 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 22 Apr 2017 17:27:10 +0200 Subject: [PATCH] radeonsi: get InstanceID from VGPR1 (or VGPR2 for tess) instead of VGPR3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit VGPR1 = InstanceID / StepRate0; // StepRate0 can be set to 1 Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 20 +++++++++++----- src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 1 + .../drivers/radeonsi/si_state_shaders.c | 24 +++++++++++++------ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6c57fa2a75a..59abdb755e1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5846,9 +5846,14 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, struct si_shader *shader = ctx->shader; params[ctx->param_vertex_id = (*num_params)++] = ctx->i32; - params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32; - params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32; - params[ctx->param_instance_id = (*num_params)++] = ctx->i32; + if (shader->key.as_ls) { + params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32; + params[ctx->param_instance_id = (*num_params)++] = ctx->i32; + } else { + params[ctx->param_instance_id = (*num_params)++] = ctx->i32; + params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32; + } + params[(*num_params)++] = ctx->i32; /* unused */ if (!shader->is_gs_copy_shader) { /* Vertex load indices. */ @@ -7368,11 +7373,14 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, key->vs_prolog.states = *prolog_key; key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; + key->vs_prolog.as_ls = shader_out->key.as_ls; - if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) + if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) { + key->vs_prolog.as_ls = 1; key->vs_prolog.num_merged_next_stage_vgprs = 2; - else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) + } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) { key->vs_prolog.num_merged_next_stage_vgprs = 5; + } /* Set the instanceID flag. */ for (unsigned i = 0; i < info->num_inputs; i++) @@ -8379,7 +8387,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0; ctx->param_vertex_id = first_vs_vgpr; - ctx->param_instance_id = first_vs_vgpr + 3; + ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ params = alloca(num_all_input_regs * sizeof(LLVMTypeRef)); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index fc26c882701..1eb9c0bc29d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -437,6 +437,7 @@ union si_shader_part_key { /* For merged stages such as LS-HS, HS input VGPRs are first. */ unsigned num_merged_next_stage_vgprs:3; unsigned last_input:4; + unsigned as_ls:1; /* Prologs for monolithic shaders shouldn't set EXEC. */ unsigned is_monolithic:1; } vs_prolog; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 39494cc1a17..938e7fb06a9 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4341,6 +4341,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); + si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (sctx->b.chip_class < CIK) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6a6fce6939c..6b910778536 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -458,8 +458,10 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); /* We need at least 2 components for LS. - * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ - vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; + * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1; si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40); @@ -491,8 +493,10 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, va >> 40); /* We need at least 2 components for LS. - * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ - ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; + * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + ls_vgpr_comp_cnt = shader->info.uses_instanceid ? 2 : 1; if (shader->config.scratch_bytes_per_wave) { fprintf(stderr, "HS: scratch buffer unsupported"); @@ -544,7 +548,8 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY); if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; + /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ + vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0; num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2; @@ -759,7 +764,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) struct gfx9_gs_info gs_info; if (es_type == PIPE_SHADER_VERTEX) - es_vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; + /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ + es_vgpr_comp_cnt = shader->info.uses_instanceid ? 1 : 0; else if (es_type == PIPE_SHADER_TESS_EVAL) es_vgpr_comp_cnt = shader->key.part.gs.es->info.uses_primid ? 3 : 2; else @@ -876,7 +882,11 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); + /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID) + * If PrimID is disabled. InstanceID / StepRate1 is loaded instead. + * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. + */ + vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0); num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = shader->selector->info.uses_primid ? 3 : 2; -- 2.30.2