From 4dde40908fdb20ca3efce4a138293ddf829e2c0b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 21 Aug 2019 00:18:17 -0400 Subject: [PATCH] radeonsi/gfx10: set PA_CL_VS_OUT_CNTL with CONTEXT_REG_RMW to fix edge flags We need two different values of the register, one for NGG and one for legacy, in order to fix edge flags for the legacy pipeline. Passing the ngg flag to emit_clip_regs would be too complicated, so CONTEXT_REG_RMW is used for partial register updates. Acked-by: Pierre-Eric Pelloux-Prayer --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 5 ++- src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 20 +++++++--- src/gallium/drivers/radeonsi/si_state.h | 11 ++++- .../drivers/radeonsi/si_state_shaders.c | 40 ++++++++++++++----- 5 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index f34286ecaec..5df6e080792 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -464,6 +464,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->index_ring_offset = 0; + STATIC_ASSERT(SI_NUM_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved) * 8); + if (has_clear_state) { ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000; @@ -480,7 +482,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1610cddac3c..8b5c70068e4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -709,6 +709,7 @@ struct si_shader { /*For save precompute registers value */ unsigned vgt_tf_param; /* VGT_TF_PARAM */ unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + unsigned pa_cl_vs_out_cntl; unsigned ge_cntl; }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 8c47002c99f..86ca4bac57a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -801,12 +801,20 @@ static void si_emit_clip_regs(struct si_context *sctx) culldist_mask |= clipdist_mask; unsigned initial_cdw = sctx->gfx_cs->current.cdw; - radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, - SI_TRACKED_PA_CL_VS_OUT_CNTL, - vs_sel->pa_cl_vs_out_cntl | - S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | - clipdist_mask | (culldist_mask << 8)); + unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + clipdist_mask | (culldist_mask << 8); + + if (sctx->chip_class >= GFX10) { + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + pa_cl_cntl, + ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + } else { + radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl); + } radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, rs->pa_cl_clip_cntl | diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 3480c4f7eeb..e3e6cf293e1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -259,6 +259,14 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; +#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \ + (S_02881C_USE_VTX_POINT_SIZE(1) | \ + S_02881C_USE_VTX_EDGE_FLAG(1) | \ + S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | \ + S_02881C_USE_VTX_VIEWPORT_INDX(1) | \ + S_02881C_VS_OUT_MISC_VEC_ENA(1) | \ + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1)) + /* The list of registers whose emitted values are remembered by si_context. */ enum si_tracked_reg { SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */ @@ -283,7 +291,8 @@ enum si_tracked_reg { SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, - SI_TRACKED_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/ + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */ SI_TRACKED_PA_CL_CLIP_CNTL, SI_TRACKED_PA_SC_BINNER_CNTL_0, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6aba3f39ad4..a1ec9884ddf 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1007,6 +1007,11 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, SI_TRACKED_PA_CL_NGG_CNTL, shader->ctx_reg.ngg.pa_cl_ngg_cntl); + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, + shader->pa_cl_vs_out_cntl, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + if (initial_cdw != sctx->gfx_cs->current.cdw) sctx->context_roll = true; } @@ -1087,6 +1092,19 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs) return PIPE_PRIM_TRIANGLES; /* worst case for all callers */ } +static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg) +{ + bool misc_vec_ena = + sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) || + sel->info.writes_layer || sel->info.writes_viewport_index; + return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | + S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | + S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | + S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | + S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | + S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena); +} + /** * Prepare the PM4 image for \p shader, which will run as a merged ESGS shader * in NGG mode. @@ -1232,6 +1250,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader */ shader->ctx_reg.ngg.pa_cl_ngg_cntl = S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true); shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) | @@ -1323,6 +1342,13 @@ static void si_emit_shader_vs(struct si_context *sctx) S_028A44_GS_PRIMS_PER_SUBGRP(126) | S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); } + + if (sctx->chip_class >= GFX10) { + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, + shader->pa_cl_vs_out_cntl, + SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + } } /** @@ -1427,6 +1453,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE); + shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false); oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; @@ -2856,16 +2883,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, } /* PA_CL_VS_OUT_CNTL */ - bool misc_vec_ena = - sel->info.writes_psize || sel->info.writes_edgeflag || - sel->info.writes_layer || sel->info.writes_viewport_index; - sel->pa_cl_vs_out_cntl = - S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) | - S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) | - S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | - S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | - S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | - S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena); + if (sctx->chip_class <= GFX9) + sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false); + sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS : sel->info.clipdist_writemask; sel->culldist_mask = sel->info.culldist_writemask << -- 2.30.2