radeonsi/gfx10: set PA_CL_VS_OUT_CNTL with CONTEXT_REG_RMW to fix edge flags
authorMarek Olšák <marek.olsak@amd.com>
Wed, 21 Aug 2019 04:18:17 +0000 (00:18 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 27 Aug 2019 20:16:08 +0000 (16:16 -0400)
We need two different values of the register, one for NGG and one for
legacy, in order to fix edge flags for the legacy pipeline.

Passing the ngg flag to emit_clip_regs would be too complicated,
so CONTEXT_REG_RMW is used for partial register updates.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index f34286ecaeccdfb1c45332a2d68e94f241c058e2..5df6e0807922ff18b7a09c5e485878efe9f39c50 100644 (file)
@@ -464,6 +464,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 
         ctx->index_ring_offset = 0;
 
+       STATIC_ASSERT(SI_NUM_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved) * 8);
+
        if (has_clear_state) {
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0x00000000;
@@ -480,7 +482,8 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
-               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
                ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
index 1610cddac3c87cdcce00bd966832090975a8f17d..8b5c70068e41e9d4ec88b0d0b2c94424c41386d7 100644 (file)
@@ -709,6 +709,7 @@ struct si_shader {
        /*For save precompute registers value */
        unsigned vgt_tf_param; /* VGT_TF_PARAM */
        unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+       unsigned pa_cl_vs_out_cntl;
        unsigned ge_cntl;
 };
 
index 8c47002c99f1bcb34b395cce14c68196265fd03b..86ca4bac57a508c1471068748fd1574e899dfb91 100644 (file)
@@ -801,12 +801,20 @@ static void si_emit_clip_regs(struct si_context *sctx)
        culldist_mask |= clipdist_mask;
 
        unsigned initial_cdw = sctx->gfx_cs->current.cdw;
-       radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
-               SI_TRACKED_PA_CL_VS_OUT_CNTL,
-               vs_sel->pa_cl_vs_out_cntl |
-               S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
-               S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
-               clipdist_mask | (culldist_mask << 8));
+       unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
+                             S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+                             clipdist_mask | (culldist_mask << 8);
+
+       if (sctx->chip_class >= GFX10) {
+               radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+                                              SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
+                                              pa_cl_cntl,
+                                              ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+       } else {
+               radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+                                          SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
+                                          vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl);
+       }
        radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL,
                SI_TRACKED_PA_CL_CLIP_CNTL,
                rs->pa_cl_clip_cntl |
index 3480c4f7eeb1b8967e15547a3120dfa2246f43fd..e3e6cf293e1c59fd79293e08b7da80d0c72da02e 100644 (file)
@@ -259,6 +259,14 @@ struct si_shader_data {
        uint32_t                sh_base[SI_NUM_SHADERS];
 };
 
+#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
+       (S_02881C_USE_VTX_POINT_SIZE(1) | \
+        S_02881C_USE_VTX_EDGE_FLAG(1) | \
+        S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | \
+        S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
+        S_02881C_VS_OUT_MISC_VEC_ENA(1) | \
+        S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1))
+
 /* The list of registers whose emitted values are remembered by si_context. */
 enum si_tracked_reg {
        SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */
@@ -283,7 +291,8 @@ enum si_tracked_reg {
        SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
        SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
 
-       SI_TRACKED_PA_CL_VS_OUT_CNTL,
+       SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/
+       SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */
        SI_TRACKED_PA_CL_CLIP_CNTL,
 
        SI_TRACKED_PA_SC_BINNER_CNTL_0,
index 6aba3f39ad48ff539a0d2047db927eb8b8ab2207..a1ec9884ddfd063239a681b4ff94b56e8e9f3d4c 100644 (file)
@@ -1007,6 +1007,11 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx,
                                   SI_TRACKED_PA_CL_NGG_CNTL,
                                   shader->ctx_reg.ngg.pa_cl_ngg_cntl);
 
+       radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+                                      SI_TRACKED_PA_CL_VS_OUT_CNTL__VS,
+                                      shader->pa_cl_vs_out_cntl,
+                                      SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+
        if (initial_cdw != sctx->gfx_cs->current.cdw)
                sctx->context_roll = true;
 }
@@ -1087,6 +1092,19 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
        return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
 }
 
+static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg)
+{
+       bool misc_vec_ena =
+               sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+               sel->info.writes_layer || sel->info.writes_viewport_index;
+       return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+              S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
+              S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
+              S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
+              S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+              S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena);
+}
+
 /**
  * Prepare the PM4 image for \p shader, which will run as a merged ESGS shader
  * in NGG mode.
@@ -1232,6 +1250,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
         */
        shader->ctx_reg.ngg.pa_cl_ngg_cntl =
                S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_type == PIPE_SHADER_VERTEX);
+       shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
 
        shader->ge_cntl =
                S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
@@ -1323,6 +1342,13 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                           S_028A44_GS_PRIMS_PER_SUBGRP(126) |
                                           S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
        }
+
+       if (sctx->chip_class >= GFX10) {
+               radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
+                                              SI_TRACKED_PA_CL_VS_OUT_CNTL__VS,
+                                              shader->pa_cl_vs_out_cntl,
+                                              SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
+       }
 }
 
 /**
@@ -1427,6 +1453,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                        S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ?
                                                    V_02870C_SPI_SHADER_4COMP :
                                                    V_02870C_SPI_SHADER_NONE);
+       shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
 
        oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0;
 
@@ -2856,16 +2883,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
        }
 
        /* PA_CL_VS_OUT_CNTL */
-       bool misc_vec_ena =
-               sel->info.writes_psize || sel->info.writes_edgeflag ||
-               sel->info.writes_layer || sel->info.writes_viewport_index;
-       sel->pa_cl_vs_out_cntl =
-               S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
-               S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag) |
-               S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
-               S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
-               S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
-               S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena);
+       if (sctx->chip_class <= GFX9)
+               sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+
        sel->clipdist_mask = sel->info.writes_clipvertex ?
                                     SIX_BITS : sel->info.clipdist_writemask;
        sel->culldist_mask = sel->info.culldist_writemask <<