ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
        ctx->last_prim = -1;
        ctx->last_multi_vgt_param = -1;
-       ctx->last_rast_prim = -1;
-       ctx->last_flatshade_first = -1;
-       ctx->last_sc_line_stipple = ~0;
        ctx->last_vs_state = ~0;
        ctx->last_ls = NULL;
        ctx->last_tcs = NULL;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x00000005;
                ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE]     = 0xffff;
+               ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE]      = 0;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL]  = 0x0000001e; /* From GFX8 */
 
-               /* Set all saved registers state to saved. */
+               /* Set all cleared context registers to saved. */
                ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
+               ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */
        } else {
                /* Set all saved registers state to unknown. */
                ctx->tracked_regs.reg_saved = 0;
+               ctx->last_gs_out_prim = -1; /* unknown */
        }
 
        /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */
 
        struct radeon_cmdbuf *cs = sctx->gfx_cs;
        enum pipe_prim_type rast_prim = sctx->current_rast_prim;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-       bool use_ngg = sctx->screen->use_ngg;
+       unsigned initial_cdw = cs->current.cdw;
 
-       if (likely(rast_prim == sctx->last_rast_prim &&
-                  rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
-                  (!use_ngg ||
-                   rs->flatshade_first == sctx->last_flatshade_first)))
-               return;
-
-       if (util_prim_is_lines(rast_prim)) {
+       if (unlikely(si_is_line_stipple_enabled(sctx))) {
                /* For lines, reset the stipple pattern at each primitive. Otherwise,
                 * reset the stipple pattern at each packet (line strips, line loops).
                 */
-               radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
-                       rs->pa_sc_line_stipple |
-                       S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
-               sctx->context_roll = true;
+               unsigned value = rs->pa_sc_line_stipple |
+                                S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2);
+
+               radeon_opt_set_context_reg(sctx, R_028A0C_PA_SC_LINE_STIPPLE,
+                                          SI_TRACKED_PA_SC_LINE_STIPPLE, value);
        }
 
-       unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+       unsigned gs_out_prim = si_conv_prim_to_gs_out(rast_prim);
+       if (unlikely(gs_out_prim != sctx->last_gs_out_prim &&
+                    (sctx->ngg || sctx->gs_shader.cso))) {
+               radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
+               sctx->last_gs_out_prim = gs_out_prim;
+       }
 
-       if (rast_prim != sctx->last_rast_prim &&
-           (sctx->ngg || sctx->gs_shader.cso)) {
-               radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+       if (initial_cdw != cs->current.cdw)
                sctx->context_roll = true;
 
-               if (use_ngg) {
-                       sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
-                       sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
-               }
-       }
+       if (sctx->ngg) {
+               unsigned vtx_index = rs->flatshade_first ? 0 : gs_out_prim;
 
-       if (use_ngg) {
-               unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
-               sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
-               sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
+               sctx->current_vs_state &= C_VS_STATE_OUTPRIM &
+                                         C_VS_STATE_PROVOKING_VTX_INDEX;
+               sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out_prim) |
+                                         S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
        }
-
-       sctx->last_rast_prim = rast_prim;
-       sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
-       sctx->last_flatshade_first = rs->flatshade_first;
 }
 
 static void si_emit_vs_state(struct si_context *sctx,
 
                        sctx->flags |= SI_CONTEXT_VGT_FLUSH;
 
                sctx->ngg = new_ngg;
-               sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+               sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
                return true;
        }
        return false;
        sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
 
        si_update_common_shader_state(sctx);
-       sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+       sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
 
        ngg_changed = si_update_ngg(sctx);
        if (ngg_changed || enable_changed)
        si_update_tess_uses_prim_id(sctx);
 
        si_update_common_shader_state(sctx);
-       sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+       sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
 
        bool ngg_changed = si_update_ngg(sctx);
        if (ngg_changed || enable_changed)