With NGG, the VGT_GS_OUT_PRIM_TYPE can change without a shader change.
The VS_STATE is required for both streamout and culling from a vertex
shader without pre-compiling outprim-specific variants.
We could consider compiling specialized variants in the future. We
could also consider compiling the NGG logic as an epilog.
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0x00000000;
#define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE
#define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1)
#define C_VS_STATE_INDEXED 0xFFFFFFFD
+#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2)
+#define C_VS_STATE_OUTPRIM 0xFFFFFFF3
#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
unsigned vgt_gsvs_ring_offset_1;
unsigned vgt_gsvs_ring_offset_2;
unsigned vgt_gsvs_ring_offset_3;
- unsigned vgt_gs_out_prim_type;
unsigned vgt_gsvs_ring_itemsize;
unsigned vgt_gs_max_vert_out;
unsigned vgt_gs_vert_itemsize;
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
- SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */
+ SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */
SI_TRACKED_VGT_GSVS_RING_OFFSET_2,
SI_TRACKED_VGT_GSVS_RING_OFFSET_3,
- SI_TRACKED_VGT_GS_OUT_PRIM_TYPE,
SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
SI_TRACKED_VGT_GS_MAX_VERT_OUT,
return ia_multi_vgt_param;
}
+static unsigned si_conv_prim_to_gs_out(unsigned mode)
+{
+ static const int prim_conv[] = {
+ [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+ };
+ assert(mode < ARRAY_SIZE(prim_conv));
+
+ return prim_conv[mode];
+}
+
/* rast_prim is the primitive type after GS. */
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- /* Skip this if not rendering lines. */
- if (!util_prim_is_lines(rast_prim))
+ if (likely(rast_prim == sctx->last_rast_prim &&
+ rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
return;
- if (rast_prim == sctx->last_rast_prim &&
- rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
- return;
+ if (util_prim_is_lines(rast_prim)) {
+ /* For lines, reset the stipple pattern at each primitive. Otherwise,
+ * reset the stipple pattern at each packet (line strips, line loops).
+ */
+ radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+ rs->pa_sc_line_stipple |
+ S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+ sctx->context_roll = true;
+ }
- /* For lines, reset the stipple pattern at each primitive. Otherwise,
- * reset the stipple pattern at each packet (line strips, line loops).
- */
- radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
- rs->pa_sc_line_stipple |
- S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+ if (rast_prim != sctx->last_rast_prim &&
+ (sctx->ngg || sctx->gs_shader.cso)) {
+ unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+ radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+ sctx->context_roll = true;
+
+ if (sctx->chip_class >= GFX10) {
+ sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
+ sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
+ }
+ }
sctx->last_rast_prim = rast_prim;
sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
- sctx->context_roll = true;
}
static void si_emit_vs_state(struct si_context *sctx,
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
}
-static unsigned si_conv_prim_to_gs_out(unsigned mode)
-{
- static const int prim_conv[] = {
- [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
- [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
- [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
- [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
- };
- assert(mode < ARRAY_SIZE(prim_conv));
-
- return prim_conv[mode];
-}
-
void gfx9_get_gs_info(struct si_shader_selector *es,
struct si_shader_selector *gs,
struct gfx9_gs_info *out)
return;
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
- * R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */
- radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
+ * R_028A68_VGT_GSVS_RING_OFFSET_3 */
+ radeon_opt_set_context_reg3(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
SI_TRACKED_VGT_GSVS_RING_OFFSET_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_1,
shader->ctx_reg.gs.vgt_gsvs_ring_offset_2,
- shader->ctx_reg.gs.vgt_gsvs_ring_offset_3,
- shader->ctx_reg.gs.vgt_gs_out_prim_type);
-
+ shader->ctx_reg.gs.vgt_gsvs_ring_offset_3);
/* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE,
offset += num_components[2] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset;
- shader->ctx_reg.gs.vgt_gs_out_prim_type =
- si_conv_prim_to_gs_out(sel->gs_output_prim);
-
if (max_stream >= 3)
offset += num_components[3] * sel->gs_max_out_vertices;
shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset;