From 77e715541c0423c4f962e9fcc8862d45bbc2eeb6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Wed, 8 May 2019 01:40:29 +0200 Subject: [PATCH] radeonsi/gfx10: emit VGT_GS_OUT_PRIM_TYPE from draw and add it to VS_STATE With NGG, the VGT_GS_OUT_PRIM_TYPE can change without a shader change. The VS_STATE is required for both streamout and culling from a vertex shader without pre-compiling outprim-specific variants. We could consider compiling specialized variants in the future. We could also consider compiling the NGG logic as an epilog. Acked-by: Bas Nieuwenhuizen --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 - src/gallium/drivers/radeonsi/si_shader.h | 3 +- src/gallium/drivers/radeonsi/si_state.h | 3 +- src/gallium/drivers/radeonsi/si_state_draw.c | 58 +++++++++++++++---- .../drivers/radeonsi/si_state_shaders.c | 35 +---------- 5 files changed, 52 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index fb9286d6b48..bb34b07095a 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -463,7 +463,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0x00000000; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 39af557bcae..9dbf08fa95f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -246,6 +246,8 @@ enum { #define C_VS_STATE_CLAMP_VERTEX_COLOR 0xFFFFFFFE #define S_VS_STATE_INDEXED(x) (((unsigned)(x) & 0x1) << 1) #define C_VS_STATE_INDEXED 0xFFFFFFFD +#define S_VS_STATE_OUTPRIM(x) (((unsigned)(x) & 0x3) << 2) +#define C_VS_STATE_OUTPRIM 0xFFFFFFF3 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8) #define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) @@ -666,7 +668,6 @@ struct si_shader { unsigned vgt_gsvs_ring_offset_1; unsigned vgt_gsvs_ring_offset_2; unsigned vgt_gsvs_ring_offset_3; - unsigned vgt_gs_out_prim_type; unsigned vgt_gsvs_ring_itemsize; unsigned vgt_gs_max_vert_out; unsigned vgt_gs_vert_itemsize; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 23c7b3245f5..678f87cd73d 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -298,10 +298,9 @@ enum si_tracked_reg { SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, - SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */ + SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */ SI_TRACKED_VGT_GSVS_RING_OFFSET_2, SI_TRACKED_VGT_GSVS_RING_OFFSET_3, - SI_TRACKED_VGT_GS_OUT_PRIM_TYPE, SI_TRACKED_VGT_GSVS_RING_ITEMSIZE, SI_TRACKED_VGT_GS_MAX_VERT_OUT, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 998c21d001e..3d2a4d72891 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -549,6 +549,30 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, return ia_multi_vgt_param; } +static unsigned si_conv_prim_to_gs_out(unsigned mode) +{ + static const int prim_conv[] = { + [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST, + [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, + [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST, + }; + assert(mode < ARRAY_SIZE(prim_conv)); + + return prim_conv[mode]; +} + /* rast_prim is the primitive type after GS. */ static void si_emit_rasterizer_prim_state(struct si_context *sctx) { @@ -556,24 +580,34 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx) enum pipe_prim_type rast_prim = sctx->current_rast_prim; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; - /* Skip this if not rendering lines. */ - if (!util_prim_is_lines(rast_prim)) + if (likely(rast_prim == sctx->last_rast_prim && + rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)) return; - if (rast_prim == sctx->last_rast_prim && - rs->pa_sc_line_stipple == sctx->last_sc_line_stipple) - return; + if (util_prim_is_lines(rast_prim)) { + /* For lines, reset the stipple pattern at each primitive. Otherwise, + * reset the stipple pattern at each packet (line strips, line loops). + */ + radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, + rs->pa_sc_line_stipple | + S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2)); + sctx->context_roll = true; + } - /* For lines, reset the stipple pattern at each primitive. Otherwise, - * reset the stipple pattern at each packet (line strips, line loops). - */ - radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, - rs->pa_sc_line_stipple | - S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2)); + if (rast_prim != sctx->last_rast_prim && + (sctx->ngg || sctx->gs_shader.cso)) { + unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim); + radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out); + sctx->context_roll = true; + + if (sctx->chip_class >= GFX10) { + sctx->current_vs_state &= C_VS_STATE_OUTPRIM; + sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out); + } + } sctx->last_rast_prim = rast_prim; sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; - sctx->context_roll = true; } static void si_emit_vs_state(struct si_context *sctx, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index be9ab3bcdd6..53c1e7f44e8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -622,30 +622,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4); } -static unsigned si_conv_prim_to_gs_out(unsigned mode) -{ - static const int prim_conv[] = { - [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST, - [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, - [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, - [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, - [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST, - }; - assert(mode < ARRAY_SIZE(prim_conv)); - - return prim_conv[mode]; -} - void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs, struct gfx9_gs_info *out) @@ -753,14 +729,12 @@ static void si_emit_shader_gs(struct si_context *sctx) return; /* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2 - * R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */ - radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1, + * R_028A68_VGT_GSVS_RING_OFFSET_3 */ + radeon_opt_set_context_reg3(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1, SI_TRACKED_VGT_GSVS_RING_OFFSET_1, shader->ctx_reg.gs.vgt_gsvs_ring_offset_1, shader->ctx_reg.gs.vgt_gsvs_ring_offset_2, - shader->ctx_reg.gs.vgt_gsvs_ring_offset_3, - shader->ctx_reg.gs.vgt_gs_out_prim_type); - + shader->ctx_reg.gs.vgt_gsvs_ring_offset_3); /* R_028AB0_VGT_GSVS_RING_ITEMSIZE */ radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE, @@ -841,9 +815,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) offset += num_components[2] * sel->gs_max_out_vertices; shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset; - shader->ctx_reg.gs.vgt_gs_out_prim_type = - si_conv_prim_to_gs_out(sel->gs_output_prim); - if (max_stream >= 3) offset += num_components[3] * sel->gs_max_out_vertices; shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset; -- 2.30.2