radeonsi/gfx10: emit VGT_GS_OUT_PRIM_TYPE from draw and add it to VS_STATE
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 7 May 2019 23:40:29 +0000 (01:40 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:12 +0000 (15:51 -0400)
With NGG, the VGT_GS_OUT_PRIM_TYPE can change without a shader change.

The VS_STATE is required for both streamout and culling from a vertex
shader without pre-compiling outprim-specific variants.

We could consider compiling specialized variants in the future. We
could also consider compiling the NGG logic as an epilog.

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index fb9286d6b48cdb9be946e07d40e68291e4afcb19..bb34b07095a3a1fddb7d07a9364dc134dfa91f2c 100644 (file)
@@ -463,7 +463,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3]  = 0x00000000;
-               ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE]    = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT]  = 0x00000000;
                ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE]  = 0x00000000;
index 39af557bcae711184e8f6ba0bddfae6cc5bb03a2..9dbf08fa95f081c273bf239078154ef866cf5d33 100644 (file)
@@ -246,6 +246,8 @@ enum {
 #define C_VS_STATE_CLAMP_VERTEX_COLOR          0xFFFFFFFE
 #define S_VS_STATE_INDEXED(x)                  (((unsigned)(x) & 0x1) << 1)
 #define C_VS_STATE_INDEXED                     0xFFFFFFFD
+#define S_VS_STATE_OUTPRIM(x)                  (((unsigned)(x) & 0x3) << 2)
+#define C_VS_STATE_OUTPRIM                     0xFFFFFFF3
 #define S_VS_STATE_LS_OUT_PATCH_SIZE(x)                (((unsigned)(x) & 0x1FFF) << 8)
 #define C_VS_STATE_LS_OUT_PATCH_SIZE           0xFFE000FF
 #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x)       (((unsigned)(x) & 0xFF) << 24)
@@ -666,7 +668,6 @@ struct si_shader {
                        unsigned        vgt_gsvs_ring_offset_1;
                        unsigned        vgt_gsvs_ring_offset_2;
                        unsigned        vgt_gsvs_ring_offset_3;
-                       unsigned        vgt_gs_out_prim_type;
                        unsigned        vgt_gsvs_ring_itemsize;
                        unsigned        vgt_gs_max_vert_out;
                        unsigned        vgt_gs_vert_itemsize;
index 23c7b3245f5c62b4b1ef03f589727a11e72def3a..678f87cd73d403f7f00b8ba2116965a9a2f2ed54 100644 (file)
@@ -298,10 +298,9 @@ enum si_tracked_reg {
 
        SI_TRACKED_VGT_ESGS_RING_ITEMSIZE,
 
-       SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 4 consecutive registers */
+       SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* 3 consecutive registers */
        SI_TRACKED_VGT_GSVS_RING_OFFSET_2,
        SI_TRACKED_VGT_GSVS_RING_OFFSET_3,
-       SI_TRACKED_VGT_GS_OUT_PRIM_TYPE,
 
        SI_TRACKED_VGT_GSVS_RING_ITEMSIZE,
        SI_TRACKED_VGT_GS_MAX_VERT_OUT,
index 998c21d001ebb0a2229d168b759fd83d0c0bbe01..3d2a4d72891681c27ab807e97a80353d0851772d 100644 (file)
@@ -549,6 +549,30 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
        return ia_multi_vgt_param;
 }
 
+static unsigned si_conv_prim_to_gs_out(unsigned mode)
+{
+       static const int prim_conv[] = {
+               [PIPE_PRIM_POINTS]                      = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+               [PIPE_PRIM_LINES]                       = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               [PIPE_PRIM_LINE_LOOP]                   = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               [PIPE_PRIM_LINE_STRIP]                  = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               [PIPE_PRIM_TRIANGLES]                   = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_TRIANGLE_STRIP]              = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_TRIANGLE_FAN]                = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_QUADS]                       = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_QUAD_STRIP]                  = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_POLYGON]                     = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_LINES_ADJACENCY]             = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               [PIPE_PRIM_LINE_STRIP_ADJACENCY]        = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               [PIPE_PRIM_TRIANGLES_ADJACENCY]         = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]    = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               [PIPE_PRIM_PATCHES]                     = V_028A6C_OUTPRIM_TYPE_POINTLIST,
+       };
+       assert(mode < ARRAY_SIZE(prim_conv));
+
+       return prim_conv[mode];
+}
+
 /* rast_prim is the primitive type after GS. */
 static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
@@ -556,24 +580,34 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
        enum pipe_prim_type rast_prim = sctx->current_rast_prim;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
-       /* Skip this if not rendering lines. */
-       if (!util_prim_is_lines(rast_prim))
+       if (likely(rast_prim == sctx->last_rast_prim &&
+                  rs->pa_sc_line_stipple == sctx->last_sc_line_stipple))
                return;
 
-       if (rast_prim == sctx->last_rast_prim &&
-           rs->pa_sc_line_stipple == sctx->last_sc_line_stipple)
-               return;
+       if (util_prim_is_lines(rast_prim)) {
+               /* For lines, reset the stipple pattern at each primitive. Otherwise,
+                * reset the stipple pattern at each packet (line strips, line loops).
+                */
+               radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
+                       rs->pa_sc_line_stipple |
+                       S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+               sctx->context_roll = true;
+       }
 
-       /* For lines, reset the stipple pattern at each primitive. Otherwise,
-        * reset the stipple pattern at each packet (line strips, line loops).
-        */
-       radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE,
-               rs->pa_sc_line_stipple |
-               S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2));
+       if (rast_prim != sctx->last_rast_prim &&
+           (sctx->ngg || sctx->gs_shader.cso)) {
+               unsigned gs_out = si_conv_prim_to_gs_out(sctx->current_rast_prim);
+               radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+               sctx->context_roll = true;
+
+               if (sctx->chip_class >= GFX10) {
+                       sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
+                       sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
+               }
+       }
 
        sctx->last_rast_prim = rast_prim;
        sctx->last_sc_line_stipple = rs->pa_sc_line_stipple;
-       sctx->context_roll = true;
 }
 
 static void si_emit_vs_state(struct si_context *sctx,
index be9ab3bcdd618ae76b8260acca2d76b333c27899..53c1e7f44e8c6005f2f7ce2ec3cc6b3c424bd2d0 100644 (file)
@@ -622,30 +622,6 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
        polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader, pm4);
 }
 
-static unsigned si_conv_prim_to_gs_out(unsigned mode)
-{
-       static const int prim_conv[] = {
-               [PIPE_PRIM_POINTS]                      = V_028A6C_OUTPRIM_TYPE_POINTLIST,
-               [PIPE_PRIM_LINES]                       = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-               [PIPE_PRIM_LINE_LOOP]                   = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-               [PIPE_PRIM_LINE_STRIP]                  = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-               [PIPE_PRIM_TRIANGLES]                   = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_TRIANGLE_STRIP]              = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_TRIANGLE_FAN]                = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_QUADS]                       = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_QUAD_STRIP]                  = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_POLYGON]                     = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_LINES_ADJACENCY]             = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-               [PIPE_PRIM_LINE_STRIP_ADJACENCY]        = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-               [PIPE_PRIM_TRIANGLES_ADJACENCY]         = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]    = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-               [PIPE_PRIM_PATCHES]                     = V_028A6C_OUTPRIM_TYPE_POINTLIST,
-       };
-       assert(mode < ARRAY_SIZE(prim_conv));
-
-       return prim_conv[mode];
-}
-
 void gfx9_get_gs_info(struct si_shader_selector *es,
                      struct si_shader_selector *gs,
                      struct gfx9_gs_info *out)
@@ -753,14 +729,12 @@ static void si_emit_shader_gs(struct si_context *sctx)
                return;
 
        /* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
-        * R_028A68_VGT_GSVS_RING_OFFSET_3, R_028A6C_VGT_GS_OUT_PRIM_TYPE */
-       radeon_opt_set_context_reg4(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
+        * R_028A68_VGT_GSVS_RING_OFFSET_3 */
+       radeon_opt_set_context_reg3(sctx, R_028A60_VGT_GSVS_RING_OFFSET_1,
                                    SI_TRACKED_VGT_GSVS_RING_OFFSET_1,
                                    shader->ctx_reg.gs.vgt_gsvs_ring_offset_1,
                                    shader->ctx_reg.gs.vgt_gsvs_ring_offset_2,
-                                   shader->ctx_reg.gs.vgt_gsvs_ring_offset_3,
-                                   shader->ctx_reg.gs.vgt_gs_out_prim_type);
-
+                                   shader->ctx_reg.gs.vgt_gsvs_ring_offset_3);
 
        /* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
        radeon_opt_set_context_reg(sctx, R_028AB0_VGT_GSVS_RING_ITEMSIZE,
@@ -841,9 +815,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
                offset += num_components[2] * sel->gs_max_out_vertices;
        shader->ctx_reg.gs.vgt_gsvs_ring_offset_3 = offset;
 
-       shader->ctx_reg.gs.vgt_gs_out_prim_type =
-               si_conv_prim_to_gs_out(sel->gs_output_prim);
-
        if (max_stream >= 3)
                offset += num_components[3] * sel->gs_max_out_vertices;
        shader->ctx_reg.gs.vgt_gsvs_ring_itemsize = offset;