freedreno/a6xx: separate rast stateobj for prim restart
authorRob Clark <robdclark@chromium.org>
Thu, 16 Jan 2020 20:15:37 +0000 (12:15 -0800)
committerRob Clark <robdclark@chromium.org>
Fri, 17 Jan 2020 23:43:51 +0000 (15:43 -0800)
This lets us move PC_PRIMITIVE_CNTL into the rasterizr stateobj, rather
than unconditionally emitting it directly in the cmdstream on every
draw.

This also starts adding some tracking about previous draw state, so that
following patches can limit some of the register writes we currently
emit on every draw.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3435>

src/gallium/drivers/freedreno/a6xx/fd6_context.h
src/gallium/drivers/freedreno/a6xx/fd6_draw.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.c
src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.h
src/gallium/drivers/freedreno/freedreno_context.h

index a7f786fac618264e377d0218fc3c32ef1461fa4b..cfc2ca7c42eeccfcdc58de5d0e380b75e970fa81 100644 (file)
@@ -101,7 +101,6 @@ struct fd6_context {
                uint32_t RB_CCU_CNTL_gmem;        /* for GMEM rendering */
                uint32_t PC_UNKNOWN_9805;
                uint32_t SP_UNKNOWN_A0F8;
-
        } magic;
 };
 
index 37ff241f2b175fcbe6936c9b401af1f036a3e52d..320cca64cec507c92f842c1b9a54e9a859c5e152 100644 (file)
@@ -118,6 +118,19 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
        }
 }
 
+static void
+fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit)
+{
+       if (ctx->last.dirty ||
+                       (ctx->last.primitive_restart != emit->primitive_restart)) {
+               /* rasterizer state is effected by primitive-restart: */
+               ctx->dirty |= FD_DIRTY_RASTERIZER;
+               ctx->last.primitive_restart = emit->primitive_restart;
+       }
+
+       ctx->last.dirty = false;
+}
+
 static bool
 fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
              unsigned index_offset)
@@ -153,6 +166,7 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
                .rasterflat = ctx->rasterizer->flatshade,
                .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable,
                .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
+               .primitive_restart = info->primitive_restart && info->index_size,
        };
 
        if (info->mode == PIPE_PRIM_PATCHES) {
@@ -259,6 +273,8 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
                }
        }
 
+       fixup_draw_state(ctx, &emit);
+
        fd6_emit_state(ring, &emit);
 
        OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 2);
index 229a863fd3634f0f0dd1e3890941ea8c0c3a9a1e..7689429df8de15bb6076658f36426ae29db442ff 100644 (file)
@@ -1023,31 +1023,12 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
        }
 
        if (dirty & FD_DIRTY_RASTERIZER) {
-               struct fd6_rasterizer_stateobj *rasterizer =
-                               fd6_rasterizer_stateobj(ctx->rasterizer);
-               fd6_emit_add_group(emit, rasterizer->stateobj,
+               struct fd_ringbuffer *stateobj =
+                       fd6_rasterizer_state(ctx, emit->primitive_restart);
+               fd6_emit_add_group(emit, stateobj,
                                                   FD6_GROUP_RASTERIZER, ENABLE_ALL);
        }
 
-       /* Since the primitive restart state is not part of a tracked object, we
-        * re-emit this register every time.
-        */
-       if (emit->info && ctx->rasterizer) {
-               struct fd6_rasterizer_stateobj *rasterizer =
-                               fd6_rasterizer_stateobj(ctx->rasterizer);
-               OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9806, 1);
-               OUT_RING(ring, 0);
-               OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9990, 1);
-               OUT_RING(ring, 0);
-               OUT_PKT4(ring, REG_A6XX_VFD_UNKNOWN_A008, 1);
-               OUT_RING(ring, 0);
-
-               OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1);
-               OUT_RING(ring, rasterizer->pc_primitive_cntl |
-                                COND(emit->info->primitive_restart && emit->info->index_size,
-                                         A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART));
-       }
-
        if (dirty & (FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                unsigned nr = pfb->nr_cbufs;
 
@@ -1347,6 +1328,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
        WRITE(REG_A6XX_VPC_SO_OVERRIDE, A6XX_VPC_SO_OVERRIDE_SO_DISABLE);
 
        WRITE(REG_A6XX_PC_UNKNOWN_9806, 0);
+       WRITE(REG_A6XX_PC_UNKNOWN_9990, 0);
        WRITE(REG_A6XX_PC_UNKNOWN_9980, 0);
 
        WRITE(REG_A6XX_PC_UNKNOWN_9B07, 0);
index f8193227651593dad476ee6c3e118ae3f0c172b0..da6e4dd5a391120e71f4d57a200ca5952596d9fa 100644 (file)
@@ -90,6 +90,7 @@ struct fd6_emit {
        bool sprite_coord_mode;
        bool rasterflat;
        bool no_decode_srgb;
+       bool primitive_restart;
 
        /* in binning pass, we don't have real frag shader, so we
         * don't know if real draw disqualifies lrz write.  So just
index cc8b52c6a9f9c1ede551133a2873e813e01e5a74..4cc31c5d38b3d30f8ff9ebdef079c32cad1024f1 100644 (file)
 #include "fd6_format.h"
 #include "fd6_pack.h"
 
-static struct fd_ringbuffer *
-setup_rasterizer_stateobj(struct fd_context *ctx,
-               const struct pipe_rasterizer_state *cso)
+struct fd_ringbuffer *
+__fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
+               const struct pipe_rasterizer_state *cso, bool primitive_restart)
 {
-       struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 12 * 4);
+       struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 14 * 4);
        float psize_min, psize_max;
 
        if (cso->point_size_per_vertex) {
@@ -85,6 +85,12 @@ setup_rasterizer_stateobj(struct fd_context *ctx,
                        cso->offset_clamp
                ));
 
+       OUT_REG(ring,
+               A6XX_PC_PRIMITIVE_CNTL_0(
+                       .provoking_vtx_last = !cso->flatshade_first,
+                       .primitive_restart = primitive_restart,
+               ));
+
        return ring;
 }
 
@@ -92,7 +98,6 @@ void *
 fd6_rasterizer_state_create(struct pipe_context *pctx,
                const struct pipe_rasterizer_state *cso)
 {
-       struct fd_context *ctx = fd_context(pctx);
        struct fd6_rasterizer_stateobj *so;
 
        so = CALLOC_STRUCT(fd6_rasterizer_stateobj);
@@ -101,11 +106,6 @@ fd6_rasterizer_state_create(struct pipe_context *pctx,
 
        so->base = *cso;
 
-       if (!cso->flatshade_first)
-               so->pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PROVOKING_VTX_LAST;
-
-       so->stateobj = setup_rasterizer_stateobj(ctx, cso);
-
        return so;
 }
 
@@ -114,7 +114,10 @@ fd6_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
 {
        struct fd6_rasterizer_stateobj *so = hwcso;
 
-       fd_ringbuffer_del(so->stateobj);
+       for (unsigned i = 0; i < ARRAY_SIZE(so->stateobjs); i++)
+               if (so->stateobjs[i])
+                       fd_ringbuffer_del(so->stateobjs[i]);
+
        FREE(hwcso);
 }
 
index afce6713b72446a73aaabb5c7e124ebfff38f5e4..22890b8def427bc0ed7a872c0afcfb45a2c9cd67 100644 (file)
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 
+#include "freedreno_context.h"
+
 struct fd6_rasterizer_stateobj {
        struct pipe_rasterizer_state base;
 
-       uint32_t pc_primitive_cntl;
-
-       struct fd_ringbuffer *stateobj;
+       struct fd_ringbuffer *stateobjs[2];
 };
 
 static inline struct fd6_rasterizer_stateobj *
@@ -49,4 +49,21 @@ void * fd6_rasterizer_state_create(struct pipe_context *pctx,
                const struct pipe_rasterizer_state *cso);
 void fd6_rasterizer_state_delete(struct pipe_context *, void *hwcso);
 
+struct fd_ringbuffer * __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
+               const struct pipe_rasterizer_state *cso, bool primitive_restart);
+
+static inline struct fd_ringbuffer *
+fd6_rasterizer_state(struct fd_context *ctx, bool primitive_restart)
+{
+       struct fd6_rasterizer_stateobj *rasterizer = fd6_rasterizer_stateobj(ctx->rasterizer);
+       unsigned variant = primitive_restart;
+
+       if (unlikely(!rasterizer->stateobjs[variant])) {
+               rasterizer->stateobjs[variant] =
+                       __fd6_setup_rasterizer_stateobj(ctx, ctx->rasterizer, primitive_restart);
+       }
+
+       return rasterizer->stateobjs[variant];
+}
+
 #endif /* FD6_RASTERIZER_H_ */
index 12aaba1d4d08ab17f2b5e9ec29249cad4814780d..846a442d315238426e2b7b6b7c5ebd675607a934 100644 (file)
@@ -363,6 +363,17 @@ struct fd_context {
         *    - solid_vbuf / 12 / R32G32B32_FLOAT
         */
        struct fd_vertex_state blit_vbuf_state;
+
+       /*
+        * Info about state of previous draw, for state that comes from
+        * pipe_draw_info (ie. not part of a CSO).  This allows us to
+        * skip some register emit when the state doesn't change from
+        * draw-to-draw
+        */
+       struct {
+               bool dirty;               /* last draw state unknown */
+               bool primitive_restart;
+       } last;
 };
 
 static inline struct fd_context *
@@ -393,6 +404,7 @@ fd_context_unlock(struct fd_context *ctx)
 static inline void
 fd_context_all_dirty(struct fd_context *ctx)
 {
+       ctx->last.dirty = true;
        ctx->dirty = ~0;
        for (unsigned i = 0; i < PIPE_SHADER_TYPES; i++)
                ctx->dirty_shader[i] = ~0;