freedreno/a6xx: move dynamic program state to streaming stateobj
authorRob Clark <robdclark@chromium.org>
Thu, 16 Jan 2020 22:38:41 +0000 (14:38 -0800)
committerRob Clark <robdclark@chromium.org>
Fri, 17 Jan 2020 23:43:51 +0000 (15:43 -0800)
Move the program state which we can't pre-bake to a streaming state
object, rather than emitting directly in the draw cmdstream.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3435>

src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/a6xx/fd6_emit.h
src/gallium/drivers/freedreno/a6xx/fd6_program.c
src/gallium/drivers/freedreno/a6xx/fd6_program.h

index 7689429df8de15bb6076658f36426ae29db442ff..d6535ea580ced864f6b8be8ac85c1b81afd3316f 100644 (file)
@@ -1014,12 +1014,12 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
                fd6_emit_add_group(emit, prog->binning_stateobj,
                                FD6_GROUP_PROG_BINNING, CP_SET_DRAW_STATE__0_BINNING);
 
-               /* emit remaining non-stateobj program state, ie. what depends
-                * on other emit state, so cannot be pre-baked.  This could
-                * be moved to a separate stateobj which is dynamically
-                * created.
+               /* emit remaining streaming program state, ie. what depends on
+                * other emit state, so cannot be pre-baked.
                 */
-               fd6_program_emit(ring, emit);
+               struct fd_ringbuffer *streaming = fd6_program_interp_state(emit);
+
+               fd6_emit_take_group(emit, streaming, FD6_GROUP_PROG_INTERP, ENABLE_DRAW);
        }
 
        if (dirty & FD_DIRTY_RASTERIZER) {
index da6e4dd5a391120e71f4d57a200ca5952596d9fa..5d03761c7acf1edf6859a5c5377bb8ebad29d0fe 100644 (file)
@@ -46,6 +46,7 @@ enum fd6_state_id {
        FD6_GROUP_PROG_CONFIG,
        FD6_GROUP_PROG,
        FD6_GROUP_PROG_BINNING,
+       FD6_GROUP_PROG_INTERP,
        FD6_GROUP_LRZ,
        FD6_GROUP_LRZ_BINNING,
        FD6_GROUP_VBO,
index 888a4b91d9061fd99479195d774c9f2620f99091..14b57bfb238dd0265eef26ef0fe5e7e4aaf90a0e 100644 (file)
@@ -790,31 +790,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
        OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1);
        OUT_RING(ring, 0);
 
-
-       if (!binning_pass) {
-               /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
-               for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
-                       /* NOTE: varyings are packed, so if compmask is 0xb
-                        * then first, third, and fourth component occupy
-                        * three consecutive varying slots:
-                        */
-                       unsigned compmask = fs->inputs[j].compmask;
-
-                       uint32_t inloc = fs->inputs[j].inloc;
-
-                       if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
-                               uint32_t loc = inloc;
-
-                               for (i = 0; i < 4; i++) {
-                                       if (compmask & (1 << i)) {
-                                               state->vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-                                               loc++;
-                                       }
-                               }
-                       }
-               }
-       }
-
        if (fs->instrlen)
                fd6_emit_shader(ring, fs);
 
@@ -846,24 +821,62 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                ir3_emit_immediates(screen, fs, ring);
 }
 
-/* emits the program state which is not part of the stateobj because of
- * dependency on other gl state (rasterflat or sprite-coord-replacement)
+static struct fd_ringbuffer *
+create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
+{
+       const struct ir3_shader_variant *fs = state->fs;
+       struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
+       uint32_t vinterp[8] = {0};
+
+       /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
+       for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
+               /* NOTE: varyings are packed, so if compmask is 0xb
+                * then first, third, and fourth component occupy
+                * three consecutive varying slots:
+                */
+               unsigned compmask = fs->inputs[j].compmask;
+
+               uint32_t inloc = fs->inputs[j].inloc;
+
+               if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
+                       uint32_t loc = inloc;
+
+                       for (int i = 0; i < 4; i++) {
+                               if (compmask & (1 << i)) {
+                                       vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+                                       loc++;
+                               }
+                       }
+               }
+       }
+
+       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+       for (int i = 0; i < 8; i++)
+               OUT_RING(ring, vinterp[i]);    /* VPC_VARYING_INTERP[i].MODE */
+
+       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+       for (int i = 0; i < 8; i++)
+               OUT_RING(ring, 0x00000000);    /* VPC_VARYING_PS_REPL[i] */
+
+       return ring;
+}
+
+/* build the program streaming state which is not part of the pre-
+ * baked stateobj because of dependency on other gl state (rasterflat
+ * or sprite-coord-replacement)
  */
-void
-fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
+struct fd_ringbuffer *
+fd6_program_interp_state(struct fd6_emit *emit)
 {
        const struct fd6_program_state *state = fd6_emit_get_prog(emit);
 
        if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
                /* fastpath: */
-               OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
-               for (int i = 0; i < 8; i++)
-                       OUT_RING(ring, state->vinterp[i]);   /* VPC_VARYING_INTERP[i].MODE */
-
-               OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
-               for (int i = 0; i < 8; i++)
-                       OUT_RING(ring, 0x00000000);          /* VPC_VARYING_PS_REPL[i] */
+               return fd_ringbuffer_ref(state->interp_stateobj);
        } else {
+               struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
+                               emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
+
                /* slow-path: */
                struct ir3_shader_variant *fs = state->fs;
                uint32_t vinterp[8], vpsrepl[8];
@@ -938,6 +951,8 @@ fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
                OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
                for (int i = 0; i < 8; i++)
                        OUT_RING(ring, vpsrepl[i]);     /* VPC_VARYING_PS_REPL[i] */
+
+               return ring;
        }
 }
 
@@ -980,6 +995,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
        setup_config_stateobj(state->config_stateobj, state);
        setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
        setup_stateobj(state->stateobj, ctx->screen, state, key, false);
+       state->interp_stateobj = create_interp_stateobj(ctx, state);
 
        return &state->base;
 }
@@ -991,6 +1007,7 @@ fd6_program_destroy(void *data, struct ir3_program_state *state)
        fd_ringbuffer_del(so->stateobj);
        fd_ringbuffer_del(so->binning_stateobj);
        fd_ringbuffer_del(so->config_stateobj);
+       fd_ringbuffer_del(so->interp_stateobj);
        free(so);
 }
 
index 39e0468e2a8bc6f9e44b9289de9f3c94ff2c81bd..f1f4838fb3895264d67de32172a9357de41d417a 100644 (file)
@@ -52,13 +52,12 @@ struct fd6_program_state {
        struct ir3_shader_variant *gs;
        struct ir3_shader_variant *fs;
        struct fd_ringbuffer *config_stateobj;
+       struct fd_ringbuffer *interp_stateobj;
        struct fd_ringbuffer *binning_stateobj;
        struct fd_ringbuffer *stateobj;
 
        /* cached state about current emitted shader program (3d): */
        struct fd6_streamout_state tf;
-
-       uint32_t vinterp[8];
 };
 
 static inline struct fd6_program_state *
@@ -80,7 +79,7 @@ fd6_last_shader(const struct fd6_program_state *state)
 
 void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so);
 
-void fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit);
+struct fd_ringbuffer * fd6_program_interp_state(struct fd6_emit *emit);
 
 void fd6_prog_init(struct pipe_context *pctx);