freedreno/a6xx: de-duplicate vinterp/vpsrepl state building
authorRob Clark <robdclark@chromium.org>
Mon, 22 Jun 2020 15:39:12 +0000 (08:39 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 24 Jun 2020 22:29:28 +0000 (22:29 +0000)
When we flip the texcoord patch, we'll setup PNTC input slot in the
pre-built interp stateobj, rather than this being a draw-time (slow-
path) built stateobj.  But rather than duplicate more of the slow-
path logic, refactor it out into a helper that is reused in both
cases.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5595>

src/gallium/drivers/freedreno/a6xx/fd6_program.c

index be561a040c62c47badc8ddec240a99c28be4047d..7b2ec2388c7891e02d94dfd1077bf39bb903896e 100644 (file)
@@ -856,42 +856,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
                fd6_emit_immediates(screen, fs, ring);
 }
 
+static void emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
+               bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable);
+
 static struct fd_ringbuffer *
 create_interp_stateobj(struct fd_context *ctx, struct fd6_program_state *state)
 {
-       const struct ir3_shader_variant *fs = state->fs;
        struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 18 * 4);
-       uint32_t vinterp[8] = {0};
 
-       /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
-       for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
-               /* NOTE: varyings are packed, so if compmask is 0xb
-                * then first, third, and fourth component occupy
-                * three consecutive varying slots:
-                */
-               unsigned compmask = fs->inputs[j].compmask;
-
-               uint32_t inloc = fs->inputs[j].inloc;
-
-               if (fs->inputs[j].interpolate == INTERP_MODE_FLAT) {
-                       uint32_t loc = inloc;
-
-                       for (int i = 0; i < 4; i++) {
-                               if (compmask & (1 << i)) {
-                                       vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-                                       loc++;
-                               }
-                       }
-               }
-       }
-
-       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
-       for (int i = 0; i < 8; i++)
-               OUT_RING(ring, vinterp[i]);    /* VPC_VARYING_INTERP[i].MODE */
-
-       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
-       for (int i = 0; i < 8; i++)
-               OUT_RING(ring, 0x00000000);    /* VPC_VARYING_PS_REPL[i] */
+       emit_interp_state(ring, state->fs, false, false, 0);
 
        return ring;
 }
@@ -912,83 +885,89 @@ fd6_program_interp_state(struct fd6_emit *emit)
                struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
                                emit->ctx->batch->submit, 18 * 4, FD_RINGBUFFER_STREAMING);
 
-               /* slow-path: */
-               struct ir3_shader_variant *fs = state->fs;
-               uint32_t vinterp[8], vpsrepl[8];
+               emit_interp_state(ring, state->fs, emit->rasterflat,
+                               emit->sprite_coord_mode, emit->sprite_coord_enable);
 
-               memset(vinterp, 0, sizeof(vinterp));
-               memset(vpsrepl, 0, sizeof(vpsrepl));
+               return ring;
+       }
+}
 
-               for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
+static void
+emit_interp_state(struct fd_ringbuffer *ring, struct ir3_shader_variant *fs,
+               bool rasterflat, bool sprite_coord_mode, uint32_t sprite_coord_enable)
+{
+       uint32_t vinterp[8], vpsrepl[8];
 
-                       /* NOTE: varyings are packed, so if compmask is 0xb
-                        * then first, third, and fourth component occupy
-                        * three consecutive varying slots:
-                        */
-                       unsigned compmask = fs->inputs[j].compmask;
+       memset(vinterp, 0, sizeof(vinterp));
+       memset(vpsrepl, 0, sizeof(vpsrepl));
 
-                       uint32_t inloc = fs->inputs[j].inloc;
+       for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
 
-                       if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
-                                       (fs->inputs[j].rasterflat && emit->rasterflat)) {
-                               uint32_t loc = inloc;
+               /* NOTE: varyings are packed, so if compmask is 0xb
+                * then first, third, and fourth component occupy
+                * three consecutive varying slots:
+                */
+               unsigned compmask = fs->inputs[j].compmask;
+
+               uint32_t inloc = fs->inputs[j].inloc;
 
-                               for (int i = 0; i < 4; i++) {
-                                       if (compmask & (1 << i)) {
-                                               vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-                                               loc++;
-                                       }
+               if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
+                               (fs->inputs[j].rasterflat && rasterflat)) {
+                       uint32_t loc = inloc;
+
+                       for (int i = 0; i < 4; i++) {
+                               if (compmask & (1 << i)) {
+                                       vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+                                       loc++;
                                }
                        }
+               }
 
-                       gl_varying_slot slot = fs->inputs[j].slot;
+               gl_varying_slot slot = fs->inputs[j].slot;
 
-                       /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
-                       if (slot >= VARYING_SLOT_VAR0) {
-                               unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
-                               /* Replace the .xy coordinates with S/T from the point sprite. Set
-                                * interpolation bits for .zw such that they become .01
+               /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+               if (slot >= VARYING_SLOT_VAR0) {
+                       unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+                       /* Replace the .xy coordinates with S/T from the point sprite. Set
+                        * interpolation bits for .zw such that they become .01
+                        */
+                       if (sprite_coord_enable & texmask) {
+                               /* mask is two 2-bit fields, where:
+                                *   '01' -> S
+                                *   '10' -> T
+                                *   '11' -> 1 - T  (flip mode)
                                 */
-                               if (emit->sprite_coord_enable & texmask) {
-                                       /* mask is two 2-bit fields, where:
-                                        *   '01' -> S
-                                        *   '10' -> T
-                                        *   '11' -> 1 - T  (flip mode)
-                                        */
-                                       unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
-                                       uint32_t loc = inloc;
-                                       if (compmask & 0x1) {
-                                               vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
-                                               loc++;
-                                       }
-                                       if (compmask & 0x2) {
-                                               vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
-                                               loc++;
-                                       }
-                                       if (compmask & 0x4) {
-                                               /* .z <- 0.0f */
-                                               vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
-                                               loc++;
-                                       }
-                                       if (compmask & 0x8) {
-                                               /* .w <- 1.0f */
-                                               vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
-                                               loc++;
-                                       }
+                               unsigned mask = sprite_coord_mode ? 0b1101 : 0b1001;
+                               uint32_t loc = inloc;
+                               if (compmask & 0x1) {
+                                       vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
+                                       loc++;
+                               }
+                               if (compmask & 0x2) {
+                                       vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
+                                       loc++;
+                               }
+                               if (compmask & 0x4) {
+                                       /* .z <- 0.0f */
+                                       vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
+                                       loc++;
+                               }
+                               if (compmask & 0x8) {
+                                       /* .w <- 1.0f */
+                                       vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
+                                       loc++;
                                }
                        }
                }
+       }
 
-               OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
-               for (int i = 0; i < 8; i++)
-                       OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
-
-               OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
-               for (int i = 0; i < 8; i++)
-                       OUT_RING(ring, vpsrepl[i]);     /* VPC_VARYING_PS_REPL[i] */
+       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+       for (int i = 0; i < 8; i++)
+               OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
 
-               return ring;
-       }
+       OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+       for (int i = 0; i < 8; i++)
+               OUT_RING(ring, vpsrepl[i]);     /* VPC_VARYING_PS_REPL[i] */
 }
 
 static struct ir3_program_state *