freedreno/a6xx: Let the GPU track streamout offsets
authorKristian H. Kristensen <hoegsberg@google.com>
Thu, 5 Sep 2019 22:07:55 +0000 (15:07 -0700)
committerKristian H. Kristensen <hoegsberg@google.com>
Fri, 6 Sep 2019 16:53:28 +0000 (09:53 -0700)
The GPU writes out streamout offsets as it goes to the FLUSH_BASE
pointer.  We use that value with CP_MEM_TO_REG when appending to the
stream so that we don't have to track the offsets with the CPU in the
driver.  This ensures that streamout continues to work once we enable
geometry and tessellation shader stages that add geometry.

Reviewed-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/a6xx/fd6_context.h
src/gallium/drivers/freedreno/a6xx/fd6_emit.c
src/gallium/drivers/freedreno/freedreno_context.h
src/gallium/drivers/freedreno/freedreno_state.c

index 81a22c44e3d8d4a7d74d2f6d9ff80a6220afc548..76183cca14a90443faab52f6dc662cab1798d8a6 100644 (file)
@@ -107,13 +107,19 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);
 struct fd6_control {
        uint32_t seqno;          /* seqno for async CP_EVENT_WRITE, etc */
        uint32_t _pad0;
-       uint32_t flush_base;     /* dummy address for VPC_SO[i].FLUSH_BASE_LO/HI */
+       volatile uint32_t vsc_overflow;
        uint32_t _pad1;
        /* flag set from cmdstream when VSC overflow detected: */
-       volatile uint32_t vsc_overflow;
-       uint32_t _pad2;
        uint32_t vsc_scratch;
+       uint32_t _pad2;
        uint32_t _pad3;
+       uint32_t _pad4;
+
+       /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
+       struct {
+               uint32_t offset;
+               uint32_t pad[7];
+       } flush_base[4];
 };
 
 #define control_ptr(fd6_ctx, member)  \
index a1b34aa5f51c8803c1745fb0b321eb104ac749ad..4319b43978f8a8e04756c5defabc77acc4baa338 100644 (file)
@@ -712,21 +712,27 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3
                if (!target)
                        continue;
 
-               unsigned offset = (so->offsets[i] * info->stride[i] * 4) +
-                               target->buffer_offset;
-
                OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3);
                /* VPC_SO[i].BUFFER_BASE_LO: */
-               OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0);
-               OUT_RING(ring, target->buffer_size + offset);
-
-               OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3);
-               OUT_RING(ring, offset);
-               /* VPC_SO[i].FLUSH_BASE_LO/HI: */
-               // TODO just give hw a dummy addr for now.. we should
-               // be using this an then CP_MEM_TO_REG to set the
-               // VPC_SO[i].BUFFER_OFFSET for the next draw..
-               OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base));
+               OUT_RELOCW(ring, fd_resource(target->buffer)->bo, target->buffer_offset, 0, 0);
+               OUT_RING(ring, target->buffer_size - target->buffer_offset);
+
+               if (so->reset & (1 << i)) {
+                       unsigned offset = (so->offsets[i] * info->stride[i] * 4);
+                       OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1);
+                       OUT_RING(ring, offset);
+               } else {
+                       OUT_PKT7(ring, CP_MEM_TO_REG, 3);
+                       OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
+                                       CP_MEM_TO_REG_0_64B | CP_MEM_TO_REG_0_ACCUMULATE |
+                                       CP_MEM_TO_REG_0_CNT(1 - 1));
+                       OUT_RELOC(ring, control_ptr(fd6_context(ctx), flush_base[i].offset));
+               }
+
+               OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE_LO(i), 2);
+               OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base[i]));
+
+               so->reset &= ~(1 << i);
 
                emit->streamout_mask |= (1 << i);
        }
index 470e625500f0c10b9d526e6bcf89605262956766..af3506fd685cbb6b416cca704946881c65ed6cfb 100644 (file)
@@ -86,6 +86,9 @@ struct fd_vertex_stateobj {
 
 struct fd_streamout_stateobj {
        struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+       /* Bitmask of stream that should be reset. */
+       unsigned reset;
+
        unsigned num_targets;
        /* Track offset from vtxcnt for streamout data.  This counter
         * is just incremented by # of vertices on each draw until
index e8ea6b0023da9c2c32cafc0fc0f5172db27d75c4..3133d2746b2f6d36372d203c3f6581f2c2a36d99 100644 (file)
@@ -499,12 +499,14 @@ fd_set_stream_output_targets(struct pipe_context *pctx,
 
        for (i = 0; i < num_targets; i++) {
                boolean changed = targets[i] != so->targets[i];
-               boolean append = (offsets[i] == (unsigned)-1);
+               boolean reset = (offsets[i] != (unsigned)-1);
 
-               if (!changed && append)
+               so->reset |= (reset << i);
+
+               if (!changed && !reset)
                        continue;
 
-               if (!append)
+               if (reset)
                        so->offsets[i] = offsets[i];
 
                pipe_so_target_reference(&so->targets[i], targets[i]);