From 1acf8d235494625e27fe0213e1a0771bba2368c3 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Thu, 5 Sep 2019 15:07:55 -0700 Subject: [PATCH] freedreno/a6xx: Let the GPU track streamout offsets The GPU writes out streamout offsets as it goes to the FLUSH_BASE pointer. We use that value with CP_MEM_TO_REG when appending to the stream so that we don't have to track the offsets with the CPU in the driver. This ensures that streamout continues to work once we enable geometry and tessellation shader stages that add geometry. Reviewed-by: Rob Clark --- .../drivers/freedreno/a6xx/fd6_context.h | 12 +++++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 32 +++++++++++-------- .../drivers/freedreno/freedreno_context.h | 3 ++ .../drivers/freedreno/freedreno_state.c | 8 +++-- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 81a22c44e3d..76183cca14a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -107,13 +107,19 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); struct fd6_control { uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */ uint32_t _pad0; - uint32_t flush_base; /* dummy address for VPC_SO[i].FLUSH_BASE_LO/HI */ + volatile uint32_t vsc_overflow; uint32_t _pad1; /* flag set from cmdstream when VSC overflow detected: */ - volatile uint32_t vsc_overflow; - uint32_t _pad2; uint32_t vsc_scratch; + uint32_t _pad2; uint32_t _pad3; + uint32_t _pad4; + + /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ + struct { + uint32_t offset; + uint32_t pad[7]; + } flush_base[4]; }; #define control_ptr(fd6_ctx, member) \ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index a1b34aa5f51..4319b43978f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -712,21 +712,27 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3 if (!target) continue; - unsigned offset = (so->offsets[i] * info->stride[i] * 4) + - target->buffer_offset; - OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_BASE_LO(i), 3); /* VPC_SO[i].BUFFER_BASE_LO: */ - OUT_RELOCW(ring, fd_resource(target->buffer)->bo, 0, 0, 0); - OUT_RING(ring, target->buffer_size + offset); - - OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 3); - OUT_RING(ring, offset); - /* VPC_SO[i].FLUSH_BASE_LO/HI: */ - // TODO just give hw a dummy addr for now.. we should - // be using this an then CP_MEM_TO_REG to set the - // VPC_SO[i].BUFFER_OFFSET for the next draw.. - OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base)); + OUT_RELOCW(ring, fd_resource(target->buffer)->bo, target->buffer_offset, 0, 0); + OUT_RING(ring, target->buffer_size - target->buffer_offset); + + if (so->reset & (1 << i)) { + unsigned offset = (so->offsets[i] * info->stride[i] * 4); + OUT_PKT4(ring, REG_A6XX_VPC_SO_BUFFER_OFFSET(i), 1); + OUT_RING(ring, offset); + } else { + OUT_PKT7(ring, CP_MEM_TO_REG, 3); + OUT_RING(ring, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) | + CP_MEM_TO_REG_0_64B | CP_MEM_TO_REG_0_ACCUMULATE | + CP_MEM_TO_REG_0_CNT(1 - 1)); + OUT_RELOC(ring, control_ptr(fd6_context(ctx), flush_base[i].offset)); + } + + OUT_PKT4(ring, REG_A6XX_VPC_SO_FLUSH_BASE_LO(i), 2); + OUT_RELOCW(ring, control_ptr(fd6_context(ctx), flush_base[i])); + + so->reset &= ~(1 << i); emit->streamout_mask |= (1 << i); } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 470e625500f..af3506fd685 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -86,6 +86,9 @@ struct fd_vertex_stateobj { struct fd_streamout_stateobj { struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + /* Bitmask of stream that should be reset. */ + unsigned reset; + unsigned num_targets; /* Track offset from vtxcnt for streamout data. This counter * is just incremented by # of vertices on each draw until diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index e8ea6b0023d..3133d2746b2 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -499,12 +499,14 @@ fd_set_stream_output_targets(struct pipe_context *pctx, for (i = 0; i < num_targets; i++) { boolean changed = targets[i] != so->targets[i]; - boolean append = (offsets[i] == (unsigned)-1); + boolean reset = (offsets[i] != (unsigned)-1); - if (!changed && append) + so->reset |= (reset << i); + + if (!changed && !reset) continue; - if (!append) + if (reset) so->offsets[i] = offsets[i]; pipe_so_target_reference(&so->targets[i], targets[i]); -- 2.30.2