+static void
+tu6_emit_streamout(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ struct tu_streamout_state *tf = &cmd->state.pipeline->streamout;
+
+ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
+ struct tu_buffer *buf = cmd->state.streamout_buf.buffers[i];
+ if (!buf)
+ continue;
+
+ uint32_t offset;
+ offset = cmd->state.streamout_buf.offsets[i];
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_BASE(i, .bo = buf->bo,
+ .bo_offset = buf->bo_offset));
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_SIZE(i, buf->size));
+
+ if (cmd->state.streamout_reset & (1 << i)) {
+ offset *= tf->stride[i];
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_BUFFER_OFFSET(i, offset));
+ cmd->state.streamout_reset &= ~(1 << i);
+ } else {
+ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
+ tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A6XX_VPC_SO_BUFFER_OFFSET(i)) |
+ CP_MEM_TO_REG_0_SHIFT_BY_2 | CP_MEM_TO_REG_0_UNK31 |
+ CP_MEM_TO_REG_0_CNT(0));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_FLUSH * (i + 1));
+ }
+
+ tu_cs_emit_regs(cs, A6XX_VPC_SO_FLUSH_BASE(i, .bo = &cmd->scratch_bo,
+ .bo_offset = VSC_FLUSH * (i + 1)));
+ }
+
+ if (cmd->state.streamout_enabled) {
+ tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 12 + (2 * tf->prog_count));
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_BUF_CNTL);
+ tu_cs_emit(cs, tf->vpc_so_buf_cntl);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(0));
+ tu_cs_emit(cs, tf->ncomp[0]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(1));
+ tu_cs_emit(cs, tf->ncomp[1]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(2));
+ tu_cs_emit(cs, tf->ncomp[2]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_NCOMP(3));
+ tu_cs_emit(cs, tf->ncomp[3]);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
+ tu_cs_emit(cs, A6XX_VPC_SO_CNTL_ENABLE);
+ for (unsigned i = 0; i < tf->prog_count; i++) {
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_PROG);
+ tu_cs_emit(cs, tf->prog[i]);
+ }
+ } else {
+ tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
+ tu_cs_emit(cs, 0);
+ tu_cs_emit(cs, REG_A6XX_VPC_SO_BUF_CNTL);
+ tu_cs_emit(cs, 0);
+ }
+}
+