Compared to a5xx and earlier, we just program the address of the first
        stream and hw adds (pipe_num * VSC_*_STRM_PITCH)
+
+       LIMIT is set to PITCH - 64, to make room for a bit of overflow
         -->
        <reg32 offset="0x0c30" name="VSC_PRIM_STRM_ADDRESS_LO"/>
        <reg32 offset="0x0c31" name="VSC_PRIM_STRM_ADDRESS_HI"/>
        <reg64 offset="0x0c30" name="VSC_PRIM_STRM_ADDRESS" type="waddress"/>
        <reg32 offset="0x0c32" name="VSC_PRIM_STRM_PITCH"/>
-       <reg32 offset="0x0c33" name="VSC_PRIM_STRM_ARRAY_PITCH" shr="4" type="uint"/>
+       <reg32 offset="0x0c33" name="VSC_PRIM_STRM_LIMIT"/>
        <reg32 offset="0x0c34" name="VSC_DRAW_STRM_ADDRESS_LO"/>
        <reg32 offset="0x0c35" name="VSC_DRAW_STRM_ADDRESS_HI"/>
        <reg64 offset="0x0c34" name="VSC_DRAW_STRM_ADDRESS" type="waddress"/>
        <reg32 offset="0x0c36" name="VSC_DRAW_STRM_PITCH"/>
-       <reg32 offset="0x0c37" name="VSC_DRAW_STRM_ARRAY_PITCH" shr="4" type="uint"/>
+       <reg32 offset="0x0c37" name="VSC_DRAW_STRM_LIMIT"/>
 
        <array offset="0x0c38" name="VSC_STATE" stride="1" length="32">
                <doc>
 
    tu_cs_emit_regs(cs,
                    A6XX_VSC_PRIM_STRM_ADDRESS(.bo = &cmd->vsc_prim_strm),
                    A6XX_VSC_PRIM_STRM_PITCH(cmd->vsc_prim_strm_pitch),
-                   A6XX_VSC_PRIM_STRM_ARRAY_PITCH(cmd->vsc_prim_strm.size));
+                   A6XX_VSC_PRIM_STRM_LIMIT(cmd->vsc_prim_strm_pitch - 64));
 
    tu_cs_emit_regs(cs,
                    A6XX_VSC_DRAW_STRM_ADDRESS(.bo = &cmd->vsc_draw_strm),
                    A6XX_VSC_DRAW_STRM_PITCH(cmd->vsc_draw_strm_pitch),
-                   A6XX_VSC_DRAW_STRM_ARRAY_PITCH(cmd->vsc_draw_strm.size));
+                   A6XX_VSC_DRAW_STRM_LIMIT(cmd->vsc_draw_strm_pitch - 64));
 }
 
 static void
             CP_COND_WRITE5_0_WRITE_MEMORY);
       tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
       tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
-      tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch));
+      tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_draw_strm_pitch - 64));
       tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
       tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
       tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_draw_strm_pitch));
             CP_COND_WRITE5_0_WRITE_MEMORY);
       tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
       tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
-      tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch));
+      tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_prim_strm_pitch - 64));
       tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
       tu_cs_emit_qw(cs, cmd->scratch_bo.iova + ctrl_offset(vsc_scratch));
       tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_prim_strm_pitch));
 
        OUT_REG(ring,
                A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
                A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
-               A6XX_VSC_PRIM_STRM_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_prim_strm)));
+               A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
 
        OUT_REG(ring,
                A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
                A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
-               A6XX_VSC_DRAW_STRM_ARRAY_PITCH(.dword = fd_bo_size(fd6_ctx->vsc_draw_strm)));
+               A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
 }
 
 /* TODO we probably have more than 8 scratch regs.. although the first
                                CP_COND_WRITE5_0_WRITE_MEMORY);
                OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
                OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
-               OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch));
+               OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
                OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
                OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_scratch));  /* WRITE_ADDR_LO/HI */
                OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
                                CP_COND_WRITE5_0_WRITE_MEMORY);
                OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
                OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
-               OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch));
+               OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
                OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
                OUT_RELOC(ring, control_ptr(fd6_ctx, vsc_scratch));  /* WRITE_ADDR_LO/HI */
                OUT_RING(ring, CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));