+ max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
+ }
+
+ ncopy = DIV_ROUND_UP(size, max_size);
+ si_need_dma_space(ctx, ncopy * 5, rdst, rsrc);
+
+ for (i = 0; i < ncopy; i++) {
+ count = MIN2(size, max_size);
+ radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
+ count >> shift));
+ radeon_emit(cs, dst_offset);
+ radeon_emit(cs, src_offset);
+ radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
+ radeon_emit(cs, (src_offset >> 32UL) & 0xff);
+ dst_offset += count;
+ src_offset += count;
+ size -= count;
+ }
+}
+
+static void si_dma_clear_buffer(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ uint64_t offset,
+ uint64_t size,
+ unsigned clear_value)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+ unsigned i, ncopy, csize;
+ struct r600_resource *rdst = r600_resource(dst);
+
+ if (!cs || offset % 4 != 0 || size % 4 != 0 ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
+ ctx->clear_buffer(ctx, dst, offset, size, &clear_value, 4);
+ return;