From cc2c1002747405ef23bf6cd697bfb1cb7a1e68e8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 27 Nov 2013 13:27:54 +0100 Subject: [PATCH] radeonsi: implement accelerated buffer copying MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/r600_blit.c | 7 +-- src/gallium/drivers/radeonsi/si_descriptors.c | 58 +++++++++++++++++++ src/gallium/drivers/radeonsi/si_state.h | 3 + 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c index e525f7950b0..3adbb81c03a 100644 --- a/src/gallium/drivers/radeonsi/r600_blit.c +++ b/src/gallium/drivers/radeonsi/r600_blit.c @@ -496,15 +496,14 @@ static void r600_resource_copy_region(struct pipe_context *ctx, const struct pipe_box *psbox = src_box; boolean restore_orig[2]; - memset(orig_info, 0, sizeof(orig_info)); - /* Fallback for buffers. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + si_copy_buffer(rctx, dst, src, dstx, src_box->x, src_box->width); return; } + memset(orig_info, 0, sizeof(orig_info)); + /* The driver doesn't decompress resources automatically while * u_blitter is rendering. */ r600_decompress_subresource(ctx, src, src_level, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c4915846d94..c591352bf38 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -605,6 +605,64 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, offset + size); } +void si_copy_buffer(struct r600_context *rctx, + struct pipe_resource *dst, struct pipe_resource *src, + uint64_t dst_offset, uint64_t src_offset, unsigned size) +{ + if (!size) + return; + + dst_offset += r600_resource_va(&rctx->screen->b.b, dst); + src_offset += r600_resource_va(&rctx->screen->b.b, src); + + /* Flush the caches where the resource is bound. */ + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_FLUSH_AND_INV_DB_META | + R600_CONTEXT_WAIT_3D_IDLE; + + while (size) { + unsigned sync_flags = 0; + unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); + + si_need_cs_space(rctx, 7 + (rctx->b.flags ? rctx->cache_flush.num_dw : 0), FALSE); + + /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */ + if (rctx->b.flags) { + si_emit_cache_flush(&rctx->b, NULL); + sync_flags |= SI_CP_DMA_RAW_WAIT; + } + + /* Do the synchronization after the last copy, so that all data is written to memory. */ + if (size == byte_count) { + sync_flags |= R600_CP_DMA_SYNC; + } + + /* This must be done after r600_need_cs_space. */ + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ); + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); + + si_emit_cp_dma_copy_buffer(rctx, dst_offset, src_offset, byte_count, sync_flags); + + size -= byte_count; + src_offset += byte_count; + dst_offset += byte_count; + } + + rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_FLUSH_AND_INV_DB_META; + + util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, + dst_offset + size); +} + /* INIT/DEINIT */ void si_init_all_descriptors(struct r600_context *rctx) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f3d402348e3..6774e57e21f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -197,6 +197,9 @@ void si_set_sampler_view(struct r600_context *rctx, unsigned shader, void si_init_all_descriptors(struct r600_context *rctx); void si_release_all_descriptors(struct r600_context *rctx); void si_all_descriptors_begin_new_cs(struct r600_context *rctx); +void si_copy_buffer(struct r600_context *rctx, + struct pipe_resource *dst, struct pipe_resource *src, + uint64_t dst_offset, uint64_t src_offset, unsigned size); /* si_state.c */ struct si_pipe_shader_selector; -- 2.30.2