From 991328498b9b1fa2937c61546bf1f3f4e5949f93 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 2 Jan 2020 17:02:12 -0500 Subject: [PATCH] radeonsi: move SI and CIK+ SDMA code into 1 common function for cleanups MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-By: Timur Kristóf --- src/gallium/drivers/radeonsi/Makefile.sources | 1 - src/gallium/drivers/radeonsi/cik_sdma.c | 60 +--------- src/gallium/drivers/radeonsi/meson.build | 1 - src/gallium/drivers/radeonsi/si_blit.c | 1 - src/gallium/drivers/radeonsi/si_buffer.c | 6 +- src/gallium/drivers/radeonsi/si_dma.c | 110 ------------------ src/gallium/drivers/radeonsi/si_dma_cs.c | 89 ++++++++++++++ src/gallium/drivers/radeonsi/si_gfx_cs.c | 6 +- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 6 +- .../drivers/radeonsi/si_test_dma_perf.c | 4 +- src/gallium/drivers/radeonsi/si_texture.c | 8 +- 12 files changed, 104 insertions(+), 190 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/si_dma.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index f25309736c9..886aaf6fa34 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -19,7 +19,6 @@ C_SOURCES := \ si_cp_dma.c \ si_debug.c \ si_descriptors.c \ - si_dma.c \ si_dma_cs.c \ si_fence.c \ si_get.c \ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index 3b07c964eed..df8a2fcd577 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -26,58 +26,6 @@ #include "sid.h" #include "si_pipe.h" -static void cik_sdma_copy_buffer(struct si_context *ctx, - struct pipe_resource *dst, - struct pipe_resource *src, - uint64_t dst_offset, - uint64_t src_offset, - uint64_t size) -{ - struct radeon_cmdbuf *cs = ctx->sdma_cs; - unsigned i, ncopy, csize; - unsigned align = ~0u; - struct si_resource *sdst = si_resource(dst); - struct si_resource *ssrc = si_resource(src); - - /* Mark the buffer range of destination as valid (initialized), - * so that transfer_map knows it should wait for the GPU when mapping - * that range. */ - util_range_add(dst, &sdst->valid_buffer_range, dst_offset, - dst_offset + size); - - dst_offset += sdst->gpu_address; - src_offset += ssrc->gpu_address; - - ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); - - /* Align copy size to dw if src/dst address are dw aligned */ - if ((src_offset & 0x3) == 0 && - (dst_offset & 0x3) == 0 && - size > 4 && - (size & 3) != 0) { - align = ~0x3u; - ncopy++; - } - - si_need_dma_space(ctx, ncopy * 7, sdst, ssrc); - - for (i = 0; i < ncopy; i++) { - csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size; - radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, - CIK_SDMA_COPY_SUB_OPCODE_LINEAR, - 0)); - radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize); - radeon_emit(cs, 0); /* src/dst endian swap */ - radeon_emit(cs, src_offset); - radeon_emit(cs, src_offset >> 32); - radeon_emit(cs, dst_offset); - radeon_emit(cs, dst_offset >> 32); - dst_offset += csize; - src_offset += csize; - size -= csize; - } -} - static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w) { width = u_minify(width, level); @@ -680,17 +628,13 @@ static void cik_sdma_copy(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; + assert(src->target != PIPE_BUFFER); + if (!sctx->sdma_cs || src->flags & PIPE_RESOURCE_FLAG_SPARSE || dst->flags & PIPE_RESOURCE_FLAG_SPARSE) goto fallback; - /* If src is a buffer and dst is a texture, we are uploading metadata. */ - if (src->target == PIPE_BUFFER) { - cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); - return; - } - /* SDMA causes corruption. See: * https://bugs.freedesktop.org/show_bug.cgi?id=110575 * https://bugs.freedesktop.org/show_bug.cgi?id=110635 diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 3baf70a020e..d2d3dd684b0 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -34,7 +34,6 @@ files_libradeonsi = files( 'si_cp_dma.c', 'si_debug.c', 'si_descriptors.c', - 'si_dma.c', 'si_dma_cs.c', 'si_fence.c', 'si_get.c', diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 9c48bf42a3a..643b15a09b4 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1212,7 +1212,6 @@ static void si_blit(struct pipe_context *ctx, * on failure (recursion). */ if (dst->surface.is_linear && - sctx->dma_copy && util_can_blit_via_copy_region(info, false)) { sctx->dma_copy(ctx, info->dst.resource, info->dst.level, info->dst.box.x, info->dst.box.y, diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c index 220a4cbdcbf..1de431cc937 100644 --- a/src/gallium/drivers/radeonsi/si_buffer.c +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -503,9 +503,9 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx, box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT))); if (staging) { /* Copy the VRAM buffer to the staging buffer. */ - sctx->dma_copy(ctx, &staging->b.b, 0, - box->x % SI_MAP_BUFFER_ALIGNMENT, - 0, 0, resource, 0, box); + si_sdma_copy_buffer(sctx, &staging->b.b, resource, + box->x % SI_MAP_BUFFER_ALIGNMENT, + box->x, box->width); data = si_buffer_map_sync_with_rings(sctx, staging, usage & ~PIPE_TRANSFER_UNSYNCHRONIZED); diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c deleted file mode 100644 index afcc38bbf01..00000000000 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * Copyright 2018 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "sid.h" -#include "si_pipe.h" - -#include "util/format/u_format.h" - -static void si_dma_copy_buffer(struct si_context *ctx, - struct pipe_resource *dst, - struct pipe_resource *src, - uint64_t dst_offset, - uint64_t src_offset, - uint64_t size) -{ - struct radeon_cmdbuf *cs = ctx->sdma_cs; - unsigned i, ncopy, count, max_size, sub_cmd, shift; - struct si_resource *sdst = si_resource(dst); - struct si_resource *ssrc = si_resource(src); - - /* Mark the buffer range of destination as valid (initialized), - * so that transfer_map knows it should wait for the GPU when mapping - * that range. */ - util_range_add(dst, &sdst->valid_buffer_range, dst_offset, - dst_offset + size); - - dst_offset += sdst->gpu_address; - src_offset += ssrc->gpu_address; - - /* see whether we should use the dword-aligned or byte-aligned copy */ - if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { - sub_cmd = SI_DMA_COPY_DWORD_ALIGNED; - shift = 2; - max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE; - } else { - sub_cmd = SI_DMA_COPY_BYTE_ALIGNED; - shift = 0; - max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE; - } - - ncopy = DIV_ROUND_UP(size, max_size); - si_need_dma_space(ctx, ncopy * 5, sdst, ssrc); - - for (i = 0; i < ncopy; i++) { - count = MIN2(size, max_size); - radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd, - count >> shift)); - radeon_emit(cs, dst_offset); - radeon_emit(cs, src_offset); - radeon_emit(cs, (dst_offset >> 32UL) & 0xff); - radeon_emit(cs, (src_offset >> 32UL) & 0xff); - dst_offset += count; - src_offset += count; - size -= count; - } -} - -static void si_dma_copy(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - struct si_context *sctx = (struct si_context *)ctx; - - if (sctx->sdma_cs == NULL || - src->flags & PIPE_RESOURCE_FLAG_SPARSE || - dst->flags & PIPE_RESOURCE_FLAG_SPARSE) { - goto fallback; - } - - if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - si_dma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); - return; - } - - /* SI SDMA image copies are unimplemented. */ -fallback: - si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); -} - -void si_init_dma_functions(struct si_context *sctx) -{ - sctx->dma_copy = si_dma_copy; -} diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index 8671c027c03..c58b2b103be 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -125,6 +125,95 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, } } +void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, + struct pipe_resource *src, uint64_t dst_offset, + uint64_t src_offset, uint64_t size) +{ + struct radeon_cmdbuf *cs = sctx->sdma_cs; + unsigned i, ncopy, csize; + struct si_resource *sdst = si_resource(dst); + struct si_resource *ssrc = si_resource(src); + + if (!cs || + dst->flags & PIPE_RESOURCE_FLAG_SPARSE || + src->flags & PIPE_RESOURCE_FLAG_SPARSE) { + si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size); + return; + } + + /* Mark the buffer range of destination as valid (initialized), + * so that transfer_map knows it should wait for the GPU when mapping + * that range. */ + util_range_add(dst, &sdst->valid_buffer_range, dst_offset, + dst_offset + size); + + dst_offset += sdst->gpu_address; + src_offset += ssrc->gpu_address; + + if (sctx->chip_class == GFX6) { + unsigned max_size, sub_cmd, shift; + + /* see whether we should use the dword-aligned or byte-aligned copy */ + if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { + sub_cmd = SI_DMA_COPY_DWORD_ALIGNED; + shift = 2; + max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE; + } else { + sub_cmd = SI_DMA_COPY_BYTE_ALIGNED; + shift = 0; + max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE; + } + + ncopy = DIV_ROUND_UP(size, max_size); + si_need_dma_space(sctx, ncopy * 5, sdst, ssrc); + + for (i = 0; i < ncopy; i++) { + csize = MIN2(size, max_size); + radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd, + csize >> shift)); + radeon_emit(cs, dst_offset); + radeon_emit(cs, src_offset); + radeon_emit(cs, (dst_offset >> 32UL) & 0xff); + radeon_emit(cs, (src_offset >> 32UL) & 0xff); + dst_offset += csize; + src_offset += csize; + size -= csize; + } + return; + } + + /* The following code is for CI and later. */ + unsigned align = ~0u; + ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); + + /* Align copy size to dw if src/dst address are dw aligned */ + if ((src_offset & 0x3) == 0 && + (dst_offset & 0x3) == 0 && + size > 4 && + (size & 3) != 0) { + align = ~0x3u; + ncopy++; + } + + si_need_dma_space(sctx, ncopy * 7, sdst, ssrc); + + for (i = 0; i < ncopy; i++) { + csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size; + radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, + CIK_SDMA_COPY_SUB_OPCODE_LINEAR, + 0)); + radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize); + radeon_emit(cs, 0); /* src/dst endian swap */ + radeon_emit(cs, src_offset); + radeon_emit(cs, src_offset >> 32); + radeon_emit(cs, dst_offset); + radeon_emit(cs, dst_offset >> 32); + dst_offset += csize; + src_offset += csize; + size -= csize; + } +} + void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resource *dst, struct si_resource *src) { diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index ddaf2af3349..15f3d238ac5 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -119,14 +119,12 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, ctx->sdma_uploads_in_progress = true; for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) { struct si_sdma_upload *up = &ctx->sdma_uploads[i]; - struct pipe_box box; assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 && up->size % 4 == 0); - u_box_1d(up->src_offset, up->size, &box); - ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0, - &up->src->b.b, 0, &box); + si_sdma_copy_buffer(ctx, &up->dst->b.b, &up->src->b.b, + up->dst_offset, up->src_offset, up->size); } ctx->sdma_uploads_in_progress = false; si_unref_sdma_uploads(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e5f9b29a9ec..3f84725f8f2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -595,7 +595,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (sctx->chip_class >= GFX7) cik_init_sdma_functions(sctx); else - si_init_dma_functions(sctx); + sctx->dma_copy = si_resource_copy_region; if (sscreen->debug_flags & DBG(FORCE_SDMA)) sctx->b.resource_copy_region = sctx->dma_copy; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e222de1e906..519b0050fd3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1363,14 +1363,14 @@ void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved, enum ring_type ring); bool si_replace_shader(unsigned num, struct si_shader_binary *binary); -/* si_dma.c */ -void si_init_dma_functions(struct si_context *sctx); - /* si_dma_cs.c */ void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uint64_t offset); void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value); +void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, + struct pipe_resource *src, uint64_t dst_offset, + uint64_t src_offset, uint64_t size); void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct si_resource *dst, struct si_resource *src); void si_flush_dma_cs(struct si_context *ctx, unsigned flags, diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index c796cc164a3..4eec3d12459 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -191,9 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen) } else if (test_sdma) { /* SDMA */ if (is_copy) { - struct pipe_box box; - u_box_1d(0, size, &box); - sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box); + si_sdma_copy_buffer(sctx, dst, src, 0, 0, size); } else { si_sdma_clear_buffer(sctx, dst, 0, size, clear_value); } diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 9fc669a6abd..183d5bd5294 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1513,14 +1513,12 @@ si_texture_create_object(struct pipe_screen *screen, /* Copy the staging buffer to the buffer backing the texture. */ struct si_context *sctx = (struct si_context*)sscreen->aux_context; - struct pipe_box box; - u_box_1d(0, buf->b.b.width0, &box); assert(tex->surface.dcc_retile_map_offset <= UINT_MAX); simple_mtx_lock(&sscreen->aux_context_lock); - sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0, - tex->surface.dcc_retile_map_offset, 0, 0, - &buf->b.b, 0, &box); + si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b, + tex->surface.dcc_retile_map_offset, + 0, buf->b.b.width0); sscreen->aux_context->flush(sscreen->aux_context, NULL, 0); simple_mtx_unlock(&sscreen->aux_context_lock); -- 2.30.2