si_cp_dma.c \
si_debug.c \
si_descriptors.c \
- si_dma.c \
si_dma_cs.c \
si_fence.c \
si_get.c \
#include "sid.h"
#include "si_pipe.h"
-static void cik_sdma_copy_buffer(struct si_context *ctx,
- struct pipe_resource *dst,
- struct pipe_resource *src,
- uint64_t dst_offset,
- uint64_t src_offset,
- uint64_t size)
-{
- struct radeon_cmdbuf *cs = ctx->sdma_cs;
- unsigned i, ncopy, csize;
- unsigned align = ~0u;
- struct si_resource *sdst = si_resource(dst);
- struct si_resource *ssrc = si_resource(src);
-
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
- dst_offset + size);
-
- dst_offset += sdst->gpu_address;
- src_offset += ssrc->gpu_address;
-
- ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-
- /* Align copy size to dw if src/dst address are dw aligned */
- if ((src_offset & 0x3) == 0 &&
- (dst_offset & 0x3) == 0 &&
- size > 4 &&
- (size & 3) != 0) {
- align = ~0x3u;
- ncopy++;
- }
-
- si_need_dma_space(ctx, ncopy * 7, sdst, ssrc);
-
- for (i = 0; i < ncopy; i++) {
- csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
- radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
- CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
- 0));
- radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize);
- radeon_emit(cs, 0); /* src/dst endian swap */
- radeon_emit(cs, src_offset);
- radeon_emit(cs, src_offset >> 32);
- radeon_emit(cs, dst_offset);
- radeon_emit(cs, dst_offset >> 32);
- dst_offset += csize;
- src_offset += csize;
- size -= csize;
- }
-}
-
static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
{
width = u_minify(width, level);
{
struct si_context *sctx = (struct si_context *)ctx;
+ assert(src->target != PIPE_BUFFER);
+
if (!sctx->sdma_cs ||
src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
goto fallback;
- /* If src is a buffer and dst is a texture, we are uploading metadata. */
- if (src->target == PIPE_BUFFER) {
- cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
- return;
- }
-
/* SDMA causes corruption. See:
* https://bugs.freedesktop.org/show_bug.cgi?id=110575
* https://bugs.freedesktop.org/show_bug.cgi?id=110635
'si_cp_dma.c',
'si_debug.c',
'si_descriptors.c',
- 'si_dma.c',
'si_dma_cs.c',
'si_fence.c',
'si_get.c',
* on failure (recursion).
*/
if (dst->surface.is_linear &&
- sctx->dma_copy &&
util_can_blit_via_copy_region(info, false)) {
sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
info->dst.box.x, info->dst.box.y,
box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
if (staging) {
/* Copy the VRAM buffer to the staging buffer. */
- sctx->dma_copy(ctx, &staging->b.b, 0,
- box->x % SI_MAP_BUFFER_ALIGNMENT,
- 0, 0, resource, 0, box);
+ si_sdma_copy_buffer(sctx, &staging->b.b, resource,
+ box->x % SI_MAP_BUFFER_ALIGNMENT,
+ box->x, box->width);
data = si_buffer_map_sync_with_rings(sctx, staging,
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- * Copyright 2018 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-#include "util/format/u_format.h"
-
-static void si_dma_copy_buffer(struct si_context *ctx,
- struct pipe_resource *dst,
- struct pipe_resource *src,
- uint64_t dst_offset,
- uint64_t src_offset,
- uint64_t size)
-{
- struct radeon_cmdbuf *cs = ctx->sdma_cs;
- unsigned i, ncopy, count, max_size, sub_cmd, shift;
- struct si_resource *sdst = si_resource(dst);
- struct si_resource *ssrc = si_resource(src);
-
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
- dst_offset + size);
-
- dst_offset += sdst->gpu_address;
- src_offset += ssrc->gpu_address;
-
- /* see whether we should use the dword-aligned or byte-aligned copy */
- if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
- sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
- shift = 2;
- max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
- } else {
- sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
- shift = 0;
- max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
- }
-
- ncopy = DIV_ROUND_UP(size, max_size);
- si_need_dma_space(ctx, ncopy * 5, sdst, ssrc);
-
- for (i = 0; i < ncopy; i++) {
- count = MIN2(size, max_size);
- radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
- count >> shift));
- radeon_emit(cs, dst_offset);
- radeon_emit(cs, src_offset);
- radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
- radeon_emit(cs, (src_offset >> 32UL) & 0xff);
- dst_offset += count;
- src_offset += count;
- size -= count;
- }
-}
-
-static void si_dma_copy(struct pipe_context *ctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
-{
- struct si_context *sctx = (struct si_context *)ctx;
-
- if (sctx->sdma_cs == NULL ||
- src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
- dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
- goto fallback;
- }
-
- if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
- si_dma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
- return;
- }
-
- /* SI SDMA image copies are unimplemented. */
-fallback:
- si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
- src, src_level, src_box);
-}
-
-void si_init_dma_functions(struct si_context *sctx)
-{
- sctx->dma_copy = si_dma_copy;
-}
}
}
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+ struct pipe_resource *src, uint64_t dst_offset,
+ uint64_t src_offset, uint64_t size)
+{
+ struct radeon_cmdbuf *cs = sctx->sdma_cs;
+ unsigned i, ncopy, csize;
+ struct si_resource *sdst = si_resource(dst);
+ struct si_resource *ssrc = si_resource(src);
+
+ if (!cs ||
+ dst->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+ src->flags & PIPE_RESOURCE_FLAG_SPARSE) {
+ si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size);
+ return;
+ }
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
+ dst_offset + size);
+
+ dst_offset += sdst->gpu_address;
+ src_offset += ssrc->gpu_address;
+
+ if (sctx->chip_class == GFX6) {
+ unsigned max_size, sub_cmd, shift;
+
+ /* see whether we should use the dword-aligned or byte-aligned copy */
+ if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
+ sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
+ shift = 2;
+ max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
+ } else {
+ sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
+ shift = 0;
+ max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
+ }
+
+ ncopy = DIV_ROUND_UP(size, max_size);
+ si_need_dma_space(sctx, ncopy * 5, sdst, ssrc);
+
+ for (i = 0; i < ncopy; i++) {
+ csize = MIN2(size, max_size);
+ radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
+ csize >> shift));
+ radeon_emit(cs, dst_offset);
+ radeon_emit(cs, src_offset);
+ radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
+ radeon_emit(cs, (src_offset >> 32UL) & 0xff);
+ dst_offset += csize;
+ src_offset += csize;
+ size -= csize;
+ }
+ return;
+ }
+
+ /* The following code is for CI and later. */
+ unsigned align = ~0u;
+ ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
+
+ /* Align copy size to dw if src/dst address are dw aligned */
+ if ((src_offset & 0x3) == 0 &&
+ (dst_offset & 0x3) == 0 &&
+ size > 4 &&
+ (size & 3) != 0) {
+ align = ~0x3u;
+ ncopy++;
+ }
+
+ si_need_dma_space(sctx, ncopy * 7, sdst, ssrc);
+
+ for (i = 0; i < ncopy; i++) {
+ csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
+ radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+ CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+ 0));
+ radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
+ radeon_emit(cs, 0); /* src/dst endian swap */
+ radeon_emit(cs, src_offset);
+ radeon_emit(cs, src_offset >> 32);
+ radeon_emit(cs, dst_offset);
+ radeon_emit(cs, dst_offset >> 32);
+ dst_offset += csize;
+ src_offset += csize;
+ size -= csize;
+ }
+}
+
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
struct si_resource *dst, struct si_resource *src)
{
ctx->sdma_uploads_in_progress = true;
for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
struct si_sdma_upload *up = &ctx->sdma_uploads[i];
- struct pipe_box box;
assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
up->size % 4 == 0);
- u_box_1d(up->src_offset, up->size, &box);
- ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
- &up->src->b.b, 0, &box);
+ si_sdma_copy_buffer(ctx, &up->dst->b.b, &up->src->b.b,
+ up->dst_offset, up->src_offset, up->size);
}
ctx->sdma_uploads_in_progress = false;
si_unref_sdma_uploads(ctx);
if (sctx->chip_class >= GFX7)
cik_init_sdma_functions(sctx);
else
- si_init_dma_functions(sctx);
+ sctx->dma_copy = si_resource_copy_region;
if (sscreen->debug_flags & DBG(FORCE_SDMA))
sctx->b.resource_copy_region = sctx->dma_copy;
struct radeon_saved_cs *saved, enum ring_type ring);
bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
-/* si_dma.c */
-void si_init_dma_functions(struct si_context *sctx);
-
/* si_dma_cs.c */
void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
uint64_t offset);
void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned clear_value);
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+ struct pipe_resource *src, uint64_t dst_offset,
+ uint64_t src_offset, uint64_t size);
void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
struct si_resource *dst, struct si_resource *src);
void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
} else if (test_sdma) {
/* SDMA */
if (is_copy) {
- struct pipe_box box;
- u_box_1d(0, size, &box);
- sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
+ si_sdma_copy_buffer(sctx, dst, src, 0, 0, size);
} else {
si_sdma_clear_buffer(sctx, dst, 0, size, clear_value);
}
/* Copy the staging buffer to the buffer backing the texture. */
struct si_context *sctx = (struct si_context*)sscreen->aux_context;
- struct pipe_box box;
- u_box_1d(0, buf->b.b.width0, &box);
assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
simple_mtx_lock(&sscreen->aux_context_lock);
- sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
- tex->surface.dcc_retile_map_offset, 0, 0,
- &buf->b.b, 0, &box);
+ si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b,
+ tex->surface.dcc_retile_map_offset,
+ 0, buf->b.b.width0);
sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
simple_mtx_unlock(&sscreen->aux_context_lock);