radeonsi: move SI and CIK+ SDMA code into 1 common function for cleanups
authorMarek Olšák <marek.olsak@amd.com>
Thu, 2 Jan 2020 22:02:12 +0000 (17:02 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 6 Jan 2020 20:38:35 +0000 (15:38 -0500)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
12 files changed:
src/gallium/drivers/radeonsi/Makefile.sources
src/gallium/drivers/radeonsi/cik_sdma.c
src/gallium/drivers/radeonsi/meson.build
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_buffer.c
src/gallium/drivers/radeonsi/si_dma.c [deleted file]
src/gallium/drivers/radeonsi/si_dma_cs.c
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_test_dma_perf.c
src/gallium/drivers/radeonsi/si_texture.c

index f25309736c9d516977c308817ef0b40c033a8391..886aaf6fa3415e54217a01d66ba972060129e173 100644 (file)
@@ -19,7 +19,6 @@ C_SOURCES := \
        si_cp_dma.c \
        si_debug.c \
        si_descriptors.c \
-       si_dma.c \
        si_dma_cs.c \
        si_fence.c \
        si_get.c \
index 3b07c964eed80f670089d34f356d2ccbabcb8336..df8a2fcd577f9fac92e4ef7d4349645c4e70b501 100644 (file)
 #include "sid.h"
 #include "si_pipe.h"
 
-static void cik_sdma_copy_buffer(struct si_context *ctx,
-                                struct pipe_resource *dst,
-                                struct pipe_resource *src,
-                                uint64_t dst_offset,
-                                uint64_t src_offset,
-                                uint64_t size)
-{
-       struct radeon_cmdbuf *cs = ctx->sdma_cs;
-       unsigned i, ncopy, csize;
-       unsigned align = ~0u;
-       struct si_resource *sdst = si_resource(dst);
-       struct si_resource *ssrc = si_resource(src);
-
-       /* Mark the buffer range of destination as valid (initialized),
-        * so that transfer_map knows it should wait for the GPU when mapping
-        * that range. */
-       util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
-                      dst_offset + size);
-
-       dst_offset += sdst->gpu_address;
-       src_offset += ssrc->gpu_address;
-
-       ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
-
-       /* Align copy size to dw if src/dst address are dw aligned */
-       if ((src_offset & 0x3) == 0 &&
-           (dst_offset & 0x3) == 0 &&
-           size > 4 &&
-           (size & 3) != 0) {
-               align = ~0x3u;
-               ncopy++;
-       }
-
-       si_need_dma_space(ctx, ncopy * 7, sdst, ssrc);
-
-       for (i = 0; i < ncopy; i++) {
-               csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
-               radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
-                                               CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
-                                               0));
-               radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize);
-               radeon_emit(cs, 0); /* src/dst endian swap */
-               radeon_emit(cs, src_offset);
-               radeon_emit(cs, src_offset >> 32);
-               radeon_emit(cs, dst_offset);
-               radeon_emit(cs, dst_offset >> 32);
-               dst_offset += csize;
-               src_offset += csize;
-               size -= csize;
-       }
-}
-
 static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned blk_w)
 {
        width = u_minify(width, level);
@@ -680,17 +628,13 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
+       assert(src->target != PIPE_BUFFER);
+
        if (!sctx->sdma_cs ||
            src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
            dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
                goto fallback;
 
-       /* If src is a buffer and dst is a texture, we are uploading metadata. */
-       if (src->target == PIPE_BUFFER) {
-               cik_sdma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
-               return;
-       }
-
        /* SDMA causes corruption. See:
         *   https://bugs.freedesktop.org/show_bug.cgi?id=110575
         *   https://bugs.freedesktop.org/show_bug.cgi?id=110635
index 3baf70a020e9aaa5e7be1032d79d857d3cb91fdf..d2d3dd684b0cc5516891c0f5781177c4f355b955 100644 (file)
@@ -34,7 +34,6 @@ files_libradeonsi = files(
   'si_cp_dma.c',
   'si_debug.c',
   'si_descriptors.c',
-  'si_dma.c',
   'si_dma_cs.c',
   'si_fence.c',
   'si_get.c',
index 9c48bf42a3aaf09bb1034c3ed71d7fc72b86871c..643b15a09b4513e0453a606e06c39a54735b4009 100644 (file)
@@ -1212,7 +1212,6 @@ static void si_blit(struct pipe_context *ctx,
         * on failure (recursion).
         */
        if (dst->surface.is_linear &&
-           sctx->dma_copy &&
            util_can_blit_via_copy_region(info, false)) {
                sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
                                 info->dst.box.x, info->dst.box.y,
index 220a4cbdcbf2036734b78c254c7dceac164e98ba..1de431cc93769f2551d15974409621e98f6c40ad 100644 (file)
@@ -503,9 +503,9 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
                                box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT)));
                if (staging) {
                        /* Copy the VRAM buffer to the staging buffer. */
-                       sctx->dma_copy(ctx, &staging->b.b, 0,
-                                      box->x % SI_MAP_BUFFER_ALIGNMENT,
-                                      0, 0, resource, 0, box);
+                       si_sdma_copy_buffer(sctx, &staging->b.b, resource,
+                                           box->x % SI_MAP_BUFFER_ALIGNMENT,
+                                           box->x, box->width);
 
                        data = si_buffer_map_sync_with_rings(sctx, staging,
                                                             usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
deleted file mode 100644 (file)
index afcc38b..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- * Copyright 2018 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "sid.h"
-#include "si_pipe.h"
-
-#include "util/format/u_format.h"
-
-static void si_dma_copy_buffer(struct si_context *ctx,
-                               struct pipe_resource *dst,
-                               struct pipe_resource *src,
-                               uint64_t dst_offset,
-                               uint64_t src_offset,
-                               uint64_t size)
-{
-       struct radeon_cmdbuf *cs = ctx->sdma_cs;
-       unsigned i, ncopy, count, max_size, sub_cmd, shift;
-       struct si_resource *sdst = si_resource(dst);
-       struct si_resource *ssrc = si_resource(src);
-
-       /* Mark the buffer range of destination as valid (initialized),
-        * so that transfer_map knows it should wait for the GPU when mapping
-        * that range. */
-       util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
-                      dst_offset + size);
-
-       dst_offset += sdst->gpu_address;
-       src_offset += ssrc->gpu_address;
-
-       /* see whether we should use the dword-aligned or byte-aligned copy */
-       if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
-               sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
-               shift = 2;
-               max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
-       } else {
-               sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
-               shift = 0;
-               max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
-       }
-
-       ncopy = DIV_ROUND_UP(size, max_size);
-       si_need_dma_space(ctx, ncopy * 5, sdst, ssrc);
-
-       for (i = 0; i < ncopy; i++) {
-               count = MIN2(size, max_size);
-               radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
-                                             count >> shift));
-               radeon_emit(cs, dst_offset);
-               radeon_emit(cs, src_offset);
-               radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
-               radeon_emit(cs, (src_offset >> 32UL) & 0xff);
-               dst_offset += count;
-               src_offset += count;
-               size -= count;
-       }
-}
-
-static void si_dma_copy(struct pipe_context *ctx,
-                       struct pipe_resource *dst,
-                       unsigned dst_level,
-                       unsigned dstx, unsigned dsty, unsigned dstz,
-                       struct pipe_resource *src,
-                       unsigned src_level,
-                       const struct pipe_box *src_box)
-{
-       struct si_context *sctx = (struct si_context *)ctx;
-
-       if (sctx->sdma_cs == NULL ||
-           src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
-           dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
-               goto fallback;
-       }
-
-       if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-               si_dma_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
-               return;
-       }
-
-       /* SI SDMA image copies are unimplemented. */
-fallback:
-       si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
-                               src, src_level, src_box);
-}
-
-void si_init_dma_functions(struct si_context *sctx)
-{
-       sctx->dma_copy = si_dma_copy;
-}
index 8671c027c03a8dfede3dc0e415760af308fbdb6c..c58b2b103bee22f26dcff4cd5ce61c92efd23f99 100644 (file)
@@ -125,6 +125,95 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
        }
 }
 
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+                        struct pipe_resource *src, uint64_t dst_offset,
+                        uint64_t src_offset, uint64_t size)
+{
+       struct radeon_cmdbuf *cs = sctx->sdma_cs;
+       unsigned i, ncopy, csize;
+       struct si_resource *sdst = si_resource(dst);
+       struct si_resource *ssrc = si_resource(src);
+
+       if (!cs ||
+           dst->flags & PIPE_RESOURCE_FLAG_SPARSE ||
+           src->flags & PIPE_RESOURCE_FLAG_SPARSE) {
+               si_copy_buffer(sctx, dst, src, dst_offset, src_offset, size);
+               return;
+       }
+
+       /* Mark the buffer range of destination as valid (initialized),
+        * so that transfer_map knows it should wait for the GPU when mapping
+        * that range. */
+       util_range_add(dst, &sdst->valid_buffer_range, dst_offset,
+                      dst_offset + size);
+
+       dst_offset += sdst->gpu_address;
+       src_offset += ssrc->gpu_address;
+
+       if (sctx->chip_class == GFX6) {
+               unsigned max_size, sub_cmd, shift;
+
+               /* see whether we should use the dword-aligned or byte-aligned copy */
+               if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
+                       sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
+                       shift = 2;
+                       max_size = SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE;
+               } else {
+                       sub_cmd = SI_DMA_COPY_BYTE_ALIGNED;
+                       shift = 0;
+                       max_size = SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE;
+               }
+
+               ncopy = DIV_ROUND_UP(size, max_size);
+               si_need_dma_space(sctx, ncopy * 5, sdst, ssrc);
+
+               for (i = 0; i < ncopy; i++) {
+                       csize = MIN2(size, max_size);
+                       radeon_emit(cs, SI_DMA_PACKET(SI_DMA_PACKET_COPY, sub_cmd,
+                                                     csize >> shift));
+                       radeon_emit(cs, dst_offset);
+                       radeon_emit(cs, src_offset);
+                       radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
+                       radeon_emit(cs, (src_offset >> 32UL) & 0xff);
+                       dst_offset += csize;
+                       src_offset += csize;
+                       size -= csize;
+               }
+               return;
+       }
+
+       /* The following code is for CI and later. */
+       unsigned align = ~0u;
+       ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
+
+       /* Align copy size to dw if src/dst address are dw aligned */
+       if ((src_offset & 0x3) == 0 &&
+           (dst_offset & 0x3) == 0 &&
+           size > 4 &&
+           (size & 3) != 0) {
+               align = ~0x3u;
+               ncopy++;
+       }
+
+       si_need_dma_space(sctx, ncopy * 7, sdst, ssrc);
+
+       for (i = 0; i < ncopy; i++) {
+               csize = size >= 4 ? MIN2(size & align, CIK_SDMA_COPY_MAX_SIZE) : size;
+               radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+                                               CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+                                               0));
+               radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
+               radeon_emit(cs, 0); /* src/dst endian swap */
+               radeon_emit(cs, src_offset);
+               radeon_emit(cs, src_offset >> 32);
+               radeon_emit(cs, dst_offset);
+               radeon_emit(cs, dst_offset >> 32);
+               dst_offset += csize;
+               src_offset += csize;
+               size -= csize;
+       }
+}
+
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
                       struct si_resource *dst, struct si_resource *src)
 {
index ddaf2af3349865caa2d362071269a9c5547b65af..15f3d238ac5332e254e1c9df89408f6b43497fbc 100644 (file)
@@ -119,14 +119,12 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
        ctx->sdma_uploads_in_progress = true;
        for (unsigned i = 0; i < ctx->num_sdma_uploads; i++) {
                struct si_sdma_upload *up = &ctx->sdma_uploads[i];
-               struct pipe_box box;
 
                assert(up->src_offset % 4 == 0 && up->dst_offset % 4 == 0 &&
                       up->size % 4 == 0);
 
-               u_box_1d(up->src_offset, up->size, &box);
-               ctx->dma_copy(&ctx->b, &up->dst->b.b, 0, up->dst_offset, 0, 0,
-                             &up->src->b.b, 0, &box);
+               si_sdma_copy_buffer(ctx, &up->dst->b.b, &up->src->b.b,
+                                   up->dst_offset, up->src_offset, up->size);
        }
        ctx->sdma_uploads_in_progress = false;
        si_unref_sdma_uploads(ctx);
index e5f9b29a9ec1eb76271ea326afeea22c2641f185..3f84725f8f202fa07ae517c90f51a4e5726e42ef 100644 (file)
@@ -595,7 +595,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        if (sctx->chip_class >= GFX7)
                cik_init_sdma_functions(sctx);
        else
-               si_init_dma_functions(sctx);
+               sctx->dma_copy = si_resource_copy_region;
 
        if (sscreen->debug_flags & DBG(FORCE_SDMA))
                sctx->b.resource_copy_region = sctx->dma_copy;
index e222de1e906f1aa79375e9881ac164af852c6a87..519b0050fd34ddf9a8f1ff2c35ae4857d21faf66 100644 (file)
@@ -1363,14 +1363,14 @@ void si_check_vm_faults(struct si_context *sctx,
                        struct radeon_saved_cs *saved, enum ring_type ring);
 bool si_replace_shader(unsigned num, struct si_shader_binary *binary);
 
-/* si_dma.c */
-void si_init_dma_functions(struct si_context *sctx);
-
 /* si_dma_cs.c */
 void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst,
                           uint64_t offset);
 void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
                          uint64_t offset, uint64_t size, unsigned clear_value);
+void si_sdma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
+                        struct pipe_resource *src, uint64_t dst_offset,
+                        uint64_t src_offset, uint64_t size);
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
                       struct si_resource *dst, struct si_resource *src);
 void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
index c796cc164a3bfd79685b76955796cf0a51c0c291..4eec3d124594bfd9d9e1659c494dae55a7ddd90b 100644 (file)
@@ -191,9 +191,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
                                        } else if (test_sdma) {
                                                /* SDMA */
                                                if (is_copy) {
-                                                       struct pipe_box box;
-                                                       u_box_1d(0, size, &box);
-                                                       sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, 0, &box);
+                                                       si_sdma_copy_buffer(sctx, dst, src, 0, 0, size);
                                                } else {
                                                        si_sdma_clear_buffer(sctx, dst, 0, size, clear_value);
                                                }
index 9fc669a6abdf60480cf9dbba4bb9862d816deb1b..183d5bd5294f42161c68ba68fc4360ee3cf2e777 100644 (file)
@@ -1513,14 +1513,12 @@ si_texture_create_object(struct pipe_screen *screen,
 
                        /* Copy the staging buffer to the buffer backing the texture. */
                        struct si_context *sctx = (struct si_context*)sscreen->aux_context;
-                       struct pipe_box box;
-                       u_box_1d(0, buf->b.b.width0, &box);
 
                        assert(tex->surface.dcc_retile_map_offset <= UINT_MAX);
                        simple_mtx_lock(&sscreen->aux_context_lock);
-                       sctx->dma_copy(&sctx->b, &tex->buffer.b.b, 0,
-                                      tex->surface.dcc_retile_map_offset, 0, 0,
-                                      &buf->b.b, 0, &box);
+                       si_sdma_copy_buffer(sctx, &tex->buffer.b.b, &buf->b.b,
+                                           tex->surface.dcc_retile_map_offset,
+                                           0, buf->b.b.width0);
                        sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
                        simple_mtx_unlock(&sscreen->aux_context_lock);