turnip: Implement buffer->buffer DMA copies.
authorBas Nieuwenhuizen <basni@chromium.org>
Mon, 18 Feb 2019 15:09:27 +0000 (16:09 +0100)
committerChia-I Wu <olvaffe@gmail.com>
Mon, 11 Mar 2019 17:02:13 +0000 (10:02 -0700)
Passes dEQP-VK.api.copy_and_blit.core.buffer_to_buffer.*

src/freedreno/vulkan/tu_meta_buffer.c
src/freedreno/vulkan/tu_meta_copy.c

index 0b65a600fd581ccd4d1d74a06a579566e698b02b..98a73413cf2f589eb562f9978ee4a048a15c3626 100644 (file)
@@ -9,15 +9,6 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
 {
 }
 
-void
-tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
-                 VkBuffer srcBuffer,
-                 VkBuffer destBuffer,
-                 uint32_t regionCount,
-                 const VkBufferCopy *pRegions)
-{
-}
-
 void
 tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
                    VkBuffer dstBuffer,
index 86d85e7b13783f6530ed23bb9b5598e9da010661..0a80aefb80e0f5046f62f53c786cc35046d88ffb 100644 (file)
 
 #include "tu_private.h"
 
+#include "a6xx.xml.h"
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+
+#include "vk_format.h"
+
+#include "tu_cs.h"
+
+static uint32_t
+blit_control(enum a6xx_color_fmt fmt)
+{
+   unsigned blit_cntl = 0xf00000;
+   blit_cntl |= A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt);
+   blit_cntl |= A6XX_RB_2D_BLIT_CNTL_IFMT(tu6_rb_fmt_to_ifmt(fmt));
+   return blit_cntl;
+}
+
+static void
+tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
+{
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 10);
+
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
+   tu_cs_emit(&cmdbuf->cs, PC_CCU_INVALIDATE_COLOR);
+
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
+   tu_cs_emit(&cmdbuf->cs, LRZ_FLUSH);
+
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
+   tu_cs_emit(&cmdbuf->cs, 0x0);
+
+   tu_cs_emit_wfi(&cmdbuf->cs);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_CCU_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, 0x10000000);
+}
+
+static void
+tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
+               struct tu_bo *src_bo,
+               uint64_t src_offset,
+               struct tu_bo *dst_bo,
+               uint64_t dst_offset,
+               uint64_t size)
+{
+   const unsigned max_size_per_iter = 0x4000 - 0x40;
+   const unsigned max_iterations =
+      (size + max_size_per_iter) / max_size_per_iter;
+
+   tu_bo_list_add(&cmdbuf->bo_list, src_bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_bo, MSM_SUBMIT_BO_WRITE);
+
+   tu_dma_prepare(cmdbuf);
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 21 + 48 * max_iterations);
+
+   /* buffer copy setup */
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
+   tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+
+   const uint32_t blit_cntl = blit_control(RB6_R8_UNORM) | 0x20000000;
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   for (; size;) {
+      uint64_t src_va = src_bo->iova + src_offset;
+      uint64_t dst_va = dst_bo->iova + dst_offset;
+
+      unsigned src_shift = src_va & 0x3f;
+      unsigned dst_shift = dst_va & 0x3f;
+      unsigned max_shift = MAX2(src_shift, dst_shift);
+
+      src_va -= src_shift;
+      dst_va -= dst_shift;
+
+      uint32_t size_todo = MIN2(0x4000 - max_shift, size);
+      unsigned pitch = (size_todo + max_shift + 63) & ~63;
+
+      /*
+       * Emit source:
+       */
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+      tu_cs_emit(&cmdbuf->cs,
+                 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
+                    A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
+                    A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
+      tu_cs_emit(&cmdbuf->cs,
+                 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_shift + size_todo) |
+                    A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
+      tu_cs_emit_qw(&cmdbuf->cs, src_va);
+      tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
+
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+      /*
+       * Emit destination:
+       */
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
+      tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(RB6_R8_UNORM) |
+                                 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+      tu_cs_emit_qw(&cmdbuf->cs, dst_va);
+
+      tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(pitch));
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+      tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+      /*
+       * Blit command:
+       */
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_shift));
+      tu_cs_emit(&cmdbuf->cs,
+                 A6XX_GRAS_2D_SRC_BR_X_X(src_shift + size_todo - 1));
+      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
+      tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_BR_Y_Y(0));
+
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
+      tu_cs_emit(&cmdbuf->cs,
+                 A6XX_GRAS_2D_DST_TL_X(dst_shift) | A6XX_GRAS_2D_DST_TL_Y(0));
+      tu_cs_emit(&cmdbuf->cs,
+                 A6XX_GRAS_2D_DST_BR_X(dst_shift + size_todo - 1) |
+                    A6XX_GRAS_2D_DST_BR_Y(0));
+
+      tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
+      tu_cs_emit(&cmdbuf->cs, 0x3f);
+      tu_cs_emit_wfi(&cmdbuf->cs);
+
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
+      tu_cs_emit(&cmdbuf->cs, 0);
+
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
+      tu_cs_emit(&cmdbuf->cs, 0xf180);
+
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+      tu_cs_emit(&cmdbuf->cs, 0x01000000);
+
+      tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
+      tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+      tu_cs_emit_wfi(&cmdbuf->cs);
+
+      tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+      tu_cs_emit(&cmdbuf->cs, 0);
+
+      src_offset += size_todo;
+      dst_offset += size_todo;
+      size -= size_todo;
+   }
+
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
+}
+
+void
+tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
+                 VkBuffer srcBuffer,
+                 VkBuffer destBuffer,
+                 uint32_t regionCount,
+                 const VkBufferCopy *pRegions)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
+   TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
+   TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
+
+   for (unsigned i = 0; i < regionCount; ++i) {
+      uint64_t src_offset = src_buffer->bo_offset + pRegions[i].srcOffset;
+      uint64_t dst_offset = dst_buffer->bo_offset + pRegions[i].dstOffset;
+
+      tu_copy_buffer(cmdbuf, src_buffer->bo, src_offset, dst_buffer->bo,
+                     dst_offset, pRegions[i].size);
+   }
+}
+
 static void
 meta_copy_buffer_to_image(struct tu_cmd_buffer *cmd_buffer,
                           struct tu_buffer *buffer,