turnip: Add image->buffer DMA copies.
authorBas Nieuwenhuizen <basni@chromium.org>
Mon, 18 Feb 2019 15:13:23 +0000 (16:13 +0100)
committerChia-I Wu <olvaffe@gmail.com>
Mon, 11 Mar 2019 17:02:13 +0000 (10:02 -0700)
Passes dEQP-VK.api.copy_and_blit.core.image_to_buffer.*

src/freedreno/vulkan/tu_meta_copy.c

index 0a80aefb80e0f5046f62f53c786cc35046d88ffb..321585cb1cadc1b80e0d681076e1e19b94b70eeb 100644 (file)
@@ -40,6 +40,29 @@ blit_control(enum a6xx_color_fmt fmt)
    return blit_cntl;
 }
 
+static uint32_t tu6_sp_2d_src_format(VkFormat format)
+{
+   const struct vk_format_description *desc = vk_format_description(format);
+   uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
+
+   int channel = vk_format_get_first_non_void_channel(format);
+   if (channel < 0) {
+      /* TODO special format. */
+      return reg;
+   }
+   if (desc->channel[channel].normalized) {
+      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
+         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
+      reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
+   } else if (desc->channel[channel].pure_integer) {
+      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
+         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
+      else
+         reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
+   }
+   return reg;
+}
+
 static void
 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
 {
@@ -193,6 +216,197 @@ tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
    tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
 }
 
+static void
+tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
+                             struct tu_image *src_image,
+                             struct tu_buffer *dst_buffer,
+                             const VkBufferImageCopy *copy_info,
+                             VkFormat format,
+                             uint32_t layer,
+                             uint64_t dst_va)
+{
+   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
+
+   uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
+   unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
+                        vk_format_get_blocksize(format);
+
+   unsigned dst_pitch;
+   unsigned dst_offset = 0;
+   if (copy_info->imageExtent.height == 1) {
+      /* Can't find this in the spec, but not having it is sort of insane? */
+      assert(dst_va % vk_format_get_blocksize(format) == 0);
+
+      dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
+      dst_va &= ~63;
+
+      dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
+   } else {
+      unsigned dst_pixel_stride = copy_info->bufferRowLength
+                                  ? copy_info->bufferRowLength
+                                  : copy_info->imageExtent.width;
+      dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
+      assert(!(dst_pitch & 63));
+      assert(!(dst_va & 63));
+   }
+   
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
+
+   /*
+    * Emit source:
+    */
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
+                 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
+                 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
+                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
+                    src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
+   tu_cs_emit_qw(&cmdbuf->cs, src_va);
+   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
+
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+   /*
+    * Emit destination:
+    */
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
+   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
+                              A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
+                              A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
+   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
+   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+   tu_cs_emit(&cmdbuf->cs, 0x00000000);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
+                                      copy_info->imageExtent.width - 1));
+   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
+                                      copy_info->imageExtent.height - 1));
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
+   tu_cs_emit(&cmdbuf->cs,
+              A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
+                 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
+
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
+   tu_cs_emit(&cmdbuf->cs, 0x3f);
+   tu_cs_emit_wfi(&cmdbuf->cs);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
+   tu_cs_emit(&cmdbuf->cs, 0);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
+   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+   tu_cs_emit(&cmdbuf->cs, 0x01000000);
+
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
+   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+   tu_cs_emit_wfi(&cmdbuf->cs);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+   tu_cs_emit(&cmdbuf->cs, 0);
+}
+
+static void
+tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
+                        struct tu_image *src_image,
+                        struct tu_buffer *dst_buffer,
+                        const VkBufferImageCopy *copy_info)
+{
+   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+   /* general setup */
+   tu_dma_prepare(cmdbuf);
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
+
+   /* buffer copy setup */
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
+   tu_cs_emit(&cmdbuf->cs, A2XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+
+   VkFormat format = src_image->vk_format;
+   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
+
+   unsigned dst_pixel_stride = copy_info->bufferRowLength
+                                  ? copy_info->bufferRowLength
+                                  : copy_info->imageExtent.width;
+   unsigned cpp = vk_format_get_blocksize(format);
+   unsigned dst_pitch = dst_pixel_stride * cpp;
+
+
+   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
+       unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
+       uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
+
+       if ((dst_pitch & 63) || (dst_va & 63)) {
+         /* Do a per line copy */
+         VkBufferImageCopy line_copy_info = *copy_info;
+         line_copy_info.imageExtent.height = 1;
+         for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
+            /*
+             * if dst_va is not aligned the line copy will need to adjust. Give it
+             * room to do so.
+             */
+            unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
+            line_copy_info.imageOffset.x = copy_info->imageOffset.x;
+            line_copy_info.imageExtent.width = copy_info->imageExtent.width;
+
+            for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
+               tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
+
+               line_copy_info.imageOffset.x += max_width;
+               line_copy_info.imageExtent.width -= max_width;
+            }
+
+            line_copy_info.imageOffset.y++;
+            dst_va += dst_pitch;
+         }
+      } else {
+         tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
+      }
+   }
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
+
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
+}
+
 void
 tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
                  VkBuffer srcBuffer,
@@ -239,16 +453,6 @@ tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
                              destImageLayout, regionCount, pRegions);
 }
 
-static void
-meta_copy_image_to_buffer(struct tu_cmd_buffer *cmd_buffer,
-                          struct tu_buffer *buffer,
-                          struct tu_image *image,
-                          VkImageLayout layout,
-                          uint32_t regionCount,
-                          const VkBufferImageCopy *pRegions)
-{
-}
-
 void
 tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
                         VkImage srcImage,
@@ -261,8 +465,10 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
    TU_FROM_HANDLE(tu_image, src_image, srcImage);
    TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
 
-   meta_copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
-                             srcImageLayout, regionCount, pRegions);
+   for (unsigned i = 0; i < regionCount; ++i) {
+      tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
+                              pRegions + i);
+   }
 }
 
 static void