turnip: improve CmdCopyImage and implement CmdBlitImage
authorJonathan Marek <jonathan@marek.ca>
Sat, 5 Oct 2019 16:38:40 +0000 (12:38 -0400)
committerJonathan Marek <jonathan@marek.ca>
Tue, 15 Oct 2019 11:56:20 +0000 (07:56 -0400)
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/vulkan/meson.build
src/freedreno/vulkan/tu_blit.c [new file with mode: 0644]
src/freedreno/vulkan/tu_blit.h [new file with mode: 0644]
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_formats.c
src/freedreno/vulkan/tu_meta_blit.c
src/freedreno/vulkan/tu_meta_copy.c
src/freedreno/vulkan/tu_private.h

index 03079a14994d88a36ed01c5261f23490760ac484..b529923251895977c9e67c56074fd7c5af8e6f4f 100644 (file)
@@ -49,8 +49,11 @@ tu_format_table_c = custom_target(
 )
 
 libtu_files = files(
+  'tu_blit.c',
+  'tu_blit.h',
   'tu_cmd_buffer.c',
   'tu_cs.c',
+  'tu_cs.h',
   'tu_device.c',
   'tu_descriptor_set.c',
   'tu_descriptor_set.h',
diff --git a/src/freedreno/vulkan/tu_blit.c b/src/freedreno/vulkan/tu_blit.c
new file mode 100644 (file)
index 0000000..33e0fd4
--- /dev/null
@@ -0,0 +1,298 @@
+/*
+ * Copyright © 2019 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *
+ */
+
+#include "tu_blit.h"
+
+#include "a6xx.xml.h"
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+
+#include "vk_format.h"
+
+#include "tu_cs.h"
+
+/* TODO:
+ *   - Avoid disabling tiling for swapped formats
+ *     (image_to_image copy doesn't deal with it)
+ *   - Fix d24_unorm_s8_uint support & aspects
+ *   - UBWC
+ */
+
+static VkFormat
+blit_copy_format(VkFormat format)
+{
+   switch (vk_format_get_blocksizebits(format)) {
+   case 8:  return VK_FORMAT_R8_UINT;
+   case 16: return VK_FORMAT_R16_UINT;
+   case 32: return VK_FORMAT_R8G8B8A8_UINT;
+   case 64: return VK_FORMAT_R32G32_UINT;
+   case 96: return VK_FORMAT_R32G32B32_UINT;
+   case 128:return VK_FORMAT_R32G32B32A32_UINT;
+   default:
+      unreachable("unhandled format size");
+   }
+}
+
+static uint32_t
+blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read)
+{
+   const struct tu_native_format *fmt = tu6_get_native_format(img->fmt);
+   enum a6xx_color_fmt rb = fmt->rb;
+   enum a3xx_color_swap swap = img->tiled ? WZYX : fmt->swap;
+   if (rb == RB6_R10G10B10A2_UNORM && src)
+      rb = RB6_R10G10B10A2_FLOAT16;
+   if (rb == RB6_X8Z24_UNORM)
+      rb = RB6_Z24_UNORM_S8_UINT;
+
+   if (stencil_read)
+      swap = XYZW;
+
+   return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) |
+          A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
+          A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) |
+          COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB);
+}
+
+static void
+emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
+{
+   struct tu_cs *cs = &cmdbuf->cs;
+
+   tu_cs_reserve_space(cmdbuf->device, cs, 52);
+
+   enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb;
+   if (fmt == RB6_X8Z24_UNORM)
+      fmt = RB6_Z24_UNORM_S8_UINT;
+
+   enum a6xx_2d_ifmt ifmt = tu6_rb_fmt_to_ifmt(fmt);
+
+   if (vk_format_is_srgb(blt->dst.fmt)) {
+      assert(ifmt == R2D_UNORM8);
+      ifmt = R2D_UNORM8_SRGB;
+   }
+
+   uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
+                        A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) | /* not required? */
+                        COND(fmt == RB6_Z24_UNORM_S8_UINT, A6XX_RB_2D_BLIT_CNTL_D24S8) |
+                        A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
+                        A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
+   tu_cs_emit(&cmdbuf->cs, blit_cntl);
+
+   /*
+    * Emit source:
+    */
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
+   tu_cs_emit(cs, blit_image_info(&blt->src, true, blt->stencil_read) |
+                  A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
+                  /* TODO: should disable this bit for integer formats ? */
+                  COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
+                  COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
+                  0x500000);
+   tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
+                  A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
+   tu_cs_emit_qw(cs, blt->src.va);
+   tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
+
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+
+   /*
+    * Emit destination:
+    */
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
+   tu_cs_emit(cs, blit_image_info(&blt->dst, false, false));
+   tu_cs_emit_qw(cs, blt->dst.va);
+   tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+   tu_cs_emit(cs, 0x00000000);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
+   tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
+   tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
+   tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
+   tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
+   tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
+                  A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
+   tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
+                  A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
+
+   tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
+   tu_cs_emit(cs, 0x3f);
+   tu_cs_emit_wfi(cs);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
+   tu_cs_emit(cs, 0);
+
+   if (fmt == RB6_R10G10B10A2_UNORM)
+      fmt = RB6_R16G16B16A16_FLOAT;
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
+   tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
+                  COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
+                  A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt) |
+                  COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
+                  A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+   tu_cs_emit(cs, 0x01000000);
+
+   tu_cs_emit_pkt7(cs, CP_BLIT, 1);
+   tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
+
+   tu_cs_emit_wfi(cs);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
+   tu_cs_emit(cs, 0);
+}
+
+void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy)
+{
+   if (copy) {
+      blt->stencil_read =
+         blt->dst.fmt == VK_FORMAT_R8_UINT &&
+         blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
+
+      assert(vk_format_get_blocksize(blt->dst.fmt) ==
+             vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
+      assert(blt->src.samples == blt->dst.samples);
+
+      if (vk_format_is_compressed(blt->src.fmt)) {
+         unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
+         unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
+
+         blt->src.pitch /= block_width;
+         blt->src.x /= block_width;
+         blt->src.y /= block_height;
+
+         /* for image_to_image copy, width/height is on the src format */
+         blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
+         blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
+      }
+
+      if (vk_format_is_compressed(blt->dst.fmt)) {
+         unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
+         unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
+
+         blt->dst.pitch /= block_width;
+         blt->dst.x /= block_width;
+         blt->dst.y /= block_height;
+      }
+
+      blt->src.fmt = blit_copy_format(blt->src.fmt);
+      blt->dst.fmt = blit_copy_format(blt->dst.fmt);
+
+      /* TODO: does this work correctly with tiling/etc ? */
+      blt->src.x *= blt->src.samples;
+      blt->dst.x *= blt->dst.samples;
+      blt->src.width *= blt->src.samples;
+      blt->dst.width *= blt->dst.samples;
+      blt->src.samples = 1;
+      blt->dst.samples = 1;
+   } else {
+      assert(blt->dst.samples == 1);
+   }
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 18);
+
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, LRZ_FLUSH, false);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_COLOR, false);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_DEPTH, false);
+
+   /* buffer copy setup */
+   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
+   tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
+
+   for (unsigned layer = 0; layer < blt->layers; layer++) {
+      if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
+         /* per line copy path (buffer_to_image) */
+         assert(copy && !blt->src.tiled);
+         struct tu_blit line_blt = *blt;
+         uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
+
+         line_blt.src.y = 0;
+         line_blt.src.pitch = 0;
+         line_blt.src.height = 1;
+         line_blt.dst.height = 1;
+
+         for (unsigned y = 0; y < blt->src.height; y++) {
+            line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
+            line_blt.src.va = src_va & ~63;
+
+            emit_blit_step(cmdbuf, &line_blt);
+
+            line_blt.dst.y++;
+            src_va += blt->src.pitch;
+         }
+      } else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
+         /* per line copy path (image_to_buffer) */
+         assert(copy && !blt->dst.tiled);
+         struct tu_blit line_blt = *blt;
+         uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
+
+         line_blt.dst.y = 0;
+         line_blt.dst.pitch = 0;
+         line_blt.src.height = 1;
+         line_blt.dst.height = 1;
+
+         for (unsigned y = 0; y < blt->src.height; y++) {
+            line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
+            line_blt.dst.va = dst_va & ~63;
+
+            emit_blit_step(cmdbuf, &line_blt);
+
+            line_blt.src.y++;
+            dst_va += blt->dst.pitch;
+         }
+      } else {
+         emit_blit_step(cmdbuf, blt);
+      }
+      blt->dst.va += blt->dst.layer_size;
+      blt->src.va += blt->src.layer_size;
+   }
+
+   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17);
+
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
+   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_INVALIDATE, false);
+}
diff --git a/src/freedreno/vulkan/tu_blit.h b/src/freedreno/vulkan/tu_blit.h
new file mode 100644 (file)
index 0000000..1f4967e
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Copyright © 2019 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan@marek.ca>
+ *
+ */
+
+#ifndef TU_BLIT_H
+#define TU_BLIT_H
+
+#include "tu_private.h"
+
+#include "vk_format.h"
+
+struct tu_blit_surf {
+   VkFormat fmt;
+   enum a6xx_tile_mode tile_mode;
+   bool tiled;
+   uint64_t va;
+   uint32_t pitch, layer_size;
+   uint32_t x, y;
+   uint32_t width, height;
+   unsigned samples;
+};
+
+static inline struct tu_blit_surf
+tu_blit_surf(struct tu_image *img,
+             VkImageSubresourceLayers subres,
+             const VkOffset3D *offsets)
+{
+   return (struct tu_blit_surf) {
+      .fmt = img->vk_format,
+      .tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel),
+      .tiled = img->tile_mode != TILE6_LINEAR,
+      .va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset +
+            subres.baseArrayLayer * img->layer_size +
+            MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size,
+      .pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples,
+      .layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size,
+      .x = MIN2(offsets[0].x, offsets[1].x),
+      .y = MIN2(offsets[0].y, offsets[1].y),
+      .width = abs(offsets[1].x - offsets[0].x),
+      .height = abs(offsets[1].y - offsets[0].y),
+      .samples = img->samples,
+   };
+}
+
+static inline struct tu_blit_surf
+tu_blit_surf_ext(struct tu_image *image,
+                 VkImageSubresourceLayers subres,
+                 VkOffset3D offset,
+                 VkExtent3D extent)
+{
+   return tu_blit_surf(image, subres, (VkOffset3D[]) {
+      offset, {.x = offset.x + extent.width,
+               .y = offset.y + extent.height,
+               .z = offset.z}
+   });
+}
+
+static inline struct tu_blit_surf
+tu_blit_surf_whole(struct tu_image *image)
+{
+   return tu_blit_surf(image, (VkImageSubresourceLayers){}, (VkOffset3D[]) {
+      {}, {image->extent.width, image->extent.height}
+   });
+}
+
+struct tu_blit {
+   struct tu_blit_surf dst;
+   struct tu_blit_surf src;
+   uint32_t layers;
+   bool filter;
+   bool stencil_read;
+   enum a6xx_rotation rotation;
+};
+
+void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy);
+
+#endif /* TU_BLIT_H */
index d701ae5fba7ee0506f5884a53d5753e9307547ec..3043740626f97d04bb650abd0ec9d3a619910a6c 100644 (file)
@@ -314,8 +314,8 @@ tu_tiling_config_get_tile(const struct tu_tiling_config *tiling,
          : tile->begin.y + tiling->tile0.extent.height;
 }
 
-static enum a3xx_msaa_samples
-tu6_msaa_samples(uint32_t samples)
+enum a3xx_msaa_samples
+tu_msaa_samples(uint32_t samples)
 {
    switch (samples) {
    case 1:
index 7e68d6103cd1ad4d7c8f230b334bc0c781329ed9..6b762441fbe29b4eb890ba15b73ca6d927cdb1cd 100644 (file)
@@ -645,8 +645,8 @@ tu_physical_device_get_format_properties(
    }
 
    if (native_fmt->rb >= 0) {
-      linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
-      tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
+      linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+      tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
    }
 
 end:
index da5ff6b12b7b5b9b0f8b6b46c1d53db361c543b3..d624eef88b4fc20af7378912c73ed96b44277bdc 100644 (file)
 
 #include "tu_private.h"
 
-#include "nir/nir_builder.h"
+#include "tu_blit.h"
+
+static void
+tu_blit_image(struct tu_cmd_buffer *cmdbuf,
+              struct tu_image *src_image,
+              struct tu_image *dst_image,
+              const VkImageBlit *info,
+              VkFilter filter)
+{
+   static const enum a6xx_rotation rotate[2][2] = {
+      {ROTATE_0, ROTATE_HFLIP},
+      {ROTATE_VFLIP, ROTATE_180},
+   };
+   bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
+                   (info->dstOffsets[1].x < info->dstOffsets[0].x);
+   bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
+                   (info->dstOffsets[1].y < info->dstOffsets[0].y);
+   bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
+                   (info->dstOffsets[1].z < info->dstOffsets[0].z);
+
+   if (mirror_z) {
+      tu_finishme("blit z mirror\n");
+      return;
+   }
+
+   if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
+       info->dstOffsets[1].z - info->dstOffsets[0].z) {
+      tu_finishme("blit z filter\n");
+      return;
+   }
+   assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
+
+   struct tu_blit blt = {
+      .dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets),
+      .src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets),
+      .layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z,
+                     info->dstSubresource.layerCount),
+      .filter = filter == VK_FILTER_LINEAR,
+      .rotation = rotate[mirror_y][mirror_x],
+   };
+
+   tu_blit(cmdbuf, &blt, false);
+}
 
 void
 tu_CmdBlitImage(VkCommandBuffer commandBuffer,
@@ -36,4 +78,14 @@ tu_CmdBlitImage(VkCommandBuffer commandBuffer,
                 VkFilter filter)
 
 {
+   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
+   TU_FROM_HANDLE(tu_image, src_image, srcImage);
+   TU_FROM_HANDLE(tu_image, dst_image, destImage);
+
+   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+   for (uint32_t i = 0; i < regionCount; ++i) {
+      tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter);
+   }
 }
index fbd5dfdec9ad774e1516b23d04d0da31473e516b..616151b751fb6fb384c8b360a8f344ccbdabc631 100644 (file)
 #include "vk_format.h"
 
 #include "tu_cs.h"
-
-/*
- * TODO:
- *   - 3D textures
- *   - compressed image formats (need to divide offset/extent)
- *   - Fix d24_unorm_s8_uint support & aspects
- */
+#include "tu_blit.h"
 
 static uint32_t
 blit_control(enum a6xx_color_fmt fmt)
@@ -47,29 +41,6 @@ blit_control(enum a6xx_color_fmt fmt)
    return blit_cntl;
 }
 
-static uint32_t tu6_sp_2d_src_format(VkFormat format)
-{
-   const struct vk_format_description *desc = vk_format_description(format);
-   uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb);
-
-   int channel = vk_format_get_first_non_void_channel(format);
-   if (channel < 0) {
-      /* TODO special format. */
-      return reg;
-   }
-   if (desc->channel[channel].normalized) {
-      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
-         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
-      reg |= A6XX_SP_2D_SRC_FORMAT_NORM;
-   } else if (desc->channel[channel].pure_integer) {
-      if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED)
-         reg |= A6XX_SP_2D_SRC_FORMAT_SINT;
-      else
-         reg |= A6XX_SP_2D_SRC_FORMAT_UINT;
-   }
-   return reg;
-}
-
 static void
 tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
 {
@@ -90,32 +61,6 @@ tu_dma_prepare(struct tu_cmd_buffer *cmdbuf)
    tu_cs_emit(&cmdbuf->cs, 0x10000000);
 }
 
-/* Always use UINT formats to avoid precision issues.
- *
- * Example failure it avoids:
- *   - dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.r16_unorm.r16_unorm.general_general
- */
-static VkFormat
-tu_canonical_copy_format(VkFormat format)
-{
-   switch (vk_format_get_blocksizebits(format)) {
-   case 8:
-      return VK_FORMAT_R8_UINT;
-   case 16:
-      return VK_FORMAT_R16_UINT;
-   case 32:
-      return VK_FORMAT_R32_UINT;
-   case 64:
-      return VK_FORMAT_R32G32_UINT;
-   case 96:
-      return VK_FORMAT_R32G32B32_UINT;
-   case 128:
-      return VK_FORMAT_R32G32B32A32_UINT;
-   default:
-      unreachable("unhandled format size");
-   }
-}
-
 static void
 tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
                struct tu_bo *src_bo,
@@ -249,548 +194,80 @@ tu_copy_buffer(struct tu_cmd_buffer *cmdbuf,
    tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
 }
 
-static void
-tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf,
-                             struct tu_buffer *src_buffer,
-                             struct tu_image *dst_image,
-                             const VkBufferImageCopy *copy_info,
-                             VkFormat format,
-                             uint32_t layer,
-                             uint64_t src_va)
+static struct tu_blit_surf
+tu_blit_buffer(struct tu_buffer *buffer,
+               VkFormat format,
+               const VkBufferImageCopy *info)
 {
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
+   if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+      format = VK_FORMAT_R8_UINT;
 
-   uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset;
-   unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch *
+   unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
                         vk_format_get_blocksize(format);
 
-   unsigned src_pitch;
-   unsigned src_offset = 0;
-   if (copy_info->imageExtent.height == 1) {
-      /* Can't find this in the spec, but not having it is sort of insane? */
-      assert(src_va % vk_format_get_blocksize(format) == 0);
-
-      src_offset = (src_va & 63) / vk_format_get_blocksize(format);
-      src_va &= ~63;
-
-      src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
-   } else {
-      unsigned src_pixel_stride = copy_info->bufferRowLength
-                                  ? copy_info->bufferRowLength
-                                  : copy_info->imageExtent.width;
-      src_pitch = src_pixel_stride * vk_format_get_blocksize(format);
-      assert(!(src_pitch & 63));
-      assert(!(src_va & 63));
-   }
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
-
-   /*
-    * Emit source:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
-   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
-                              A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
-                              A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) |
-                              0x500000);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) |
-                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
-                    copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */
-   tu_cs_emit_qw(&cmdbuf->cs, src_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   /*
-    * Emit destination:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
-                 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
-                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
-   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1));
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) |
-                 A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x +
-                                    copy_info->imageExtent.width - 1) |
-                 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y +
-                                       copy_info->imageExtent.height - 1));
-
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x3f);
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
-   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x01000000);
-
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
-   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
+   return (struct tu_blit_surf) {
+      .fmt = format,
+      .tile_mode = TILE6_LINEAR,
+      .va = buffer->bo->iova + buffer->bo_offset + info->bufferOffset,
+      .pitch = pitch,
+      .layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
+      .width = info->imageExtent.width,
+      .height = info->imageExtent.height,
+      .samples = 1,
+   };
 }
 
 static void
 tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
                         struct tu_buffer *src_buffer,
                         struct tu_image *dst_image,
-                        const VkBufferImageCopy *copy_info)
-{
-   tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
-   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
-   /* general setup */
-   tu_dma_prepare(cmdbuf);
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
-
-   /* buffer copy setup */
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
-   tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
-
-   VkFormat format = tu_canonical_copy_format(dst_image->vk_format);
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
-
-   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   unsigned src_pixel_stride = copy_info->bufferRowLength
-                                  ? copy_info->bufferRowLength
-                                  : copy_info->imageExtent.width;
-   unsigned cpp = vk_format_get_blocksize(format);
-   unsigned src_pitch = src_pixel_stride * cpp;
-
-   for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
-      unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
-      uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch;
-       
-      if ((src_pitch & 63) || (src_va & 63)) {
-         /* Do a per line copy */
-         VkBufferImageCopy line_copy_info = *copy_info;
-         line_copy_info.imageExtent.height = 1;
-         for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
-            /*
-             * if src_va is not aligned the line copy will need to adjust. Give it
-             * room to do so.
-             */
-            unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0;
-            line_copy_info.imageOffset.x = copy_info->imageOffset.x;
-            line_copy_info.imageExtent.width = copy_info->imageExtent.width;
-            
-            for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
-               tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp);
-
-               line_copy_info.imageOffset.x += max_width;
-               line_copy_info.imageExtent.width -= max_width;
-            }
-   
-            line_copy_info.imageOffset.y++;
-            src_va += src_pitch;
-         }
-      } else {
-         tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va);
-      }
-   }
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
-
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
-}
-
-static void
-tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf,
-                             struct tu_image *src_image,
-                             struct tu_buffer *dst_buffer,
-                             const VkBufferImageCopy *copy_info,
-                             VkFormat format,
-                             uint32_t layer,
-                             uint64_t dst_va)
+                        const VkBufferImageCopy *info)
 {
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
-
-   uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset;
-   unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch *
-                        vk_format_get_blocksize(format);
-
-   unsigned dst_pitch;
-   unsigned dst_offset = 0;
-   if (copy_info->imageExtent.height == 1) {
-      /* Can't find this in the spec, but not having it is sort of insane? */
-      assert(dst_va % vk_format_get_blocksize(format) == 0);
-
-      dst_offset = (dst_va & 63) / vk_format_get_blocksize(format);
-      dst_va &= ~63;
-
-      dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64);
-   } else {
-      unsigned dst_pixel_stride = copy_info->bufferRowLength
-                                  ? copy_info->bufferRowLength
-                                  : copy_info->imageExtent.width;
-      dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format);
-      assert(!(dst_pitch & 63));
-      assert(!(dst_va & 63));
+   if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
+       vk_format_get_blocksize(dst_image->vk_format) == 4) {
+      tu_finishme("aspect mask\n");
+      return;
    }
-   
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
-
-   /*
-    * Emit source:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
-                 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
-                 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
-                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
-                    src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
-   tu_cs_emit_qw(&cmdbuf->cs, src_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   /*
-    * Emit destination:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
-   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
-                              A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
-                              A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
-   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x +
-                                      copy_info->imageExtent.width - 1));
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y +
-                                      copy_info->imageExtent.height - 1));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) |
-                 A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1));
-
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x3f);
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
-   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x01000000);
 
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
-   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
+   tu_blit(cmdbuf, &(struct tu_blit) {
+      .dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
+      .src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
+      .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
+   }, true);
 }
 
 static void
 tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
                         struct tu_image *src_image,
                         struct tu_buffer *dst_buffer,
-                        const VkBufferImageCopy *copy_info)
+                        const VkBufferImageCopy *info)
 {
-   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
-   tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
-
-   /* general setup */
-   tu_dma_prepare(cmdbuf);
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
-
-   /* buffer copy setup */
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
-   tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
-
-   VkFormat format = tu_canonical_copy_format(src_image->vk_format);
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
-
-   unsigned dst_pixel_stride = copy_info->bufferRowLength
-                                  ? copy_info->bufferRowLength
-                                  : copy_info->imageExtent.width;
-   unsigned cpp = vk_format_get_blocksize(format);
-   unsigned dst_pitch = dst_pixel_stride * cpp;
-
-
-   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) {
-       unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset;
-       uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch;
-
-       if ((dst_pitch & 63) || (dst_va & 63)) {
-         /* Do a per line copy */
-         VkBufferImageCopy line_copy_info = *copy_info;
-         line_copy_info.imageExtent.height = 1;
-         for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) {
-            /*
-             * if dst_va is not aligned the line copy will need to adjust. Give it
-             * room to do so.
-             */
-            unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0;
-            line_copy_info.imageOffset.x = copy_info->imageOffset.x;
-            line_copy_info.imageExtent.width = copy_info->imageExtent.width;
-
-            for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) {
-               tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp);
-
-               line_copy_info.imageOffset.x += max_width;
-               line_copy_info.imageExtent.width -= max_width;
-            }
-
-            line_copy_info.imageOffset.y++;
-            dst_va += dst_pitch;
-         }
-      } else {
-         tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va);
-      }
-   }
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
-
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
-}
-
-static void
-tu_copy_image_to_image_step(struct tu_cmd_buffer *cmdbuf,
-                            struct tu_image *src_image,
-                            struct tu_image *dst_image,
-                            const VkImageCopy *copy_info,
-                            VkFormat format,
-                            uint32_t layer_offset)
-{
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
-
-   unsigned src_layer =
-      copy_info->srcSubresource.baseArrayLayer + layer_offset;
-   uint64_t src_va =
-      src_image->bo->iova + src_image->bo_offset +
-      src_image->layer_size * src_layer +
-      src_image->levels[copy_info->srcSubresource.mipLevel].offset;
-   unsigned src_pitch =
-      src_image->levels[copy_info->srcSubresource.mipLevel].pitch *
-      vk_format_get_blocksize(format);
-
-   unsigned dst_layer =
-      copy_info->dstSubresource.baseArrayLayer + layer_offset;
-   uint64_t dst_va =
-      dst_image->bo->iova + dst_image->bo_offset +
-      dst_image->layer_size * dst_layer +
-      dst_image->levels[copy_info->dstSubresource.mipLevel].offset;
-   unsigned dst_pitch =
-      src_image->levels[copy_info->dstSubresource.mipLevel].pitch *
-      vk_format_get_blocksize(format);
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48);
-
-   /*
-    * Emit source:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) |
-                 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) |
-                 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) |
-                 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(
-                    src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */
-   tu_cs_emit_qw(&cmdbuf->cs, src_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch));
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   /*
-    * Emit destination:
-    */
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9);
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) |
-                 A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) |
-                 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
-   tu_cs_emit_qw(&cmdbuf->cs, dst_va);
-   tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch));
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-   tu_cs_emit(&cmdbuf->cs, 0x00000000);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->srcOffset.x));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_X_X(copy_info->srcOffset.x +
-                                      copy_info->extent.width - 1));
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->srcOffset.y));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->srcOffset.y +
-                                      copy_info->extent.height - 1));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2);
-   tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(copy_info->dstOffset.x) |
-                              A6XX_GRAS_2D_DST_TL_Y(copy_info->dstOffset.y));
-   tu_cs_emit(&cmdbuf->cs,
-              A6XX_GRAS_2D_DST_BR_X(copy_info->dstOffset.x +
-                                    copy_info->extent.width - 1) |
-                 A6XX_GRAS_2D_DST_BR_Y(copy_info->dstOffset.y +
-                                       copy_info->extent.height - 1));
-
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x3f);
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
-   tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format));
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0x01000000);
-
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1);
-   tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
-
-   tu_cs_emit_wfi(&cmdbuf->cs);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
-   tu_cs_emit(&cmdbuf->cs, 0);
+   tu_blit(cmdbuf, &(struct tu_blit) {
+      .dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
+      .src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
+      .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
+   }, true);
 }
 
 static void
 tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
                        struct tu_image *src_image,
                        struct tu_image *dst_image,
-                       const VkImageCopy *copy_info)
+                       const VkImageCopy *info)
 {
-   /* TODO:
-    *  - Handle 3D images.
-    *  - In some cases where src and dst format are different this may
-    *    have tiling implications. Not sure if things happen correctly
-    *    in that case.
-    */
-
-   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
-   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
-
-   /* general setup */
-   tu_dma_prepare(cmdbuf);
-
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6);
-
-   /* buffer copy setup */
-   tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1);
-   tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
-
-   VkFormat format = tu_canonical_copy_format(src_image->vk_format);
-   const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb;
-   const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000;
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
-   tu_cs_emit(&cmdbuf->cs, blit_cntl);
-
-   for (unsigned layer_offset = 0;
-        layer_offset < copy_info->srcSubresource.layerCount; ++layer_offset) {
-      tu_copy_image_to_image_step(cmdbuf, src_image, dst_image, copy_info,
-                                  format, layer_offset);
+   if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
+        vk_format_get_blocksize(dst_image->vk_format) == 4) ||
+       (info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
+        vk_format_get_blocksize(src_image->vk_format) == 4)) {
+      tu_finishme("aspect mask\n");
+      return;
    }
 
-   tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15);
-
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true);
-   tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true);
+   tu_blit(cmdbuf, &(struct tu_blit) {
+      .dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
+      .src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
+      .layers = info->extent.depth,
+   }, true);
 }
 
 void
@@ -821,14 +298,15 @@ tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
                         uint32_t regionCount,
                         const VkBufferImageCopy *pRegions)
 {
-   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
-   TU_FROM_HANDLE(tu_image, dest_image, destImage);
+   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
+   TU_FROM_HANDLE(tu_image, dst_image, destImage);
    TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
 
-   for (unsigned i = 0; i < regionCount; ++i) {
-      tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
-                              pRegions + i);
-   }
+   tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+   for (unsigned i = 0; i < regionCount; ++i)
+      tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
 }
 
 void
@@ -839,14 +317,15 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
                         uint32_t regionCount,
                         const VkBufferImageCopy *pRegions)
 {
-   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
+   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
    TU_FROM_HANDLE(tu_image, src_image, srcImage);
    TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
 
-   for (unsigned i = 0; i < regionCount; ++i) {
-      tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer,
-                              pRegions + i);
-   }
+   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
+
+   for (unsigned i = 0; i < regionCount; ++i)
+      tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
 }
 
 void
@@ -858,11 +337,13 @@ tu_CmdCopyImage(VkCommandBuffer commandBuffer,
                 uint32_t regionCount,
                 const VkImageCopy *pRegions)
 {
-   TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
+   TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
    TU_FROM_HANDLE(tu_image, src_image, srcImage);
-   TU_FROM_HANDLE(tu_image, dest_image, destImage);
+   TU_FROM_HANDLE(tu_image, dst_image, destImage);
 
-   for (uint32_t i = 0; i < regionCount; ++i) {
-      tu_copy_image_to_image(cmd_buffer, src_image, dest_image, pRegions + i);
-   }
+   tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
+   tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
+
+   for (uint32_t i = 0; i < regionCount; ++i)
+      tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
 }
index 405fa20b4f5fe09b1d97150807d414b836fab36b..eb043ccf5624d1e875f12a7a65b15a2dc0993599 100644 (file)
@@ -1299,6 +1299,8 @@ tu_get_levelCount(const struct tu_image *image,
 
 enum a6xx_tile_mode
 tu6_get_image_tile_mode(struct tu_image *image, int level);
+enum a3xx_msaa_samples
+tu_msaa_samples(uint32_t samples);
 
 struct tu_image_view
 {