tu: Implement fallback linear staging blit for CopyImage
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 12 May 2020 10:47:03 +0000 (12:47 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 13 May 2020 13:39:04 +0000 (13:39 +0000)
Also, rewrite the format decision code so that we correctly decide when
the linear fallback is needed, even if UBWC is disabled. As part of
that, I also moved around some of the code to handle compressed formats
to make sure that copying compressed formats with a linear staging blit
works (this is now possible since we started allowing tiled compressed
textures).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5007>

src/freedreno/vulkan/tu_clear_blit.c

index 550c66174631f97b12bf8a8a17534b2cf1c08c20..3c2411e76a42238a1d541e74db612c605bec6511 100644 (file)
@@ -1412,6 +1412,35 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
       tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i);
 }
 
+/* Tiled formats don't support swapping, which means that we can't support
+ * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
+ * formats like B5G5R5A1 have a separate linear-only format when sampling.
+ * Currently we fake support for tiled swapped formats and use the unswapped
+ * format instead, but this means that reinterpreting copies to and from
+ * swapped formats can't be performed correctly unless we can swizzle the
+ * components by reinterpreting the other image as the "correct" swapped
+ * format, i.e. only when the other image is linear.
+ */
+
+static bool
+is_swapped_format(VkFormat format)
+{
+   struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR);
+   struct tu_native_format tiled = tu6_format_texture(format, TILE6_3);
+   return linear.fmt != tiled.fmt || linear.swap != tiled.swap;
+}
+
+/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
+ * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
+ * versa). This should mirror the logic in fdl6_layout.
+ */
+static bool
+image_is_r8g8(struct tu_image *image)
+{
+   return image->layout.cpp == 2 &&
+      vk_format_get_nr_components(image->vk_format) == 2;
+}
+
 static void
 tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
                        struct tu_image *src_image,
@@ -1439,38 +1468,158 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
    VkOffset3D dst_offset = info->dstOffset;
    VkExtent3D extent = info->extent;
 
-   /* TODO: should check (ubwc || (tile_mode && swap)) instead */
-   if (src_image->layout.tile_mode && src_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
-      format = src_image->vk_format;
+   /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
+    * Images":
+    *
+    *    When copying between compressed and uncompressed formats the extent
+    *    members represent the texel dimensions of the source image and not
+    *    the destination. When copying from a compressed image to an
+    *    uncompressed image the image texel dimensions written to the
+    *    uncompressed image will be source extent divided by the compressed
+    *    texel block dimensions. When copying from an uncompressed image to a
+    *    compressed image the image texel dimensions written to the compressed
+    *    image will be the source extent multiplied by the compressed texel
+    *    block dimensions.
+    *
+    * This means we only have to adjust the extent if the source image is
+    * compressed.
+    */
+   copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
+   copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
+
+   VkFormat dst_format = vk_format_is_compressed(dst_image->vk_format) ?
+      copy_format(dst_image->vk_format) : dst_image->vk_format;
+   VkFormat src_format = vk_format_is_compressed(src_image->vk_format) ?
+      copy_format(src_image->vk_format) : src_image->vk_format;
+
+   bool use_staging_blit = false;
+
+   if (src_format == dst_format) {
+      /* Images that share a format can always be copied directly because it's
+       * the same as a blit.
+       */
+      format = src_format;
+   } else if (!src_image->layout.tile_mode) {
+      /* If an image is linear, we can always safely reinterpret it with the
+       * other image's format and then do a regular blit.
+       */
+      format = dst_format;
+   } else if (!dst_image->layout.tile_mode) {
+      format = src_format;
+   } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) {
+      /* We can't currently copy r8g8 images to/from other cpp=2 images,
+       * due to the different tile layout.
+       */
+      use_staging_blit = true;
+   } else if (is_swapped_format(src_format) ||
+              is_swapped_format(dst_format)) {
+      /* If either format has a non-identity swap, then we can't copy
+       * to/from it.
+       */
+      use_staging_blit = true;
+   } else if (!src_image->layout.ubwc) {
+      format = dst_format;
+   } else if (!dst_image->layout.ubwc) {
+      format = src_format;
+   } else {
+      /* Both formats use UBWC and so neither can be reinterpreted.
+       * TODO: We could do an in-place decompression of the dst instead.
+       */
+      use_staging_blit = true;
+   }
+
+   struct tu_image_view dst, src;
 
-   if (dst_image->layout.tile_mode && dst_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
-      if (format != VK_FORMAT_UNDEFINED && format != dst_image->vk_format) {
-         /* can be clever in some cases but in some cases we need and intermediate
-         * linear buffer
-         */
-         tu_finishme("image copy between two tiled/ubwc images\n");
+   if (use_staging_blit) {
+      tu_image_view_blit2(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false);
+      tu_image_view_blit2(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false);
+
+      struct tu_image staging_image = {
+         .vk_format = src_format,
+         .type = src_image->type,
+         .tiling = VK_IMAGE_TILING_LINEAR,
+         .extent = extent,
+         .level_count = 1,
+         .layer_count = info->srcSubresource.layerCount,
+         .samples = src_image->samples,
+         .bo_offset = 0,
+      }; 
+
+      VkImageSubresourceLayers staging_subresource = {
+         .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+         .mipLevel = 0,
+         .baseArrayLayer = 0,
+         .layerCount = info->srcSubresource.layerCount,
+      };
+
+      VkOffset3D staging_offset = { 0 };
+
+      staging_image.layout.tile_mode = TILE6_LINEAR;
+      staging_image.layout.ubwc = false;
+
+      fdl6_layout(&staging_image.layout,
+                  vk_format_to_pipe_format(staging_image.vk_format),
+                  staging_image.samples,
+                  staging_image.extent.width,
+                  staging_image.extent.height,
+                  staging_image.extent.depth,
+                  staging_image.level_count,
+                  staging_image.layer_count,
+                  staging_image.type == VK_IMAGE_TYPE_3D);
+
+      VkResult result = tu_get_scratch_bo(cmd->device,
+                                          staging_image.layout.size,
+                                          &staging_image.bo);
+      if (result != VK_SUCCESS) {
+         cmd->record_result = result;
          return;
       }
-      format = dst_image->vk_format;
-   }
 
-   if (format == VK_FORMAT_UNDEFINED)
-      format = copy_format(src_image->vk_format);
+      tu_bo_list_add(&cmd->bo_list, staging_image.bo,
+                     MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE);
 
-   copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
-   copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
+      struct tu_image_view staging;
+      tu_image_view_blit2(&staging, &staging_image, src_format,
+                          &staging_subresource, 0, false);
 
-   ops->setup(cmd, cs, format, ROTATE_0, false, mask);
-   coords(ops, cs, &dst_offset, &src_offset, &extent);
+      ops->setup(cmd, cs, src_format, ROTATE_0, false, mask);
+      coords(ops, cs, &staging_offset, &src_offset, &extent);
 
-   struct tu_image_view dst, src;
-   tu_image_view_blit2(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
-   tu_image_view_blit2(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
+      for (uint32_t i = 0; i < info->extent.depth; i++) {
+         ops->src(cmd, cs, &src, i, false);
+         ops->dst(cs, &staging, i);
+         ops->run(cmd, cs);
+      }
 
-   for (uint32_t i = 0; i < info->extent.depth; i++) {
-      ops->src(cmd, cs, &src, i, false);
-      ops->dst(cs, &dst, i);
-      ops->run(cmd, cs);
+      /* When executed by the user there has to be a pipeline barrier here,
+       * but since we're doing it manually we'll have to flush ourselves.
+       */
+      tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
+      tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+
+      tu_image_view_blit2(&staging, &staging_image, dst_format,
+                          &staging_subresource, 0, false);
+
+      ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
+      coords(ops, cs, &dst_offset, &staging_offset, &extent);
+
+      for (uint32_t i = 0; i < info->extent.depth; i++) {
+         ops->src(cmd, cs, &staging, i, false);
+         ops->dst(cs, &dst, i);
+         ops->run(cmd, cs);
+      }
+   } else {
+      tu_image_view_blit2(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
+      tu_image_view_blit2(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
+
+      ops->setup(cmd, cs, format, ROTATE_0, false, mask);
+      coords(ops, cs, &dst_offset, &src_offset, &extent);
+
+      for (uint32_t i = 0; i < info->extent.depth; i++) {
+         ops->src(cmd, cs, &src, i, false);
+         ops->dst(cs, &dst, i);
+         ops->run(cmd, cs);
+      }
    }
 }