From b408734e5e2fe1e1ef08080c4425ad8a7ed33579 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 12 May 2020 12:47:03 +0200 Subject: [PATCH] tu: Implement fallback linear staging blit for CopyImage Also, rewrite the format decision code so that we correctly decide when the linear fallback is needed, even if UBWC is disabled. As part of that, I also moved around some of the code to handle compressed formats to make sure that copying compressed formats with a linear staging blit works (this is now possible since we started allowing tiled compressed textures). Part-of: --- src/freedreno/vulkan/tu_clear_blit.c | 197 +++++++++++++++++++++++---- 1 file changed, 173 insertions(+), 24 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 550c6617463..3c2411e76a4 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -1412,6 +1412,35 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i); } +/* Tiled formats don't support swapping, which means that we can't support + * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some + * formats like B5G5R5A1 have a separate linear-only format when sampling. + * Currently we fake support for tiled swapped formats and use the unswapped + * format instead, but this means that reinterpreting copies to and from + * swapped formats can't be performed correctly unless we can swizzle the + * components by reinterpreting the other image as the "correct" swapped + * format, i.e. only when the other image is linear. + */ + +static bool +is_swapped_format(VkFormat format) +{ + struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); + struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); + return linear.fmt != tiled.fmt || linear.swap != tiled.swap; +} + +/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and + * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice + * versa). This should mirror the logic in fdl6_layout. + */ +static bool +image_is_r8g8(struct tu_image *image) +{ + return image->layout.cpp == 2 && + vk_format_get_nr_components(image->vk_format) == 2; +} + static void tu_copy_image_to_image(struct tu_cmd_buffer *cmd, struct tu_image *src_image, @@ -1439,38 +1468,158 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, VkOffset3D dst_offset = info->dstOffset; VkExtent3D extent = info->extent; - /* TODO: should check (ubwc || (tile_mode && swap)) instead */ - if (src_image->layout.tile_mode && src_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) - format = src_image->vk_format; + /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between + * Images": + * + * When copying between compressed and uncompressed formats the extent + * members represent the texel dimensions of the source image and not + * the destination. When copying from a compressed image to an + * uncompressed image the image texel dimensions written to the + * uncompressed image will be source extent divided by the compressed + * texel block dimensions. When copying from an uncompressed image to a + * compressed image the image texel dimensions written to the compressed + * image will be the source extent multiplied by the compressed texel + * block dimensions. + * + * This means we only have to adjust the extent if the source image is + * compressed. + */ + copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL); + copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL); + + VkFormat dst_format = vk_format_is_compressed(dst_image->vk_format) ? + copy_format(dst_image->vk_format) : dst_image->vk_format; + VkFormat src_format = vk_format_is_compressed(src_image->vk_format) ? + copy_format(src_image->vk_format) : src_image->vk_format; + + bool use_staging_blit = false; + + if (src_format == dst_format) { + /* Images that share a format can always be copied directly because it's + * the same as a blit. + */ + format = src_format; + } else if (!src_image->layout.tile_mode) { + /* If an image is linear, we can always safely reinterpret it with the + * other image's format and then do a regular blit. + */ + format = dst_format; + } else if (!dst_image->layout.tile_mode) { + format = src_format; + } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) { + /* We can't currently copy r8g8 images to/from other cpp=2 images, + * due to the different tile layout. + */ + use_staging_blit = true; + } else if (is_swapped_format(src_format) || + is_swapped_format(dst_format)) { + /* If either format has a non-identity swap, then we can't copy + * to/from it. + */ + use_staging_blit = true; + } else if (!src_image->layout.ubwc) { + format = dst_format; + } else if (!dst_image->layout.ubwc) { + format = src_format; + } else { + /* Both formats use UBWC and so neither can be reinterpreted. + * TODO: We could do an in-place decompression of the dst instead. + */ + use_staging_blit = true; + } + + struct tu_image_view dst, src; - if (dst_image->layout.tile_mode && dst_image->vk_format != VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { - if (format != VK_FORMAT_UNDEFINED && format != dst_image->vk_format) { - /* can be clever in some cases but in some cases we need and intermediate - * linear buffer - */ - tu_finishme("image copy between two tiled/ubwc images\n"); + if (use_staging_blit) { + tu_image_view_blit2(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false); + tu_image_view_blit2(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false); + + struct tu_image staging_image = { + .vk_format = src_format, + .type = src_image->type, + .tiling = VK_IMAGE_TILING_LINEAR, + .extent = extent, + .level_count = 1, + .layer_count = info->srcSubresource.layerCount, + .samples = src_image->samples, + .bo_offset = 0, + }; + + VkImageSubresourceLayers staging_subresource = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = info->srcSubresource.layerCount, + }; + + VkOffset3D staging_offset = { 0 }; + + staging_image.layout.tile_mode = TILE6_LINEAR; + staging_image.layout.ubwc = false; + + fdl6_layout(&staging_image.layout, + vk_format_to_pipe_format(staging_image.vk_format), + staging_image.samples, + staging_image.extent.width, + staging_image.extent.height, + staging_image.extent.depth, + staging_image.level_count, + staging_image.layer_count, + staging_image.type == VK_IMAGE_TYPE_3D); + + VkResult result = tu_get_scratch_bo(cmd->device, + staging_image.layout.size, + &staging_image.bo); + if (result != VK_SUCCESS) { + cmd->record_result = result; return; } - format = dst_image->vk_format; - } - if (format == VK_FORMAT_UNDEFINED) - format = copy_format(src_image->vk_format); + tu_bo_list_add(&cmd->bo_list, staging_image.bo, + MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); - copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL); - copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL); + struct tu_image_view staging; + tu_image_view_blit2(&staging, &staging_image, src_format, + &staging_subresource, 0, false); - ops->setup(cmd, cs, format, ROTATE_0, false, mask); - coords(ops, cs, &dst_offset, &src_offset, &extent); + ops->setup(cmd, cs, src_format, ROTATE_0, false, mask); + coords(ops, cs, &staging_offset, &src_offset, &extent); - struct tu_image_view dst, src; - tu_image_view_blit2(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false); - tu_image_view_blit2(&src, src_image, format, &info->srcSubresource, src_offset.z, false); + for (uint32_t i = 0; i < info->extent.depth; i++) { + ops->src(cmd, cs, &src, i, false); + ops->dst(cs, &staging, i); + ops->run(cmd, cs); + } - for (uint32_t i = 0; i < info->extent.depth; i++) { - ops->src(cmd, cs, &src, i, false); - ops->dst(cs, &dst, i); - ops->run(cmd, cs); + /* When executed by the user there has to be a pipeline barrier here, + * but since we're doing it manually we'll have to flush ourselves. + */ + tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true); + tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false); + + tu_image_view_blit2(&staging, &staging_image, dst_format, + &staging_subresource, 0, false); + + ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask); + coords(ops, cs, &dst_offset, &staging_offset, &extent); + + for (uint32_t i = 0; i < info->extent.depth; i++) { + ops->src(cmd, cs, &staging, i, false); + ops->dst(cs, &dst, i); + ops->run(cmd, cs); + } + } else { + tu_image_view_blit2(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false); + tu_image_view_blit2(&src, src_image, format, &info->srcSubresource, src_offset.z, false); + + ops->setup(cmd, cs, format, ROTATE_0, false, mask); + coords(ops, cs, &dst_offset, &src_offset, &extent); + + for (uint32_t i = 0; i < info->extent.depth; i++) { + ops->src(cmd, cs, &src, i, false); + ops->dst(cs, &dst, i); + ops->run(cmd, cs); + } } } -- 2.30.2