From d3c9914152248a0e98d0a1dcde1f71e7bd9f2b7c Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Sat, 5 Oct 2019 12:38:40 -0400 Subject: [PATCH] turnip: improve CmdCopyImage and implement CmdBlitImage Signed-off-by: Jonathan Marek Reviewed-by: Kristian H. Kristensen --- src/freedreno/vulkan/meson.build | 3 + src/freedreno/vulkan/tu_blit.c | 298 ++++++++++++ src/freedreno/vulkan/tu_blit.h | 100 ++++ src/freedreno/vulkan/tu_cmd_buffer.c | 4 +- src/freedreno/vulkan/tu_formats.c | 4 +- src/freedreno/vulkan/tu_meta_blit.c | 54 ++- src/freedreno/vulkan/tu_meta_copy.c | 651 +++------------------------ src/freedreno/vulkan/tu_private.h | 2 + 8 files changed, 526 insertions(+), 590 deletions(-) create mode 100644 src/freedreno/vulkan/tu_blit.c create mode 100644 src/freedreno/vulkan/tu_blit.h diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index 03079a14994..b5299232518 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -49,8 +49,11 @@ tu_format_table_c = custom_target( ) libtu_files = files( + 'tu_blit.c', + 'tu_blit.h', 'tu_cmd_buffer.c', 'tu_cs.c', + 'tu_cs.h', 'tu_device.c', 'tu_descriptor_set.c', 'tu_descriptor_set.h', diff --git a/src/freedreno/vulkan/tu_blit.c b/src/freedreno/vulkan/tu_blit.c new file mode 100644 index 00000000000..33e0fd45a55 --- /dev/null +++ b/src/freedreno/vulkan/tu_blit.c @@ -0,0 +1,298 @@ +/* + * Copyright © 2019 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek + * + */ + +#include "tu_blit.h" + +#include "a6xx.xml.h" +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" + +#include "vk_format.h" + +#include "tu_cs.h" + +/* TODO: + * - Avoid disabling tiling for swapped formats + * (image_to_image copy doesn't deal with it) + * - Fix d24_unorm_s8_uint support & aspects + * - UBWC + */ + +static VkFormat +blit_copy_format(VkFormat format) +{ + switch (vk_format_get_blocksizebits(format)) { + case 8: return VK_FORMAT_R8_UINT; + case 16: return VK_FORMAT_R16_UINT; + case 32: return VK_FORMAT_R8G8B8A8_UINT; + case 64: return VK_FORMAT_R32G32_UINT; + case 96: return VK_FORMAT_R32G32B32_UINT; + case 128:return VK_FORMAT_R32G32B32A32_UINT; + default: + unreachable("unhandled format size"); + } +} + +static uint32_t +blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read) +{ + const struct tu_native_format *fmt = tu6_get_native_format(img->fmt); + enum a6xx_color_fmt rb = fmt->rb; + enum a3xx_color_swap swap = img->tiled ? WZYX : fmt->swap; + if (rb == RB6_R10G10B10A2_UNORM && src) + rb = RB6_R10G10B10A2_FLOAT16; + if (rb == RB6_X8Z24_UNORM) + rb = RB6_Z24_UNORM_S8_UINT; + + if (stencil_read) + swap = XYZW; + + return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) | + A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) | + A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) | + COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB); +} + +static void +emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt) +{ + struct tu_cs *cs = &cmdbuf->cs; + + tu_cs_reserve_space(cmdbuf->device, cs, 52); + + enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb; + if (fmt == RB6_X8Z24_UNORM) + fmt = RB6_Z24_UNORM_S8_UINT; + + enum a6xx_2d_ifmt ifmt = tu6_rb_fmt_to_ifmt(fmt); + + if (vk_format_is_srgb(blt->dst.fmt)) { + assert(ifmt == R2D_UNORM8); + ifmt = R2D_UNORM8_SRGB; + } + + uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) | + A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) | /* not required? */ + COND(fmt == RB6_Z24_UNORM_S8_UINT, A6XX_RB_2D_BLIT_CNTL_D24S8) | + A6XX_RB_2D_BLIT_CNTL_MASK(0xf) | + A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); + tu_cs_emit(&cmdbuf->cs, blit_cntl); + + /* + * Emit source: + */ + tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10); + tu_cs_emit(cs, blit_image_info(&blt->src, true, blt->stencil_read) | + A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) | + /* TODO: should disable this bit for integer formats ? */ + COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) | + COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) | + 0x500000); + tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) | + A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height)); + tu_cs_emit_qw(cs, blt->src.va); + tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch)); + + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + /* + * Emit destination: + */ + tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9); + tu_cs_emit(cs, blit_image_info(&blt->dst, false, false)); + tu_cs_emit_qw(cs, blt->dst.va); + tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch)); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); + tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x)); + tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1)); + tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y)); + tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1)); + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2); + tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) | + A6XX_GRAS_2D_DST_TL_Y(blt->dst.y)); + tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) | + A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1)); + + tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1); + tu_cs_emit(cs, 0x3f); + tu_cs_emit_wfi(cs); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1); + tu_cs_emit(cs, 0); + + if (fmt == RB6_R10G10B10A2_UNORM) + fmt = RB6_R16G16B16A16_FLOAT; + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); + tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) | + COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) | + A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(fmt) | + COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) | + A6XX_SP_2D_SRC_FORMAT_MASK(0xf)); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(cs, 0x01000000); + + tu_cs_emit_pkt7(cs, CP_BLIT, 1); + tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); + + tu_cs_emit_wfi(cs); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1); + tu_cs_emit(cs, 0); +} + +void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy) +{ + if (copy) { + blt->stencil_read = + blt->dst.fmt == VK_FORMAT_R8_UINT && + blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT; + + assert(vk_format_get_blocksize(blt->dst.fmt) == + vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read); + assert(blt->src.samples == blt->dst.samples); + + if (vk_format_is_compressed(blt->src.fmt)) { + unsigned block_width = vk_format_get_blockwidth(blt->src.fmt); + unsigned block_height = vk_format_get_blockheight(blt->src.fmt); + + blt->src.pitch /= block_width; + blt->src.x /= block_width; + blt->src.y /= block_height; + + /* for image_to_image copy, width/height is on the src format */ + blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width); + blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height); + } + + if (vk_format_is_compressed(blt->dst.fmt)) { + unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt); + unsigned block_height = vk_format_get_blockheight(blt->dst.fmt); + + blt->dst.pitch /= block_width; + blt->dst.x /= block_width; + blt->dst.y /= block_height; + } + + blt->src.fmt = blit_copy_format(blt->src.fmt); + blt->dst.fmt = blit_copy_format(blt->dst.fmt); + + /* TODO: does this work correctly with tiling/etc ? */ + blt->src.x *= blt->src.samples; + blt->dst.x *= blt->dst.samples; + blt->src.width *= blt->src.samples; + blt->dst.width *= blt->dst.samples; + blt->src.samples = 1; + blt->dst.samples = 1; + } else { + assert(blt->dst.samples == 1); + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 18); + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, LRZ_FLUSH, false); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_COLOR, false); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, PC_CCU_INVALIDATE_DEPTH, false); + + /* buffer copy setup */ + tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); + tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); + + for (unsigned layer = 0; layer < blt->layers; layer++) { + if ((blt->src.va & 63) || (blt->src.pitch & 63)) { + /* per line copy path (buffer_to_image) */ + assert(copy && !blt->src.tiled); + struct tu_blit line_blt = *blt; + uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y; + + line_blt.src.y = 0; + line_blt.src.pitch = 0; + line_blt.src.height = 1; + line_blt.dst.height = 1; + + for (unsigned y = 0; y < blt->src.height; y++) { + line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt); + line_blt.src.va = src_va & ~63; + + emit_blit_step(cmdbuf, &line_blt); + + line_blt.dst.y++; + src_va += blt->src.pitch; + } + } else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) { + /* per line copy path (image_to_buffer) */ + assert(copy && !blt->dst.tiled); + struct tu_blit line_blt = *blt; + uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y; + + line_blt.dst.y = 0; + line_blt.dst.pitch = 0; + line_blt.src.height = 1; + line_blt.dst.height = 1; + + for (unsigned y = 0; y < blt->src.height; y++) { + line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt); + line_blt.dst.va = dst_va & ~63; + + emit_blit_step(cmdbuf, &line_blt); + + line_blt.src.y++; + dst_va += blt->dst.pitch; + } + } else { + emit_blit_step(cmdbuf, blt); + } + blt->dst.va += blt->dst.layer_size; + blt->src.va += blt->src.layer_size; + } + + tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17); + + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); + tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_INVALIDATE, false); +} diff --git a/src/freedreno/vulkan/tu_blit.h b/src/freedreno/vulkan/tu_blit.h new file mode 100644 index 00000000000..1f4967e845f --- /dev/null +++ b/src/freedreno/vulkan/tu_blit.h @@ -0,0 +1,100 @@ +/* + * Copyright © 2019 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek + * + */ + +#ifndef TU_BLIT_H +#define TU_BLIT_H + +#include "tu_private.h" + +#include "vk_format.h" + +struct tu_blit_surf { + VkFormat fmt; + enum a6xx_tile_mode tile_mode; + bool tiled; + uint64_t va; + uint32_t pitch, layer_size; + uint32_t x, y; + uint32_t width, height; + unsigned samples; +}; + +static inline struct tu_blit_surf +tu_blit_surf(struct tu_image *img, + VkImageSubresourceLayers subres, + const VkOffset3D *offsets) +{ + return (struct tu_blit_surf) { + .fmt = img->vk_format, + .tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel), + .tiled = img->tile_mode != TILE6_LINEAR, + .va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset + + subres.baseArrayLayer * img->layer_size + + MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size, + .pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples, + .layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size, + .x = MIN2(offsets[0].x, offsets[1].x), + .y = MIN2(offsets[0].y, offsets[1].y), + .width = abs(offsets[1].x - offsets[0].x), + .height = abs(offsets[1].y - offsets[0].y), + .samples = img->samples, + }; +} + +static inline struct tu_blit_surf +tu_blit_surf_ext(struct tu_image *image, + VkImageSubresourceLayers subres, + VkOffset3D offset, + VkExtent3D extent) +{ + return tu_blit_surf(image, subres, (VkOffset3D[]) { + offset, {.x = offset.x + extent.width, + .y = offset.y + extent.height, + .z = offset.z} + }); +} + +static inline struct tu_blit_surf +tu_blit_surf_whole(struct tu_image *image) +{ + return tu_blit_surf(image, (VkImageSubresourceLayers){}, (VkOffset3D[]) { + {}, {image->extent.width, image->extent.height} + }); +} + +struct tu_blit { + struct tu_blit_surf dst; + struct tu_blit_surf src; + uint32_t layers; + bool filter; + bool stencil_read; + enum a6xx_rotation rotation; +}; + +void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt, bool copy); + +#endif /* TU_BLIT_H */ diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index d701ae5fba7..3043740626f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -314,8 +314,8 @@ tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, : tile->begin.y + tiling->tile0.extent.height; } -static enum a3xx_msaa_samples -tu6_msaa_samples(uint32_t samples) +enum a3xx_msaa_samples +tu_msaa_samples(uint32_t samples) { switch (samples) { case 1: diff --git a/src/freedreno/vulkan/tu_formats.c b/src/freedreno/vulkan/tu_formats.c index 7e68d6103cd..6b762441fbe 100644 --- a/src/freedreno/vulkan/tu_formats.c +++ b/src/freedreno/vulkan/tu_formats.c @@ -645,8 +645,8 @@ tu_physical_device_get_format_properties( } if (native_fmt->rb >= 0) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; } end: diff --git a/src/freedreno/vulkan/tu_meta_blit.c b/src/freedreno/vulkan/tu_meta_blit.c index da5ff6b12b7..d624eef88b4 100644 --- a/src/freedreno/vulkan/tu_meta_blit.c +++ b/src/freedreno/vulkan/tu_meta_blit.c @@ -23,7 +23,49 @@ #include "tu_private.h" -#include "nir/nir_builder.h" +#include "tu_blit.h" + +static void +tu_blit_image(struct tu_cmd_buffer *cmdbuf, + struct tu_image *src_image, + struct tu_image *dst_image, + const VkImageBlit *info, + VkFilter filter) +{ + static const enum a6xx_rotation rotate[2][2] = { + {ROTATE_0, ROTATE_HFLIP}, + {ROTATE_VFLIP, ROTATE_180}, + }; + bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) != + (info->dstOffsets[1].x < info->dstOffsets[0].x); + bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != + (info->dstOffsets[1].y < info->dstOffsets[0].y); + bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) != + (info->dstOffsets[1].z < info->dstOffsets[0].z); + + if (mirror_z) { + tu_finishme("blit z mirror\n"); + return; + } + + if (info->srcOffsets[1].z - info->srcOffsets[0].z != + info->dstOffsets[1].z - info->dstOffsets[0].z) { + tu_finishme("blit z filter\n"); + return; + } + assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount); + + struct tu_blit blt = { + .dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets), + .src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets), + .layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z, + info->dstSubresource.layerCount), + .filter = filter == VK_FILTER_LINEAR, + .rotation = rotate[mirror_y][mirror_x], + }; + + tu_blit(cmdbuf, &blt, false); +} void tu_CmdBlitImage(VkCommandBuffer commandBuffer, @@ -36,4 +78,14 @@ tu_CmdBlitImage(VkCommandBuffer commandBuffer, VkFilter filter) { + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); + TU_FROM_HANDLE(tu_image, src_image, srcImage); + TU_FROM_HANDLE(tu_image, dst_image, destImage); + + tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); + + for (uint32_t i = 0; i < regionCount; ++i) { + tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter); + } } diff --git a/src/freedreno/vulkan/tu_meta_copy.c b/src/freedreno/vulkan/tu_meta_copy.c index fbd5dfdec9a..616151b751f 100644 --- a/src/freedreno/vulkan/tu_meta_copy.c +++ b/src/freedreno/vulkan/tu_meta_copy.c @@ -30,13 +30,7 @@ #include "vk_format.h" #include "tu_cs.h" - -/* - * TODO: - * - 3D textures - * - compressed image formats (need to divide offset/extent) - * - Fix d24_unorm_s8_uint support & aspects - */ +#include "tu_blit.h" static uint32_t blit_control(enum a6xx_color_fmt fmt) @@ -47,29 +41,6 @@ blit_control(enum a6xx_color_fmt fmt) return blit_cntl; } -static uint32_t tu6_sp_2d_src_format(VkFormat format) -{ - const struct vk_format_description *desc = vk_format_description(format); - uint32_t reg = 0xf000 | A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(tu6_get_native_format(format)->rb); - - int channel = vk_format_get_first_non_void_channel(format); - if (channel < 0) { - /* TODO special format. */ - return reg; - } - if (desc->channel[channel].normalized) { - if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) - reg |= A6XX_SP_2D_SRC_FORMAT_SINT; - reg |= A6XX_SP_2D_SRC_FORMAT_NORM; - } else if (desc->channel[channel].pure_integer) { - if (desc->channel[channel].type == VK_FORMAT_TYPE_SIGNED) - reg |= A6XX_SP_2D_SRC_FORMAT_SINT; - else - reg |= A6XX_SP_2D_SRC_FORMAT_UINT; - } - return reg; -} - static void tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) { @@ -90,32 +61,6 @@ tu_dma_prepare(struct tu_cmd_buffer *cmdbuf) tu_cs_emit(&cmdbuf->cs, 0x10000000); } -/* Always use UINT formats to avoid precision issues. - * - * Example failure it avoids: - * - dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color.r16_unorm.r16_unorm.general_general - */ -static VkFormat -tu_canonical_copy_format(VkFormat format) -{ - switch (vk_format_get_blocksizebits(format)) { - case 8: - return VK_FORMAT_R8_UINT; - case 16: - return VK_FORMAT_R16_UINT; - case 32: - return VK_FORMAT_R32_UINT; - case 64: - return VK_FORMAT_R32G32_UINT; - case 96: - return VK_FORMAT_R32G32B32_UINT; - case 128: - return VK_FORMAT_R32G32B32A32_UINT; - default: - unreachable("unhandled format size"); - } -} - static void tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, struct tu_bo *src_bo, @@ -249,548 +194,80 @@ tu_copy_buffer(struct tu_cmd_buffer *cmdbuf, tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); } -static void -tu_copy_buffer_to_image_step(struct tu_cmd_buffer *cmdbuf, - struct tu_buffer *src_buffer, - struct tu_image *dst_image, - const VkBufferImageCopy *copy_info, - VkFormat format, - uint32_t layer, - uint64_t src_va) +static struct tu_blit_surf +tu_blit_buffer(struct tu_buffer *buffer, + VkFormat format, + const VkBufferImageCopy *info) { - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; + if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + format = VK_FORMAT_R8_UINT; - uint64_t dst_va = dst_image->bo->iova + dst_image->bo_offset + dst_image->layer_size * layer + dst_image->levels[copy_info->imageSubresource.mipLevel].offset; - unsigned dst_pitch = dst_image->levels[copy_info->imageSubresource.mipLevel].pitch * + unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) * vk_format_get_blocksize(format); - unsigned src_pitch; - unsigned src_offset = 0; - if (copy_info->imageExtent.height == 1) { - /* Can't find this in the spec, but not having it is sort of insane? */ - assert(src_va % vk_format_get_blocksize(format) == 0); - - src_offset = (src_va & 63) / vk_format_get_blocksize(format); - src_va &= ~63; - - src_pitch = align((src_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); - } else { - unsigned src_pixel_stride = copy_info->bufferRowLength - ? copy_info->bufferRowLength - : copy_info->imageExtent.width; - src_pitch = src_pixel_stride * vk_format_get_blocksize(format); - assert(!(src_pitch & 63)); - assert(!(src_va & 63)); - } - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); - - /* - * Emit source: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | - 0x500000); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_offset + copy_info->imageExtent.width) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( - copy_info->imageExtent.height)); /* SP_PS_2D_SRC_SIZE */ - tu_cs_emit_qw(&cmdbuf->cs, src_va); - tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - /* - * Emit destination: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); - tu_cs_emit(&cmdbuf->cs, - A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) | - A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); - tu_cs_emit_qw(&cmdbuf->cs, dst_va); - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(src_offset)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_X_X(src_offset + copy_info->imageExtent.width - 1)); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(0)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageExtent.height - 1)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_TL_X(copy_info->imageOffset.x) | - A6XX_GRAS_2D_DST_TL_Y(copy_info->imageOffset.y)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_BR_X(copy_info->imageOffset.x + - copy_info->imageExtent.width - 1) | - A6XX_GRAS_2D_DST_BR_Y(copy_info->imageOffset.y + - copy_info->imageExtent.height - 1)); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, 0x3f); - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); - tu_cs_emit(&cmdbuf->cs, 0); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); - tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0x01000000); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); - tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); - - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0); + return (struct tu_blit_surf) { + .fmt = format, + .tile_mode = TILE6_LINEAR, + .va = buffer->bo->iova + buffer->bo_offset + info->bufferOffset, + .pitch = pitch, + .layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format), + .width = info->imageExtent.width, + .height = info->imageExtent.height, + .samples = 1, + }; } static void tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf, struct tu_buffer *src_buffer, struct tu_image *dst_image, - const VkBufferImageCopy *copy_info) -{ - tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); - tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); - - /* general setup */ - tu_dma_prepare(cmdbuf); - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); - - /* buffer copy setup */ - tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); - tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - - VkFormat format = tu_canonical_copy_format(dst_image->vk_format); - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; - - const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - unsigned src_pixel_stride = copy_info->bufferRowLength - ? copy_info->bufferRowLength - : copy_info->imageExtent.width; - unsigned cpp = vk_format_get_blocksize(format); - unsigned src_pitch = src_pixel_stride * cpp; - - for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { - unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; - uint64_t src_va = src_buffer->bo->iova + src_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * src_pitch; - - if ((src_pitch & 63) || (src_va & 63)) { - /* Do a per line copy */ - VkBufferImageCopy line_copy_info = *copy_info; - line_copy_info.imageExtent.height = 1; - for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { - /* - * if src_va is not aligned the line copy will need to adjust. Give it - * room to do so. - */ - unsigned max_width = 16384 - (src_va & 0x3f) ? 64 : 0; - line_copy_info.imageOffset.x = copy_info->imageOffset.x; - line_copy_info.imageExtent.width = copy_info->imageExtent.width; - - for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { - tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, &line_copy_info, format, layer, src_va + c * cpp); - - line_copy_info.imageOffset.x += max_width; - line_copy_info.imageExtent.width -= max_width; - } - - line_copy_info.imageOffset.y++; - src_va += src_pitch; - } - } else { - tu_copy_buffer_to_image_step(cmdbuf, src_buffer, dst_image, copy_info, format, layer, src_va); - } - } - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); - - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); -} - -static void -tu_copy_image_to_buffer_step(struct tu_cmd_buffer *cmdbuf, - struct tu_image *src_image, - struct tu_buffer *dst_buffer, - const VkBufferImageCopy *copy_info, - VkFormat format, - uint32_t layer, - uint64_t dst_va) + const VkBufferImageCopy *info) { - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; - - uint64_t src_va = src_image->bo->iova + src_image->bo_offset + src_image->layer_size * layer + src_image->levels[copy_info->imageSubresource.mipLevel].offset; - unsigned src_pitch = src_image->levels[copy_info->imageSubresource.mipLevel].pitch * - vk_format_get_blocksize(format); - - unsigned dst_pitch; - unsigned dst_offset = 0; - if (copy_info->imageExtent.height == 1) { - /* Can't find this in the spec, but not having it is sort of insane? */ - assert(dst_va % vk_format_get_blocksize(format) == 0); - - dst_offset = (dst_va & 63) / vk_format_get_blocksize(format); - dst_va &= ~63; - - dst_pitch = align((dst_offset + copy_info->imageExtent.width) * vk_format_get_blocksize(format), 64); - } else { - unsigned dst_pixel_stride = copy_info->bufferRowLength - ? copy_info->bufferRowLength - : copy_info->imageExtent.width; - dst_pitch = dst_pixel_stride * vk_format_get_blocksize(format); - assert(!(dst_pitch & 63)); - assert(!(dst_va & 63)); + if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT && + vk_format_get_blocksize(dst_image->vk_format) == 4) { + tu_finishme("aspect mask\n"); + return; } - - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); - - /* - * Emit source: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | - A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( - src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ - tu_cs_emit_qw(&cmdbuf->cs, src_va); - tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - /* - * Emit destination: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) | - A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); - tu_cs_emit_qw(&cmdbuf->cs, dst_va); - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->imageOffset.x)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_X_X(copy_info->imageOffset.x + - copy_info->imageExtent.width - 1)); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->imageOffset.y)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->imageOffset.y + - copy_info->imageExtent.height - 1)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_TL_X(dst_offset) | A6XX_GRAS_2D_DST_TL_Y(0)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_BR_X(dst_offset + copy_info->imageExtent.width - 1) | - A6XX_GRAS_2D_DST_BR_Y(copy_info->imageExtent.height - 1)); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, 0x3f); - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); - tu_cs_emit(&cmdbuf->cs, 0); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); - tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0x01000000); - tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); - tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); - - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0); + tu_blit(cmdbuf, &(struct tu_blit) { + .dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent), + .src = tu_blit_buffer(src_buffer, dst_image->vk_format, info), + .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount), + }, true); } static void tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf, struct tu_image *src_image, struct tu_buffer *dst_buffer, - const VkBufferImageCopy *copy_info) + const VkBufferImageCopy *info) { - tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); - tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); - - /* general setup */ - tu_dma_prepare(cmdbuf); - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); - - /* buffer copy setup */ - tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); - tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - - VkFormat format = tu_canonical_copy_format(src_image->vk_format); - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; - - unsigned dst_pixel_stride = copy_info->bufferRowLength - ? copy_info->bufferRowLength - : copy_info->imageExtent.width; - unsigned cpp = vk_format_get_blocksize(format); - unsigned dst_pitch = dst_pixel_stride * cpp; - - - const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - for (unsigned layer_offset = 0; layer_offset < copy_info->imageSubresource.layerCount; ++layer_offset) { - unsigned layer = copy_info->imageSubresource.baseArrayLayer + layer_offset; - uint64_t dst_va = dst_buffer->bo->iova + dst_buffer->bo_offset + copy_info->bufferOffset + layer_offset * copy_info->bufferImageHeight * dst_pitch; - - if ((dst_pitch & 63) || (dst_va & 63)) { - /* Do a per line copy */ - VkBufferImageCopy line_copy_info = *copy_info; - line_copy_info.imageExtent.height = 1; - for (unsigned r = 0; r < copy_info->imageExtent.height; ++r) { - /* - * if dst_va is not aligned the line copy will need to adjust. Give it - * room to do so. - */ - unsigned max_width = 16384 - (dst_va & 0x3f) ? 64 : 0; - line_copy_info.imageOffset.x = copy_info->imageOffset.x; - line_copy_info.imageExtent.width = copy_info->imageExtent.width; - - for (unsigned c = 0; c < copy_info->imageExtent.width; c += max_width) { - tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, &line_copy_info, format, layer, dst_va + c * cpp); - - line_copy_info.imageOffset.x += max_width; - line_copy_info.imageExtent.width -= max_width; - } - - line_copy_info.imageOffset.y++; - dst_va += dst_pitch; - } - } else { - tu_copy_image_to_buffer_step(cmdbuf, src_image, dst_buffer, copy_info, format, layer, dst_va); - } - } - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); - - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); -} - -static void -tu_copy_image_to_image_step(struct tu_cmd_buffer *cmdbuf, - struct tu_image *src_image, - struct tu_image *dst_image, - const VkImageCopy *copy_info, - VkFormat format, - uint32_t layer_offset) -{ - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; - - unsigned src_layer = - copy_info->srcSubresource.baseArrayLayer + layer_offset; - uint64_t src_va = - src_image->bo->iova + src_image->bo_offset + - src_image->layer_size * src_layer + - src_image->levels[copy_info->srcSubresource.mipLevel].offset; - unsigned src_pitch = - src_image->levels[copy_info->srcSubresource.mipLevel].pitch * - vk_format_get_blocksize(format); - - unsigned dst_layer = - copy_info->dstSubresource.baseArrayLayer + layer_offset; - uint64_t dst_va = - dst_image->bo->iova + dst_image->bo_offset + - dst_image->layer_size * dst_layer + - dst_image->levels[copy_info->dstSubresource.mipLevel].offset; - unsigned dst_pitch = - src_image->levels[copy_info->dstSubresource.mipLevel].pitch * - vk_format_get_blocksize(format); - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 48); - - /* - * Emit source: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_PS_2D_SRC_INFO, 13); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(src_image->tile_mode) | - A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000); - tu_cs_emit(&cmdbuf->cs, - A6XX_SP_PS_2D_SRC_SIZE_WIDTH(src_image->extent.width) | - A6XX_SP_PS_2D_SRC_SIZE_HEIGHT( - src_image->extent.height)); /* SP_PS_2D_SRC_SIZE */ - tu_cs_emit_qw(&cmdbuf->cs, src_va); - tu_cs_emit(&cmdbuf->cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(src_pitch)); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - /* - * Emit destination: - */ - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_DST_INFO, 9); - tu_cs_emit(&cmdbuf->cs, - A6XX_RB_2D_DST_INFO_COLOR_FORMAT(rb_fmt) | - A6XX_RB_2D_DST_INFO_TILE_MODE(dst_image->tile_mode) | - A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX)); - tu_cs_emit_qw(&cmdbuf->cs, dst_va); - tu_cs_emit(&cmdbuf->cs, A6XX_RB_2D_DST_SIZE_PITCH(dst_pitch)); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - tu_cs_emit(&cmdbuf->cs, 0x00000000); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_X_X(copy_info->srcOffset.x)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_X_X(copy_info->srcOffset.x + - copy_info->extent.width - 1)); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_SRC_TL_Y_Y(copy_info->srcOffset.y)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_SRC_BR_Y_Y(copy_info->srcOffset.y + - copy_info->extent.height - 1)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_DST_TL, 2); - tu_cs_emit(&cmdbuf->cs, A6XX_GRAS_2D_DST_TL_X(copy_info->dstOffset.x) | - A6XX_GRAS_2D_DST_TL_Y(copy_info->dstOffset.y)); - tu_cs_emit(&cmdbuf->cs, - A6XX_GRAS_2D_DST_BR_X(copy_info->dstOffset.x + - copy_info->extent.width - 1) | - A6XX_GRAS_2D_DST_BR_Y(copy_info->dstOffset.y + - copy_info->extent.height - 1)); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_EVENT_WRITE, 1); - tu_cs_emit(&cmdbuf->cs, 0x3f); - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8C01, 1); - tu_cs_emit(&cmdbuf->cs, 0); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_SP_2D_SRC_FORMAT, 1); - tu_cs_emit(&cmdbuf->cs, tu6_sp_2d_src_format(format)); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0x01000000); - - tu_cs_emit_pkt7(&cmdbuf->cs, CP_BLIT, 1); - tu_cs_emit(&cmdbuf->cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); - - tu_cs_emit_wfi(&cmdbuf->cs); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_UNKNOWN_8E04, 1); - tu_cs_emit(&cmdbuf->cs, 0); + tu_blit(cmdbuf, &(struct tu_blit) { + .dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info), + .src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent), + .layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount), + }, true); } static void tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf, struct tu_image *src_image, struct tu_image *dst_image, - const VkImageCopy *copy_info) + const VkImageCopy *info) { - /* TODO: - * - Handle 3D images. - * - In some cases where src and dst format are different this may - * have tiling implications. Not sure if things happen correctly - * in that case. - */ - - tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); - tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); - - /* general setup */ - tu_dma_prepare(cmdbuf); - - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 6); - - /* buffer copy setup */ - tu_cs_emit_pkt7(&cmdbuf->cs, CP_SET_MARKER, 1); - tu_cs_emit(&cmdbuf->cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE)); - - VkFormat format = tu_canonical_copy_format(src_image->vk_format); - const enum a6xx_color_fmt rb_fmt = tu6_get_native_format(format)->rb; - const uint32_t blit_cntl = blit_control(rb_fmt) | 0x20000000; - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - tu_cs_emit_pkt4(&cmdbuf->cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); - tu_cs_emit(&cmdbuf->cs, blit_cntl); - - for (unsigned layer_offset = 0; - layer_offset < copy_info->srcSubresource.layerCount; ++layer_offset) { - tu_copy_image_to_image_step(cmdbuf, src_image, dst_image, copy_info, - format, layer_offset); + if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT && + vk_format_get_blocksize(dst_image->vk_format) == 4) || + (info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT && + vk_format_get_blocksize(src_image->vk_format) == 4)) { + tu_finishme("aspect mask\n"); + return; } - tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 15); - - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, 0x1d, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, FACENESS_FLUSH, true); - tu6_emit_event_write(cmdbuf, &cmdbuf->cs, CACHE_FLUSH_TS, true); + tu_blit(cmdbuf, &(struct tu_blit) { + .dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent), + .src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent), + .layers = info->extent.depth, + }, true); } void @@ -821,14 +298,15 @@ tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint32_t regionCount, const VkBufferImageCopy *pRegions) { - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_image, dest_image, destImage); + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); + TU_FROM_HANDLE(tu_image, dst_image, destImage); TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer); - for (unsigned i = 0; i < regionCount; ++i) { - tu_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, - pRegions + i); - } + tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); + + for (unsigned i = 0; i < regionCount; ++i) + tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i); } void @@ -839,14 +317,15 @@ tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, uint32_t regionCount, const VkBufferImageCopy *pRegions) { - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); TU_FROM_HANDLE(tu_image, src_image, srcImage); TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer); - for (unsigned i = 0; i < regionCount; ++i) { - tu_copy_image_to_buffer(cmd_buffer, src_image, dst_buffer, - pRegions + i); - } + tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE); + + for (unsigned i = 0; i < regionCount; ++i) + tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i); } void @@ -858,11 +337,13 @@ tu_CmdCopyImage(VkCommandBuffer commandBuffer, uint32_t regionCount, const VkImageCopy *pRegions) { - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer); TU_FROM_HANDLE(tu_image, src_image, srcImage); - TU_FROM_HANDLE(tu_image, dest_image, destImage); + TU_FROM_HANDLE(tu_image, dst_image, destImage); - for (uint32_t i = 0; i < regionCount; ++i) { - tu_copy_image_to_image(cmd_buffer, src_image, dest_image, pRegions + i); - } + tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); + tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); + + for (uint32_t i = 0; i < regionCount; ++i) + tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i); } diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 405fa20b4f5..eb043ccf562 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1299,6 +1299,8 @@ tu_get_levelCount(const struct tu_image *image, enum a6xx_tile_mode tu6_get_image_tile_mode(struct tu_image *image, int level); +enum a3xx_msaa_samples +tu_msaa_samples(uint32_t samples); struct tu_image_view { -- 2.30.2