From 773d640efa2665fc50f86cbb7d1e6b9402ba44ad Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 18 Nov 2019 16:46:39 -0500 Subject: [PATCH] turnip: implement UBWC This enables UBWC for everything except 3D textures. It breaks many image_to_image copies but those aren't important and it can be worked around later (image_to_image copy needs to be done in two steps, decode from the source format and then encode to the destination format). Signed-off-by: Jonathan Marek Reviewed-by: Eric Anholt --- src/freedreno/vulkan/tu_android.c | 10 +- src/freedreno/vulkan/tu_blit.c | 42 +++++- src/freedreno/vulkan/tu_blit.h | 30 +++-- src/freedreno/vulkan/tu_cmd_buffer.c | 75 ++++++----- src/freedreno/vulkan/tu_formats.c | 18 +++ src/freedreno/vulkan/tu_image.c | 185 +++++++++++++++++++-------- src/freedreno/vulkan/tu_meta_copy.c | 2 +- src/freedreno/vulkan/tu_private.h | 54 ++++++-- src/freedreno/vulkan/tu_wsi.c | 34 ++++- 9 files changed, 325 insertions(+), 125 deletions(-) diff --git a/src/freedreno/vulkan/tu_android.c b/src/freedreno/vulkan/tu_android.c index 1ebc9e726e8..75fdb904601 100644 --- a/src/freedreno/vulkan/tu_android.c +++ b/src/freedreno/vulkan/tu_android.c @@ -31,6 +31,8 @@ #include #include +#include "drm-uapi/drm_fourcc.h" + static int tu_hal_open(const struct hw_module_t *mod, const char *id, @@ -120,12 +122,8 @@ tu_image_from_gralloc(VkDevice device_h, struct tu_bo *bo = NULL; VkResult result; - result = tu_image_create( - device_h, - &(struct tu_image_create_info) { - .vk_info = base_info, .scanout = true, .no_metadata_planes = true }, - alloc, &image_h); - + result = tu_image_create(device_h, base_info, alloc, &image_h, + DRM_FORMAT_MOD_LINEAR); if (result != VK_SUCCESS) return result; diff --git a/src/freedreno/vulkan/tu_blit.c b/src/freedreno/vulkan/tu_blit.c index 6a43a9fbe9b..22918d62995 100644 --- a/src/freedreno/vulkan/tu_blit.c +++ b/src/freedreno/vulkan/tu_blit.c @@ -48,7 +48,7 @@ blit_copy_format(VkFormat format) switch (vk_format_get_blocksizebits(format)) { case 8: return VK_FORMAT_R8_UINT; case 16: return VK_FORMAT_R16_UINT; - case 32: return VK_FORMAT_R8G8B8A8_UINT; + case 32: return VK_FORMAT_R32_UINT; case 64: return VK_FORMAT_R32G32_UINT; case 96: return VK_FORMAT_R32G32B32_UINT; case 128:return VK_FORMAT_R32G32B32A32_UINT; @@ -74,7 +74,8 @@ blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read) return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) | A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) | A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) | - COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB); + COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) | + COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS); } static void @@ -82,7 +83,7 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt) { struct tu_cs *cs = &cmdbuf->cs; - tu_cs_reserve_space(cmdbuf->device, cs, 52); + tu_cs_reserve_space(cmdbuf->device, cs, 66); enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb; if (fmt == RB6_Z24_UNORM_S8_UINT) @@ -135,6 +136,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt) tu_cs_emit(cs, 0x00000000); tu_cs_emit(cs, 0x00000000); tu_cs_emit(cs, 0x00000000); + + if (blt->src.ubwc_size) { + tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6); + tu_cs_emit_qw(cs, blt->src.ubwc_va); + tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) | + A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2)); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + } } /* @@ -150,6 +161,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt) tu_cs_emit(cs, 0x00000000); tu_cs_emit(cs, 0x00000000); + if (blt->dst.ubwc_size) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6); + tu_cs_emit_qw(cs, blt->dst.ubwc_va); + tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) | + A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2)); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + tu_cs_emit(cs, 0x00000000); + } + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4); tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x)); tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1)); @@ -196,7 +217,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt) switch (blt->type) { case TU_BLIT_COPY: blt->stencil_read = - blt->dst.fmt == VK_FORMAT_R8_UINT && + blt->dst.fmt == VK_FORMAT_R8_UNORM && blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT; assert(vk_format_get_blocksize(blt->dst.fmt) == @@ -210,6 +231,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt) blt->src.pitch /= block_width; blt->src.x /= block_width; blt->src.y /= block_height; + blt->src.fmt = blit_copy_format(blt->src.fmt); /* for image_to_image copy, width/height is on the src format */ blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width); @@ -223,12 +245,16 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt) blt->dst.pitch /= block_width; blt->dst.x /= block_width; blt->dst.y /= block_height; + blt->dst.fmt = blit_copy_format(blt->dst.fmt); } - blt->src.fmt = blit_copy_format(blt->src.fmt); - blt->dst.fmt = blit_copy_format(blt->dst.fmt); + if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) + blt->dst.fmt = blit_copy_format(blt->dst.fmt); + + if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) + blt->src.fmt = blit_copy_format(blt->src.fmt); - /* TODO: does this work correctly with tiling/etc ? */ + /* TODO: multisample image copy does not work correctly with tiling/UBWC */ blt->src.x *= blt->src.samples; blt->dst.x *= blt->dst.samples; blt->src.width *= blt->src.samples; @@ -304,6 +330,8 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt) } blt->dst.va += blt->dst.layer_size; blt->src.va += blt->src.layer_size; + blt->dst.ubwc_va += blt->dst.ubwc_size; + blt->src.ubwc_va += blt->src.ubwc_size; } tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17); diff --git a/src/freedreno/vulkan/tu_blit.h b/src/freedreno/vulkan/tu_blit.h index b01f62c7685..9897aaeb0a1 100644 --- a/src/freedreno/vulkan/tu_blit.h +++ b/src/freedreno/vulkan/tu_blit.h @@ -41,27 +41,37 @@ struct tu_blit_surf { uint32_t x, y; uint32_t width, height; unsigned samples; + uint64_t ubwc_va; + uint32_t ubwc_pitch; + uint32_t ubwc_size; }; static inline struct tu_blit_surf -tu_blit_surf(struct tu_image *img, +tu_blit_surf(struct tu_image *image, VkImageSubresourceLayers subres, const VkOffset3D *offsets) { + unsigned layer = subres.baseArrayLayer; + if (image->type == VK_IMAGE_TYPE_3D) { + assert(layer == 0); + layer = MIN2(offsets[0].z, offsets[1].z); + } + return (struct tu_blit_surf) { - .fmt = img->vk_format, - .tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel), - .tiled = img->tile_mode != TILE6_LINEAR, - .va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset + - subres.baseArrayLayer * img->layer_size + - MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size, - .pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples, - .layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size, + .fmt = image->vk_format, + .tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel), + .tiled = image->tile_mode != TILE6_LINEAR, + .va = tu_image_base(image, subres.mipLevel, layer), + .pitch = tu_image_stride(image, subres.mipLevel), + .layer_size = tu_layer_size(image, subres.mipLevel), .x = MIN2(offsets[0].x, offsets[1].x), .y = MIN2(offsets[0].y, offsets[1].y), .width = abs(offsets[1].x - offsets[0].x), .height = abs(offsets[1].y - offsets[0].y), - .samples = img->samples, + .samples = image->samples, + .ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer), + .ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel), + .ubwc_size = tu_image_ubwc_size(image, subres.mipLevel), }; } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f7081e88fc2..29e2d872b6b 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -388,6 +388,22 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } +static void +tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview) +{ + uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer); + uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip); + uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip); + if (iview->image->ubwc_size) { + tu_cs_emit_qw(cs, va); + tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) | + A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2)); + } else { + tu_cs_emit_qw(cs, 0); + tu_cs_emit(cs, 0); + } +} + static void tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { @@ -430,22 +446,21 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_image_level *slice = &iview->image->levels[iview->base_mip]; enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format); - uint32_t offset = slice->offset + slice->size * iview->base_layer; - uint32_t stride = slice->pitch * iview->image->cpp; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(slice->size)); - tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + offset); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layer_size)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit(cs, tiling->gmem_offsets[gmem_index]); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + tu6_emit_flag_buffer(cs, iview); + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ @@ -475,12 +490,8 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) continue; const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; const enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(iview->image, iview->base_mip); - uint32_t stride = 0; - uint32_t offset = 0; mrt_comp[i] = 0xf; @@ -491,33 +502,21 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_get_native_format(iview->vk_format); assert(format && format->rb >= 0); - offset = slice->offset + slice->size * iview->base_layer; - stride = slice->pitch * iview->image->cpp; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size)); - tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + - offset); /* BASE_LO/HI */ + tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layer_size)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit( cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */ tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb)); -#if 0 - /* when we support UBWC, these would be the system memory - * addr/pitch/etc: - */ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); -#endif + tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3); + tu6_emit_flag_buffer(cs, iview); } tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1); @@ -633,11 +632,6 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, uint32_t gmem_offset, uint32_t blit_info) { - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; - const uint32_t offset = slice->offset + slice->size * iview->base_layer; - const uint32_t stride = slice->pitch * iview->image->cpp; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); tu_cs_emit(cs, blit_info); @@ -651,11 +645,16 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) | A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit_qw(cs, - iview->image->bo->iova + iview->image->bo_offset + offset); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size)); + A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) | + COND(iview->image->ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS)); + tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip))); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layer_size)); + + if (iview->image->ubwc_size) { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3); + tu6_emit_flag_buffer(cs, iview); + } tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); tu_cs_emit(cs, gmem_offset); diff --git a/src/freedreno/vulkan/tu_formats.c b/src/freedreno/vulkan/tu_formats.c index 909591e846b..120672cf0a9 100644 --- a/src/freedreno/vulkan/tu_formats.c +++ b/src/freedreno/vulkan/tu_formats.c @@ -34,6 +34,7 @@ #include "util/u_half.h" #include "vk_format.h" #include "vk_util.h" +#include "drm-uapi/drm_fourcc.h" /** * Declare a format table. A format table is an array of tu_native_format. @@ -784,6 +785,23 @@ tu_GetPhysicalDeviceFormatProperties2( tu_physical_device_get_format_properties( physical_device, format, &pFormatProperties->formatProperties); + + struct wsi_format_modifier_properties_list *list = + vk_find_struct(pFormatProperties->pNext, WSI_FORMAT_MODIFIER_PROPERTIES_LIST_MESA); + if (list) { + VK_OUTARRAY_MAKE(out, list->modifier_properties, &list->modifier_count); + + vk_outarray_append(&out, mod_props) { + mod_props->modifier = DRM_FORMAT_MOD_LINEAR; + mod_props->modifier_plane_count = 1; + } + + /* TODO: any cases where this should be disabled? */ + vk_outarray_append(&out, mod_props) { + mod_props->modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED; + mod_props->modifier_plane_count = 1; + } + } } static VkResult diff --git a/src/freedreno/vulkan/tu_image.c b/src/freedreno/vulkan/tu_image.c index 0b14a28d44d..3d6ca56f1fc 100644 --- a/src/freedreno/vulkan/tu_image.c +++ b/src/freedreno/vulkan/tu_image.c @@ -31,18 +31,20 @@ #include "util/u_atomic.h" #include "vk_format.h" #include "vk_util.h" +#include "drm-uapi/drm_fourcc.h" static inline bool -image_level_linear(struct tu_image *image, int level) +image_level_linear(struct tu_image *image, int level, bool ubwc) { unsigned w = u_minify(image->extent.width, level); - return w < 16; + /* all levels are tiled/compressed with UBWC */ + return ubwc ? false : (w < 16); } enum a6xx_tile_mode tu6_get_image_tile_mode(struct tu_image *image, int level) { - if (image_level_linear(image, level)) + if (image_level_linear(image, level, !!image->ubwc_size)) return TILE6_LINEAR; else return image->tile_mode; @@ -50,32 +52,44 @@ tu6_get_image_tile_mode(struct tu_image *image, int level) /* indexed by cpp, including msaa 2x and 4x: */ static const struct { - unsigned pitchalign; - unsigned heightalign; + uint8_t pitchalign; + uint8_t heightalign; + uint8_t ubwc_blockwidth; + uint8_t ubwc_blockheight; } tile_alignment[] = { - [1] = { 128, 32 }, - [2] = { 128, 16 }, +/* TODO: + * cpp=1 UBWC needs testing at larger texture sizes + * missing UBWC blockwidth/blockheight for npot+64 cpp + * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32 + */ + [1] = { 128, 32, 16, 4 }, + [2] = { 128, 16, 16, 4 }, [3] = { 64, 32 }, - [4] = { 64, 16 }, + [4] = { 64, 16, 16, 4 }, [6] = { 64, 16 }, - [8] = { 64, 16 }, + [8] = { 64, 16, 8, 4, }, [12] = { 64, 16 }, - [16] = { 64, 16 }, + [16] = { 64, 16, 4, 4, }, [24] = { 64, 16 }, - [32] = { 64, 16 }, + [32] = { 64, 16, 4, 2 }, [48] = { 64, 16 }, [64] = { 64, 16 }, - /* special case for r8g8: */ - [0] = { 64, 32 }, + [0] = { 64, 32, 16, 4 }, }; static void -setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) +setup_slices(struct tu_image *image, + const VkImageCreateInfo *pCreateInfo, + bool ubwc_enabled) { +#define RGB_TILE_WIDTH_ALIGNMENT 64 +#define RGB_TILE_HEIGHT_ALIGNMENT 16 +#define UBWC_PLANE_SIZE_ALIGNMENT 4096 VkFormat format = pCreateInfo->format; enum util_format_layout layout = vk_format_description(format)->layout; uint32_t layer_size = 0; + uint32_t ubwc_size = 0; int ta = image->cpp; /* The r8g8 format seems to not play by the normal tiling rules: */ @@ -84,6 +98,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) { struct tu_image_level *slice = &image->levels[level]; + struct tu_image_level *ubwc_slice = &image->ubwc_levels[level]; uint32_t width = u_minify(pCreateInfo->extent.width, level); uint32_t height = u_minify(pCreateInfo->extent.height, level); uint32_t depth = u_minify(pCreateInfo->extent.depth, level); @@ -91,7 +106,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) uint32_t blocks; uint32_t pitchalign; - if (image->tile_mode && !image_level_linear(image, level)) { + if (image->tile_mode && !image_level_linear(image, level, ubwc_enabled)) { /* tiled levels of 3D textures are rounded up to PoT dimensions: */ if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) { width = util_next_power_of_two(width); @@ -139,19 +154,47 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) } layer_size += slice->size * depth; - } + if (ubwc_enabled) { + /* with UBWC every level is aligned to 4K */ + layer_size = align(layer_size, 4096); + + uint32_t block_width = tile_alignment[ta].ubwc_blockwidth; + uint32_t block_height = tile_alignment[ta].ubwc_blockheight; + uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width), RGB_TILE_WIDTH_ALIGNMENT); + uint32_t meta_height = align(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT); + + /* it looks like mipmaps need alignment to power of two + * TODO: needs testing with large npot textures + * (needed for the first level?) + */ + if (pCreateInfo->mipLevels > 1) { + meta_pitch = util_next_power_of_two(meta_pitch); + meta_height = util_next_power_of_two(meta_height); + } + ubwc_slice->pitch = meta_pitch; + ubwc_slice->offset = ubwc_size; + ubwc_size += align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT); + } + } image->layer_size = align(layer_size, 4096); + + VkDeviceSize offset = ubwc_size * pCreateInfo->arrayLayers; + for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) + image->levels[level].offset += offset; + + image->size = offset + image->layer_size * pCreateInfo->arrayLayers; + image->ubwc_size = ubwc_size; } VkResult tu_image_create(VkDevice _device, - const struct tu_image_create_info *create_info, + const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, - VkImage *pImage) + VkImage *pImage, + uint64_t modifier) { TU_FROM_HANDLE(tu_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; struct tu_image *image = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); @@ -195,21 +238,42 @@ tu_image_create(VkDevice _device, EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL; image->tile_mode = TILE6_3; + bool ubwc_enabled = true; + /* disable tiling when linear is requested and for compressed formats */ if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR || - /* compressed textures can't use tiling? */ - vk_format_is_compressed(image->vk_format) || - /* scanout needs to be linear (what about tiling modifiers?) */ - create_info->scanout || - /* image_to_image copy doesn't deal with tiling+swap */ - tu6_get_native_format(image->vk_format)->swap || - /* r8g8 formats are tiled different and could break image_to_image copy */ - (image->cpp == 2 && vk_format_get_nr_components(image->vk_format) == 2)) + modifier == DRM_FORMAT_MOD_LINEAR || + vk_format_is_compressed(image->vk_format)) { image->tile_mode = TILE6_LINEAR; + ubwc_enabled = false; + } + + /* using UBWC with D24S8 breaks the "stencil read" copy path (why?) + * (causes any deqp tests that need to check stencil to fail) + * disable UBWC for this format until we properly support copy aspect masks + */ + if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) + ubwc_enabled = false; - setup_slices(image, pCreateInfo); + /* UBWC can't be used with E5B9G9R9 */ + if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) + ubwc_enabled = false; + + if (image->extent.depth > 1) { + tu_finishme("UBWC with 3D textures"); + ubwc_enabled = false; + } + + if (!tile_alignment[image->cpp].ubwc_blockwidth) { + tu_finishme("UBWC for cpp=%d", image->cpp); + ubwc_enabled = false; + } + + /* expect UBWC enabled if we asked for it */ + assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled); + + setup_slices(image, pCreateInfo, ubwc_enabled); - image->size = image->layer_size * pCreateInfo->arrayLayers; *pImage = tu_image_to_handle(image); return VK_SUCCESS; @@ -324,12 +388,13 @@ tu_image_view_init(struct tu_image_view *iview, memset(iview->descriptor, 0, sizeof(iview->descriptor)); const struct tu_native_format *fmt = tu6_get_native_format(iview->vk_format); - struct tu_image_level *slice0 = &image->levels[iview->base_mip]; - uint64_t base_addr = image->bo->iova + iview->base_layer * image->layer_size + slice0->offset; - uint32_t pitch = (slice0->pitch / vk_format_get_blockwidth(iview->vk_format)) * - vk_format_get_blocksize(iview->vk_format); - enum a6xx_tile_mode tile_mode = - image_level_linear(image, iview->base_mip) ? TILE6_LINEAR : image->tile_mode; + uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer); + uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer); + + uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format); + enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip); + uint32_t width = u_minify(image->extent.width, iview->base_mip); + uint32_t height = u_minify(image->extent.height, iview->base_mip); iview->descriptor[0] = A6XX_TEX_CONST_0_TILE_MODE(tile_mode) | @@ -339,24 +404,34 @@ tu_image_view_init(struct tu_image_view *iview, A6XX_TEX_CONST_0_SWAP(image->tile_mode ? WZYX : fmt->swap) | tu6_texswiz(&pCreateInfo->components, vk_format_description(iview->vk_format)->swizzle) | A6XX_TEX_CONST_0_MIPLVLS(iview->level_count - 1); - iview->descriptor[1] = - A6XX_TEX_CONST_1_WIDTH(u_minify(image->extent.width, iview->base_mip)) | - A6XX_TEX_CONST_1_HEIGHT(u_minify(image->extent.height, iview->base_mip)); + iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); iview->descriptor[2] = A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(iview->vk_format)) | A6XX_TEX_CONST_2_PITCH(pitch) | A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType)); - iview->descriptor[3] = 0; + iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(tu_layer_size(image, iview->base_mip)); iview->descriptor[4] = base_addr; iview->descriptor[5] = base_addr >> 32; + if (image->ubwc_size) { + uint32_t block_width = tile_alignment[image->cpp].ubwc_blockwidth; + uint32_t block_height = tile_alignment[image->cpp].ubwc_blockheight; + + iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL; + iview->descriptor[7] = ubwc_addr; + iview->descriptor[8] = ubwc_addr >> 32; + iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(tu_image_ubwc_size(image, iview->base_mip) >> 2); + iview->descriptor[10] |= + A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(tu_image_ubwc_pitch(image, iview->base_mip)) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height))); + } + if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D) { - iview->descriptor[3] |= A6XX_TEX_CONST_3_ARRAY_PITCH(image->layer_size); iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(iview->layer_count); } else { iview->descriptor[3] |= - A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size) | - A6XX_TEX_CONST_3_ARRAY_PITCH(slice0->size); + A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size); iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(u_minify(image->extent.depth, iview->base_mip)); } @@ -393,14 +468,17 @@ tu_CreateImage(VkDevice device, const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); - bool scanout = wsi_info && wsi_info->scanout; - - return tu_image_create(device, - &(struct tu_image_create_info) { - .vk_info = pCreateInfo, - .scanout = scanout, - }, - pAllocator, pImage); + uint64_t modifier = DRM_FORMAT_MOD_INVALID; + + if (wsi_info) { + modifier = DRM_FORMAT_MOD_LINEAR; + for (unsigned i = 0; i < wsi_info->modifier_count; i++) { + if (wsi_info->modifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED) + modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED; + } + } + + return tu_image_create(device, pCreateInfo, pAllocator, pImage, modifier); } void @@ -438,6 +516,13 @@ tu_GetImageSubresourceLayout(VkDevice _device, level->pitch * vk_format_get_blocksize(image->vk_format); pLayout->arrayPitch = image->layer_size; pLayout->depthPitch = level->size; + + if (image->ubwc_size) { + /* UBWC starts at offset 0 */ + pLayout->offset = 0; + /* UBWC scanout won't match what the kernel wants if we have levels/layers */ + assert(image->level_count == 1 && image->layer_count == 1); + } } VkResult diff --git a/src/freedreno/vulkan/tu_meta_copy.c b/src/freedreno/vulkan/tu_meta_copy.c index dedfab2559b..e51b31275f0 100644 --- a/src/freedreno/vulkan/tu_meta_copy.c +++ b/src/freedreno/vulkan/tu_meta_copy.c @@ -200,7 +200,7 @@ tu_blit_buffer(struct tu_buffer *buffer, const VkBufferImageCopy *info) { if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) - format = VK_FORMAT_R8_UINT; + format = VK_FORMAT_R8_UNORM; unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) * vk_format_get_blocksize(format); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 958fc169aaa..549d1de8b87 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1271,6 +1271,8 @@ struct tu_image struct tu_image_level levels[15]; unsigned tile_mode; unsigned cpp; + struct tu_image_level ubwc_levels[15]; + uint32_t ubwc_size; unsigned queue_family_mask; bool exclusive; @@ -1307,6 +1309,46 @@ tu_get_levelCount(const struct tu_image *image, : range->levelCount; } +static inline VkDeviceSize +tu_layer_size(struct tu_image *image, int level) +{ + if (image->type == VK_IMAGE_TYPE_3D) + return image->levels[level].size; + return image->layer_size; +} + +static inline uint32_t +tu_image_stride(struct tu_image *image, int level) +{ + return image->levels[level].pitch * image->cpp; +} + +static inline uint64_t +tu_image_base(struct tu_image *image, int level, int layer) +{ + return image->bo->iova + image->bo_offset + image->levels[level].offset + + layer * tu_layer_size(image, level); +} + +static inline VkDeviceSize +tu_image_ubwc_size(struct tu_image *image, int level) +{ + return image->ubwc_size; +} + +static inline uint32_t +tu_image_ubwc_pitch(struct tu_image *image, int level) +{ + return image->ubwc_levels[level].pitch; +} + +static inline uint64_t +tu_image_ubwc_base(struct tu_image *image, int level, int layer) +{ + return image->bo->iova + image->bo_offset + image->ubwc_levels[level].offset + + layer * tu_image_ubwc_size(image, level); +} + enum a6xx_tile_mode tu6_get_image_tile_mode(struct tu_image *image, int level); enum a3xx_msaa_samples @@ -1340,18 +1382,12 @@ struct tu_sampler bool needs_border; }; -struct tu_image_create_info -{ - const VkImageCreateInfo *vk_info; - bool scanout; - bool no_metadata_planes; -}; - VkResult tu_image_create(VkDevice _device, - const struct tu_image_create_info *info, + const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, - VkImage *pImage); + VkImage *pImage, + uint64_t modifier); VkResult tu_image_from_gralloc(VkDevice device_h, diff --git a/src/freedreno/vulkan/tu_wsi.c b/src/freedreno/vulkan/tu_wsi.c index 21466108b20..c80e489399f 100644 --- a/src/freedreno/vulkan/tu_wsi.c +++ b/src/freedreno/vulkan/tu_wsi.c @@ -27,6 +27,7 @@ #include "vk_util.h" #include "wsi_common.h" +#include "drm-uapi/drm_fourcc.h" static PFN_vkVoidFunction tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) @@ -34,13 +35,38 @@ tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) return tu_lookup_entrypoint_unchecked(pName); } +static uint64_t +tu_wsi_image_get_modifier(VkImage _image) +{ + TU_FROM_HANDLE(tu_image, image, _image); + + if (!image->tile_mode) + return DRM_FORMAT_MOD_LINEAR; + + if (image->ubwc_size) + return DRM_FORMAT_MOD_QCOM_COMPRESSED; + + /* TODO invent a modifier for tiled but not UBWC buffers: */ + return DRM_FORMAT_MOD_INVALID; +} + VkResult tu_wsi_init(struct tu_physical_device *physical_device) { - return wsi_device_init(&physical_device->wsi_device, - tu_physical_device_to_handle(physical_device), - tu_wsi_proc_addr, &physical_device->instance->alloc, - physical_device->master_fd, NULL); + VkResult result; + + result = wsi_device_init(&physical_device->wsi_device, + tu_physical_device_to_handle(physical_device), + tu_wsi_proc_addr, + &physical_device->instance->alloc, + physical_device->master_fd, NULL); + if (result != VK_SUCCESS) + return result; + + physical_device->wsi_device.supports_modifiers = true; + physical_device->wsi_device.image_get_modifier = tu_wsi_image_get_modifier; + + return VK_SUCCESS; } void -- 2.30.2