X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_image.c;h=6f2dc0421d064013b339d0ff90814e88e04e63a8;hb=a27823ef2cd617db8a57d6c1c645a220ac83ee21;hp=6c774de9f53c35ff63e264bf420785cb94187de8;hpb=f1efc9a1c89e3b8f1b5faad4238035ebe8a8f217;p=mesa.git diff --git a/src/freedreno/vulkan/tu_image.c b/src/freedreno/vulkan/tu_image.c index 6c774de9f53..6f2dc0421d0 100644 --- a/src/freedreno/vulkan/tu_image.c +++ b/src/freedreno/vulkan/tu_image.c @@ -29,141 +29,79 @@ #include "util/debug.h" #include "util/u_atomic.h" +#include "util/format/u_format.h" #include "vk_format.h" #include "vk_util.h" +#include "drm-uapi/drm_fourcc.h" -static inline bool -image_level_linear(struct tu_image *image, int level) +#include "tu_cs.h" + +static uint32_t +tu6_plane_count(VkFormat format) { - unsigned w = u_minify(image->extent.width, level); - return w < 16; + switch (format) { + default: + return 1; + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return 2; + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + return 3; + } } -enum a6xx_tile_mode -tu6_get_image_tile_mode(struct tu_image *image, int level) +static VkFormat +tu6_plane_format(VkFormat format, uint32_t plane) { - if (image_level_linear(image, level)) - return TILE6_LINEAR; - else - return image->tile_mode; + switch (format) { + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + /* note: with UBWC, and Y plane UBWC is different from R8_UNORM */ + return plane ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM; + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + return plane ? VK_FORMAT_S8_UINT : VK_FORMAT_D32_SFLOAT; + default: + return format; + } } -/* indexed by cpp, including msaa 2x and 4x: */ -static const struct { - unsigned pitchalign; - unsigned heightalign; -} tile_alignment[] = { - [1] = { 128, 32 }, - [2] = { 128, 16 }, - [3] = { 64, 32 }, - [4] = { 64, 16 }, - [6] = { 64, 16 }, - [8] = { 64, 16 }, - [12] = { 64, 16 }, - [16] = { 64, 16 }, - [24] = { 64, 16 }, - [32] = { 64, 16 }, - [48] = { 64, 16 }, - [64] = { 64, 16 }, - - /* special case for r8g8: */ - [0] = { 64, 32 }, -}; - -static void -setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) +static uint32_t +tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask) { - VkFormat format = pCreateInfo->format; - enum vk_format_layout layout = vk_format_description(format)->layout; - uint32_t layer_size = 0; - int ta = image->cpp; - - /* The r8g8 format seems to not play by the normal tiling rules: */ - if (image->cpp == 2 && vk_format_get_nr_components(format) == 2) - ta = 0; - - for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) { - struct tu_image_level *slice = &image->levels[level]; - uint32_t width = u_minify(pCreateInfo->extent.width, level); - uint32_t height = u_minify(pCreateInfo->extent.height, level); - uint32_t depth = u_minify(pCreateInfo->extent.depth, level); - uint32_t aligned_height = height; - uint32_t blocks; - uint32_t pitchalign; - - if (image->tile_mode && !image_level_linear(image, level)) { - /* tiled levels of 3D textures are rounded up to PoT dimensions: */ - if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) { - width = util_next_power_of_two(width); - height = aligned_height = util_next_power_of_two(height); - } - pitchalign = tile_alignment[ta].pitchalign; - aligned_height = align(aligned_height, tile_alignment[ta].heightalign); - } else { - pitchalign = 64; - } - - /* The blits used for mem<->gmem work at a granularity of - * 32x32, which can cause faults due to over-fetch on the - * last level. The simple solution is to over-allocate a - * bit the last level to ensure any over-fetch is harmless. - * The pitch is already sufficiently aligned, but height - * may not be: - */ - if (level + 1 == pCreateInfo->mipLevels) - aligned_height = align(aligned_height, 32); - - if (layout == VK_FORMAT_LAYOUT_ASTC) - slice->pitch = - util_align_npot(width, pitchalign * vk_format_get_blockwidth(format)); - else - slice->pitch = align(width, pitchalign); - - slice->offset = layer_size; - blocks = vk_format_get_block_count(format, slice->pitch, aligned_height); - - /* 1d array and 2d array textures must all have the same layer size - * for each miplevel on a6xx. 3d textures can have different layer - * sizes for high levels, but the hw auto-sizer is buggy (or at least - * different than what this code does), so as soon as the layer size - * range gets into range, we stop reducing it. - */ - if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) { - if (level < 1 || image->levels[level - 1].size > 0xf000) { - slice->size = align(blocks * image->cpp, 4096); - } else { - slice->size = image->levels[level - 1].size; - } - } else { - slice->size = blocks * image->cpp; - } - - layer_size += slice->size * depth; + switch (aspect_mask) { + default: + return 0; + case VK_IMAGE_ASPECT_PLANE_1_BIT: + return 1; + case VK_IMAGE_ASPECT_PLANE_2_BIT: + return 2; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return format == VK_FORMAT_D32_SFLOAT_S8_UINT; } - - image->layer_size = align(layer_size, 4096); } VkResult tu_image_create(VkDevice _device, - const struct tu_image_create_info *create_info, + const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc, - VkImage *pImage) + VkImage *pImage, + uint64_t modifier, + const VkSubresourceLayout *plane_layouts) { TU_FROM_HANDLE(tu_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; struct tu_image *image = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - tu_assert(pCreateInfo->mipLevels > 0); - tu_assert(pCreateInfo->arrayLayers > 0); - tu_assert(pCreateInfo->samples > 0); - tu_assert(pCreateInfo->extent.width > 0); - tu_assert(pCreateInfo->extent.height > 0); - tu_assert(pCreateInfo->extent.depth > 0); + assert(pCreateInfo->mipLevels > 0); + assert(pCreateInfo->arrayLayers > 0); + assert(pCreateInfo->samples > 0); + assert(pCreateInfo->extent.width > 0); + assert(pCreateInfo->extent.height > 0); + assert(pCreateInfo->extent.depth > 0); - image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + image = vk_object_zalloc(&device->vk, alloc, sizeof(*image), + VK_OBJECT_TYPE_IMAGE); if (!image) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -177,7 +115,6 @@ tu_image_create(VkDevice _device, image->level_count = pCreateInfo->mipLevels; image->layer_count = pCreateInfo->arrayLayers; image->samples = pCreateInfo->samples; - image->cpp = vk_format_get_blocksize(image->vk_format) * image->samples; image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { @@ -194,99 +131,293 @@ tu_image_create(VkDevice _device, vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL; - image->tile_mode = TILE6_3; - + enum a6xx_tile_mode tile_mode = TILE6_3; + bool ubwc_enabled = + !(device->physical_device->instance->debug_flags & TU_DEBUG_NOUBWC); + + /* disable tiling when linear is requested, for YUYV/UYVY, and for mutable + * images. Mutable images can be reinterpreted as any other compatible + * format, including swapped formats which aren't supported with tiling. + * This means that we have to fall back to linear almost always. However + * depth and stencil formats cannot be reintepreted as another format, and + * cannot be linear with sysmem rendering, so don't fall back for those. + * + * TODO: Be smarter and use usage bits and VK_KHR_image_format_list to + * enable tiling and/or UBWC when possible. + */ if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR || - /* compressed textures can't use tiling? */ - vk_format_is_compressed(image->vk_format) || - /* scanout needs to be linear (what about tiling modifiers?) */ - create_info->scanout || - /* image_to_image copy doesn't deal with tiling+swap */ - tu6_get_native_format(image->vk_format)->swap || - /* r8g8 formats are tiled different and could break image_to_image copy */ - (image->cpp == 2 && vk_format_get_nr_components(image->vk_format) == 2)) - image->tile_mode = TILE6_LINEAR; - - setup_slices(image, pCreateInfo); - - image->size = image->layer_size * pCreateInfo->arrayLayers; + modifier == DRM_FORMAT_MOD_LINEAR || + vk_format_description(image->vk_format)->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED || + (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT && + !vk_format_is_depth_or_stencil(image->vk_format))) { + tile_mode = TILE6_LINEAR; + ubwc_enabled = false; + } + + /* UBWC is supported for these formats, but NV12 has a special UBWC + * format for accessing the Y plane aspect, which isn't implemented + * For IYUV, the blob doesn't use UBWC, but it seems to work, but + * disable it since we don't know if a special UBWC format is needed + * like NV12 + * + * Disable tiling completely, because we set the TILE_ALL bit to + * match the blob, however fdl expects the TILE_ALL bit to not be + * set for non-UBWC tiled formats + */ + if (image->vk_format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM || + image->vk_format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM) { + tile_mode = TILE6_LINEAR; + ubwc_enabled = false; + } + + /* don't use UBWC with compressed formats */ + if (vk_format_is_compressed(image->vk_format)) + ubwc_enabled = false; + + /* UBWC can't be used with E5B9G9R9 */ + if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) + ubwc_enabled = false; + + /* separate stencil doesn't have a UBWC enable bit */ + if (image->vk_format == VK_FORMAT_S8_UINT) + ubwc_enabled = false; + + if (image->extent.depth > 1) { + tu_finishme("UBWC with 3D textures"); + ubwc_enabled = false; + } + + /* Disable UBWC for storage images. + * + * The closed GL driver skips UBWC for storage images (and additionally + * uses linear for writeonly images). We seem to have image tiling working + * in freedreno in general, so turnip matches that. freedreno also enables + * UBWC on images, but it's not really tested due to the lack of + * UBWC-enabled mipmaps in freedreno currently. Just match the closed GL + * behavior of no UBWC. + */ + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) + ubwc_enabled = false; + + /* Disable UBWC for D24S8 on A630 in some cases + * + * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample + * from the stencil component as UINT, however no format allows this + * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing) + * + * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible + * + * Additionally, the special AS_R8G8B8A8 format is broken without UBWC, + * so we have to fallback to 8_8_8_8_UNORM when UBWC is disabled + */ + if (device->physical_device->limited_z24s8 && + image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) { + ubwc_enabled = false; + } + + /* expect UBWC enabled if we asked for it */ + assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled); + + for (uint32_t i = 0; i < tu6_plane_count(image->vk_format); i++) { + struct fdl_layout *layout = &image->layout[i]; + VkFormat format = tu6_plane_format(image->vk_format, i); + uint32_t width0 = pCreateInfo->extent.width; + uint32_t height0 = pCreateInfo->extent.height; + + if (i > 0) { + switch (image->vk_format) { + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + /* half width/height on chroma planes */ + width0 = (width0 + 1) >> 1; + height0 = (height0 + 1) >> 1; + break; + case VK_FORMAT_D32_SFLOAT_S8_UINT: + /* no UBWC for separate stencil */ + ubwc_enabled = false; + break; + default: + break; + } + } + + struct fdl_explicit_layout plane_layout; + + if (plane_layouts) { + /* only expect simple 2D images for now */ + if (pCreateInfo->mipLevels != 1 || + pCreateInfo->arrayLayers != 1 || + image->extent.depth != 1) + goto invalid_layout; + + plane_layout.offset = plane_layouts[i].offset; + plane_layout.pitch = plane_layouts[i].rowPitch; + /* note: use plane_layouts[0].arrayPitch to support array formats */ + } + + layout->tile_mode = tile_mode; + layout->ubwc = ubwc_enabled; + + if (!fdl6_layout(layout, vk_format_to_pipe_format(format), + image->samples, + width0, height0, + pCreateInfo->extent.depth, + pCreateInfo->mipLevels, + pCreateInfo->arrayLayers, + pCreateInfo->imageType == VK_IMAGE_TYPE_3D, + plane_layouts ? &plane_layout : NULL)) { + assert(plane_layouts); /* can only fail with explicit layout */ + goto invalid_layout; + } + + /* fdl6_layout can't take explicit offset without explicit pitch + * add offset manually for extra layouts for planes + */ + if (!plane_layouts && i > 0) { + uint32_t offset = ALIGN_POT(image->total_size, 4096); + for (int i = 0; i < pCreateInfo->mipLevels; i++) { + layout->slices[i].offset += offset; + layout->ubwc_slices[i].offset += offset; + } + layout->size += offset; + } + + image->total_size = MAX2(image->total_size, layout->size); + } + *pImage = tu_image_to_handle(image); return VK_SUCCESS; + +invalid_layout: + vk_object_free(&device->vk, alloc, image); + return vk_error(device->instance, VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); } -static enum a6xx_tex_fetchsize -tu6_fetchsize(VkFormat format) +static void +compose_swizzle(unsigned char *swiz, const VkComponentMapping *mapping) { - if (vk_format_description(format)->layout == VK_FORMAT_LAYOUT_ASTC) - return TFETCH6_16_BYTE; - - switch (vk_format_get_blocksize(format) / vk_format_get_blockwidth(format)) { - case 1: return TFETCH6_1_BYTE; - case 2: return TFETCH6_2_BYTE; - case 4: return TFETCH6_4_BYTE; - case 8: return TFETCH6_8_BYTE; - case 16: return TFETCH6_16_BYTE; - default: - unreachable("bad block size"); + unsigned char src_swiz[4] = { swiz[0], swiz[1], swiz[2], swiz[3] }; + VkComponentSwizzle vk_swiz[4] = { + mapping->r, mapping->g, mapping->b, mapping->a + }; + for (int i = 0; i < 4; i++) { + switch (vk_swiz[i]) { + case VK_COMPONENT_SWIZZLE_IDENTITY: + swiz[i] = src_swiz[i]; + break; + case VK_COMPONENT_SWIZZLE_R...VK_COMPONENT_SWIZZLE_A: + swiz[i] = src_swiz[vk_swiz[i] - VK_COMPONENT_SWIZZLE_R]; + break; + case VK_COMPONENT_SWIZZLE_ZERO: + swiz[i] = A6XX_TEX_ZERO; + break; + case VK_COMPONENT_SWIZZLE_ONE: + swiz[i] = A6XX_TEX_ONE; + break; + default: + unreachable("unexpected swizzle"); + } } } static uint32_t -tu6_texswiz(const VkComponentMapping *comps, const unsigned char *fmt_swiz) +tu6_texswiz(const VkComponentMapping *comps, + const struct tu_sampler_ycbcr_conversion *conversion, + VkFormat format, + VkImageAspectFlagBits aspect_mask, + bool limited_z24s8) { - unsigned char swiz[4] = {comps->r, comps->g, comps->b, comps->a}; - unsigned char vk_swizzle[] = { - [VK_COMPONENT_SWIZZLE_ZERO] = A6XX_TEX_ZERO, - [VK_COMPONENT_SWIZZLE_ONE] = A6XX_TEX_ONE, - [VK_COMPONENT_SWIZZLE_R] = A6XX_TEX_X, - [VK_COMPONENT_SWIZZLE_G] = A6XX_TEX_Y, - [VK_COMPONENT_SWIZZLE_B] = A6XX_TEX_Z, - [VK_COMPONENT_SWIZZLE_A] = A6XX_TEX_W, + unsigned char swiz[4] = { + A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W, }; - for (unsigned i = 0; i < 4; i++) { - swiz[i] = (swiz[i] == VK_COMPONENT_SWIZZLE_IDENTITY) ? i : vk_swizzle[swiz[i]]; - /* if format has 0/1 in channel, use that (needed for bc1_rgb) */ - if (swiz[i] < 4) { - switch (fmt_swiz[swiz[i]]) { - case VK_SWIZZLE_0: swiz[i] = A6XX_TEX_ZERO; break; - case VK_SWIZZLE_1: swiz[i] = A6XX_TEX_ONE; break; + + switch (format) { + case VK_FORMAT_G8B8G8R8_422_UNORM: + case VK_FORMAT_B8G8R8G8_422_UNORM: + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + swiz[0] = A6XX_TEX_Z; + swiz[1] = A6XX_TEX_X; + swiz[2] = A6XX_TEX_Y; + break; + case VK_FORMAT_BC1_RGB_UNORM_BLOCK: + case VK_FORMAT_BC1_RGB_SRGB_BLOCK: + /* same hardware format is used for BC1_RGB / BC1_RGBA */ + swiz[3] = A6XX_TEX_ONE; + break; + case VK_FORMAT_D24_UNORM_S8_UINT: + if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { + if (limited_z24s8) { + /* using FMT6_8_8_8_8_UINT */ + swiz[0] = A6XX_TEX_W; + swiz[1] = A6XX_TEX_ZERO; + } else { + /* using FMT6_Z24_UINT_S8_UINT */ + swiz[0] = A6XX_TEX_Y; + swiz[1] = A6XX_TEX_ZERO; } } + default: + break; } + compose_swizzle(swiz, comps); + if (conversion) + compose_swizzle(swiz, &conversion->components); + return A6XX_TEX_CONST_0_SWIZ_X(swiz[0]) | A6XX_TEX_CONST_0_SWIZ_Y(swiz[1]) | A6XX_TEX_CONST_0_SWIZ_Z(swiz[2]) | A6XX_TEX_CONST_0_SWIZ_W(swiz[3]); } -static enum a6xx_tex_type -tu6_tex_type(VkImageViewType type) +void +tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { - switch (type) { - default: - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - return A6XX_TEX_1D; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - return A6XX_TEX_2D; - case VK_IMAGE_VIEW_TYPE_3D: - return A6XX_TEX_3D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - return A6XX_TEX_CUBE; - } + tu_cs_emit(cs, iview->PITCH); + tu_cs_emit(cs, iview->layer_size >> 6); + tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer); +} + +void +tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +{ + tu_cs_emit(cs, iview->stencil_PITCH); + tu_cs_emit(cs, iview->stencil_layer_size >> 6); + tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); +} + +void +tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src) +{ + tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer); + /* SP_PS_2D_SRC_PITCH has shifted pitch field */ + tu_cs_emit(cs, iview->PITCH << (src ? 9 : 0)); +} + +void +tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) +{ + tu_cs_emit_qw(cs, iview->ubwc_addr + iview->ubwc_layer_size * layer); + tu_cs_emit(cs, iview->FLAG_BUFFER_PITCH); } void tu_image_view_init(struct tu_image_view *iview, - struct tu_device *device, - const VkImageViewCreateInfo *pCreateInfo) + const VkImageViewCreateInfo *pCreateInfo, + bool limited_z24s8) { TU_FROM_HANDLE(tu_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; + VkFormat format = pCreateInfo->format; + VkImageAspectFlagBits aspect_mask = pCreateInfo->subresourceRange.aspectMask; + + const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO); + const struct tu_sampler_ycbcr_conversion *conversion = ycbcr_conversion ? + tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL; switch (image->type) { case VK_IMAGE_TYPE_1D: @@ -296,84 +427,250 @@ tu_image_view_init(struct tu_image_view *iview, break; case VK_IMAGE_TYPE_3D: assert(range->baseArrayLayer + tu_get_layerCount(image, range) <= - tu_minify(image->extent.depth, range->baseMipLevel)); + u_minify(image->extent.depth, range->baseMipLevel)); break; default: unreachable("bad VkImageType"); } iview->image = image; - iview->type = pCreateInfo->viewType; - iview->vk_format = pCreateInfo->format; - iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - - if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { - iview->vk_format = vk_format_stencil_only(iview->vk_format); - } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { - iview->vk_format = vk_format_depth_only(iview->vk_format); + + memset(iview->descriptor, 0, sizeof(iview->descriptor)); + + struct fdl_layout *layout = + &image->layout[tu6_plane_index(image->vk_format, aspect_mask)]; + + uint32_t width = u_minify(layout->width0, range->baseMipLevel); + uint32_t height = u_minify(layout->height0, range->baseMipLevel); + uint32_t storage_depth = tu_get_layerCount(image, range); + if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) { + storage_depth = u_minify(image->extent.depth, range->baseMipLevel); } - // should we minify? - iview->extent = image->extent; + uint32_t depth = storage_depth; + if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE || + pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { + /* Cubes are treated as 2D arrays for storage images, so only divide the + * depth by 6 for the texture descriptor. + */ + depth /= 6; + } - iview->base_layer = range->baseArrayLayer; - iview->layer_count = tu_get_layerCount(image, range); - iview->base_mip = range->baseMipLevel; - iview->level_count = tu_get_levelCount(image, range); + uint64_t base_addr = image->bo->iova + image->bo_offset + + fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer); + uint64_t ubwc_addr = image->bo->iova + image->bo_offset + + fdl_ubwc_offset(layout, range->baseMipLevel, range->baseArrayLayer); - memset(iview->descriptor, 0, sizeof(iview->descriptor)); + uint32_t pitch = fdl_pitch(layout, range->baseMipLevel); + uint32_t ubwc_pitch = fdl_ubwc_pitch(layout, range->baseMipLevel); + uint32_t layer_size = fdl_layer_stride(layout, range->baseMipLevel); - const struct tu_native_format *fmt = tu6_get_native_format(iview->vk_format); - struct tu_image_level *slice0 = &image->levels[iview->base_mip]; - uint64_t base_addr = image->bo->iova + iview->base_layer * image->layer_size + slice0->offset; - uint32_t pitch = (slice0->pitch / vk_format_get_blockwidth(iview->vk_format)) * - vk_format_get_blocksize(iview->vk_format); - enum a6xx_tile_mode tile_mode = - image_level_linear(image, iview->base_mip) ? TILE6_LINEAR : image->tile_mode; + if (aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) + format = tu6_plane_format(format, tu6_plane_index(format, aspect_mask)); + + struct tu_native_format fmt = tu6_format_texture(format, layout->tile_mode); + /* note: freedreno layout assumes no TILE_ALL bit for non-UBWC + * this means smaller mipmap levels have a linear tile mode + */ + fmt.tile_mode = fdl_tile_mode(layout, range->baseMipLevel); + + bool ubwc_enabled = fdl_ubwc_enabled(layout, range->baseMipLevel); + + bool is_d24s8 = (format == VK_FORMAT_D24_UNORM_S8_UINT || + format == VK_FORMAT_X8_D24_UNORM_PACK32); + + if (is_d24s8 && ubwc_enabled) + fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + + unsigned fmt_tex = fmt.fmt; + if (is_d24s8) { + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + fmt_tex = FMT6_Z24_UNORM_S8_UINT; + if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) + fmt_tex = limited_z24s8 ? FMT6_8_8_8_8_UINT : FMT6_Z24_UINT_S8_UINT; + /* TODO: also use this format with storage descriptor ? */ + } iview->descriptor[0] = - A6XX_TEX_CONST_0_TILE_MODE(tile_mode) | - COND(vk_format_is_srgb(iview->vk_format), A6XX_TEX_CONST_0_SRGB) | - A6XX_TEX_CONST_0_FMT(fmt->tex) | + A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) | + COND(vk_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | + A6XX_TEX_CONST_0_FMT(fmt_tex) | A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) | - A6XX_TEX_CONST_0_SWAP(image->tile_mode ? WZYX : fmt->swap) | - tu6_texswiz(&pCreateInfo->components, vk_format_description(iview->vk_format)->swizzle) | - A6XX_TEX_CONST_0_MIPLVLS(iview->level_count - 1); - iview->descriptor[1] = - A6XX_TEX_CONST_1_WIDTH(u_minify(image->extent.width, iview->base_mip)) | - A6XX_TEX_CONST_1_HEIGHT(u_minify(image->extent.height, iview->base_mip)); + A6XX_TEX_CONST_0_SWAP(fmt.swap) | + tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask, limited_z24s8) | + A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1); + iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); iview->descriptor[2] = - A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(iview->vk_format)) | + A6XX_TEX_CONST_2_PITCHALIGN(layout->pitchalign - 6) | A6XX_TEX_CONST_2_PITCH(pitch) | - A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType)); - iview->descriptor[3] = 0; + A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType, false)); + iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(layer_size); iview->descriptor[4] = base_addr; - iview->descriptor[5] = base_addr >> 32; + iview->descriptor[5] = (base_addr >> 32) | A6XX_TEX_CONST_5_DEPTH(depth); + + if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM || + format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM) { + /* chroma offset re-uses MIPLVLS bits */ + assert(tu_get_levelCount(image, range) == 1); + if (conversion) { + if (conversion->chroma_offsets[0] == VK_CHROMA_LOCATION_MIDPOINT) + iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_X; + if (conversion->chroma_offsets[1] == VK_CHROMA_LOCATION_MIDPOINT) + iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_Y; + } - if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D) { - iview->descriptor[3] |= A6XX_TEX_CONST_3_ARRAY_PITCH(image->layer_size); - iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(iview->layer_count); - } else { + uint64_t base_addr[3]; + + iview->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL; + if (ubwc_enabled) { + iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG; + /* no separate ubwc base, image must have the expected layout */ + for (uint32_t i = 0; i < 3; i++) { + base_addr[i] = image->bo->iova + image->bo_offset + + fdl_ubwc_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer); + } + } else { + for (uint32_t i = 0; i < 3; i++) { + base_addr[i] = image->bo->iova + image->bo_offset + + fdl_surface_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer); + } + } + + iview->descriptor[4] = base_addr[0]; + iview->descriptor[5] |= base_addr[0] >> 32; + iview->descriptor[6] = + A6XX_TEX_CONST_6_PLANE_PITCH(fdl_pitch(&image->layout[1], range->baseMipLevel)); + iview->descriptor[7] = base_addr[1]; + iview->descriptor[8] = base_addr[1] >> 32; + iview->descriptor[9] = base_addr[2]; + iview->descriptor[10] = base_addr[2] >> 32; + + assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D); + assert(!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)); + return; + } + + if (ubwc_enabled) { + uint32_t block_width, block_height; + fdl6_get_ubwc_blockwidth(layout, &block_width, &block_height); + + iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL; + iview->descriptor[7] = ubwc_addr; + iview->descriptor[8] = ubwc_addr >> 32; + iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2); + iview->descriptor[10] |= + A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_pitch) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) | + A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height))); + } + + if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) { iview->descriptor[3] |= - A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size) | - A6XX_TEX_CONST_3_ARRAY_PITCH(slice0->size); - iview->descriptor[5] |= - A6XX_TEX_CONST_5_DEPTH(u_minify(image->extent.depth, iview->base_mip)); + A6XX_TEX_CONST_3_MIN_LAYERSZ(layout->slices[image->level_count - 1].size0); } -} -unsigned -tu_image_queue_family_mask(const struct tu_image *image, - uint32_t family, - uint32_t queue_family) -{ - if (!image->exclusive) - return image->queue_family_mask; - if (family == VK_QUEUE_FAMILY_EXTERNAL) - return (1u << TU_MAX_QUEUE_FAMILIES) - 1u; - if (family == VK_QUEUE_FAMILY_IGNORED) - return 1u << queue_family; - return 1u << family; + iview->SP_PS_2D_SRC_INFO = A6XX_SP_PS_2D_SRC_INFO( + .color_format = fmt.fmt, + .tile_mode = fmt.tile_mode, + .color_swap = fmt.swap, + .flags = ubwc_enabled, + .srgb = vk_format_is_srgb(format), + .samples = tu_msaa_samples(image->samples), + .samples_average = image->samples > 1 && + !vk_format_is_int(format) && + !vk_format_is_depth_or_stencil(format), + .unk20 = 1, + .unk22 = 1).value; + iview->SP_PS_2D_SRC_SIZE = + A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height).value; + + /* note: these have same encoding for MRT and 2D (except 2D PITCH src) */ + iview->PITCH = A6XX_RB_DEPTH_BUFFER_PITCH(pitch).value; + iview->FLAG_BUFFER_PITCH = A6XX_RB_DEPTH_FLAG_BUFFER_PITCH( + .pitch = ubwc_pitch, .array_pitch = layout->ubwc_layer_size >> 2).value; + + iview->base_addr = base_addr; + iview->ubwc_addr = ubwc_addr; + iview->layer_size = layer_size; + iview->ubwc_layer_size = layout->ubwc_layer_size; + + /* Don't set fields that are only used for attachments/blit dest if COLOR + * is unsupported. + */ + if (!(fmt.supported & FMT_COLOR)) + return; + + struct tu_native_format cfmt = tu6_format_color(format, layout->tile_mode); + cfmt.tile_mode = fmt.tile_mode; + + if (is_d24s8 && ubwc_enabled) + cfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + memset(iview->storage_descriptor, 0, sizeof(iview->storage_descriptor)); + + iview->storage_descriptor[0] = + A6XX_IBO_0_FMT(fmt.fmt) | + A6XX_IBO_0_TILE_MODE(fmt.tile_mode); + iview->storage_descriptor[1] = + A6XX_IBO_1_WIDTH(width) | + A6XX_IBO_1_HEIGHT(height); + iview->storage_descriptor[2] = + A6XX_IBO_2_PITCH(pitch) | + A6XX_IBO_2_TYPE(tu6_tex_type(pCreateInfo->viewType, true)); + iview->storage_descriptor[3] = A6XX_IBO_3_ARRAY_PITCH(layer_size); + + iview->storage_descriptor[4] = base_addr; + iview->storage_descriptor[5] = (base_addr >> 32) | A6XX_IBO_5_DEPTH(storage_depth); + + if (ubwc_enabled) { + iview->storage_descriptor[3] |= A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27; + iview->storage_descriptor[7] |= ubwc_addr; + iview->storage_descriptor[8] |= ubwc_addr >> 32; + iview->storage_descriptor[9] = A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2); + iview->storage_descriptor[10] = + A6XX_IBO_10_FLAG_BUFFER_PITCH(ubwc_pitch); + } + } + + iview->extent.width = width; + iview->extent.height = height; + iview->need_y2_align = + (fmt.tile_mode == TILE6_LINEAR && range->baseMipLevel != image->level_count - 1); + + iview->ubwc_enabled = ubwc_enabled; + + iview->RB_MRT_BUF_INFO = A6XX_RB_MRT_BUF_INFO(0, + .color_tile_mode = cfmt.tile_mode, + .color_format = cfmt.fmt, + .color_swap = cfmt.swap).value; + + iview->SP_FS_MRT_REG = A6XX_SP_FS_MRT_REG(0, + .color_format = cfmt.fmt, + .color_sint = vk_format_is_sint(format), + .color_uint = vk_format_is_uint(format)).value; + + iview->RB_2D_DST_INFO = A6XX_RB_2D_DST_INFO( + .color_format = cfmt.fmt, + .tile_mode = cfmt.tile_mode, + .color_swap = cfmt.swap, + .flags = ubwc_enabled, + .srgb = vk_format_is_srgb(format)).value; + + iview->RB_BLIT_DST_INFO = A6XX_RB_BLIT_DST_INFO( + .tile_mode = cfmt.tile_mode, + .samples = tu_msaa_samples(iview->image->samples), + .color_format = cfmt.fmt, + .color_swap = cfmt.swap, + .flags = ubwc_enabled).value; + + if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + layout = &image->layout[1]; + iview->stencil_base_addr = image->bo->iova + image->bo_offset + + fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer); + iview->stencil_layer_size = fdl_layer_stride(layout, range->baseMipLevel); + iview->stencil_PITCH = A6XX_RB_STENCIL_BUFFER_PITCH(fdl_pitch(layout, range->baseMipLevel)).value; + } } VkResult @@ -391,16 +688,39 @@ tu_CreateImage(VkDevice device, pAllocator, pImage); #endif - const struct wsi_image_create_info *wsi_info = - vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); - bool scanout = wsi_info && wsi_info->scanout; + uint64_t modifier = DRM_FORMAT_MOD_INVALID; + const VkSubresourceLayout *plane_layouts = NULL; + + if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + const VkImageDrmFormatModifierListCreateInfoEXT *mod_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); + const VkImageDrmFormatModifierExplicitCreateInfoEXT *drm_explicit_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT); + + assert(mod_info || drm_explicit_info); + + if (mod_info) { + modifier = DRM_FORMAT_MOD_LINEAR; + for (unsigned i = 0; i < mod_info->drmFormatModifierCount; i++) { + if (mod_info->pDrmFormatModifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED) + modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED; + } + } else { + modifier = drm_explicit_info->drmFormatModifier; + assert(modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED); + plane_layouts = drm_explicit_info->pPlaneLayouts; + } + } else { + const struct wsi_image_create_info *wsi_info = + vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); + if (wsi_info && wsi_info->scanout) + modifier = DRM_FORMAT_MOD_LINEAR; + } - return tu_image_create(device, - &(struct tu_image_create_info) { - .vk_info = pCreateInfo, - .scanout = scanout, - }, - pAllocator, pImage); + return tu_image_create(device, pCreateInfo, pAllocator, pImage, modifier, plane_layouts); } void @@ -417,7 +737,7 @@ tu_DestroyImage(VkDevice _device, if (image->owned_memory != VK_NULL_HANDLE) tu_FreeMemory(_device, image->owned_memory, pAllocator); - vk_free2(&device->alloc, pAllocator, image); + vk_object_free(&device->vk, pAllocator, image); } void @@ -428,18 +748,48 @@ tu_GetImageSubresourceLayout(VkDevice _device, { TU_FROM_HANDLE(tu_image, image, _image); - const uint32_t layer_offset = image->layer_size * pSubresource->arrayLayer; - const struct tu_image_level *level = - image->levels + pSubresource->mipLevel; + struct fdl_layout *layout = + &image->layout[tu6_plane_index(image->vk_format, pSubresource->aspectMask)]; + const struct fdl_slice *slice = layout->slices + pSubresource->mipLevel; + + pLayout->offset = + fdl_surface_offset(layout, pSubresource->mipLevel, pSubresource->arrayLayer); + pLayout->size = slice->size0; + pLayout->rowPitch = fdl_pitch(layout, pSubresource->mipLevel); + pLayout->arrayPitch = fdl_layer_stride(layout, pSubresource->mipLevel); + pLayout->depthPitch = slice->size0; + + if (fdl_ubwc_enabled(layout, pSubresource->mipLevel)) { + /* UBWC starts at offset 0 */ + pLayout->offset = 0; + /* UBWC scanout won't match what the kernel wants if we have levels/layers */ + assert(image->level_count == 1 && image->layer_count == 1); + } +} + +VkResult tu_GetImageDrmFormatModifierPropertiesEXT( + VkDevice device, + VkImage _image, + VkImageDrmFormatModifierPropertiesEXT* pProperties) +{ + TU_FROM_HANDLE(tu_image, image, _image); + + assert(pProperties->sType == + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT); + + /* TODO invent a modifier for tiled but not UBWC buffers */ + + if (!image->layout[0].tile_mode) + pProperties->drmFormatModifier = DRM_FORMAT_MOD_LINEAR; + else if (image->layout[0].ubwc_layer_size) + pProperties->drmFormatModifier = DRM_FORMAT_MOD_QCOM_COMPRESSED; + else + pProperties->drmFormatModifier = DRM_FORMAT_MOD_INVALID; - pLayout->offset = layer_offset + level->offset; - pLayout->size = level->size; - pLayout->rowPitch = - level->pitch * vk_format_get_blocksize(image->vk_format); - pLayout->arrayPitch = image->layer_size; - pLayout->depthPitch = level->size; + return VK_SUCCESS; } + VkResult tu_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, @@ -449,12 +799,12 @@ tu_CreateImageView(VkDevice _device, TU_FROM_HANDLE(tu_device, device, _device); struct tu_image_view *view; - view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view), + VK_OBJECT_TYPE_IMAGE_VIEW); if (view == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - tu_image_view_init(view, device, pCreateInfo); + tu_image_view_init(view, pCreateInfo, device->physical_device->limited_z24s8); *pView = tu_image_view_to_handle(view); @@ -471,7 +821,8 @@ tu_DestroyImageView(VkDevice _device, if (!iview) return; - vk_free2(&device->alloc, pAllocator, iview); + + vk_object_free(&device->vk, pAllocator, iview); } void @@ -481,10 +832,45 @@ tu_buffer_view_init(struct tu_buffer_view *view, { TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer); - view->range = pCreateInfo->range == VK_WHOLE_SIZE - ? buffer->size - pCreateInfo->offset - : pCreateInfo->range; - view->vk_format = pCreateInfo->format; + view->buffer = buffer; + + enum VkFormat vfmt = pCreateInfo->format; + enum pipe_format pfmt = vk_format_to_pipe_format(vfmt); + const struct tu_native_format fmt = tu6_format_texture(vfmt, TILE6_LINEAR); + + uint32_t range; + if (pCreateInfo->range == VK_WHOLE_SIZE) + range = buffer->size - pCreateInfo->offset; + else + range = pCreateInfo->range; + uint32_t elements = range / util_format_get_blocksize(pfmt); + + static const VkComponentMapping components = { + .r = VK_COMPONENT_SWIZZLE_R, + .g = VK_COMPONENT_SWIZZLE_G, + .b = VK_COMPONENT_SWIZZLE_B, + .a = VK_COMPONENT_SWIZZLE_A, + }; + + uint64_t iova = tu_buffer_iova(buffer) + pCreateInfo->offset; + + memset(&view->descriptor, 0, sizeof(view->descriptor)); + + view->descriptor[0] = + A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | + A6XX_TEX_CONST_0_SWAP(fmt.swap) | + A6XX_TEX_CONST_0_FMT(fmt.fmt) | + A6XX_TEX_CONST_0_MIPLVLS(0) | + tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT, false); + COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB); + view->descriptor[1] = + A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) | + A6XX_TEX_CONST_1_HEIGHT(elements >> 15); + view->descriptor[2] = + A6XX_TEX_CONST_2_UNK4 | + A6XX_TEX_CONST_2_UNK31; + view->descriptor[4] = iova; + view->descriptor[5] = iova >> 32; } VkResult @@ -496,8 +882,8 @@ tu_CreateBufferView(VkDevice _device, TU_FROM_HANDLE(tu_device, device, _device); struct tu_buffer_view *view; - view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view), + VK_OBJECT_TYPE_BUFFER_VIEW); if (!view) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); @@ -519,5 +905,5 @@ tu_DestroyBufferView(VkDevice _device, if (!view) return; - vk_free2(&device->alloc, pAllocator, view); + vk_object_free(&device->vk, pAllocator, view); }