turnip: remove compute emit_border_color
[mesa.git] / src / freedreno / vulkan / tu_image.c
index 50bd1dbbb1929f219a8c90054eca47b785d25b4d..83e0d9e9b2a43ba01dafc90ff3d8c9363e655274 100644 (file)
 #include "util/u_atomic.h"
 #include "vk_format.h"
 #include "vk_util.h"
+#include "drm-uapi/drm_fourcc.h"
 
 static inline bool
-image_level_linear(struct tu_image *image, int level)
+image_level_linear(struct tu_image *image, int level, bool ubwc)
 {
    unsigned w = u_minify(image->extent.width, level);
-   return w < 16;
+   /* all levels are tiled/compressed with UBWC */
+   return ubwc ? false : (w < 16);
 }
 
 enum a6xx_tile_mode
 tu6_get_image_tile_mode(struct tu_image *image, int level)
 {
-   if (image_level_linear(image, level))
+   if (image_level_linear(image, level, !!image->layout.ubwc_size))
       return TILE6_LINEAR;
    else
-      return image->tile_mode;
-}
-
-/* indexed by cpp, including msaa 2x and 4x: */
-static const struct {
-   unsigned pitchalign;
-   unsigned heightalign;
-} tile_alignment[] = {
-   [1]  = { 128, 32 },
-   [2]  = { 128, 16 },
-   [3]  = {  64, 32 },
-   [4]  = {  64, 16 },
-   [6]  = {  64, 16 },
-   [8]  = {  64, 16 },
-   [12] = {  64, 16 },
-   [16] = {  64, 16 },
-   [24] = {  64, 16 },
-   [32] = {  64, 16 },
-   [48] = {  64, 16 },
-   [64] = {  64, 16 },
-
-   /* special case for r8g8: */
-   [0]  = { 64, 32 },
-};
-
-static void
-setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
-{
-   VkFormat format = pCreateInfo->format;
-   enum vk_format_layout layout = vk_format_description(format)->layout;
-   uint32_t layer_size = 0;
-   int ta = image->cpp;
-
-   /* The r8g8 format seems to not play by the normal tiling rules: */
-   if (image->cpp == 2 && vk_format_get_nr_components(format) == 2)
-      ta = 0;
-
-   for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) {
-      struct tu_image_level *slice = &image->levels[level];
-      uint32_t width = u_minify(pCreateInfo->extent.width, level);
-      uint32_t height = u_minify(pCreateInfo->extent.height, level);
-      uint32_t depth = u_minify(pCreateInfo->extent.depth, level);
-      uint32_t aligned_height = height;
-      uint32_t blocks;
-      uint32_t pitchalign;
-
-      if (image->tile_mode && !image_level_linear(image, level)) {
-         /* tiled levels of 3D textures are rounded up to PoT dimensions: */
-         if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
-            width = util_next_power_of_two(width);
-            height = aligned_height = util_next_power_of_two(height);
-         }
-         pitchalign = tile_alignment[ta].pitchalign;
-         aligned_height = align(aligned_height, tile_alignment[ta].heightalign);
-      } else {
-         pitchalign = 64;
-      }
-
-      /* The blits used for mem<->gmem work at a granularity of
-       * 32x32, which can cause faults due to over-fetch on the
-       * last level.  The simple solution is to over-allocate a
-       * bit the last level to ensure any over-fetch is harmless.
-       * The pitch is already sufficiently aligned, but height
-       * may not be:
-       */
-      if (level + 1 == pCreateInfo->mipLevels)
-         aligned_height = align(aligned_height, 32);
-
-      if (layout == VK_FORMAT_LAYOUT_ASTC)
-         slice->pitch =
-            util_align_npot(width, pitchalign * vk_format_get_blockwidth(format));
-      else
-         slice->pitch = align(width, pitchalign);
-
-      slice->offset = layer_size;
-      blocks = vk_format_get_block_count(format, slice->pitch, aligned_height);
-
-      /* 1d array and 2d array textures must all have the same layer size
-       * for each miplevel on a6xx. 3d textures can have different layer
-       * sizes for high levels, but the hw auto-sizer is buggy (or at least
-       * different than what this code does), so as soon as the layer size
-       * range gets into range, we stop reducing it.
-       */
-      if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
-         if (level < 1 || image->levels[level - 1].size > 0xf000) {
-            slice->size = align(blocks * image->cpp, 4096);
-         } else {
-            slice->size = image->levels[level - 1].size;
-         }
-      } else {
-         slice->size = blocks * image->cpp;
-      }
-
-      layer_size += slice->size * depth;
-   }
-
-   image->layer_size = align(layer_size, 4096);
+      return image->layout.tile_mode;
 }
 
 VkResult
 tu_image_create(VkDevice _device,
-                const struct tu_image_create_info *create_info,
+                const VkImageCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *alloc,
-                VkImage *pImage)
+                VkImage *pImage,
+                uint64_t modifier)
 {
    TU_FROM_HANDLE(tu_device, device, _device);
-   const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
    struct tu_image *image = NULL;
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
@@ -177,7 +83,6 @@ tu_image_create(VkDevice _device,
    image->level_count = pCreateInfo->mipLevels;
    image->layer_count = pCreateInfo->arrayLayers;
    image->samples = pCreateInfo->samples;
-   image->cpp = vk_format_get_blocksize(image->vk_format) * image->samples;
 
    image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
    if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
@@ -194,22 +99,54 @@ tu_image_create(VkDevice _device,
       vk_find_struct_const(pCreateInfo->pNext,
                            EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
 
-   image->tile_mode = TILE6_3;
+   image->layout.tile_mode = TILE6_3;
+   bool ubwc_enabled = true;
 
+   /* disable tiling when linear is requested and for compressed formats */
    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ||
-       /* compressed textures can't use tiling? */
-       vk_format_is_compressed(image->vk_format) ||
-       /* scanout needs to be linear (what about tiling modifiers?) */
-       create_info->scanout ||
-       /* image_to_image copy doesn't deal with tiling+swap */
-       tu6_get_native_format(image->vk_format)->swap ||
-       /* r8g8 formats are tiled different and could break image_to_image copy */
-       (image->cpp == 2 && vk_format_get_nr_components(image->vk_format) == 2))
-      image->tile_mode = TILE6_LINEAR;
-
-   setup_slices(image, pCreateInfo);
-
-   image->size = image->layer_size * pCreateInfo->arrayLayers;
+       modifier == DRM_FORMAT_MOD_LINEAR ||
+       vk_format_is_compressed(image->vk_format)) {
+      image->layout.tile_mode = TILE6_LINEAR;
+      ubwc_enabled = false;
+   }
+
+   /* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
+    * (causes any deqp tests that need to check stencil to fail)
+    * disable UBWC for this format until we properly support copy aspect masks
+    */
+   if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
+      ubwc_enabled = false;
+
+   /* UBWC can't be used with E5B9G9R9 */
+   if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+      ubwc_enabled = false;
+
+   if (image->extent.depth > 1) {
+      tu_finishme("UBWC with 3D textures");
+      ubwc_enabled = false;
+   }
+
+   uint32_t ubwc_blockwidth, ubwc_blockheight;
+   fdl6_get_ubwc_blockwidth(&image->layout,
+                            &ubwc_blockwidth, &ubwc_blockheight);
+   if (!ubwc_blockwidth) {
+      tu_finishme("UBWC for cpp=%d", image->layout.cpp);
+      ubwc_enabled = false;
+   }
+
+   /* expect UBWC enabled if we asked for it */
+   assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
+
+   fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format),
+               image->samples,
+               pCreateInfo->extent.width,
+               pCreateInfo->extent.height,
+               pCreateInfo->extent.depth,
+               pCreateInfo->mipLevels,
+               pCreateInfo->arrayLayers,
+               pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
+               ubwc_enabled);
+
    *pImage = tu_image_to_handle(image);
 
    return VK_SUCCESS;
@@ -218,7 +155,7 @@ tu_image_create(VkDevice _device,
 static enum a6xx_tex_fetchsize
 tu6_fetchsize(VkFormat format)
 {
-   if (vk_format_description(format)->layout == VK_FORMAT_LAYOUT_ASTC)
+   if (vk_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
       return TFETCH6_16_BYTE;
 
    switch (vk_format_get_blocksize(format) / vk_format_get_blockwidth(format)) {
@@ -233,7 +170,9 @@ tu6_fetchsize(VkFormat format)
 }
 
 static uint32_t
-tu6_texswiz(const VkComponentMapping *comps, const unsigned char *fmt_swiz)
+tu6_texswiz(const VkComponentMapping *comps,
+            VkFormat format,
+            VkImageAspectFlagBits aspect_mask)
 {
    unsigned char swiz[4] = {comps->r, comps->g, comps->b, comps->a};
    unsigned char vk_swizzle[] = {
@@ -244,13 +183,18 @@ tu6_texswiz(const VkComponentMapping *comps, const unsigned char *fmt_swiz)
       [VK_COMPONENT_SWIZZLE_B] = A6XX_TEX_Z,
       [VK_COMPONENT_SWIZZLE_A] = A6XX_TEX_W,
    };
+   const unsigned char *fmt_swiz = vk_format_description(format)->swizzle;
+
    for (unsigned i = 0; i < 4; i++) {
       swiz[i] = (swiz[i] == VK_COMPONENT_SWIZZLE_IDENTITY) ? i : vk_swizzle[swiz[i]];
       /* if format has 0/1 in channel, use that (needed for bc1_rgb) */
       if (swiz[i] < 4) {
+         if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
+             format == VK_FORMAT_D24_UNORM_S8_UINT)
+            swiz[i] = A6XX_TEX_Y;
          switch (fmt_swiz[swiz[i]]) {
-         case VK_SWIZZLE_0: swiz[i] = A6XX_TEX_ZERO; break;
-         case VK_SWIZZLE_1: swiz[i] = A6XX_TEX_ONE;  break;
+         case PIPE_SWIZZLE_0: swiz[i] = A6XX_TEX_ZERO; break;
+         case PIPE_SWIZZLE_1: swiz[i] = A6XX_TEX_ONE;  break;
          }
       }
    }
@@ -307,12 +251,6 @@ tu_image_view_init(struct tu_image_view *iview,
    iview->vk_format = pCreateInfo->format;
    iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
 
-   if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-      iview->vk_format = vk_format_stencil_only(iview->vk_format);
-   } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-      iview->vk_format = vk_format_depth_only(iview->vk_format);
-   }
-
    // should we minify?
    iview->extent = image->extent;
 
@@ -324,39 +262,56 @@ tu_image_view_init(struct tu_image_view *iview,
    memset(iview->descriptor, 0, sizeof(iview->descriptor));
 
    const struct tu_native_format *fmt = tu6_get_native_format(iview->vk_format);
-   struct tu_image_level *slice0 = &image->levels[iview->base_mip];
-   uint64_t base_addr = image->bo->iova + iview->base_layer * image->layer_size + slice0->offset;
-   uint32_t pitch = (slice0->pitch / vk_format_get_blockwidth(iview->vk_format)) *
-                        vk_format_get_blocksize(iview->vk_format);
-   enum a6xx_tile_mode tile_mode =
-      image_level_linear(image, iview->base_mip) ? TILE6_LINEAR : image->tile_mode;
+   uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
+   uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
+
+   uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
+   enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
+   uint32_t width = u_minify(image->extent.width, iview->base_mip);
+   uint32_t height = u_minify(image->extent.height, iview->base_mip);
+
+   unsigned fmt_tex = fmt->tex;
+   if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
+       iview->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
+      fmt_tex = TFMT6_S8Z24_UINT;
 
    iview->descriptor[0] =
       A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
       COND(vk_format_is_srgb(iview->vk_format), A6XX_TEX_CONST_0_SRGB) |
-      A6XX_TEX_CONST_0_FMT(fmt->tex) |
-      A6XX_TEX_CONST_0_SAMPLES(0) |
-      A6XX_TEX_CONST_0_SWAP(image->tile_mode ? WZYX : fmt->swap) |
-      tu6_texswiz(&pCreateInfo->components, vk_format_description(iview->vk_format)->swizzle) |
+      A6XX_TEX_CONST_0_FMT(fmt_tex) |
+      A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
+      A6XX_TEX_CONST_0_SWAP(image->layout.tile_mode ? WZYX : fmt->swap) |
+      tu6_texswiz(&pCreateInfo->components, iview->vk_format, iview->aspect_mask) |
       A6XX_TEX_CONST_0_MIPLVLS(iview->level_count - 1);
-   iview->descriptor[1] =
-      A6XX_TEX_CONST_1_WIDTH(u_minify(image->extent.width, iview->base_mip)) |
-      A6XX_TEX_CONST_1_HEIGHT(u_minify(image->extent.height, iview->base_mip));
+   iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
    iview->descriptor[2] =
       A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(iview->vk_format)) |
       A6XX_TEX_CONST_2_PITCH(pitch) |
       A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType));
-   iview->descriptor[3] = 0;
+   iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(tu_layer_size(image, iview->base_mip));
    iview->descriptor[4] = base_addr;
    iview->descriptor[5] = base_addr >> 32;
 
+   if (image->layout.ubwc_size) {
+      uint32_t block_width, block_height;
+      fdl6_get_ubwc_blockwidth(&image->layout,
+                               &block_width, &block_height);
+
+      iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
+      iview->descriptor[7] = ubwc_addr;
+      iview->descriptor[8] = ubwc_addr >> 32;
+      iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(tu_image_ubwc_size(image, iview->base_mip) >> 2);
+      iview->descriptor[10] |=
+         A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(tu_image_ubwc_pitch(image, iview->base_mip)) |
+         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) |
+         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height)));
+   }
+
    if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D) {
-      iview->descriptor[3] |= A6XX_TEX_CONST_3_ARRAY_PITCH(image->layer_size);
       iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(iview->layer_count);
    } else {
       iview->descriptor[3] |=
-         A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size) |
-         A6XX_TEX_CONST_3_ARRAY_PITCH(slice0->size);
+         A6XX_TEX_CONST_3_MIN_LAYERSZ(image->layout.slices[image->level_count - 1].size0);
       iview->descriptor[5] |=
          A6XX_TEX_CONST_5_DEPTH(u_minify(image->extent.depth, iview->base_mip));
    }
@@ -393,14 +348,17 @@ tu_CreateImage(VkDevice device,
 
    const struct wsi_image_create_info *wsi_info =
       vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
-   bool scanout = wsi_info && wsi_info->scanout;
-
-   return tu_image_create(device,
-                          &(struct tu_image_create_info) {
-                             .vk_info = pCreateInfo,
-                             .scanout = scanout,
-                          },
-                          pAllocator, pImage);
+   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+
+   if (wsi_info) {
+      modifier = DRM_FORMAT_MOD_LINEAR;
+      for (unsigned i = 0; i < wsi_info->modifier_count; i++) {
+         if (wsi_info->modifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
+            modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
+      }
+   }
+
+   return tu_image_create(device, pCreateInfo, pAllocator, pImage, modifier);
 }
 
 void
@@ -428,16 +386,23 @@ tu_GetImageSubresourceLayout(VkDevice _device,
 {
    TU_FROM_HANDLE(tu_image, image, _image);
 
-   const uint32_t layer_offset = image->layer_size * pSubresource->arrayLayer;
-   const struct tu_image_level *level =
-      image->levels + pSubresource->mipLevel;
+   const struct fdl_slice *slice = image->layout.slices + pSubresource->mipLevel;
 
-   pLayout->offset = layer_offset + level->offset;
-   pLayout->size = level->size;
+   pLayout->offset = fdl_surface_offset(&image->layout,
+                                        pSubresource->mipLevel,
+                                        pSubresource->arrayLayer);
+   pLayout->size = slice->size0;
    pLayout->rowPitch =
-      level->pitch * vk_format_get_blocksize(image->vk_format);
-   pLayout->arrayPitch = image->layer_size;
-   pLayout->depthPitch = level->size;
+      slice->pitch * vk_format_get_blocksize(image->vk_format);
+   pLayout->arrayPitch = image->layout.layer_size;
+   pLayout->depthPitch = slice->size0;
+
+   if (image->layout.ubwc_size) {
+      /* UBWC starts at offset 0 */
+      pLayout->offset = 0;
+      /* UBWC scanout won't match what the kernel wants if we have levels/layers */
+      assert(image->level_count == 1 && image->layer_count == 1);
+   }
 }
 
 VkResult