tu: Move UBWC layout into fdl6_layout() and use that function.
authorEric Anholt <eric@anholt.net>
Tue, 26 Nov 2019 20:29:19 +0000 (12:29 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 11 Dec 2019 04:24:18 +0000 (04:24 +0000)
This gets us shared non-UBWC layout code between gallium and turnip.
Until I fix up the rest of gallium to handle UBWC mipmapping, we do the
single-level UBWC setup in gallium as a fixup after layout.

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/fdl/fd6_layout.c
src/freedreno/fdl/freedreno_layout.h
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_image.c
src/freedreno/vulkan/tu_private.h
src/gallium/drivers/freedreno/a6xx/fd6_resource.c

index 82b3a3e715cd285d53f5c70b3b5fbcc9b4e29071..966a3e4bbc423c1cc11f8ab26e9b4e76e10ff55f 100644 (file)
 
 #include "freedreno_layout.h"
 
-/* indexed by cpp, including msaa 2x and 4x: */
+/* indexed by cpp, including msaa 2x and 4x:
+ * TODO:
+ * cpp=1 UBWC needs testing at larger texture sizes
+ * missing UBWC blockwidth/blockheight for npot+64 cpp
+ * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
+ */
 static const struct {
        unsigned pitchalign;
        unsigned heightalign;
+       uint8_t ubwc_blockwidth;
+       uint8_t ubwc_blockheight;
 } tile_alignment[] = {
-       [1]  = { 128, 32 },
-       [2]  = { 128, 16 },
+       [1]  = { 128, 32, 16, 4 },
+       [2]  = { 128, 16, 16, 4 },
        [3]  = {  64, 32 },
-       [4]  = {  64, 16 },
+       [4]  = {  64, 16, 16, 4 },
        [6]  = {  64, 16 },
-       [8]  = {  64, 16 },
+       [8]  = {  64, 16, 8, 4, },
        [12] = {  64, 16 },
-       [16] = {  64, 16 },
+       [16] = {  64, 16, 4, 4, },
        [24] = {  64, 16 },
-       [32] = {  64, 16 },
+       [32] = {  64, 16, 4, 2 },
        [48] = {  64, 16 },
        [64] = {  64, 16 },
 
        /* special cases for r8g8: */
-       [0]  = {  64, 32 },
+       [0]  = {  64, 32, 16, 4 },
 };
 
+#define RGB_TILE_WIDTH_ALIGNMENT 64
+#define RGB_TILE_HEIGHT_ALIGNMENT 16
+#define UBWC_PLANE_SIZE_ALIGNMENT 4096
+
 /* NOTE: good way to test this is:  (for example)
  *  piglit/bin/texelFetch fs sampler3D 100x100x8
  */
@@ -58,7 +69,7 @@ void
 fdl6_layout(struct fdl_layout *layout,
                enum pipe_format format, uint32_t nr_samples,
                uint32_t width0, uint32_t height0, uint32_t depth0,
-               uint32_t mip_levels, uint32_t array_size, bool is_3d)
+               uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc)
 {
        assert(nr_samples > 0);
        layout->width0 = width0;
@@ -70,7 +81,6 @@ fdl6_layout(struct fdl_layout *layout,
 
        const struct util_format_description *format_desc =
                util_format_description(format);
-       uint32_t level;
        uint32_t depth = depth0;
        /* linear dimensions: */
        uint32_t lwidth = width0;
@@ -100,9 +110,11 @@ fdl6_layout(struct fdl_layout *layout,
        debug_assert(ta < ARRAY_SIZE(tile_alignment));
        debug_assert(tile_alignment[ta].pitchalign);
 
-       for (level = 0; level < mip_levels; level++) {
+       for (uint32_t level = 0; level < mip_levels; level++) {
                struct fdl_slice *slice = &layout->slices[level];
-               uint32_t tile_mode = fdl_tile_mode(layout, level);
+               struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
+               uint32_t tile_mode = (ubwc ?
+                               layout->tile_mode : fdl_tile_mode(layout, level));
                uint32_t width, height;
 
                /* tiled levels of 3D textures are rounded up to PoT dimensions: */
@@ -162,13 +174,30 @@ fdl6_layout(struct fdl_layout *layout,
 
                layout->size += slice->size0 * depth * layers_in_level;
 
-               if (false) {
-                       fprintf(stderr, "%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u, offset=0x%x tiling=%d\n",
-                                       util_format_name(format),
-                                       width, height, depth, layout->cpp,
-                                       level, slice->pitch * layout->cpp,
-                                       slice->size0, layout->size, aligned_height, blocks,
-                                       slice->offset, tile_mode);
+               if (ubwc) {
+                       /* with UBWC every level is aligned to 4K */
+                       layout->size = align(layout->size, 4096);
+
+                       uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
+                       uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
+                       uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width),
+                                       RGB_TILE_WIDTH_ALIGNMENT);
+                       uint32_t meta_height = align(DIV_ROUND_UP(height, block_height),
+                                       RGB_TILE_HEIGHT_ALIGNMENT);
+
+                       /* it looks like mipmaps need alignment to power of two
+                        * TODO: needs testing with large npot textures
+                        * (needed for the first level?)
+                        */
+                       if (mip_levels > 1) {
+                               meta_pitch = util_next_power_of_two(meta_pitch);
+                               meta_height = util_next_power_of_two(meta_height);
+                       }
+
+                       ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
+                       ubwc_slice->pitch = meta_pitch;
+                       ubwc_slice->offset = layout->ubwc_size;
+                       layout->ubwc_size += ubwc_slice->size0;
                }
 
                depth = u_minify(depth, 1);
@@ -182,4 +211,45 @@ fdl6_layout(struct fdl_layout *layout,
                layout->layer_size = align(layout->size, 4096);
                layout->size = layout->layer_size * array_size;
        }
+
+       /* Place the UBWC slices before the uncompressed slices, because the
+        * kernel expects UBWC to be at the start of the buffer.  In the HW, we
+        * get to program the UBWC and non-UBWC offset/strides
+        * independently.
+        */
+       if (ubwc) {
+               for (uint32_t level = 0; level < mip_levels; level++)
+                       layout->slices[level].offset += layout->ubwc_size * array_size;
+               layout->size += layout->ubwc_size * array_size;
+       }
+
+       if (false) {
+               for (uint32_t level = 0; level < mip_levels; level++) {
+                       struct fdl_slice *slice = &layout->slices[level];
+                       struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
+                       uint32_t tile_mode = (ubwc ?
+                                       layout->tile_mode : fdl_tile_mode(layout, level));
+
+                       fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x tiling=%d\n",
+                                       util_format_name(format),
+                                       u_minify(layout->width0, level),
+                                       u_minify(layout->height0, level),
+                                       u_minify(layout->depth0, level),
+                                       layout->cpp, nr_samples,
+                                       level,
+                                       slice->pitch * layout->cpp,
+                                       slice->size0, ubwc_slice->size0,
+                                       slice->size0 / (slice->pitch * layout->cpp),
+                                       slice->offset, ubwc_slice->offset,
+                                       tile_mode);
+               }
+       }
+}
+
+void
+fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
+               uint32_t *blockwidth, uint32_t *blockheight)
+{
+       *blockwidth = tile_alignment[layout->cpp].ubwc_blockwidth;
+       *blockheight = tile_alignment[layout->cpp].ubwc_blockheight;
 }
index be73a24ccc56988a67a71938d6d61f0f642fc12a..54f7d3720d39a890a237179ea56609e20667fe16 100644 (file)
@@ -174,6 +174,10 @@ void
 fdl6_layout(struct fdl_layout *layout,
                enum pipe_format format, uint32_t nr_samples,
                uint32_t width0, uint32_t height0, uint32_t depth0,
-               uint32_t mip_levels, uint32_t array_size, bool is_3d);
+               uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc);
+
+void
+fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
+               uint32_t *blockwidth, uint32_t *blockheight);
 
 #endif /* FREEDRENO_LAYOUT_H_ */
index 4e9841471d1f4f1a43680e12656398e65eca10c3..dc998ea7d59e96c7585e2937cb75ec95cd6b9ad6 100644 (file)
@@ -1563,7 +1563,7 @@ tu_GetImageMemoryRequirements(VkDevice _device,
    TU_FROM_HANDLE(tu_image, image, _image);
 
    pMemoryRequirements->memoryTypeBits = 1;
-   pMemoryRequirements->size = image->size;
+   pMemoryRequirements->size = image->layout.size;
    pMemoryRequirements->alignment = image->alignment;
 }
 
index 307ae2cd18056e4d265571f3ee706202f9e1e42e..853d8ddf6be1a31a5437a627fdc05d7da53dc90b 100644 (file)
@@ -50,147 +50,6 @@ tu6_get_image_tile_mode(struct tu_image *image, int level)
       return image->layout.tile_mode;
 }
 
-/* indexed by cpp, including msaa 2x and 4x: */
-static const struct {
-   uint8_t pitchalign;
-   uint8_t heightalign;
-   uint8_t ubwc_blockwidth;
-   uint8_t ubwc_blockheight;
-} tile_alignment[] = {
-/* TODO:
- * cpp=1 UBWC needs testing at larger texture sizes
- * missing UBWC blockwidth/blockheight for npot+64 cpp
- * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
- */
-   [1]  = { 128, 32, 16, 4 },
-   [2]  = { 128, 16, 16, 4 },
-   [3]  = {  64, 32 },
-   [4]  = {  64, 16, 16, 4 },
-   [6]  = {  64, 16 },
-   [8]  = {  64, 16, 8, 4, },
-   [12] = {  64, 16 },
-   [16] = {  64, 16, 4, 4, },
-   [24] = {  64, 16 },
-   [32] = {  64, 16, 4, 2 },
-   [48] = {  64, 16 },
-   [64] = {  64, 16 },
-   /* special case for r8g8: */
-   [0]  = { 64, 32, 16, 4 },
-};
-
-static void
-setup_slices(struct tu_image *image,
-             const VkImageCreateInfo *pCreateInfo,
-             bool ubwc_enabled)
-{
-#define RGB_TILE_WIDTH_ALIGNMENT 64
-#define RGB_TILE_HEIGHT_ALIGNMENT 16
-#define UBWC_PLANE_SIZE_ALIGNMENT 4096
-   VkFormat format = pCreateInfo->format;
-   enum util_format_layout layout = vk_format_description(format)->layout;
-   uint32_t layer_size = 0;
-   uint32_t ubwc_size = 0;
-   int ta = image->layout.cpp;
-
-   if (image->type != VK_IMAGE_TYPE_3D)
-      image->layout.layer_first = true;
-
-   /* The r8g8 format seems to not play by the normal tiling rules: */
-   if (image->layout.cpp == 2 && vk_format_get_nr_components(format) == 2)
-      ta = 0;
-
-   for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) {
-      struct fdl_slice *slice = &image->layout.slices[level];
-      struct fdl_slice *ubwc_slice = &image->layout.ubwc_slices[level];
-      uint32_t width = u_minify(pCreateInfo->extent.width, level);
-      uint32_t height = u_minify(pCreateInfo->extent.height, level);
-      uint32_t depth = u_minify(pCreateInfo->extent.depth, level);
-      uint32_t aligned_height = height;
-      uint32_t blocks;
-      uint32_t pitchalign;
-
-      if (image->layout.tile_mode &&
-          !image_level_linear(image, level, ubwc_enabled)) {
-         /* tiled levels of 3D textures are rounded up to PoT dimensions: */
-         if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
-            width = util_next_power_of_two(width);
-            height = aligned_height = util_next_power_of_two(height);
-         }
-         pitchalign = tile_alignment[ta].pitchalign;
-         aligned_height = align(aligned_height, tile_alignment[ta].heightalign);
-      } else {
-         pitchalign = 64;
-      }
-
-      /* The blits used for mem<->gmem work at a granularity of
-       * 32x32, which can cause faults due to over-fetch on the
-       * last level.  The simple solution is to over-allocate a
-       * bit the last level to ensure any over-fetch is harmless.
-       * The pitch is already sufficiently aligned, but height
-       * may not be:
-       */
-      if (level + 1 == pCreateInfo->mipLevels)
-         aligned_height = align(aligned_height, 32);
-
-      if (layout == UTIL_FORMAT_LAYOUT_ASTC)
-         slice->pitch =
-            util_align_npot(width, pitchalign * vk_format_get_blockwidth(format));
-      else
-         slice->pitch = align(width, pitchalign);
-
-      slice->offset = layer_size;
-      blocks = vk_format_get_block_count(format, slice->pitch, aligned_height);
-
-      /* 1d array and 2d array textures must all have the same layer size
-       * for each miplevel on a6xx. 3d textures can have different layer
-       * sizes for high levels, but the hw auto-sizer is buggy (or at least
-       * different than what this code does), so as soon as the layer size
-       * range gets into range, we stop reducing it.
-       */
-      if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
-         if (level < 1 || image->layout.slices[level - 1].size0 > 0xf000) {
-            slice->size0 = align(blocks * image->layout.cpp, 4096);
-         } else {
-            slice->size0 = image->layout.slices[level - 1].size0;
-         }
-      } else {
-         slice->size0 = blocks * image->layout.cpp;
-      }
-
-      layer_size += slice->size0 * depth;
-      if (ubwc_enabled) {
-         /* with UBWC every level is aligned to 4K */
-         layer_size = align(layer_size, 4096);
-
-         uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
-         uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
-         uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width), RGB_TILE_WIDTH_ALIGNMENT);
-         uint32_t meta_height = align(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
-
-         /* it looks like mipmaps need alignment to power of two
-          * TODO: needs testing with large npot textures
-          * (needed for the first level?)
-          */
-         if (pCreateInfo->mipLevels > 1) {
-            meta_pitch = util_next_power_of_two(meta_pitch);
-            meta_height = util_next_power_of_two(meta_height);
-         }
-
-         ubwc_slice->pitch = meta_pitch;
-         ubwc_slice->offset = ubwc_size;
-         ubwc_size += align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
-      }
-   }
-   image->layout.layer_size = align(layer_size, 4096);
-
-   VkDeviceSize offset = ubwc_size * pCreateInfo->arrayLayers;
-   for (unsigned level = 0; level < pCreateInfo->mipLevels; level++)
-      image->layout.slices[level].offset += offset;
-
-   image->size = offset + image->layout.layer_size * pCreateInfo->arrayLayers;
-   image->layout.ubwc_size = ubwc_size;
-}
-
 VkResult
 tu_image_create(VkDevice _device,
                 const VkImageCreateInfo *pCreateInfo,
@@ -224,7 +83,6 @@ tu_image_create(VkDevice _device,
    image->level_count = pCreateInfo->mipLevels;
    image->layer_count = pCreateInfo->arrayLayers;
    image->samples = pCreateInfo->samples;
-   image->layout.cpp = vk_format_get_blocksize(image->vk_format) * image->samples;
 
    image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
    if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
@@ -268,7 +126,10 @@ tu_image_create(VkDevice _device,
       ubwc_enabled = false;
    }
 
-   if (!tile_alignment[image->layout.cpp].ubwc_blockwidth) {
+   uint32_t ubwc_blockwidth, ubwc_blockheight;
+   fdl6_get_ubwc_blockwidth(&image->layout,
+                            &ubwc_blockwidth, &ubwc_blockheight);
+   if (!ubwc_blockwidth) {
       tu_finishme("UBWC for cpp=%d", image->layout.cpp);
       ubwc_enabled = false;
    }
@@ -276,7 +137,15 @@ tu_image_create(VkDevice _device,
    /* expect UBWC enabled if we asked for it */
    assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
 
-   setup_slices(image, pCreateInfo, ubwc_enabled);
+   fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format),
+               image->samples,
+               pCreateInfo->extent.width,
+               pCreateInfo->extent.height,
+               pCreateInfo->extent.depth,
+               pCreateInfo->mipLevels,
+               pCreateInfo->arrayLayers,
+               pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
+               ubwc_enabled);
 
    *pImage = tu_image_to_handle(image);
 
@@ -418,8 +287,9 @@ tu_image_view_init(struct tu_image_view *iview,
    iview->descriptor[5] = base_addr >> 32;
 
    if (image->layout.ubwc_size) {
-      uint32_t block_width = tile_alignment[image->layout.cpp].ubwc_blockwidth;
-      uint32_t block_height = tile_alignment[image->layout.cpp].ubwc_blockheight;
+      uint32_t block_width, block_height;
+      fdl6_get_ubwc_blockwidth(&image->layout,
+                               &block_width, &block_height);
 
       iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
       iview->descriptor[7] = ubwc_addr;
index a555dbee29b3ef28082910e386f836f291c0ec65..1edc4a24d7774df73792f4ce97119e043c48a5ac 100644 (file)
@@ -1293,7 +1293,6 @@ struct tu_image
    VkSampleCountFlagBits samples;
 
 
-   VkDeviceSize size;
    uint32_t alignment;
 
    struct fdl_layout layout;
index 8801982a7a74c44562d891e9a309a86a4a493044..942a99b41f1e328f220ad4ee94a06723844ba1ad 100644 (file)
@@ -200,7 +200,7 @@ fd6_setup_slices(struct fd_resource *rsc)
        fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
                        prsc->width0, prsc->height0, prsc->depth0,
                        prsc->last_level + 1, prsc->array_size,
-                       prsc->target == PIPE_TEXTURE_3D);
+                       prsc->target == PIPE_TEXTURE_3D, false);
 
        /* The caller does this bit of layout setup again. */
        if (rsc->layout.layer_first)