turnip: implement UBWC
authorJonathan Marek <jonathan@marek.ca>
Mon, 18 Nov 2019 21:46:39 +0000 (16:46 -0500)
committerJonathan Marek <jonathan@marek.ca>
Thu, 21 Nov 2019 22:21:57 +0000 (22:21 +0000)
This enables UBWC for everything except 3D textures.

It breaks many image_to_image copies but those aren't important and it can
be worked around later (image_to_image copy needs to be done in two steps,
decode from the source format and then encode to the destination format).

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/freedreno/vulkan/tu_android.c
src/freedreno/vulkan/tu_blit.c
src/freedreno/vulkan/tu_blit.h
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_formats.c
src/freedreno/vulkan/tu_image.c
src/freedreno/vulkan/tu_meta_copy.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_wsi.c

index 1ebc9e726e87113ec8926b327351923444495892..75fdb904601cb46c1f6a6b820c5f59ef2401310c 100644 (file)
@@ -31,6 +31,8 @@
 #include <vulkan/vk_android_native_buffer.h>
 #include <vulkan/vk_icd.h>
 
+#include "drm-uapi/drm_fourcc.h"
+
 static int
 tu_hal_open(const struct hw_module_t *mod,
             const char *id,
@@ -120,12 +122,8 @@ tu_image_from_gralloc(VkDevice device_h,
    struct tu_bo *bo = NULL;
    VkResult result;
 
-   result = tu_image_create(
-      device_h,
-      &(struct tu_image_create_info) {
-         .vk_info = base_info, .scanout = true, .no_metadata_planes = true },
-      alloc, &image_h);
-
+   result = tu_image_create(device_h, base_info, alloc, &image_h,
+                            DRM_FORMAT_MOD_LINEAR);
    if (result != VK_SUCCESS)
       return result;
 
index 6a43a9fbe9bcec34809940033425076b89ae32d3..22918d62995ea9cec2cd692aba0f96f91c852124 100644 (file)
@@ -48,7 +48,7 @@ blit_copy_format(VkFormat format)
    switch (vk_format_get_blocksizebits(format)) {
    case 8:  return VK_FORMAT_R8_UINT;
    case 16: return VK_FORMAT_R16_UINT;
-   case 32: return VK_FORMAT_R8G8B8A8_UINT;
+   case 32: return VK_FORMAT_R32_UINT;
    case 64: return VK_FORMAT_R32G32_UINT;
    case 96: return VK_FORMAT_R32G32B32_UINT;
    case 128:return VK_FORMAT_R32G32B32A32_UINT;
@@ -74,7 +74,8 @@ blit_image_info(const struct tu_blit_surf *img, bool src, bool stencil_read)
    return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(rb) |
           A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
           A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(swap) |
-          COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB);
+          COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
+          COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
 }
 
 static void
@@ -82,7 +83,7 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
 {
    struct tu_cs *cs = &cmdbuf->cs;
 
-   tu_cs_reserve_space(cmdbuf->device, cs, 52);
+   tu_cs_reserve_space(cmdbuf->device, cs, 66);
 
    enum a6xx_color_fmt fmt = tu6_get_native_format(blt->dst.fmt)->rb;
    if (fmt == RB6_Z24_UNORM_S8_UINT)
@@ -135,6 +136,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
       tu_cs_emit(cs, 0x00000000);
       tu_cs_emit(cs, 0x00000000);
       tu_cs_emit(cs, 0x00000000);
+
+      if (blt->src.ubwc_size) {
+         tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
+         tu_cs_emit_qw(cs, blt->src.ubwc_va);
+         tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
+            A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
+         tu_cs_emit(cs, 0x00000000);
+         tu_cs_emit(cs, 0x00000000);
+         tu_cs_emit(cs, 0x00000000);
+      }
    }
 
    /*
@@ -150,6 +161,16 @@ emit_blit_step(struct tu_cmd_buffer *cmdbuf, const struct tu_blit *blt)
    tu_cs_emit(cs, 0x00000000);
    tu_cs_emit(cs, 0x00000000);
 
+   if (blt->dst.ubwc_size) {
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
+      tu_cs_emit_qw(cs, blt->dst.ubwc_va);
+      tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
+         A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
+      tu_cs_emit(cs, 0x00000000);
+      tu_cs_emit(cs, 0x00000000);
+      tu_cs_emit(cs, 0x00000000);
+   }
+
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
    tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
    tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
@@ -196,7 +217,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
    switch (blt->type) {
    case TU_BLIT_COPY:
       blt->stencil_read =
-         blt->dst.fmt == VK_FORMAT_R8_UINT &&
+         blt->dst.fmt == VK_FORMAT_R8_UNORM &&
          blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
 
       assert(vk_format_get_blocksize(blt->dst.fmt) ==
@@ -210,6 +231,7 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
          blt->src.pitch /= block_width;
          blt->src.x /= block_width;
          blt->src.y /= block_height;
+         blt->src.fmt = blit_copy_format(blt->src.fmt);
 
          /* for image_to_image copy, width/height is on the src format */
          blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
@@ -223,12 +245,16 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
          blt->dst.pitch /= block_width;
          blt->dst.x /= block_width;
          blt->dst.y /= block_height;
+         blt->dst.fmt = blit_copy_format(blt->dst.fmt);
       }
 
-      blt->src.fmt = blit_copy_format(blt->src.fmt);
-      blt->dst.fmt = blit_copy_format(blt->dst.fmt);
+      if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+         blt->dst.fmt = blit_copy_format(blt->dst.fmt);
+
+      if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+         blt->src.fmt = blit_copy_format(blt->src.fmt);
 
-      /* TODO: does this work correctly with tiling/etc ? */
+      /* TODO: multisample image copy does not work correctly with tiling/UBWC */
       blt->src.x *= blt->src.samples;
       blt->dst.x *= blt->dst.samples;
       blt->src.width *= blt->src.samples;
@@ -304,6 +330,8 @@ void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_blit *blt)
       }
       blt->dst.va += blt->dst.layer_size;
       blt->src.va += blt->src.layer_size;
+      blt->dst.ubwc_va += blt->dst.ubwc_size;
+      blt->src.ubwc_va += blt->src.ubwc_size;
    }
 
    tu_cs_reserve_space(cmdbuf->device, &cmdbuf->cs, 17);
index b01f62c76850dbd2d699a4fe91d83d529fc87049..9897aaeb0a162850c5d02b8104cce18934b4ee1c 100644 (file)
@@ -41,27 +41,37 @@ struct tu_blit_surf {
    uint32_t x, y;
    uint32_t width, height;
    unsigned samples;
+   uint64_t ubwc_va;
+   uint32_t ubwc_pitch;
+   uint32_t ubwc_size;
 };
 
 static inline struct tu_blit_surf
-tu_blit_surf(struct tu_image *img,
+tu_blit_surf(struct tu_image *image,
              VkImageSubresourceLayers subres,
              const VkOffset3D *offsets)
 {
+   unsigned layer = subres.baseArrayLayer;
+   if (image->type == VK_IMAGE_TYPE_3D) {
+      assert(layer == 0);
+      layer = MIN2(offsets[0].z, offsets[1].z);
+   }
+
    return (struct tu_blit_surf) {
-      .fmt = img->vk_format,
-      .tile_mode = tu6_get_image_tile_mode(img, subres.mipLevel),
-      .tiled = img->tile_mode != TILE6_LINEAR,
-      .va = img->bo->iova + img->bo_offset + img->levels[subres.mipLevel].offset +
-            subres.baseArrayLayer * img->layer_size +
-            MIN2(offsets[0].z, offsets[1].z) * img->levels[subres.mipLevel].size,
-      .pitch = img->levels[subres.mipLevel].pitch * vk_format_get_blocksize(img->vk_format) * img->samples,
-      .layer_size = img->type == VK_IMAGE_TYPE_3D ? img->levels[subres.mipLevel].size : img->layer_size,
+      .fmt = image->vk_format,
+      .tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel),
+      .tiled = image->tile_mode != TILE6_LINEAR,
+      .va = tu_image_base(image, subres.mipLevel, layer),
+      .pitch = tu_image_stride(image, subres.mipLevel),
+      .layer_size = tu_layer_size(image, subres.mipLevel),
       .x = MIN2(offsets[0].x, offsets[1].x),
       .y = MIN2(offsets[0].y, offsets[1].y),
       .width = abs(offsets[1].x - offsets[0].x),
       .height = abs(offsets[1].y - offsets[0].y),
-      .samples = img->samples,
+      .samples = image->samples,
+      .ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer),
+      .ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel),
+      .ubwc_size = tu_image_ubwc_size(image, subres.mipLevel),
    };
 }
 
index f7081e88fc26ec518658cb9ee040897c2d0a8e58..29e2d872b6b4d0082173aafde9347f63bd0a8957 100644 (file)
@@ -388,6 +388,22 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    }
 }
 
+static void
+tu6_emit_flag_buffer(struct tu_cs *cs, const struct tu_image_view *iview)
+{
+   uint64_t va = tu_image_ubwc_base(iview->image, iview->base_mip, iview->base_layer);
+   uint32_t pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip);
+   uint32_t size = tu_image_ubwc_size(iview->image, iview->base_mip);
+   if (iview->image->ubwc_size) {
+      tu_cs_emit_qw(cs, va);
+      tu_cs_emit(cs, A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_PITCH(pitch) |
+                     A6XX_RB_DEPTH_FLAG_BUFFER_PITCH_ARRAY_PITCH(size >> 2));
+   } else {
+      tu_cs_emit_qw(cs, 0);
+      tu_cs_emit(cs, 0);
+   }
+}
+
 static void
 tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 {
@@ -430,22 +446,21 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    }
 
    const struct tu_image_view *iview = fb->attachments[a].attachment;
-   const struct tu_image_level *slice = &iview->image->levels[iview->base_mip];
    enum a6xx_depth_format fmt = tu6_pipe2depth(iview->vk_format);
 
-   uint32_t offset = slice->offset + slice->size * iview->base_layer;
-   uint32_t stride = slice->pitch * iview->image->cpp;
-
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
    tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
-   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(stride));
-   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(slice->size));
-   tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + offset);
+   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)));
+   tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layer_size));
+   tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
    tu_cs_emit(cs, tiling->gmem_offsets[gmem_index]);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
    tu_cs_emit(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
 
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
+   tu6_emit_flag_buffer(cs, iview);
+
    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5);
    tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
    tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
@@ -475,12 +490,8 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
          continue;
 
       const struct tu_image_view *iview = fb->attachments[a].attachment;
-      const struct tu_image_level *slice =
-         &iview->image->levels[iview->base_mip];
       const enum a6xx_tile_mode tile_mode =
          tu6_get_image_tile_mode(iview->image, iview->base_mip);
-      uint32_t stride = 0;
-      uint32_t offset = 0;
 
       mrt_comp[i] = 0xf;
 
@@ -491,33 +502,21 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
          tu6_get_native_format(iview->vk_format);
       assert(format && format->rb >= 0);
 
-      offset = slice->offset + slice->size * iview->base_layer;
-      stride = slice->pitch * iview->image->cpp;
-
       tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
       tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) |
                         A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
                         A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap));
-      tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride));
-      tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size));
-      tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset +
-                           offset); /* BASE_LO/HI */
+      tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip)));
+      tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layer_size));
+      tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
       tu_cs_emit(
          cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */
 
       tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1);
       tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb));
 
-#if 0
-      /* when we support UBWC, these would be the system memory
-       * addr/pitch/etc:
-       */
-      tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4);
-      tu_cs_emit(cs, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
-      tu_cs_emit(cs, 0x00000000);    /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
-      tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0));
-      tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
-#endif
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
+      tu6_emit_flag_buffer(cs, iview);
    }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1);
@@ -633,11 +632,6 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
                    uint32_t gmem_offset,
                    uint32_t blit_info)
 {
-   const struct tu_image_level *slice =
-      &iview->image->levels[iview->base_mip];
-   const uint32_t offset = slice->offset + slice->size * iview->base_layer;
-   const uint32_t stride = slice->pitch * iview->image->cpp;
-
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
    tu_cs_emit(cs, blit_info);
 
@@ -651,11 +645,16 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
    tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) |
                      A6XX_RB_BLIT_DST_INFO_SAMPLES(tu_msaa_samples(iview->image->samples)) |
                      A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) |
-                     A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap));
-   tu_cs_emit_qw(cs,
-                 iview->image->bo->iova + iview->image->bo_offset + offset);
-   tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride));
-   tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size));
+                     A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap) |
+                     COND(iview->image->ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS));
+   tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer));
+   tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)));
+   tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layer_size));
+
+   if (iview->image->ubwc_size) {
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3);
+      tu6_emit_flag_buffer(cs, iview);
+   }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
    tu_cs_emit(cs, gmem_offset);
index 909591e846ba17b5c82900f8d2d7f8bd14263fa7..120672cf0a969fd15df72ea4ce468f0387643381 100644 (file)
@@ -34,6 +34,7 @@
 #include "util/u_half.h"
 #include "vk_format.h"
 #include "vk_util.h"
+#include "drm-uapi/drm_fourcc.h"
 
 /**
  * Declare a format table.  A format table is an array of tu_native_format.
@@ -784,6 +785,23 @@ tu_GetPhysicalDeviceFormatProperties2(
 
    tu_physical_device_get_format_properties(
       physical_device, format, &pFormatProperties->formatProperties);
+
+   struct wsi_format_modifier_properties_list *list =
+      vk_find_struct(pFormatProperties->pNext, WSI_FORMAT_MODIFIER_PROPERTIES_LIST_MESA);
+   if (list) {
+      VK_OUTARRAY_MAKE(out, list->modifier_properties, &list->modifier_count);
+
+      vk_outarray_append(&out, mod_props) {
+         mod_props->modifier = DRM_FORMAT_MOD_LINEAR;
+         mod_props->modifier_plane_count = 1;
+      }
+
+      /* TODO: any cases where this should be disabled? */
+      vk_outarray_append(&out, mod_props) {
+         mod_props->modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
+         mod_props->modifier_plane_count = 1;
+      }
+   }
 }
 
 static VkResult
index 0b14a28d44d732241c7d1be60e4c3a902e739c93..3d6ca56f1fcfea37f4dc890ff1c280bb5eec6b96 100644 (file)
 #include "util/u_atomic.h"
 #include "vk_format.h"
 #include "vk_util.h"
+#include "drm-uapi/drm_fourcc.h"
 
 static inline bool
-image_level_linear(struct tu_image *image, int level)
+image_level_linear(struct tu_image *image, int level, bool ubwc)
 {
    unsigned w = u_minify(image->extent.width, level);
-   return w < 16;
+   /* all levels are tiled/compressed with UBWC */
+   return ubwc ? false : (w < 16);
 }
 
 enum a6xx_tile_mode
 tu6_get_image_tile_mode(struct tu_image *image, int level)
 {
-   if (image_level_linear(image, level))
+   if (image_level_linear(image, level, !!image->ubwc_size))
       return TILE6_LINEAR;
    else
       return image->tile_mode;
@@ -50,32 +52,44 @@ tu6_get_image_tile_mode(struct tu_image *image, int level)
 
 /* indexed by cpp, including msaa 2x and 4x: */
 static const struct {
-   unsigned pitchalign;
-   unsigned heightalign;
+   uint8_t pitchalign;
+   uint8_t heightalign;
+   uint8_t ubwc_blockwidth;
+   uint8_t ubwc_blockheight;
 } tile_alignment[] = {
-   [1]  = { 128, 32 },
-   [2]  = { 128, 16 },
+/* TODO:
+ * cpp=1 UBWC needs testing at larger texture sizes
+ * missing UBWC blockwidth/blockheight for npot+64 cpp
+ * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
+ */
+   [1]  = { 128, 32, 16, 4 },
+   [2]  = { 128, 16, 16, 4 },
    [3]  = {  64, 32 },
-   [4]  = {  64, 16 },
+   [4]  = {  64, 16, 16, 4 },
    [6]  = {  64, 16 },
-   [8]  = {  64, 16 },
+   [8]  = {  64, 16, 8, 4, },
    [12] = {  64, 16 },
-   [16] = {  64, 16 },
+   [16] = {  64, 16, 4, 4, },
    [24] = {  64, 16 },
-   [32] = {  64, 16 },
+   [32] = {  64, 16, 4, 2 },
    [48] = {  64, 16 },
    [64] = {  64, 16 },
-
    /* special case for r8g8: */
-   [0]  = { 64, 32 },
+   [0]  = { 64, 32, 16, 4 },
 };
 
 static void
-setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
+setup_slices(struct tu_image *image,
+             const VkImageCreateInfo *pCreateInfo,
+             bool ubwc_enabled)
 {
+#define RGB_TILE_WIDTH_ALIGNMENT 64
+#define RGB_TILE_HEIGHT_ALIGNMENT 16
+#define UBWC_PLANE_SIZE_ALIGNMENT 4096
    VkFormat format = pCreateInfo->format;
    enum util_format_layout layout = vk_format_description(format)->layout;
    uint32_t layer_size = 0;
+   uint32_t ubwc_size = 0;
    int ta = image->cpp;
 
    /* The r8g8 format seems to not play by the normal tiling rules: */
@@ -84,6 +98,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
 
    for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) {
       struct tu_image_level *slice = &image->levels[level];
+      struct tu_image_level *ubwc_slice = &image->ubwc_levels[level];
       uint32_t width = u_minify(pCreateInfo->extent.width, level);
       uint32_t height = u_minify(pCreateInfo->extent.height, level);
       uint32_t depth = u_minify(pCreateInfo->extent.depth, level);
@@ -91,7 +106,7 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
       uint32_t blocks;
       uint32_t pitchalign;
 
-      if (image->tile_mode && !image_level_linear(image, level)) {
+      if (image->tile_mode && !image_level_linear(image, level, ubwc_enabled)) {
          /* tiled levels of 3D textures are rounded up to PoT dimensions: */
          if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
             width = util_next_power_of_two(width);
@@ -139,19 +154,47 @@ setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo)
       }
 
       layer_size += slice->size * depth;
-   }
+      if (ubwc_enabled) {
+         /* with UBWC every level is aligned to 4K */
+         layer_size = align(layer_size, 4096);
+
+         uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
+         uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
+         uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width), RGB_TILE_WIDTH_ALIGNMENT);
+         uint32_t meta_height = align(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
+
+         /* it looks like mipmaps need alignment to power of two
+          * TODO: needs testing with large npot textures
+          * (needed for the first level?)
+          */
+         if (pCreateInfo->mipLevels > 1) {
+            meta_pitch = util_next_power_of_two(meta_pitch);
+            meta_height = util_next_power_of_two(meta_height);
+         }
 
+         ubwc_slice->pitch = meta_pitch;
+         ubwc_slice->offset = ubwc_size;
+         ubwc_size += align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
+      }
+   }
    image->layer_size = align(layer_size, 4096);
+
+   VkDeviceSize offset = ubwc_size * pCreateInfo->arrayLayers;
+   for (unsigned level = 0; level < pCreateInfo->mipLevels; level++)
+      image->levels[level].offset += offset;
+
+   image->size = offset + image->layer_size * pCreateInfo->arrayLayers;
+   image->ubwc_size = ubwc_size;
 }
 
 VkResult
 tu_image_create(VkDevice _device,
-                const struct tu_image_create_info *create_info,
+                const VkImageCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *alloc,
-                VkImage *pImage)
+                VkImage *pImage,
+                uint64_t modifier)
 {
    TU_FROM_HANDLE(tu_device, device, _device);
-   const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
    struct tu_image *image = NULL;
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
@@ -195,21 +238,42 @@ tu_image_create(VkDevice _device,
                            EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
 
    image->tile_mode = TILE6_3;
+   bool ubwc_enabled = true;
 
+   /* disable tiling when linear is requested and for compressed formats */
    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR ||
-       /* compressed textures can't use tiling? */
-       vk_format_is_compressed(image->vk_format) ||
-       /* scanout needs to be linear (what about tiling modifiers?) */
-       create_info->scanout ||
-       /* image_to_image copy doesn't deal with tiling+swap */
-       tu6_get_native_format(image->vk_format)->swap ||
-       /* r8g8 formats are tiled different and could break image_to_image copy */
-       (image->cpp == 2 && vk_format_get_nr_components(image->vk_format) == 2))
+       modifier == DRM_FORMAT_MOD_LINEAR ||
+       vk_format_is_compressed(image->vk_format)) {
       image->tile_mode = TILE6_LINEAR;
+      ubwc_enabled = false;
+   }
+
+   /* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
+    * (causes any deqp tests that need to check stencil to fail)
+    * disable UBWC for this format until we properly support copy aspect masks
+    */
+   if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
+      ubwc_enabled = false;
 
-   setup_slices(image, pCreateInfo);
+   /* UBWC can't be used with E5B9G9R9 */
+   if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+      ubwc_enabled = false;
+
+   if (image->extent.depth > 1) {
+      tu_finishme("UBWC with 3D textures");
+      ubwc_enabled = false;
+   }
+
+   if (!tile_alignment[image->cpp].ubwc_blockwidth) {
+      tu_finishme("UBWC for cpp=%d", image->cpp);
+      ubwc_enabled = false;
+   }
+
+   /* expect UBWC enabled if we asked for it */
+   assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
+
+   setup_slices(image, pCreateInfo, ubwc_enabled);
 
-   image->size = image->layer_size * pCreateInfo->arrayLayers;
    *pImage = tu_image_to_handle(image);
 
    return VK_SUCCESS;
@@ -324,12 +388,13 @@ tu_image_view_init(struct tu_image_view *iview,
    memset(iview->descriptor, 0, sizeof(iview->descriptor));
 
    const struct tu_native_format *fmt = tu6_get_native_format(iview->vk_format);
-   struct tu_image_level *slice0 = &image->levels[iview->base_mip];
-   uint64_t base_addr = image->bo->iova + iview->base_layer * image->layer_size + slice0->offset;
-   uint32_t pitch = (slice0->pitch / vk_format_get_blockwidth(iview->vk_format)) *
-                        vk_format_get_blocksize(iview->vk_format);
-   enum a6xx_tile_mode tile_mode =
-      image_level_linear(image, iview->base_mip) ? TILE6_LINEAR : image->tile_mode;
+   uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
+   uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
+
+   uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
+   enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
+   uint32_t width = u_minify(image->extent.width, iview->base_mip);
+   uint32_t height = u_minify(image->extent.height, iview->base_mip);
 
    iview->descriptor[0] =
       A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
@@ -339,24 +404,34 @@ tu_image_view_init(struct tu_image_view *iview,
       A6XX_TEX_CONST_0_SWAP(image->tile_mode ? WZYX : fmt->swap) |
       tu6_texswiz(&pCreateInfo->components, vk_format_description(iview->vk_format)->swizzle) |
       A6XX_TEX_CONST_0_MIPLVLS(iview->level_count - 1);
-   iview->descriptor[1] =
-      A6XX_TEX_CONST_1_WIDTH(u_minify(image->extent.width, iview->base_mip)) |
-      A6XX_TEX_CONST_1_HEIGHT(u_minify(image->extent.height, iview->base_mip));
+   iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
    iview->descriptor[2] =
       A6XX_TEX_CONST_2_FETCHSIZE(tu6_fetchsize(iview->vk_format)) |
       A6XX_TEX_CONST_2_PITCH(pitch) |
       A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType));
-   iview->descriptor[3] = 0;
+   iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(tu_layer_size(image, iview->base_mip));
    iview->descriptor[4] = base_addr;
    iview->descriptor[5] = base_addr >> 32;
 
+   if (image->ubwc_size) {
+      uint32_t block_width = tile_alignment[image->cpp].ubwc_blockwidth;
+      uint32_t block_height = tile_alignment[image->cpp].ubwc_blockheight;
+
+      iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
+      iview->descriptor[7] = ubwc_addr;
+      iview->descriptor[8] = ubwc_addr >> 32;
+      iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(tu_image_ubwc_size(image, iview->base_mip) >> 2);
+      iview->descriptor[10] |=
+         A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(tu_image_ubwc_pitch(image, iview->base_mip)) |
+         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) |
+         A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height)));
+   }
+
    if (pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D) {
-      iview->descriptor[3] |= A6XX_TEX_CONST_3_ARRAY_PITCH(image->layer_size);
       iview->descriptor[5] |= A6XX_TEX_CONST_5_DEPTH(iview->layer_count);
    } else {
       iview->descriptor[3] |=
-         A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size) |
-         A6XX_TEX_CONST_3_ARRAY_PITCH(slice0->size);
+         A6XX_TEX_CONST_3_MIN_LAYERSZ(image->levels[image->level_count - 1].size);
       iview->descriptor[5] |=
          A6XX_TEX_CONST_5_DEPTH(u_minify(image->extent.depth, iview->base_mip));
    }
@@ -393,14 +468,17 @@ tu_CreateImage(VkDevice device,
 
    const struct wsi_image_create_info *wsi_info =
       vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
-   bool scanout = wsi_info && wsi_info->scanout;
-
-   return tu_image_create(device,
-                          &(struct tu_image_create_info) {
-                             .vk_info = pCreateInfo,
-                             .scanout = scanout,
-                          },
-                          pAllocator, pImage);
+   uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+
+   if (wsi_info) {
+      modifier = DRM_FORMAT_MOD_LINEAR;
+      for (unsigned i = 0; i < wsi_info->modifier_count; i++) {
+         if (wsi_info->modifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
+            modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
+      }
+   }
+
+   return tu_image_create(device, pCreateInfo, pAllocator, pImage, modifier);
 }
 
 void
@@ -438,6 +516,13 @@ tu_GetImageSubresourceLayout(VkDevice _device,
       level->pitch * vk_format_get_blocksize(image->vk_format);
    pLayout->arrayPitch = image->layer_size;
    pLayout->depthPitch = level->size;
+
+   if (image->ubwc_size) {
+      /* UBWC starts at offset 0 */
+      pLayout->offset = 0;
+      /* UBWC scanout won't match what the kernel wants if we have levels/layers */
+      assert(image->level_count == 1 && image->layer_count == 1);
+   }
 }
 
 VkResult
index dedfab2559bebe9d7949fe5c2c788021fb32328a..e51b31275f07501f633b433002ecc921ef7b7c1e 100644 (file)
@@ -200,7 +200,7 @@ tu_blit_buffer(struct tu_buffer *buffer,
                const VkBufferImageCopy *info)
 {
    if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
-      format = VK_FORMAT_R8_UINT;
+      format = VK_FORMAT_R8_UNORM;
 
    unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
                         vk_format_get_blocksize(format);
index 958fc169aaa297595455cc4fcd35e4ba060e560d..549d1de8b8711ab7da092f37e1ba272f42146804 100644 (file)
@@ -1271,6 +1271,8 @@ struct tu_image
    struct tu_image_level levels[15];
    unsigned tile_mode;
    unsigned cpp;
+   struct tu_image_level ubwc_levels[15];
+   uint32_t ubwc_size;
 
    unsigned queue_family_mask;
    bool exclusive;
@@ -1307,6 +1309,46 @@ tu_get_levelCount(const struct tu_image *image,
              : range->levelCount;
 }
 
+static inline VkDeviceSize
+tu_layer_size(struct tu_image *image, int level)
+{
+   if (image->type == VK_IMAGE_TYPE_3D)
+      return image->levels[level].size;
+   return image->layer_size;
+}
+
+static inline uint32_t
+tu_image_stride(struct tu_image *image, int level)
+{
+   return image->levels[level].pitch * image->cpp;
+}
+
+static inline uint64_t
+tu_image_base(struct tu_image *image, int level, int layer)
+{
+   return image->bo->iova + image->bo_offset + image->levels[level].offset +
+          layer * tu_layer_size(image, level);
+}
+
+static inline VkDeviceSize
+tu_image_ubwc_size(struct tu_image *image, int level)
+{
+   return image->ubwc_size;
+}
+
+static inline uint32_t
+tu_image_ubwc_pitch(struct tu_image *image, int level)
+{
+   return image->ubwc_levels[level].pitch;
+}
+
+static inline uint64_t
+tu_image_ubwc_base(struct tu_image *image, int level, int layer)
+{
+   return image->bo->iova + image->bo_offset + image->ubwc_levels[level].offset +
+          layer * tu_image_ubwc_size(image, level);
+}
+
 enum a6xx_tile_mode
 tu6_get_image_tile_mode(struct tu_image *image, int level);
 enum a3xx_msaa_samples
@@ -1340,18 +1382,12 @@ struct tu_sampler
    bool needs_border;
 };
 
-struct tu_image_create_info
-{
-   const VkImageCreateInfo *vk_info;
-   bool scanout;
-   bool no_metadata_planes;
-};
-
 VkResult
 tu_image_create(VkDevice _device,
-                const struct tu_image_create_info *info,
+                const VkImageCreateInfo *pCreateInfo,
                 const VkAllocationCallbacks *alloc,
-                VkImage *pImage);
+                VkImage *pImage,
+                uint64_t modifier);
 
 VkResult
 tu_image_from_gralloc(VkDevice device_h,
index 21466108b20e907ff2ed0bb6074ce64b74e2201f..c80e489399f09ed38dc92844cc766bcf9bc659cb 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "vk_util.h"
 #include "wsi_common.h"
+#include "drm-uapi/drm_fourcc.h"
 
 static PFN_vkVoidFunction
 tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
@@ -34,13 +35,38 @@ tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
    return tu_lookup_entrypoint_unchecked(pName);
 }
 
+static uint64_t
+tu_wsi_image_get_modifier(VkImage _image)
+{
+   TU_FROM_HANDLE(tu_image, image, _image);
+
+   if (!image->tile_mode)
+      return DRM_FORMAT_MOD_LINEAR;
+
+   if (image->ubwc_size)
+      return DRM_FORMAT_MOD_QCOM_COMPRESSED;
+
+   /* TODO invent a modifier for tiled but not UBWC buffers: */
+   return DRM_FORMAT_MOD_INVALID;
+}
+
 VkResult
 tu_wsi_init(struct tu_physical_device *physical_device)
 {
-   return wsi_device_init(&physical_device->wsi_device,
-                          tu_physical_device_to_handle(physical_device),
-                          tu_wsi_proc_addr, &physical_device->instance->alloc,
-                          physical_device->master_fd, NULL);
+   VkResult result;
+
+   result = wsi_device_init(&physical_device->wsi_device,
+                            tu_physical_device_to_handle(physical_device),
+                            tu_wsi_proc_addr,
+                            &physical_device->instance->alloc,
+                            physical_device->master_fd, NULL);
+   if (result != VK_SUCCESS)
+      return result;
+
+   physical_device->wsi_device.supports_modifiers = true;
+   physical_device->wsi_device.image_get_modifier = tu_wsi_image_get_modifier;
+
+   return VK_SUCCESS;
 }
 
 void