turnip: workaround for a630 d24_unorm_s8_uint fails
authorJonathan Marek <jonathan@marek.ca>
Thu, 11 Jun 2020 21:57:54 +0000 (17:57 -0400)
committerMarge Bot <eric+marge@anholt.net>
Mon, 3 Aug 2020 21:07:30 +0000 (21:07 +0000)
A630 doesn't have the HW format we use to sample stencil, so it needs a
workaround. It also has a bug around the AS_R8G8B8A8 format, which doesn't
work when UBWC is disabled, so use 8_8_8_8_UNORM instead when UBWC is
disabled (using AS_R8G8B8A8 or 8_8_8_8_UNORM should only matter with UBWC)

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5438>

.gitlab-ci/deqp-freedreno-a630-bypass-fails.txt
.gitlab-ci/deqp-freedreno-a630-fails.txt
src/freedreno/registers/adreno/a6xx.xml
src/freedreno/vulkan/tu_clear_blit.c
src/freedreno/vulkan/tu_cmd_buffer.c
src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_formats.c
src/freedreno/vulkan/tu_image.c
src/freedreno/vulkan/tu_private.h

index db865ecf467b802150549052757dff0c31fff041..a6913591d3c661dbddd7e3bc1163fd9fa693cae3 100644 (file)
@@ -85,28 +85,3 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d
 dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d_array
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d_array
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.dont_care.clear_draw
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.store.clear_draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_use_input_aspect
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.dont_care.clear_draw_use_input_aspect
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.dont_care.draw_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.clear_draw_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.draw
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.dont_care.draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.store.clear_draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.store.draw_use_input_aspect
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.clear_draw_use_input_aspect_stencil_read_only
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.clear_draw_use_input_aspect
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_stencil_read_only
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.dont_care.clear_draw_stencil_read_only
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.dont_care.draw
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.store.clear_draw
-dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_clear_draw_stencil_read_only
-dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_draw
-dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.self_dep_draw_stencil_read_only
-dEQP-VK.renderpass2.suballocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_draw_stencil_read_only
-dEQP-VK.renderpass2.suballocation.multisample.d24_unorm_s8_uint.samples_4
-dEQP-VK.renderpass2.suballocation.multisample.separate_stencil_usage.d24_unorm_s8_uint.samples_2.test_stencil
index ed5b3c4e6a77fddeae382ccbc28403d4646d634e..93887266e80622a89ba77a66f7df3fb95528e0bc 100644 (file)
@@ -7,10 +7,6 @@ dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithi
 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
 dEQP-VK.glsl.linkage.varying.struct.mat3x2
 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
-dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.self_dep_draw_stencil_read_only
-dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.dont_care.clear_draw
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect
-dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_stencil_read_only
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.negate_denorm_preserve
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_out_prod
 dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom
index 559f253f3a04f737c1c6921c996d3e3c3295e740..63653c2fb4547040602f186f45a3c22f111a8203 100644 (file)
@@ -150,8 +150,8 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
        <value value="0xcd" name="FMT6_ASTC_12x10"/>
        <value value="0xce" name="FMT6_ASTC_12x12"/>
 
-       <!-- same as X8Z24_UNORM but for sampling stencil (integer, 2nd channel) -->
-       <value value="0xea" name="FMT6_S8Z24_UINT"/>
+       <!-- for sampling stencil (integer, 2nd channel), not available on a630 -->
+       <value value="0xea" name="FMT6_Z24_UINT_S8_UINT"/>
 
        <!-- Not a hw enum, used internally in driver -->
        <value value="0xff" name="FMT6_NONE"/>
index 6ada2be88b72e9c0051c8d01d03b9398cecff915..1d25147a6cfcdd316764c737d431a14a96bb5192 100644 (file)
@@ -250,12 +250,18 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
                  VkImageAspectFlags aspect_mask,
                  enum a6xx_rotation rotation,
                  bool clear,
+                 bool ubwc,
                  bool scissor)
 {
    enum a6xx_format format = tu6_base_format(vk_format);
    enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
    uint32_t unknown_8c01 = 0;
 
+   if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+        vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
+      format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+   }
+
    /* note: the only format with partial clearing is D24S8 */
    if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
       /* preserve stencil channel */
@@ -302,11 +308,12 @@ r2d_setup(struct tu_cmd_buffer *cmd,
           VkFormat vk_format,
           VkImageAspectFlags aspect_mask,
           enum a6xx_rotation rotation,
-          bool clear)
+          bool clear,
+          bool ubwc)
 {
    tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
 
-   r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, false);
+   r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false);
 }
 
 static void
@@ -715,8 +722,16 @@ r3d_setup(struct tu_cmd_buffer *cmd,
           VkFormat vk_format,
           VkImageAspectFlags aspect_mask,
           enum a6xx_rotation rotation,
-          bool clear)
+          bool clear,
+          bool ubwc)
 {
+   enum a6xx_format format = tu6_base_format(vk_format);
+
+   if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+        vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
+      format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+   }
+
    if (!cmd->state.pass) {
       tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
       tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
@@ -756,7 +771,7 @@ r3d_setup(struct tu_cmd_buffer *cmd,
    tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
 
    tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
-                        .color_format = tu6_base_format(vk_format),
+                        .color_format = format,
                         .color_sint = vk_format_is_sint(vk_format),
                         .color_uint = vk_format_is_uint(vk_format)));
 
@@ -802,7 +817,8 @@ struct blit_ops {
                  VkFormat vk_format,
                  VkImageAspectFlags aspect_mask,
                  enum a6xx_rotation rotation,
-                 bool clear);
+                 bool clear,
+                 bool ubwc);
    void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
 };
 
@@ -901,7 +917,7 @@ tu_image_view_copy_blit(struct tu_image_view *iview,
          .baseArrayLayer = subres->baseArrayLayer + layer,
          .layerCount = 1,
       },
-   });
+   }, false);
 }
 
 static void
@@ -985,7 +1001,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
     */
 
    ops->setup(cmd, cs, dst_image->vk_format, info->dstSubresource.aspectMask,
-              rotate[mirror_y][mirror_x], false);
+              rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc);
 
    if (ops == &r3d_ops) {
       r3d_coords_raw(cs, (float[]) {
@@ -1100,7 +1116,7 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
 
    ops->setup(cmd, cs,
               copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false),
-              info->imageSubresource.aspectMask, ROTATE_0, false);
+              info->imageSubresource.aspectMask, ROTATE_0, false, dst_image->layout[0].ubwc);
 
    struct tu_image_view dst;
    tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false);
@@ -1174,7 +1190,7 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
    uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
    uint32_t layer_size = pitch * dst_height;
 
-   ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false);
+   ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
 
    struct tu_image_view src;
    tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read);
@@ -1378,7 +1394,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
       tu_image_view_copy(&staging, &staging_image, src_format,
                          &staging_subresource, 0, false);
 
-      ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false);
+      ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
       coords(ops, cs, &staging_offset, &src_offset, &extent);
 
       for (uint32_t i = 0; i < info->extent.depth; i++) {
@@ -1396,7 +1412,8 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
       tu_image_view_copy(&staging, &staging_image, dst_format,
                          &staging_subresource, 0, false);
 
-      ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, ROTATE_0, false);
+      ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask,
+                 ROTATE_0, false, dst_image->layout[0].ubwc);
       coords(ops, cs, &dst_offset, &staging_offset, &extent);
 
       for (uint32_t i = 0; i < info->extent.depth; i++) {
@@ -1408,7 +1425,8 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
       tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
       tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
 
-      ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, ROTATE_0, false);
+      ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
+                 ROTATE_0, false, dst_image->layout[0].ubwc);
       coords(ops, cs, &dst_offset, &src_offset, &extent);
 
       for (uint32_t i = 0; i < info->extent.depth; i++) {
@@ -1451,7 +1469,7 @@ copy_buffer(struct tu_cmd_buffer *cmd,
    VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM;
    uint64_t blocks = size / block_size;
 
-   ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false);
+   ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false);
 
    while (blocks) {
       uint32_t src_x = (src_va & 63) / block_size;
@@ -1534,7 +1552,7 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
    uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset;
    uint32_t blocks = fillSize / 4;
 
-   ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true);
+   ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true, false);
    ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
 
    while (blocks) {
@@ -1568,7 +1586,8 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer,
    tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
    tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
 
-   ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false);
+   ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
+              ROTATE_0, false, dst_image->layout[0].ubwc);
 
    for (uint32_t i = 0; i < regionCount; ++i) {
       const VkImageResolve *info = &pRegions[i];
@@ -1606,7 +1625,8 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
 
    assert(src->image->vk_format == dst->image->vk_format);
 
-   ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false);
+   ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
+              ROTATE_0, false, dst->ubwc_enabled);
    ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
 
    for (uint32_t i = 0; i < layers; i++) {
@@ -1636,7 +1656,7 @@ clear_image(struct tu_cmd_buffer *cmd,
 
    const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops;
 
-   ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true);
+   ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true, image->layout[0].ubwc);
    ops->clear_value(cs, image->vk_format, clear_value);
 
    for (unsigned j = 0; j < level_count; j++) {
@@ -1754,7 +1774,8 @@ tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd,
          const struct tu_image_view *iview =
             cmd->state.framebuffer->attachments[a].attachment;
 
-         ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask, ROTATE_0, true);
+         ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask,
+                    ROTATE_0, true, iview->ubwc_enabled);
          ops->clear_value(cs, iview->image->vk_format, &attachments[j].clearValue);
 
          /* Wait for the flushes we triggered manually to complete */
@@ -2083,7 +2104,8 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
    if (attachment->samples > 1)
       ops = &r3d_ops;
 
-   ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0, true);
+   ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0,
+              true, iview->ubwc_enabled);
    ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
    ops->clear_value(cs, attachment->format, &info->pClearValues[a]);
 
@@ -2255,7 +2277,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
       return;
    }
 
-   r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, true);
+   r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT,
+                    ROTATE_0, false, iview->ubwc_enabled, true);
    r2d_dst(cs, iview, 0);
    r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
 
index deebd7132d53d693a1783c653cabdc6727e15504..b7b43ba485f4f4ca8fe5978d4e07c098daab1da0 100644 (file)
@@ -1087,11 +1087,19 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd,
          dst[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
             A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
             A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK);
-         dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_S8Z24_UINT) |
-            A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_Y) |
-            A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) |
-            A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) |
-            A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE);
+         if (cmd->device->physical_device->limited_z24s8) {
+            dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_8_8_8_UINT) |
+               A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_W) |
+               A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) |
+               A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) |
+               A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE);
+         } else {
+            dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_Z24_UINT_S8_UINT) |
+               A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_Y) |
+               A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) |
+               A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) |
+               A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE);
+         }
       }
 
       if (!gmem)
index a1dd04e58fbfb60828e3ec78ff0eeb8d1f28b551..380b5d9340c5dbb2d9385e573e133b9841ca97b5 100644 (file)
@@ -261,6 +261,8 @@ tu_physical_device_init(struct tu_physical_device *device,
    memset(device->name, 0, sizeof(device->name));
    sprintf(device->name, "FD%d", device->gpu_id);
 
+   device->limited_z24s8 = (device->gpu_id == 630);
+
    switch (device->gpu_id) {
    case 618:
       device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */
index 6394ba138b8ce9886cb666f1f6f62e0c9c130a90..958cc9b407ea71c924ea9525622f6075a6fbd01c 100644 (file)
@@ -215,13 +215,17 @@ static const struct tu_native_format tu6_format_table[] = {
    TU6_VTC(B10G11R11_UFLOAT_PACK32,    11_11_10_FLOAT,    WZYX), /* 122 */
    TU6_xTx(E5B9G9R9_UFLOAT_PACK32,     9_9_9_E5_FLOAT,    WZYX), /* 123 */
 
-   /* depth/stencil */
+   /* depth/stencil
+    * X8_D24_UNORM/D24_UNORM_S8_UINT should be Z24_UNORM_S8_UINT_AS_R8G8B8A8
+    * but the format doesn't work on A630 when UBWC is disabled, so use
+    * 8_8_8_8_UNORM as the default and override it when UBWC is enabled
+    */
    TU6_xTC(D16_UNORM,                  16_UNORM,                      WZYX), /* 124 */
-   TU6_xTC(X8_D24_UNORM_PACK32,        Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 125 */
+   TU6_xTC(X8_D24_UNORM_PACK32,        8_8_8_8_UNORM,                 WZYX), /* 125 */
    TU6_xTC(D32_SFLOAT,                 32_FLOAT,                      WZYX), /* 126 */
    TU6_xTC(S8_UINT,                    8_UINT,                        WZYX), /* 127 */
    TU6_xxx(D16_UNORM_S8_UINT,          X8Z16_UNORM,                   WZYX), /* 128 */
-   TU6_xTC(D24_UNORM_S8_UINT,          Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 129 */
+   TU6_xTC(D24_UNORM_S8_UINT,          8_8_8_8_UNORM,                 WZYX), /* 129 */
    TU6_xxx(D32_SFLOAT_S8_UINT,         x,                             WZYX), /* 130 */
 
    /* compressed */
index 9087806fe47fd06f53421f6f755fcb96a3a6397f..2e4a24491938d186afe431fa3bb50e4a57fd10cf 100644 (file)
@@ -194,6 +194,23 @@ tu_image_create(VkDevice _device,
    if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
       ubwc_enabled = false;
 
+   /* Disable UBWC for D24S8 on A630 in some cases
+    *
+    * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample
+    * from the stencil component as UINT, however no format allows this
+    * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing)
+    *
+    * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible
+    *
+    * Additionally, the special AS_R8G8B8A8 format is broken without UBWC,
+    * so we have to fallback to 8_8_8_8_UNORM when UBWC is disabled
+    */
+   if (device->physical_device->limited_z24s8 &&
+       image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+       (image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) {
+      ubwc_enabled = false;
+   }
+
    /* expect UBWC enabled if we asked for it */
    assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
 
@@ -300,7 +317,8 @@ static uint32_t
 tu6_texswiz(const VkComponentMapping *comps,
             const struct tu_sampler_ycbcr_conversion *conversion,
             VkFormat format,
-            VkImageAspectFlagBits aspect_mask)
+            VkImageAspectFlagBits aspect_mask,
+            bool limited_z24s8)
 {
    unsigned char swiz[4] = {
       A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W,
@@ -321,10 +339,16 @@ tu6_texswiz(const VkComponentMapping *comps,
       swiz[3] = A6XX_TEX_ONE;
       break;
    case VK_FORMAT_D24_UNORM_S8_UINT:
-      /* for D24S8, stencil is in the 2nd channel of the hardware format */
       if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-         swiz[0] = A6XX_TEX_Y;
-         swiz[1] = A6XX_TEX_ZERO;
+         if (limited_z24s8) {
+            /* using FMT6_8_8_8_8_UINT */
+            swiz[0] = A6XX_TEX_W;
+            swiz[1] = A6XX_TEX_ZERO;
+         } else {
+            /* using FMT6_Z24_UINT_S8_UINT */
+            swiz[0] = A6XX_TEX_Y;
+            swiz[1] = A6XX_TEX_ZERO;
+         }
       }
    default:
       break;
@@ -365,7 +389,8 @@ tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32
 
 void
 tu_image_view_init(struct tu_image_view *iview,
-                   const VkImageViewCreateInfo *pCreateInfo)
+                   const VkImageViewCreateInfo *pCreateInfo,
+                   bool limited_z24s8)
 {
    TU_FROM_HANDLE(tu_image, image, pCreateInfo->image);
    const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
@@ -430,12 +455,18 @@ tu_image_view_init(struct tu_image_view *iview,
 
    bool ubwc_enabled = fdl_ubwc_enabled(layout, range->baseMipLevel);
 
+   bool is_d24s8 = (format == VK_FORMAT_D24_UNORM_S8_UINT ||
+                    format == VK_FORMAT_X8_D24_UNORM_PACK32);
+
+   if (is_d24s8 && ubwc_enabled)
+      fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+
    unsigned fmt_tex = fmt.fmt;
-   if (fmt_tex == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
+   if (is_d24s8) {
       if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
          fmt_tex = FMT6_Z24_UNORM_S8_UINT;
       if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
-         fmt_tex = FMT6_S8Z24_UINT;
+         fmt_tex = limited_z24s8 ? FMT6_8_8_8_8_UINT : FMT6_Z24_UINT_S8_UINT;
       /* TODO: also use this format with storage descriptor ? */
    }
 
@@ -445,7 +476,7 @@ tu_image_view_init(struct tu_image_view *iview,
       A6XX_TEX_CONST_0_FMT(fmt_tex) |
       A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
       A6XX_TEX_CONST_0_SWAP(fmt.swap) |
-      tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask) |
+      tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask, limited_z24s8) |
       A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1);
    iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
    iview->descriptor[2] =
@@ -551,6 +582,9 @@ tu_image_view_init(struct tu_image_view *iview,
    struct tu_native_format cfmt = tu6_format_color(format, layout->tile_mode);
    cfmt.tile_mode = fmt.tile_mode;
 
+   if (is_d24s8 && ubwc_enabled)
+      cfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
+
    if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
       memset(iview->storage_descriptor, 0, sizeof(iview->storage_descriptor));
 
@@ -589,6 +623,7 @@ tu_image_view_init(struct tu_image_view *iview,
                               .color_tile_mode = cfmt.tile_mode,
                               .color_format = cfmt.fmt,
                               .color_swap = cfmt.swap).value;
+
    iview->SP_FS_MRT_REG = A6XX_SP_FS_MRT_REG(0,
                               .color_format = cfmt.fmt,
                               .color_sint = vk_format_is_sint(format),
@@ -740,7 +775,7 @@ tu_CreateImageView(VkDevice _device,
    if (view == NULL)
       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-   tu_image_view_init(view, pCreateInfo);
+   tu_image_view_init(view, pCreateInfo, device->physical_device->limited_z24s8);
 
    *pView = tu_image_view_to_handle(view);
 
@@ -797,7 +832,7 @@ tu_buffer_view_init(struct tu_buffer_view *view,
       A6XX_TEX_CONST_0_SWAP(fmt.swap) |
       A6XX_TEX_CONST_0_FMT(fmt.fmt) |
       A6XX_TEX_CONST_0_MIPLVLS(0) |
-      tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT);
+      tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT, false);
       COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB);
    view->descriptor[1] =
       A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
index 291d3f81ca32aebbe028f95cdb6fc83ba782ae46..90f0b31cac5528b7dc22b9e1c2e62f0ac74ff6d9 100644 (file)
@@ -225,6 +225,8 @@ struct tu_physical_device
    int msm_major_version;
    int msm_minor_version;
 
+   bool limited_z24s8;
+
    /* This is the drivers on-disk cache used as a fallback as opposed to
     * the pipeline cache defined by apps.
     */
@@ -1392,8 +1394,9 @@ tu_image_from_gralloc(VkDevice device_h,
                       VkImage *out_image_h);
 
 void
-tu_image_view_init(struct tu_image_view *view,
-                   const VkImageViewCreateInfo *pCreateInfo);
+tu_image_view_init(struct tu_image_view *iview,
+                   const VkImageViewCreateInfo *pCreateInfo,
+                   bool limited_z24s8);
 
 struct tu_buffer_view
 {