From 5afaec3741384da0702d3bf75aebd0af6fd07d8b Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 11 Jun 2020 17:57:54 -0400 Subject: [PATCH] turnip: workaround for a630 d24_unorm_s8_uint fails A630 doesn't have the HW format we use to sample stencil, so it needs a workaround. It also has a bug around the AS_R8G8B8A8 format, which doesn't work when UBWC is disabled, so use 8_8_8_8_UNORM instead when UBWC is disabled (using AS_R8G8B8A8 or 8_8_8_8_UNORM should only matter with UBWC) Signed-off-by: Jonathan Marek Part-of: --- .../deqp-freedreno-a630-bypass-fails.txt | 25 -------- .gitlab-ci/deqp-freedreno-a630-fails.txt | 4 -- src/freedreno/registers/adreno/a6xx.xml | 4 +- src/freedreno/vulkan/tu_clear_blit.c | 63 +++++++++++++------ src/freedreno/vulkan/tu_cmd_buffer.c | 18 ++++-- src/freedreno/vulkan/tu_device.c | 2 + src/freedreno/vulkan/tu_formats.c | 10 ++- src/freedreno/vulkan/tu_image.c | 55 +++++++++++++--- src/freedreno/vulkan/tu_private.h | 7 ++- 9 files changed, 117 insertions(+), 71 deletions(-) diff --git a/.gitlab-ci/deqp-freedreno-a630-bypass-fails.txt b/.gitlab-ci/deqp-freedreno-a630-bypass-fails.txt index db865ecf467..a6913591d3c 100644 --- a/.gitlab-ci/deqp-freedreno-a630-bypass-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-bypass-fails.txt @@ -85,28 +85,3 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d_array dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d_array -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.dont_care.clear_draw -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.store.clear_draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_use_input_aspect -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.dont_care.clear_draw_use_input_aspect -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.dont_care.draw_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.clear_draw_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.draw -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.dont_care.draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.store.clear_draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.clear.store.draw_use_input_aspect -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.clear_draw_use_input_aspect_stencil_read_only -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.clear_draw_use_input_aspect -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_stencil_read_only -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.dont_care.clear_draw_stencil_read_only -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.dont_care.draw -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.load.store.clear_draw -dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_clear_draw_stencil_read_only -dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_draw -dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.self_dep_draw_stencil_read_only -dEQP-VK.renderpass2.suballocation.formats.d24_unorm_s8_uint.input.clear.store.self_dep_draw_stencil_read_only -dEQP-VK.renderpass2.suballocation.multisample.d24_unorm_s8_uint.samples_4 -dEQP-VK.renderpass2.suballocation.multisample.separate_stencil_usage.d24_unorm_s8_uint.samples_2.test_stencil diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt index ed5b3c4e6a7..93887266e80 100644 --- a/.gitlab-ci/deqp-freedreno-a630-fails.txt +++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt @@ -7,10 +7,6 @@ dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithi dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3 dEQP-VK.glsl.linkage.varying.struct.mat3x2 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow -dEQP-VK.renderpass2.dedicated_allocation.formats.d24_unorm_s8_uint.input.load.store.self_dep_draw_stencil_read_only -dEQP-VK.renderpass.dedicated_allocation.formats.d24_unorm_s8_uint.input.clear.dont_care.clear_draw -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.dont_care.draw_use_input_aspect -dEQP-VK.renderpass.suballocation.formats.d24_unorm_s8_uint.input.dont_care.store.draw_stencil_read_only dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.negate_denorm_preserve dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp32.input_args.rounding_rtz_out_prod dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 559f253f3a0..63653c2fb45 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -150,8 +150,8 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> - - + + diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 6ada2be88b7..1d25147a6cf 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -250,12 +250,18 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, VkImageAspectFlags aspect_mask, enum a6xx_rotation rotation, bool clear, + bool ubwc, bool scissor) { enum a6xx_format format = tu6_base_format(vk_format); enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); uint32_t unknown_8c01 = 0; + if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { + format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + } + /* note: the only format with partial clearing is D24S8 */ if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { /* preserve stencil channel */ @@ -302,11 +308,12 @@ r2d_setup(struct tu_cmd_buffer *cmd, VkFormat vk_format, VkImageAspectFlags aspect_mask, enum a6xx_rotation rotation, - bool clear) + bool clear, + bool ubwc) { tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); - r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, false); + r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false); } static void @@ -715,8 +722,16 @@ r3d_setup(struct tu_cmd_buffer *cmd, VkFormat vk_format, VkImageAspectFlags aspect_mask, enum a6xx_rotation rotation, - bool clear) + bool clear, + bool ubwc) { + enum a6xx_format format = tu6_base_format(vk_format); + + if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT || + vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) { + format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + } + if (!cmd->state.pass) { tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); @@ -756,7 +771,7 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, - .color_format = tu6_base_format(vk_format), + .color_format = format, .color_sint = vk_format_is_sint(vk_format), .color_uint = vk_format_is_uint(vk_format))); @@ -802,7 +817,8 @@ struct blit_ops { VkFormat vk_format, VkImageAspectFlags aspect_mask, enum a6xx_rotation rotation, - bool clear); + bool clear, + bool ubwc); void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); }; @@ -901,7 +917,7 @@ tu_image_view_copy_blit(struct tu_image_view *iview, .baseArrayLayer = subres->baseArrayLayer + layer, .layerCount = 1, }, - }); + }, false); } static void @@ -985,7 +1001,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, */ ops->setup(cmd, cs, dst_image->vk_format, info->dstSubresource.aspectMask, - rotate[mirror_y][mirror_x], false); + rotate[mirror_y][mirror_x], false, dst_image->layout[0].ubwc); if (ops == &r3d_ops) { r3d_coords_raw(cs, (float[]) { @@ -1100,7 +1116,7 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, ops->setup(cmd, cs, copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false), - info->imageSubresource.aspectMask, ROTATE_0, false); + info->imageSubresource.aspectMask, ROTATE_0, false, dst_image->layout[0].ubwc); struct tu_image_view dst; tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false); @@ -1174,7 +1190,7 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format); uint32_t layer_size = pitch * dst_height; - ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); + ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); struct tu_image_view src; tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read); @@ -1378,7 +1394,7 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu_image_view_copy(&staging, &staging_image, src_format, &staging_subresource, 0, false); - ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); + ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); coords(ops, cs, &staging_offset, &src_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1396,7 +1412,8 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu_image_view_copy(&staging, &staging_image, dst_format, &staging_subresource, 0, false); - ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, ROTATE_0, false); + ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask, + ROTATE_0, false, dst_image->layout[0].ubwc); coords(ops, cs, &dst_offset, &staging_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1408,7 +1425,8 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd, tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false); tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false); - ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, ROTATE_0, false); + ops->setup(cmd, cs, format, info->dstSubresource.aspectMask, + ROTATE_0, false, dst_image->layout[0].ubwc); coords(ops, cs, &dst_offset, &src_offset, &extent); for (uint32_t i = 0; i < info->extent.depth; i++) { @@ -1451,7 +1469,7 @@ copy_buffer(struct tu_cmd_buffer *cmd, VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM; uint64_t blocks = size / block_size; - ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); + ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, false); while (blocks) { uint32_t src_x = (src_va & 63) / block_size; @@ -1534,7 +1552,7 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer, uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset; uint32_t blocks = fillSize / 4; - ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true); + ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, true, false); ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); while (blocks) { @@ -1568,7 +1586,8 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer, tu_bo_list_add(&cmd->bo_list, src_image->bo, MSM_SUBMIT_BO_READ); tu_bo_list_add(&cmd->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE); - ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); + ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, + ROTATE_0, false, dst_image->layout[0].ubwc); for (uint32_t i = 0; i < regionCount; ++i) { const VkImageResolve *info = &pRegions[i]; @@ -1606,7 +1625,8 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd, assert(src->image->vk_format == dst->image->vk_format); - ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false); + ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT, + ROTATE_0, false, dst->ubwc_enabled); ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); for (uint32_t i = 0; i < layers; i++) { @@ -1636,7 +1656,7 @@ clear_image(struct tu_cmd_buffer *cmd, const struct blit_ops *ops = image->samples > 1 ? &r3d_ops : &r2d_ops; - ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true); + ops->setup(cmd, cs, format, range->aspectMask, ROTATE_0, true, image->layout[0].ubwc); ops->clear_value(cs, image->vk_format, clear_value); for (unsigned j = 0; j < level_count; j++) { @@ -1754,7 +1774,8 @@ tu_clear_sysmem_attachments_2d(struct tu_cmd_buffer *cmd, const struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment; - ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask, ROTATE_0, true); + ops->setup(cmd, cs, iview->image->vk_format, attachments[j].aspectMask, + ROTATE_0, true, iview->ubwc_enabled); ops->clear_value(cs, iview->image->vk_format, &attachments[j].clearValue); /* Wait for the flushes we triggered manually to complete */ @@ -2083,7 +2104,8 @@ tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, if (attachment->samples > 1) ops = &r3d_ops; - ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0, true); + ops->setup(cmd, cs, attachment->format, attachment->clear_mask, ROTATE_0, + true, iview->ubwc_enabled); ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent); ops->clear_value(cs, attachment->format, &info->pClearValues[a]); @@ -2255,7 +2277,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, return; } - r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT, ROTATE_0, false, true); + r2d_setup_common(cmd, cs, dst->format, VK_IMAGE_ASPECT_COLOR_BIT, + ROTATE_0, false, iview->ubwc_enabled, true); r2d_dst(cs, iview, 0); r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index deebd7132d5..b7b43ba485f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1087,11 +1087,19 @@ tu_emit_input_attachments(struct tu_cmd_buffer *cmd, dst[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK | A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK | A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK); - dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_S8Z24_UINT) | - A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_Y) | - A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) | - A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) | - A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE); + if (cmd->device->physical_device->limited_z24s8) { + dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_8_8_8_8_UINT) | + A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_W) | + A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) | + A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) | + A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE); + } else { + dst[0] |= A6XX_TEX_CONST_0_FMT(FMT6_Z24_UINT_S8_UINT) | + A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_Y) | + A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_ZERO) | + A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_ZERO) | + A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_ONE); + } } if (!gmem) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index a1dd04e58fb..380b5d9340c 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -261,6 +261,8 @@ tu_physical_device_init(struct tu_physical_device *device, memset(device->name, 0, sizeof(device->name)); sprintf(device->name, "FD%d", device->gpu_id); + device->limited_z24s8 = (device->gpu_id == 630); + switch (device->gpu_id) { case 618: device->ccu_offset_gmem = 0x7c000; /* 0x7e000 in some cases? */ diff --git a/src/freedreno/vulkan/tu_formats.c b/src/freedreno/vulkan/tu_formats.c index 6394ba138b8..958cc9b407e 100644 --- a/src/freedreno/vulkan/tu_formats.c +++ b/src/freedreno/vulkan/tu_formats.c @@ -215,13 +215,17 @@ static const struct tu_native_format tu6_format_table[] = { TU6_VTC(B10G11R11_UFLOAT_PACK32, 11_11_10_FLOAT, WZYX), /* 122 */ TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, WZYX), /* 123 */ - /* depth/stencil */ + /* depth/stencil + * X8_D24_UNORM/D24_UNORM_S8_UINT should be Z24_UNORM_S8_UINT_AS_R8G8B8A8 + * but the format doesn't work on A630 when UBWC is disabled, so use + * 8_8_8_8_UNORM as the default and override it when UBWC is enabled + */ TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */ - TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 125 */ + TU6_xTC(X8_D24_UNORM_PACK32, 8_8_8_8_UNORM, WZYX), /* 125 */ TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */ TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */ TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */ - TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 129 */ + TU6_xTC(D24_UNORM_S8_UINT, 8_8_8_8_UNORM, WZYX), /* 129 */ TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */ /* compressed */ diff --git a/src/freedreno/vulkan/tu_image.c b/src/freedreno/vulkan/tu_image.c index 9087806fe47..2e4a2449193 100644 --- a/src/freedreno/vulkan/tu_image.c +++ b/src/freedreno/vulkan/tu_image.c @@ -194,6 +194,23 @@ tu_image_create(VkDevice _device, if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) ubwc_enabled = false; + /* Disable UBWC for D24S8 on A630 in some cases + * + * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample + * from the stencil component as UINT, however no format allows this + * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing) + * + * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible + * + * Additionally, the special AS_R8G8B8A8 format is broken without UBWC, + * so we have to fallback to 8_8_8_8_UNORM when UBWC is disabled + */ + if (device->physical_device->limited_z24s8 && + image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) { + ubwc_enabled = false; + } + /* expect UBWC enabled if we asked for it */ assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled); @@ -300,7 +317,8 @@ static uint32_t tu6_texswiz(const VkComponentMapping *comps, const struct tu_sampler_ycbcr_conversion *conversion, VkFormat format, - VkImageAspectFlagBits aspect_mask) + VkImageAspectFlagBits aspect_mask, + bool limited_z24s8) { unsigned char swiz[4] = { A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W, @@ -321,10 +339,16 @@ tu6_texswiz(const VkComponentMapping *comps, swiz[3] = A6XX_TEX_ONE; break; case VK_FORMAT_D24_UNORM_S8_UINT: - /* for D24S8, stencil is in the 2nd channel of the hardware format */ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { - swiz[0] = A6XX_TEX_Y; - swiz[1] = A6XX_TEX_ZERO; + if (limited_z24s8) { + /* using FMT6_8_8_8_8_UINT */ + swiz[0] = A6XX_TEX_W; + swiz[1] = A6XX_TEX_ZERO; + } else { + /* using FMT6_Z24_UINT_S8_UINT */ + swiz[0] = A6XX_TEX_Y; + swiz[1] = A6XX_TEX_ZERO; + } } default: break; @@ -365,7 +389,8 @@ tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32 void tu_image_view_init(struct tu_image_view *iview, - const VkImageViewCreateInfo *pCreateInfo) + const VkImageViewCreateInfo *pCreateInfo, + bool limited_z24s8) { TU_FROM_HANDLE(tu_image, image, pCreateInfo->image); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; @@ -430,12 +455,18 @@ tu_image_view_init(struct tu_image_view *iview, bool ubwc_enabled = fdl_ubwc_enabled(layout, range->baseMipLevel); + bool is_d24s8 = (format == VK_FORMAT_D24_UNORM_S8_UINT || + format == VK_FORMAT_X8_D24_UNORM_PACK32); + + if (is_d24s8 && ubwc_enabled) + fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + unsigned fmt_tex = fmt.fmt; - if (fmt_tex == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) { + if (is_d24s8) { if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) fmt_tex = FMT6_Z24_UNORM_S8_UINT; if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - fmt_tex = FMT6_S8Z24_UINT; + fmt_tex = limited_z24s8 ? FMT6_8_8_8_8_UINT : FMT6_Z24_UINT_S8_UINT; /* TODO: also use this format with storage descriptor ? */ } @@ -445,7 +476,7 @@ tu_image_view_init(struct tu_image_view *iview, A6XX_TEX_CONST_0_FMT(fmt_tex) | A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) | A6XX_TEX_CONST_0_SWAP(fmt.swap) | - tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask) | + tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask, limited_z24s8) | A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1); iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); iview->descriptor[2] = @@ -551,6 +582,9 @@ tu_image_view_init(struct tu_image_view *iview, struct tu_native_format cfmt = tu6_format_color(format, layout->tile_mode); cfmt.tile_mode = fmt.tile_mode; + if (is_d24s8 && ubwc_enabled) + cfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; + if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) { memset(iview->storage_descriptor, 0, sizeof(iview->storage_descriptor)); @@ -589,6 +623,7 @@ tu_image_view_init(struct tu_image_view *iview, .color_tile_mode = cfmt.tile_mode, .color_format = cfmt.fmt, .color_swap = cfmt.swap).value; + iview->SP_FS_MRT_REG = A6XX_SP_FS_MRT_REG(0, .color_format = cfmt.fmt, .color_sint = vk_format_is_sint(format), @@ -740,7 +775,7 @@ tu_CreateImageView(VkDevice _device, if (view == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - tu_image_view_init(view, pCreateInfo); + tu_image_view_init(view, pCreateInfo, device->physical_device->limited_z24s8); *pView = tu_image_view_to_handle(view); @@ -797,7 +832,7 @@ tu_buffer_view_init(struct tu_buffer_view *view, A6XX_TEX_CONST_0_SWAP(fmt.swap) | A6XX_TEX_CONST_0_FMT(fmt.fmt) | A6XX_TEX_CONST_0_MIPLVLS(0) | - tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT); + tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT, false); COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB); view->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) | diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 291d3f81ca3..90f0b31cac5 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -225,6 +225,8 @@ struct tu_physical_device int msm_major_version; int msm_minor_version; + bool limited_z24s8; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -1392,8 +1394,9 @@ tu_image_from_gralloc(VkDevice device_h, VkImage *out_image_h); void -tu_image_view_init(struct tu_image_view *view, - const VkImageViewCreateInfo *pCreateInfo); +tu_image_view_init(struct tu_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + bool limited_z24s8); struct tu_buffer_view { -- 2.30.2