From 824cfc1ee5e0aba15b676b9363ff32046d96eb42 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 3 Dec 2018 22:45:03 +0100 Subject: [PATCH] radv: rework the TC-compat HTILE hardware bug with COND_EXEC After investigating on this, it appears that COND_WRITE doesn't work correctly in some situations. I don't know exactly why does it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK also uses COND_EXEC I think there is a reason. Now the driver stores a new metadata value in order to reflect the last fast depth clear state. If a TC-compat HTILE is fast cleared with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to work around that hardware bug. This fixes rendering issues with The Forest and DXVK and doesn't seem to introduce any regressions. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914 Fixes: 68dead112e7 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug") Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 91 ++++++++++++++++++++++---------- src/amd/vulkan/radv_image.c | 10 +++- src/amd/vulkan/radv_private.h | 8 +++ 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ccf762ead46..23909a0f7dd 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1067,7 +1067,7 @@ static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image *image, VkImageLayout layout, - bool requires_cond_write) + bool requires_cond_exec) { uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; @@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, } /* When we don't know the last fast clear value we need to emit a - * conditional packet, otherwise we can update DB_Z_INFO directly. + * conditional packet that will eventually skip the following + * SET_CONTEXT_REG packet. */ - if (requires_cond_write) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0)); - - const uint32_t write_space = 0 << 8; /* register */ - const uint32_t poll_space = 1 << 4; /* memory */ - const uint32_t function = 3 << 0; /* equal to the reference */ - const uint32_t options = write_space | poll_space | function; - radeon_emit(cmd_buffer->cs, options); - - /* poll address - location of the depth clear value */ + if (requires_cond_exec) { uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; - - /* In presence of stencil format, we have to adjust the base - * address because the first value is the stencil clear value. - */ - if (vk_format_is_stencil(image->vk_format)) - va += 4; + va += image->offset + image->tc_compat_zrange_offset; + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); - - radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */ - radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */ - radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */ - radeon_emit(cmd_buffer->cs, 0u); /* write address high */ - radeon_emit(cmd_buffer->cs, db_z_info); - } else { - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ } + + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); } static void @@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, fui(ds_clear_value.depth)); } +/** + * Update the TC-compat metadata value for this image. + */ +static void +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + uint32_t value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); +} + +static void +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkClearDepthStencilValue ds_clear_value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + uint32_t cond_val; + + /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last + * depth clear value is 0.0f. + */ + cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; + + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val); +} + /** * Update the clear depth/stencil values for this image. */ @@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); + if (radv_image_is_tc_compat_htile(image) && + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + radv_update_tc_compat_zrange_metadata(cmd_buffer, image, + ds_clear_value); + } + radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, aspects); } @@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects); + + if (radv_image_is_tc_compat_htile(image)) { + /* Initialize the TC-compat metada value to 0 because by + * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only + * need have to conditionally update its value when performing + * a fast depth clear. + */ + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0); + } } static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 4032906f3c6..090ca70a327 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image) /* + 8 for storing the clear values */ image->clear_value_offset = image->htile_offset + image->surface.htile_size; image->size = image->clear_value_offset + 8; + if (radv_image_is_tc_compat_htile(image)) { + /* Metadata for the TC-compatible HTILE hardware bug which + * have to be fixed by updating ZRANGE_PRECISION when doing + * fast depth clears to 0.0f. + */ + image->tc_compat_zrange_offset = image->clear_value_offset + 8; + image->size = image->clear_value_offset + 16; + } image->alignment = align64(image->alignment, image->surface.htile_alignment); } @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device, /* Otherwise, try to enable HTILE for depth surfaces. */ if (radv_image_can_enable_htile(image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { - radv_image_alloc_htile(image); image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; + radv_image_alloc_htile(image); } else { image->surface.htile_size = 0; } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ac756f2c247..8d69b016d6f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1503,6 +1503,14 @@ struct radv_image { uint64_t clear_value_offset; uint64_t fce_pred_offset; + /* + * Metadata for the TC-compat zrange workaround. If the 32-bit value + * stored at this offset is UINT_MAX, the driver will emit + * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the + * SET_CONTEXT_REG packet. + */ + uint64_t tc_compat_zrange_offset; + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ VkDeviceMemory owned_memory; }; -- 2.30.2