radv: Implement TC compatible HTILE.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 9 May 2017 06:26:07 +0000 (08:26 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 4 Oct 2017 07:19:26 +0000 (09:19 +0200)
The situations where we enable it are quite limitied, but it works,
even for madmax, so lets just enable it.

Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_private.h

index 402c948e5231039cbac40c03e8380befcc54c3f5..aa7fe35d87e8a69015517b270e4e1a637e2b1d64 100644 (file)
@@ -3249,6 +3249,18 @@ radv_initialise_ds_surface(struct radv_device *device,
                if (iview->image->surface.htile_size && !level) {
                        ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
 
+                       if (iview->image->tc_compatible_htile) {
+                               unsigned max_zplanes = 4;
+
+                               if (iview->vk_format == VK_FORMAT_D16_UNORM  &&
+                                   iview->image->info.samples > 1)
+                                       max_zplanes = 2;
+
+                               ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
+                                         S_028038_ITERATE_FLUSH(1);
+                               ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+                       }
+
                        if (!iview->image->surface.has_stencil)
                                /* Use all of the htile_buffer for depth if there's no stencil. */
                                ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
@@ -3268,7 +3280,7 @@ radv_initialise_ds_surface(struct radv_device *device,
                z_offs += iview->image->surface.u.legacy.level[level].offset;
                s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
 
-               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+               ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
                ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
                ds->db_stencil_info = S_028044_FORMAT(stencil_format);
 
@@ -3312,7 +3324,8 @@ radv_initialise_ds_surface(struct radv_device *device,
                if (iview->image->surface.htile_size && !level) {
                        ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
 
-                       if (!iview->image->surface.has_stencil)
+                       if (!iview->image->surface.has_stencil &&
+                           !iview->image->tc_compatible_htile)
                                /* Use all of the htile_buffer for depth if there's no stencil. */
                                ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
 
@@ -3320,6 +3333,17 @@ radv_initialise_ds_surface(struct radv_device *device,
                                iview->image->htile_offset;
                        ds->db_htile_data_base = va >> 8;
                        ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+                       if (iview->image->tc_compatible_htile) {
+                               ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+                               if (iview->image->info.samples <= 1)
+                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+                               else if (iview->image->info.samples <= 4)
+                                       ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+                               else
+                                       ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+                       }
                }
        }
 
index 35c58f45ab586d63c3d13711bb762a6e2c760bf4..bf30281abaa632aac30acdb03be8442688286037 100644 (file)
@@ -109,6 +109,15 @@ radv_init_surface(struct radv_device *device,
 
        if (is_depth) {
                surface->flags |= RADEON_SURF_ZBUFFER;
+               if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+                   !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
+                   pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
+                   pCreateInfo->mipLevels <= 1 &&
+                   device->physical_device->rad_info.chip_class >= VI &&
+                   (pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
+                    (device->physical_device->rad_info.chip_class >= GFX9 &&
+                     pCreateInfo->format == VK_FORMAT_D16_UNORM)))
+                       surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
        }
 
        if (is_stencil)
@@ -255,6 +264,11 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
                        meta_va = gpu_address + image->dcc_offset;
                        if (chip_class <= VI)
                                meta_va += base_level_info->dcc_offset;
+               } else if(image->tc_compatible_htile && image->surface.htile_size) {
+                       meta_va = gpu_address + image->htile_offset;
+               }
+
+               if (meta_va) {
                        state[6] |= S_008F28_COMPRESSION_EN(1);
                        state[7] = meta_va >> 8;
                        state[7] |= image->surface.tile_swizzle;
@@ -898,6 +912,7 @@ radv_image_create(VkDevice _device,
                if (radv_image_can_enable_htile(image) &&
                    !(device->debug_flags & RADV_DEBUG_NO_HIZ)) {
                        radv_image_alloc_htile(image);
+                       image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
                } else {
                        image->surface.htile_size = 0;
                }
@@ -1040,6 +1055,9 @@ bool radv_layout_has_htile(const struct radv_image *image,
                            VkImageLayout layout,
                            unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1050,6 +1068,9 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
                                      VkImageLayout layout,
                                      unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
index 1133024d588eef43051aebf1fe9ab94c10b35fa6..fd2caf3d0ce95781774aae7015ed1563983e0012 100644 (file)
@@ -543,8 +543,10 @@ create_depthstencil_pipeline(struct radv_device *device,
 
 static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
                                      const struct radv_image_view *iview,
+                                     VkImageAspectFlags aspects,
                                      VkImageLayout layout,
-                                     const VkClearRect *clear_rect)
+                                     const VkClearRect *clear_rect,
+                                     VkClearDepthStencilValue clear_value)
 {
        uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
                                                           cmd_buffer->queue_family_index,
@@ -553,7 +555,13 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
            clear_rect->rect.extent.width != iview->extent.width ||
            clear_rect->rect.extent.height != iview->extent.height)
                return false;
-       if (iview->base_mip == 0 &&
+       if (iview->image->tc_compatible_htile &&
+           (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
+             clear_value.depth != 1.0) ||
+            ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
+               return false;
+       if (iview->image->surface.htile_size &&
+           iview->base_mip == 0 &&
            iview->base_layer == 0 &&
            radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
            !radv_image_extent_compare(iview->image, &iview->extent))
@@ -571,7 +579,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
                           const VkClearRect *clear_rect,
                           VkClearDepthStencilValue clear_value)
 {
-       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect);
+       bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, clear_rect, clear_value);
        int index = DEPTH_CLEAR_SLOW;
 
        if (fast) {
@@ -641,7 +649,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
                                     pipeline);
        }
 
-       if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect))
+       if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
+                                     subpass->depth_stencil_attachment.layout,
+                                     clear_rect, clear_value))
                radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
 
        radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
index 5cab40721161c6a593cf4800af0f91b13bbf20d5..c2d78a7e2c7bb87e1c11e91c5d0e802e66bb4f45 100644 (file)
@@ -1218,6 +1218,7 @@ struct radv_image {
        VkDeviceSize offset;
        uint32_t dcc_offset;
        uint32_t htile_offset;
+       bool tc_compatible_htile;
        struct radeon_surf surface;
 
        struct radv_fmask_info fmask;