radv: Implement TC compatible HTILE.
[mesa.git] / src / amd / vulkan / radv_image.c
index df288666bebc3812680d5c3d027c4d1dda190ce8..bf30281abaa632aac30acdb03be8442688286037 100644 (file)
@@ -109,6 +109,15 @@ radv_init_surface(struct radv_device *device,
 
        if (is_depth) {
                surface->flags |= RADEON_SURF_ZBUFFER;
+               if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+                   !(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
+                   pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
+                   pCreateInfo->mipLevels <= 1 &&
+                   device->physical_device->rad_info.chip_class >= VI &&
+                   (pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
+                    (device->physical_device->rad_info.chip_class >= GFX9 &&
+                     pCreateInfo->format == VK_FORMAT_D16_UNORM)))
+                       surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
        }
 
        if (is_stencil)
@@ -116,20 +125,40 @@ radv_init_surface(struct radv_device *device,
 
        surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
+       bool dcc_compatible_formats = !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
+       if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+               const struct  VkImageFormatListCreateInfoKHR *format_list =
+                         (const struct  VkImageFormatListCreateInfoKHR *)
+                               vk_find_struct_const(pCreateInfo->pNext,
+                                                    IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
+
+               /* We have to ignore the existence of the list if viewFormatCount = 0 */
+               if (format_list && format_list->viewFormatCount) {
+                       /* compatibility is transitive, so we only need to check
+                        * one format with everything else. */
+                       for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+                               if (!radv_dcc_formats_compatible(pCreateInfo->format,
+                                                                format_list->pViewFormats[i]))
+                                       dcc_compatible_formats = false;
+                       }
+               } else {
+                       dcc_compatible_formats = false;
+               }
+       }
+
        if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
                                   VK_IMAGE_USAGE_STORAGE_BIT)) ||
-           (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
+           !dcc_compatible_formats ||
             (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
             pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
             device->physical_device->rad_info.chip_class < VI ||
-            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
-            !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
+            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC))
                surface->flags |= RADEON_SURF_DISABLE_DCC;
        if (create_info->scanout)
                surface->flags |= RADEON_SURF_SCANOUT;
        return 0;
 }
-#define ATI_VENDOR_ID 0x1002
+
 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
 {
        return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
@@ -172,7 +201,7 @@ radv_make_buffer_descriptor(struct radv_device *device,
 {
        const struct vk_format_description *desc;
        unsigned stride;
-       uint64_t gpu_address = device->ws->buffer_get_va(buffer->bo);
+       uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
        uint64_t va = gpu_address + buffer->offset;
        unsigned num_format, data_format;
        int first_non_void;
@@ -209,7 +238,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
                               unsigned block_width, bool is_stencil,
                               uint32_t *state)
 {
-       uint64_t gpu_address = image->bo ? device->ws->buffer_get_va(image->bo) + image->offset : 0;
+       uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
        uint64_t va = gpu_address;
        enum chip_class chip_class = device->physical_device->rad_info.chip_class;
        uint64_t meta_va = 0;
@@ -231,10 +260,15 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
        if (chip_class >= VI) {
                state[6] &= C_008F28_COMPRESSION_EN;
                state[7] = 0;
-               if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
+               if (radv_vi_dcc_enabled(image, first_level)) {
                        meta_va = gpu_address + image->dcc_offset;
                        if (chip_class <= VI)
                                meta_va += base_level_info->dcc_offset;
+               } else if(image->tc_compatible_htile && image->surface.htile_size) {
+                       meta_va = gpu_address + image->htile_offset;
+               }
+
+               if (meta_va) {
                        state[6] |= S_008F28_COMPRESSION_EN(1);
                        state[7] = meta_va >> 8;
                        state[7] |= image->surface.tile_swizzle;
@@ -450,7 +484,7 @@ si_make_texture_descriptor(struct radv_device *device,
        /* Initialize the sampler view for FMASK. */
        if (image->fmask.size) {
                uint32_t fmask_format, num_format;
-               uint64_t gpu_address = device->ws->buffer_get_va(image->bo);
+               uint64_t gpu_address = radv_buffer_get_va(image->bo);
                uint64_t va;
 
                va = gpu_address + image->offset + image->fmask.offset;
@@ -744,8 +778,7 @@ radv_image_alloc_cmask(struct radv_device *device,
 }
 
 static void
-radv_image_alloc_dcc(struct radv_device *device,
-                      struct radv_image *image)
+radv_image_alloc_dcc(struct radv_image *image)
 {
        image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
        /* + 16 for storing the clear values + dcc pred */
@@ -756,14 +789,8 @@ radv_image_alloc_dcc(struct radv_device *device,
 }
 
 static void
-radv_image_alloc_htile(struct radv_device *device,
-                      struct radv_image *image)
+radv_image_alloc_htile(struct radv_image *image)
 {
-       if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->info.levels > 1) {
-               image->surface.htile_size = 0;
-               return;
-       }
-
        image->htile_offset = align64(image->size, image->surface.htile_alignment);
 
        /* + 8 for storing the clear values */
@@ -772,6 +799,41 @@ radv_image_alloc_htile(struct radv_device *device,
        image->alignment = align64(image->alignment, image->surface.htile_alignment);
 }
 
+static inline bool
+radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
+{
+       return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
+              (image->exclusive || image->queue_family_mask == 1);
+}
+
+static inline bool
+radv_image_can_enable_dcc(struct radv_image *image)
+{
+       return radv_image_can_enable_dcc_or_cmask(image) &&
+              image->surface.dcc_size;
+}
+
+static inline bool
+radv_image_can_enable_cmask(struct radv_image *image)
+{
+       return radv_image_can_enable_dcc_or_cmask(image) &&
+              image->info.levels == 1 &&
+              image->info.depth == 1 &&
+              !image->surface.is_linear;
+}
+
+static inline bool
+radv_image_can_enable_fmask(struct radv_image *image)
+{
+       return image->info.samples > 1 && vk_format_is_color(image->vk_format);
+}
+
+static inline bool
+radv_image_can_enable_htile(struct radv_image *image)
+{
+       return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
+}
+
 VkResult
 radv_image_create(VkDevice _device,
                  const struct radv_image_create_info *create_info,
@@ -781,7 +843,6 @@ radv_image_create(VkDevice _device,
        RADV_FROM_HANDLE(radv_device, device, _device);
        const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
        struct radv_image *image = NULL;
-       bool can_cmask_dcc = false;
        assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
        radv_assert(pCreateInfo->mipLevels > 0);
@@ -832,26 +893,29 @@ radv_image_create(VkDevice _device,
        image->size = image->surface.surf_size;
        image->alignment = image->surface.surf_alignment;
 
-       if (image->exclusive || image->queue_family_mask == 1)
-               can_cmask_dcc = true;
-
-       if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-           image->surface.dcc_size && can_cmask_dcc)
-               radv_image_alloc_dcc(device, image);
-       else
+       /* Try to enable DCC first. */
+       if (radv_image_can_enable_dcc(image)) {
+               radv_image_alloc_dcc(image);
+       } else {
+               /* When DCC cannot be enabled, try CMASK. */
                image->surface.dcc_size = 0;
+               if (radv_image_can_enable_cmask(image)) {
+                       radv_image_alloc_cmask(device, image);
+               }
+       }
 
-       if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-           pCreateInfo->mipLevels == 1 &&
-           !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
-           !image->surface.is_linear)
-               radv_image_alloc_cmask(device, image);
-
-       if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
+       /* Try to enable FMASK for multisampled images. */
+       if (radv_image_can_enable_fmask(image)) {
                radv_image_alloc_fmask(device, image);
-       } else if (vk_format_is_depth(pCreateInfo->format)) {
-
-               radv_image_alloc_htile(device, image);
+       } else {
+               /* Otherwise, try to enable HTILE for depth surfaces. */
+               if (radv_image_can_enable_htile(image) &&
+                   !(device->debug_flags & RADV_DEBUG_NO_HIZ)) {
+                       radv_image_alloc_htile(image);
+                       image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+               } else {
+                       image->surface.htile_size = 0;
+               }
        }
 
        if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
@@ -991,6 +1055,9 @@ bool radv_layout_has_htile(const struct radv_image *image,
                            VkImageLayout layout,
                            unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1001,6 +1068,9 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
                                      VkImageLayout layout,
                                      unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1059,23 +1129,34 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
 }
 
 void radv_GetImageSubresourceLayout(
-       VkDevice                                    device,
+       VkDevice                                    _device,
        VkImage                                     _image,
        const VkImageSubresource*                   pSubresource,
        VkSubresourceLayout*                        pLayout)
 {
        RADV_FROM_HANDLE(radv_image, image, _image);
+       RADV_FROM_HANDLE(radv_device, device, _device);
        int level = pSubresource->mipLevel;
        int layer = pSubresource->arrayLayer;
        struct radeon_surf *surface = &image->surface;
 
-       pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
-       pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
-       pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
-       pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
-       pLayout->size = surface->u.legacy.level[level].slice_size;
-       if (image->type == VK_IMAGE_TYPE_3D)
-               pLayout->size *= u_minify(image->info.depth, level);
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
+               pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+               pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
+               pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
+               pLayout->size = surface->u.gfx9.surf_slice_size;
+               if (image->type == VK_IMAGE_TYPE_3D)
+                       pLayout->size *= u_minify(image->info.depth, level);
+       } else {
+               pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
+               pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+               pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
+               pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
+               pLayout->size = surface->u.legacy.level[level].slice_size;
+               if (image->type == VK_IMAGE_TYPE_3D)
+                       pLayout->size *= u_minify(image->info.depth, level);
+       }
 }
 
 
@@ -1114,8 +1195,7 @@ radv_DestroyImageView(VkDevice _device, VkImageView _iview,
 
 void radv_buffer_view_init(struct radv_buffer_view *view,
                           struct radv_device *device,
-                          const VkBufferViewCreateInfo* pCreateInfo,
-                          struct radv_cmd_buffer *cmd_buffer)
+                          const VkBufferViewCreateInfo* pCreateInfo)
 {
        RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
 
@@ -1142,7 +1222,7 @@ radv_CreateBufferView(VkDevice _device,
        if (!view)
                return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       radv_buffer_view_init(view, device, pCreateInfo, NULL);
+       radv_buffer_view_init(view, device, pCreateInfo);
 
        *pView = radv_buffer_view_to_handle(view);