amd/common: add declare_vs_input_vgprs() helper
[mesa.git] / src / amd / vulkan / radv_image.c
index cff1b8d03f87d82423ee179d8d34d6b4a33988cb..b1c4f3340eddd75011e1db77cd34fc0ec46278a2 100644 (file)
@@ -109,6 +109,17 @@ radv_init_surface(struct radv_device *device,
 
        if (is_depth) {
                surface->flags |= RADEON_SURF_ZBUFFER;
+               if (!(pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+                   !(pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+                                           VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR)) &&
+                   pCreateInfo->tiling != VK_IMAGE_TILING_LINEAR &&
+                   pCreateInfo->mipLevels <= 1 &&
+                   device->physical_device->rad_info.chip_class >= VI &&
+                   ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
+                     pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT) ||
+                    (device->physical_device->rad_info.chip_class >= GFX9 &&
+                     pCreateInfo->format == VK_FORMAT_D16_UNORM)))
+                       surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
        }
 
        if (is_stencil)
@@ -116,13 +127,15 @@ radv_init_surface(struct radv_device *device,
 
        surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
 
-       bool dcc_compatible_formats = !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
+       bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
        if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
                const struct  VkImageFormatListCreateInfoKHR *format_list =
                          (const struct  VkImageFormatListCreateInfoKHR *)
                                vk_find_struct_const(pCreateInfo->pNext,
                                                     IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
-               if (format_list) {
+
+               /* We have to ignore the existence of the list if viewFormatCount = 0 */
+               if (format_list && format_list->viewFormatCount) {
                        /* compatibility is transitive, so we only need to check
                         * one format with everything else. */
                        for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
@@ -137,17 +150,19 @@ radv_init_surface(struct radv_device *device,
 
        if ((pCreateInfo->usage & (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
                                   VK_IMAGE_USAGE_STORAGE_BIT)) ||
+           (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
            !dcc_compatible_formats ||
             (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
             pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
             device->physical_device->rad_info.chip_class < VI ||
-            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC))
+            create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) ||
+           pCreateInfo->samples >= 2)
                surface->flags |= RADEON_SURF_DISABLE_DCC;
        if (create_info->scanout)
                surface->flags |= RADEON_SURF_SCANOUT;
        return 0;
 }
-#define ATI_VENDOR_ID 0x1002
+
 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
 {
        return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
@@ -249,10 +264,15 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
        if (chip_class >= VI) {
                state[6] &= C_008F28_COMPRESSION_EN;
                state[7] = 0;
-               if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
+               if (radv_vi_dcc_enabled(image, first_level)) {
                        meta_va = gpu_address + image->dcc_offset;
                        if (chip_class <= VI)
                                meta_va += base_level_info->dcc_offset;
+               } else if(image->tc_compatible_htile && image->surface.htile_size) {
+                       meta_va = gpu_address + image->htile_offset;
+               }
+
+               if (meta_va) {
                        state[6] |= S_008F28_COMPRESSION_EN(1);
                        state[7] = meta_va >> 8;
                        state[7] |= image->surface.tile_swizzle;
@@ -396,6 +416,12 @@ si_make_texture_descriptor(struct radv_device *device,
                data_format = 0;
        }
 
+       /* S8 with Z32 HTILE needs a special format. */
+       if (device->physical_device->rad_info.chip_class >= GFX9 &&
+           vk_format == VK_FORMAT_S8_UINT &&
+           image->tc_compatible_htile)
+               data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
+
        type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
                            is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
        if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
@@ -762,8 +788,7 @@ radv_image_alloc_cmask(struct radv_device *device,
 }
 
 static void
-radv_image_alloc_dcc(struct radv_device *device,
-                      struct radv_image *image)
+radv_image_alloc_dcc(struct radv_image *image)
 {
        image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
        /* + 16 for storing the clear values + dcc pred */
@@ -774,14 +799,8 @@ radv_image_alloc_dcc(struct radv_device *device,
 }
 
 static void
-radv_image_alloc_htile(struct radv_device *device,
-                      struct radv_image *image)
+radv_image_alloc_htile(struct radv_image *image)
 {
-       if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->info.levels > 1) {
-               image->surface.htile_size = 0;
-               return;
-       }
-
        image->htile_offset = align64(image->size, image->surface.htile_alignment);
 
        /* + 8 for storing the clear values */
@@ -790,6 +809,59 @@ radv_image_alloc_htile(struct radv_device *device,
        image->alignment = align64(image->alignment, image->surface.htile_alignment);
 }
 
+static inline bool
+radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
+{
+       if (image->info.samples <= 1 &&
+           image->info.width * image->info.height <= 512 * 512) {
+               /* Do not enable CMASK or DCC for small surfaces where the cost
+                * of the eliminate pass can be higher than the benefit of fast
+                * clear. RadeonSI does this, but the image threshold is
+                * different.
+                */
+               return false;
+       }
+
+       return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
+              (image->exclusive || image->queue_family_mask == 1);
+}
+
+static inline bool
+radv_image_can_enable_dcc(struct radv_image *image)
+{
+       return radv_image_can_enable_dcc_or_cmask(image) &&
+              image->surface.dcc_size;
+}
+
+static inline bool
+radv_image_can_enable_cmask(struct radv_image *image)
+{
+       if (image->surface.bpe > 8 && image->info.samples == 1) {
+               /* Do not enable CMASK for non-MSAA images (fast color clear)
+                * because 128 bit formats are not supported, but FMASK might
+                * still be used.
+                */
+               return false;
+       }
+
+       return radv_image_can_enable_dcc_or_cmask(image) &&
+              image->info.levels == 1 &&
+              image->info.depth == 1 &&
+              !image->surface.is_linear;
+}
+
+static inline bool
+radv_image_can_enable_fmask(struct radv_image *image)
+{
+       return image->info.samples > 1 && vk_format_is_color(image->vk_format);
+}
+
+static inline bool
+radv_image_can_enable_htile(struct radv_image *image)
+{
+       return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
+}
+
 VkResult
 radv_image_create(VkDevice _device,
                  const struct radv_image_create_info *create_info,
@@ -799,7 +871,6 @@ radv_image_create(VkDevice _device,
        RADV_FROM_HANDLE(radv_device, device, _device);
        const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
        struct radv_image *image = NULL;
-       bool can_cmask_dcc = false;
        assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
 
        radv_assert(pCreateInfo->mipLevels > 0);
@@ -809,12 +880,11 @@ radv_image_create(VkDevice _device,
        radv_assert(pCreateInfo->extent.height > 0);
        radv_assert(pCreateInfo->extent.depth > 0);
 
-       image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
-                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+       image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (!image)
                return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       memset(image, 0, sizeof(*image));
        image->type = pCreateInfo->imageType;
        image->info.width = pCreateInfo->extent.width;
        image->info.height = pCreateInfo->extent.height;
@@ -850,26 +920,29 @@ radv_image_create(VkDevice _device,
        image->size = image->surface.surf_size;
        image->alignment = image->surface.surf_alignment;
 
-       if (image->exclusive || image->queue_family_mask == 1)
-               can_cmask_dcc = true;
-
-       if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-           image->surface.dcc_size && can_cmask_dcc)
-               radv_image_alloc_dcc(device, image);
-       else
+       /* Try to enable DCC first. */
+       if (radv_image_can_enable_dcc(image)) {
+               radv_image_alloc_dcc(image);
+       } else {
+               /* When DCC cannot be enabled, try CMASK. */
                image->surface.dcc_size = 0;
+               if (radv_image_can_enable_cmask(image)) {
+                       radv_image_alloc_cmask(device, image);
+               }
+       }
 
-       if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-           pCreateInfo->mipLevels == 1 &&
-           !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
-           !image->surface.is_linear)
-               radv_image_alloc_cmask(device, image);
-
-       if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
+       /* Try to enable FMASK for multisampled images. */
+       if (radv_image_can_enable_fmask(image)) {
                radv_image_alloc_fmask(device, image);
-       } else if (vk_format_is_depth(pCreateInfo->format)) {
-
-               radv_image_alloc_htile(device, image);
+       } else {
+               /* Otherwise, try to enable HTILE for depth surfaces. */
+               if (radv_image_can_enable_htile(image) &&
+                   !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
+                       radv_image_alloc_htile(image);
+                       image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+               } else {
+                       image->surface.htile_size = 0;
+               }
        }
 
        if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
@@ -900,15 +973,12 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
        bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
        uint32_t blk_w;
        uint32_t *descriptor;
-       uint32_t *fmask_descriptor;
        uint32_t hw_level = 0;
 
        if (is_storage_image) {
                descriptor = iview->storage_descriptor;
-               fmask_descriptor = iview->storage_fmask_descriptor;
        } else {
                descriptor = iview->descriptor;
-               fmask_descriptor = iview->fmask_descriptor;
        }
 
        assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
@@ -927,7 +997,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
                                   iview->extent.height,
                                   iview->extent.depth,
                                   descriptor,
-                                  fmask_descriptor);
+                                  descriptor + 8);
 
        const struct legacy_surf_level *base_level_info = NULL;
        if (device->physical_device->rad_info.chip_class <= GFX9) {
@@ -1009,6 +1079,9 @@ bool radv_layout_has_htile(const struct radv_image *image,
                            VkImageLayout layout,
                            unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1019,6 +1092,9 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
                                      VkImageLayout layout,
                                      unsigned queue_mask)
 {
+       if (image->surface.htile_size && image->tc_compatible_htile)
+               return layout != VK_IMAGE_LAYOUT_GENERAL;
+
        return image->surface.htile_size &&
               (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
                layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
@@ -1051,11 +1127,15 @@ radv_CreateImage(VkDevice device,
                 const VkAllocationCallbacks *pAllocator,
                 VkImage *pImage)
 {
+       const struct wsi_image_create_info *wsi_info =
+               vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
+       bool scanout = wsi_info && wsi_info->scanout;
+
        return radv_image_create(device,
                                 &(struct radv_image_create_info) {
                                         .vk_info = pCreateInfo,
-                                                .scanout = false,
-                                                },
+                                        .scanout = scanout,
+                                },
                                 pAllocator,
                                 pImage);
 }
@@ -1097,11 +1177,11 @@ void radv_GetImageSubresourceLayout(
                if (image->type == VK_IMAGE_TYPE_3D)
                        pLayout->size *= u_minify(image->info.depth, level);
        } else {
-               pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
+               pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
                pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
-               pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
-               pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
-               pLayout->size = surface->u.legacy.level[level].slice_size;
+               pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+               pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+               pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
                if (image->type == VK_IMAGE_TYPE_3D)
                        pLayout->size *= u_minify(image->info.depth, level);
        }