radv: Use bo metadata for imported image tiling on Android.
[mesa.git] / src / amd / vulkan / radv_image.c
index 92409d147f1f3e3263f60c78bf9a977f0a83f34c..afb426fc3c588b4c02be54446decf62590f036e4 100644 (file)
@@ -31,9 +31,9 @@
 #include "vk_util.h"
 #include "radv_radeon_winsys.h"
 #include "sid.h"
-#include "gfx9d.h"
 #include "util/debug.h"
 #include "util/u_atomic.h"
+
 static unsigned
 radv_choose_tiling(struct radv_device *device,
                   const struct radv_image_create_info *create_info)
@@ -47,7 +47,7 @@ radv_choose_tiling(struct radv_device *device,
 
        if (!vk_format_is_compressed(pCreateInfo->format) &&
            !vk_format_is_depth_or_stencil(pCreateInfo->format)
-           && device->physical_device->rad_info.chip_class <= VI) {
+           && device->physical_device->rad_info.chip_class <= GFX8) {
                /* this causes hangs in some VK CTS tests on GFX9. */
                /* Textures with a very small height are recommended to be linear. */
                if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
@@ -69,13 +69,20 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device,
                                   const VkImageCreateInfo *pCreateInfo)
 {
        /* TC-compat HTILE is only available for GFX8+. */
-       if (device->physical_device->rad_info.chip_class < VI)
+       if (device->physical_device->rad_info.chip_class < GFX8)
                return false;
 
        if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
            (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
                return false;
 
+       /* TODO: Implement layout transitions with variable sample locations
+        * before enabling HTILE for depth/stencil images created with this
+        * flags because the depth decompress pass needs to know them.
+        */
+       if (pCreateInfo->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)
+               return false;
+
        if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
                return false;
 
@@ -120,6 +127,22 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device,
        return true;
 }
 
+static bool
+radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
+{
+       if (info->scanout)
+               return true;
+
+       if (!info->bo_metadata)
+               return false;
+
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               return info->bo_metadata->u.gfx9.swizzle_mode == 0 || info->bo_metadata->u.gfx9.swizzle_mode % 4 == 2;
+       } else {
+               return info->bo_metadata->u.legacy.scanout;
+       }
+}
+
 static bool
 radv_use_dcc_for_image(struct radv_device *device,
                       const struct radv_image *image,
@@ -130,7 +153,7 @@ radv_use_dcc_for_image(struct radv_device *device,
        bool blendable;
 
        /* DCC (Delta Color Compression) is only available for GFX8+. */
-       if (device->physical_device->rad_info.chip_class < VI)
+       if (device->physical_device->rad_info.chip_class < GFX8)
                return false;
 
        if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
@@ -157,7 +180,7 @@ radv_use_dcc_for_image(struct radv_device *device,
        if (pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1)
                return false;
 
-       if (create_info->scanout)
+       if (radv_surface_has_scanout(device, create_info))
                return false;
 
        /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
@@ -199,6 +222,37 @@ radv_use_dcc_for_image(struct radv_device *device,
        return true;
 }
 
+static void
+radv_prefill_surface_from_metadata(struct radv_device *device,
+                                   struct radeon_surf *surface,
+                                   const struct radv_image_create_info *create_info)
+{
+       const struct radeon_bo_metadata *md = create_info->bo_metadata;
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               if (md->u.gfx9.swizzle_mode > 0)
+                       surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+               else
+                       surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+               surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
+       } else {
+               surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
+               surface->u.legacy.bankw = md->u.legacy.bankw;
+               surface->u.legacy.bankh = md->u.legacy.bankh;
+               surface->u.legacy.tile_split = md->u.legacy.tile_split;
+               surface->u.legacy.mtilea = md->u.legacy.mtilea;
+               surface->u.legacy.num_banks = md->u.legacy.num_banks;
+
+               if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+                       surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+               else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+                       surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
+               else
+                       surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+       }
+}
+
 static int
 radv_init_surface(struct radv_device *device,
                  const struct radv_image *image,
@@ -223,7 +277,11 @@ radv_init_surface(struct radv_device *device,
        if (surface->bpe == 3) {
                surface->bpe = 4;
        }
-       surface->flags = RADEON_SURF_SET(array_mode, MODE);
+       if (create_info->bo_metadata) {
+               radv_prefill_surface_from_metadata(device, surface, create_info);
+       } else {
+               surface->flags = RADEON_SURF_SET(array_mode, MODE);
+       }
 
        switch (pCreateInfo->imageType){
        case VK_IMAGE_TYPE_1D:
@@ -265,8 +323,9 @@ radv_init_surface(struct radv_device *device,
        if (!radv_use_dcc_for_image(device, image, create_info, pCreateInfo))
                surface->flags |= RADEON_SURF_DISABLE_DCC;
 
-       if (create_info->scanout)
+       if (radv_surface_has_scanout(device, create_info))
                surface->flags |= RADEON_SURF_SCANOUT;
+
        return 0;
 }
 
@@ -328,7 +387,7 @@ radv_make_buffer_descriptor(struct radv_device *device,
        state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                S_008F04_STRIDE(stride);
 
-       if (device->physical_device->rad_info.chip_class != VI && stride) {
+       if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
                range /= stride;
        }
 
@@ -370,12 +429,12 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
        state[1] &= C_008F14_BASE_ADDRESS_HI;
        state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
 
-       if (chip_class >= VI) {
+       if (chip_class >= GFX8) {
                state[6] &= C_008F28_COMPRESSION_EN;
                state[7] = 0;
                if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
                        meta_va = gpu_address + image->dcc_offset;
-                       if (chip_class <= VI)
+                       if (chip_class <= GFX8)
                                meta_va += base_level_info->dcc_offset;
                } else if (!is_storage_image &&
                           radv_image_is_tc_compat_htile(image)) {
@@ -391,14 +450,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 
        if (chip_class >= GFX9) {
                state[3] &= C_008F1C_SW_MODE;
-               state[4] &= C_008F20_PITCH_GFX9;
+               state[4] &= C_008F20_PITCH;
 
                if (is_stencil) {
                        state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
-                       state[4] |= S_008F20_PITCH_GFX9(plane->surface.u.gfx9.stencil.epitch);
+                       state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
                } else {
                        state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
-                       state[4] |= S_008F20_PITCH_GFX9(plane->surface.u.gfx9.surf.epitch);
+                       state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
                }
 
                state[5] &= C_008F24_META_DATA_ADDRESS &
@@ -417,14 +476,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
                                    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
                }
        } else {
-               /* SI-CI-VI */
+               /* GFX6-GFX8 */
                unsigned pitch = base_level_info->nblk_x * block_width;
                unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
 
                state[3] &= C_008F1C_TILING_INDEX;
                state[3] |= S_008F1C_TILING_INDEX(index);
-               state[4] &= C_008F20_PITCH_GFX6;
-               state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
+               state[4] &= C_008F20_PITCH;
+               state[4] |= S_008F20_PITCH(pitch - 1);
        }
 }
 
@@ -548,8 +607,8 @@ si_make_texture_descriptor(struct radv_device *device,
                depth = image->info.array_size / 6;
 
        state[0] = 0;
-       state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
-                   S_008F14_NUM_FORMAT_GFX6(num_format));
+       state[1] = (S_008F14_DATA_FORMAT(data_format) |
+                   S_008F14_NUM_FORMAT(num_format));
        state[2] = (S_008F18_WIDTH(width - 1) |
                    S_008F18_HEIGHT(height - 1) |
                    S_008F18_PERF_MOD(4));
@@ -596,7 +655,7 @@ si_make_texture_descriptor(struct radv_device *device,
                /* The last dword is unused by hw. The shader uses it to clear
                 * bits in the first dword of sampler state.
                 */
-               if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
+               if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
                        if (first_level == last_level)
                                state[7] = C_008F30_MAX_ANISO_RATIO;
                        else
@@ -650,8 +709,8 @@ si_make_texture_descriptor(struct radv_device *device,
                fmask_state[0] = va >> 8;
                fmask_state[0] |= image->fmask.tile_swizzle;
                fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
-                       S_008F14_DATA_FORMAT_GFX6(fmask_format) |
-                       S_008F14_NUM_FORMAT_GFX6(num_format);
+                       S_008F14_DATA_FORMAT(fmask_format) |
+                       S_008F14_NUM_FORMAT(num_format);
                fmask_state[2] = S_008F18_WIDTH(width - 1) |
                        S_008F18_HEIGHT(height - 1);
                fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
@@ -667,13 +726,13 @@ si_make_texture_descriptor(struct radv_device *device,
                if (device->physical_device->rad_info.chip_class >= GFX9) {
                        fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
                        fmask_state[4] |= S_008F20_DEPTH(last_layer) |
-                                         S_008F20_PITCH_GFX9(image->planes[0].surface.u.gfx9.fmask.epitch);
+                                         S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
                        fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
                                          S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
                } else {
                        fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
                        fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
-                               S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
+                               S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
                        fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
                }
        } else if (fmask_state)
@@ -725,7 +784,7 @@ radv_query_opaque_metadata(struct radv_device *device,
        memcpy(&md->metadata[2], desc, sizeof(desc));
 
        /* Dwords [10:..] contain the mipmap level offsets. */
-       if (device->physical_device->rad_info.chip_class <= VI) {
+       if (device->physical_device->rad_info.chip_class <= GFX8) {
                for (i = 0; i <= image->info.levels - 1; i++)
                        md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
                md->size_metadata = (11 + image->info.levels - 1) * 4;
@@ -760,6 +819,34 @@ radv_init_metadata(struct radv_device *device,
        radv_query_opaque_metadata(device, image, metadata);
 }
 
+void
+radv_image_override_offset_stride(struct radv_device *device,
+                                  struct radv_image *image,
+                                  uint64_t offset, uint32_t stride)
+{
+       struct radeon_surf *surface = &image->planes[0].surface;
+       unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
+
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               if (stride) {
+                       surface->u.gfx9.surf_pitch = stride;
+                       surface->u.gfx9.surf_slice_size =
+                               (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
+               }
+               surface->u.gfx9.surf_offset = offset;
+       } else {
+               surface->u.legacy.level[0].nblk_x = stride;
+               surface->u.legacy.level[0].slice_size_dw =
+                       ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
+
+               if (offset) {
+                       for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+                               surface->u.legacy.level[i].offset += offset;
+               }
+
+       }
+}
+
 /* The number of samples can be specified independently of the texture. */
 static void
 radv_image_get_fmask_info(struct radv_device *device,
@@ -1022,7 +1109,8 @@ radv_image_create(VkDevice _device,
 
        image->shareable = vk_find_struct_const(pCreateInfo->pNext,
                                                EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
-       if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
+       if (!vk_format_is_depth_or_stencil(pCreateInfo->format) &&
+           !radv_surface_has_scanout(device, create_info) && !image->shareable) {
                image->info.surf_index = &device->image_mrt_offset_counter;
        }
 
@@ -1428,7 +1516,19 @@ void radv_GetImageSubresourceLayout(
 
        if (device->physical_device->rad_info.chip_class >= GFX9) {
                pLayout->offset = plane->offset + surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
-               pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+               if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+                   image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+                   image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+                       /* Adjust the number of bytes between each row because
+                        * the pitch is actually the number of components per
+                        * row.
+                        */
+                       pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
+               } else {
+                       assert(util_is_power_of_two_nonzero(surface->bpe));
+                       pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+               }
+
                pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
                pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
                pLayout->size = surface->u.gfx9.surf_slice_size;