#include "vk_util.h"
#include "radv_radeon_winsys.h"
#include "sid.h"
-#include "gfx9d.h"
#include "util/debug.h"
#include "util/u_atomic.h"
+
static unsigned
radv_choose_tiling(struct radv_device *device,
const struct radv_image_create_info *create_info)
if (!vk_format_is_compressed(pCreateInfo->format) &&
!vk_format_is_depth_or_stencil(pCreateInfo->format)
- && device->physical_device->rad_info.chip_class <= VI) {
+ && device->physical_device->rad_info.chip_class <= GFX8) {
/* this causes hangs in some VK CTS tests on GFX9. */
/* Textures with a very small height are recommended to be linear. */
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
const VkImageCreateInfo *pCreateInfo)
{
/* TC-compat HTILE is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < VI)
+ if (device->physical_device->rad_info.chip_class < GFX8)
return false;
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
(pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
return false;
+ /* TODO: Implement layout transitions with variable sample locations
+ * before enabling HTILE for depth/stencil images created with this
+ * flags because the depth decompress pass needs to know them.
+ */
+ if (pCreateInfo->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)
+ return false;
+
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
return false;
return true;
}
+static bool
+radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
+{
+ if (info->scanout)
+ return true;
+
+ if (!info->bo_metadata)
+ return false;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ return info->bo_metadata->u.gfx9.swizzle_mode == 0 || info->bo_metadata->u.gfx9.swizzle_mode % 4 == 2;
+ } else {
+ return info->bo_metadata->u.legacy.scanout;
+ }
+}
+
static bool
radv_use_dcc_for_image(struct radv_device *device,
const struct radv_image *image,
bool blendable;
/* DCC (Delta Color Compression) is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < VI)
+ if (device->physical_device->rad_info.chip_class < GFX8)
return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
if (pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1)
return false;
- if (create_info->scanout)
+ if (radv_surface_has_scanout(device, create_info))
return false;
/* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
return true;
}
+static void
+radv_prefill_surface_from_metadata(struct radv_device *device,
+ struct radeon_surf *surface,
+ const struct radv_image_create_info *create_info)
+{
+ const struct radeon_bo_metadata *md = create_info->bo_metadata;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ if (md->u.gfx9.swizzle_mode > 0)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+ else
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+ surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
+ } else {
+ surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
+ surface->u.legacy.bankw = md->u.legacy.bankw;
+ surface->u.legacy.bankh = md->u.legacy.bankh;
+ surface->u.legacy.tile_split = md->u.legacy.tile_split;
+ surface->u.legacy.mtilea = md->u.legacy.mtilea;
+ surface->u.legacy.num_banks = md->u.legacy.num_banks;
+
+ if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+ else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
+ else
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+ }
+}
+
static int
radv_init_surface(struct radv_device *device,
const struct radv_image *image,
if (surface->bpe == 3) {
surface->bpe = 4;
}
- surface->flags = RADEON_SURF_SET(array_mode, MODE);
+ if (create_info->bo_metadata) {
+ radv_prefill_surface_from_metadata(device, surface, create_info);
+ } else {
+ surface->flags = RADEON_SURF_SET(array_mode, MODE);
+ }
switch (pCreateInfo->imageType){
case VK_IMAGE_TYPE_1D:
if (!radv_use_dcc_for_image(device, image, create_info, pCreateInfo))
surface->flags |= RADEON_SURF_DISABLE_DCC;
- if (create_info->scanout)
+ if (radv_surface_has_scanout(device, create_info))
surface->flags |= RADEON_SURF_SCANOUT;
+
return 0;
}
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
- if (device->physical_device->rad_info.chip_class != VI && stride) {
+ if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
range /= stride;
}
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
- if (chip_class >= VI) {
+ if (chip_class >= GFX8) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
meta_va = gpu_address + image->dcc_offset;
- if (chip_class <= VI)
+ if (chip_class <= GFX8)
meta_va += base_level_info->dcc_offset;
} else if (!is_storage_image &&
radv_image_is_tc_compat_htile(image)) {
if (chip_class >= GFX9) {
state[3] &= C_008F1C_SW_MODE;
- state[4] &= C_008F20_PITCH_GFX9;
+ state[4] &= C_008F20_PITCH;
if (is_stencil) {
state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
- state[4] |= S_008F20_PITCH_GFX9(plane->surface.u.gfx9.stencil.epitch);
+ state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
} else {
state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
- state[4] |= S_008F20_PITCH_GFX9(plane->surface.u.gfx9.surf.epitch);
+ state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
}
state[5] &= C_008F24_META_DATA_ADDRESS &
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
}
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
unsigned pitch = base_level_info->nblk_x * block_width;
unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
state[3] &= C_008F1C_TILING_INDEX;
state[3] |= S_008F1C_TILING_INDEX(index);
- state[4] &= C_008F20_PITCH_GFX6;
- state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
+ state[4] &= C_008F20_PITCH;
+ state[4] |= S_008F20_PITCH(pitch - 1);
}
}
depth = image->info.array_size / 6;
state[0] = 0;
- state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
- S_008F14_NUM_FORMAT_GFX6(num_format));
+ state[1] = (S_008F14_DATA_FORMAT(data_format) |
+ S_008F14_NUM_FORMAT(num_format));
state[2] = (S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1) |
S_008F18_PERF_MOD(4));
/* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state.
*/
- if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
+ if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO;
else
fmask_state[0] = va >> 8;
fmask_state[0] |= image->fmask.tile_swizzle;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
- S_008F14_DATA_FORMAT_GFX6(fmask_format) |
- S_008F14_NUM_FORMAT_GFX6(num_format);
+ S_008F14_DATA_FORMAT(fmask_format) |
+ S_008F14_NUM_FORMAT(num_format);
fmask_state[2] = S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1);
fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
if (device->physical_device->rad_info.chip_class >= GFX9) {
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
fmask_state[4] |= S_008F20_DEPTH(last_layer) |
- S_008F20_PITCH_GFX9(image->planes[0].surface.u.gfx9.fmask.epitch);
+ S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
} else {
fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
- S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
+ S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
} else if (fmask_state)
memcpy(&md->metadata[2], desc, sizeof(desc));
/* Dwords [10:..] contain the mipmap level offsets. */
- if (device->physical_device->rad_info.chip_class <= VI) {
+ if (device->physical_device->rad_info.chip_class <= GFX8) {
for (i = 0; i <= image->info.levels - 1; i++)
md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
md->size_metadata = (11 + image->info.levels - 1) * 4;
radv_query_opaque_metadata(device, image, metadata);
}
+void
+radv_image_override_offset_stride(struct radv_device *device,
+ struct radv_image *image,
+ uint64_t offset, uint32_t stride)
+{
+ struct radeon_surf *surface = &image->planes[0].surface;
+ unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ if (stride) {
+ surface->u.gfx9.surf_pitch = stride;
+ surface->u.gfx9.surf_slice_size =
+ (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
+ }
+ surface->u.gfx9.surf_offset = offset;
+ } else {
+ surface->u.legacy.level[0].nblk_x = stride;
+ surface->u.legacy.level[0].slice_size_dw =
+ ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
+
+ if (offset) {
+ for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ surface->u.legacy.level[i].offset += offset;
+ }
+
+ }
+}
+
/* The number of samples can be specified independently of the texture. */
static void
radv_image_get_fmask_info(struct radv_device *device,
image->shareable = vk_find_struct_const(pCreateInfo->pNext,
EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
- if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
+ if (!vk_format_is_depth_or_stencil(pCreateInfo->format) &&
+ !radv_surface_has_scanout(device, create_info) && !image->shareable) {
image->info.surf_index = &device->image_mrt_offset_counter;
}
if (device->physical_device->rad_info.chip_class >= GFX9) {
pLayout->offset = plane->offset + surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
- pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+ if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+ /* Adjust the number of bytes between each row because
+ * the pitch is actually the number of components per
+ * row.
+ */
+ pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
+ } else {
+ assert(util_is_power_of_two_nonzero(surface->bpe));
+ pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+ }
+
pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
pLayout->size = surface->u.gfx9.surf_slice_size;