2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
38 unsigned img_format
:9;
40 /* Various formats are only supported with workarounds for vertex fetch,
41 * and some 32_32_32 formats are supported natively, but only for buffers
42 * (possibly with some image support, actually, but no filtering). */
46 #include "gfx10_format_table.h"
49 radv_choose_tiling(struct radv_device
*device
,
50 const VkImageCreateInfo
*pCreateInfo
,
53 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
54 assert(pCreateInfo
->samples
<= 1);
55 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
58 if (!vk_format_is_compressed(format
) &&
59 !vk_format_is_depth_or_stencil(format
)
60 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
61 /* this causes hangs in some VK CTS tests on GFX9. */
62 /* Textures with a very small height are recommended to be linear. */
63 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
64 /* Only very thin and long 2D textures should benefit from
66 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
67 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
70 /* MSAA resources must be 2D tiled. */
71 if (pCreateInfo
->samples
> 1)
72 return RADEON_SURF_MODE_2D
;
74 return RADEON_SURF_MODE_2D
;
78 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
79 const VkImageCreateInfo
*pCreateInfo
,
82 /* TC-compat HTILE is only available for GFX8+. */
83 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
86 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
89 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
92 if (pCreateInfo
->mipLevels
> 1)
95 /* Do not enable TC-compatible HTILE if the image isn't readable by a
96 * shader because no texture fetches will happen.
98 if (!(pCreateInfo
->usage
& (VK_IMAGE_USAGE_SAMPLED_BIT
|
99 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT
|
100 VK_IMAGE_USAGE_TRANSFER_SRC_BIT
)))
103 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
104 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
106 if (pCreateInfo
->samples
>= 2 &&
107 (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
108 (format
== VK_FORMAT_D32_SFLOAT
&&
109 device
->physical_device
->rad_info
.chip_class
== GFX10
)))
112 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
113 * supports 32-bit. Though, it's possible to enable TC-compat for
114 * 16-bit depth surfaces if no Z planes are compressed.
116 if (format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
117 format
!= VK_FORMAT_D32_SFLOAT
&&
118 format
!= VK_FORMAT_D16_UNORM
)
121 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
122 const struct VkImageFormatListCreateInfo
*format_list
=
123 (const struct VkImageFormatListCreateInfo
*)
124 vk_find_struct_const(pCreateInfo
->pNext
,
125 IMAGE_FORMAT_LIST_CREATE_INFO
);
127 /* We have to ignore the existence of the list if viewFormatCount = 0 */
128 if (format_list
&& format_list
->viewFormatCount
) {
129 /* compatibility is transitive, so we only need to check
130 * one format with everything else.
132 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
133 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
136 if (format
!= format_list
->pViewFormats
[i
])
148 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
150 if (info
->bo_metadata
) {
151 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
152 return info
->bo_metadata
->u
.gfx9
.scanout
;
154 return info
->bo_metadata
->u
.legacy
.scanout
;
157 return info
->scanout
;
161 radv_use_dcc_for_image(struct radv_device
*device
,
162 const struct radv_image
*image
,
163 const VkImageCreateInfo
*pCreateInfo
,
166 bool dcc_compatible_formats
;
169 /* DCC (Delta Color Compression) is only available for GFX8+. */
170 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
173 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
176 if (image
->shareable
)
179 /* TODO: Enable DCC for storage images. */
180 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
183 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
186 if (vk_format_is_subsampled(format
) ||
187 vk_format_get_plane_count(format
) > 1)
190 /* TODO: Enable DCC for mipmaps on GFX9+. */
191 if ((pCreateInfo
->arrayLayers
> 1 || pCreateInfo
->mipLevels
> 1) &&
192 device
->physical_device
->rad_info
.chip_class
>= GFX9
)
195 /* Do not enable DCC for mipmapped arrays because performance is worse. */
196 if (pCreateInfo
->arrayLayers
> 1 && pCreateInfo
->mipLevels
> 1)
199 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
200 * 2x can be enabled with an option.
202 if (pCreateInfo
->samples
> 2 ||
203 (pCreateInfo
->samples
== 2 &&
204 !device
->physical_device
->dcc_msaa_allowed
))
207 /* Determine if the formats are DCC compatible. */
208 dcc_compatible_formats
=
209 radv_is_colorbuffer_format_supported(format
,
212 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
213 const struct VkImageFormatListCreateInfo
*format_list
=
214 (const struct VkImageFormatListCreateInfo
*)
215 vk_find_struct_const(pCreateInfo
->pNext
,
216 IMAGE_FORMAT_LIST_CREATE_INFO
);
218 /* We have to ignore the existence of the list if viewFormatCount = 0 */
219 if (format_list
&& format_list
->viewFormatCount
) {
220 /* compatibility is transitive, so we only need to check
221 * one format with everything else. */
222 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
223 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
226 if (!radv_dcc_formats_compatible(format
,
227 format_list
->pViewFormats
[i
]))
228 dcc_compatible_formats
= false;
231 dcc_compatible_formats
= false;
235 if (!dcc_compatible_formats
)
242 radv_use_tc_compat_cmask_for_image(struct radv_device
*device
,
243 struct radv_image
*image
)
245 if (!(device
->instance
->perftest_flags
& RADV_PERFTEST_TC_COMPAT_CMASK
))
248 /* TC-compat CMASK is only available for GFX8+. */
249 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
252 if (image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)
255 if (radv_image_has_dcc(image
))
258 if (!radv_image_has_cmask(image
))
264 static uint32_t si_get_bo_metadata_word1(const struct radv_device
*device
)
266 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
270 radv_is_valid_opaque_metadata(const struct radv_device
*device
,
271 const struct radeon_bo_metadata
*md
)
273 if (md
->metadata
[0] != 1 ||
274 md
->metadata
[1] != si_get_bo_metadata_word1(device
))
277 if (md
->size_metadata
< 40)
284 radv_patch_surface_from_metadata(struct radv_device
*device
,
285 struct radeon_surf
*surface
,
286 const struct radeon_bo_metadata
*md
)
288 surface
->flags
= RADEON_SURF_CLR(surface
->flags
, MODE
);
290 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
291 if (md
->u
.gfx9
.swizzle_mode
> 0)
292 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
294 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
296 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
298 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
299 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
300 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
301 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
302 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
303 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
305 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
306 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
307 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
308 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
310 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
316 radv_patch_image_dimensions(struct radv_device
*device
,
317 struct radv_image
*image
,
318 const struct radv_image_create_info
*create_info
,
319 struct ac_surf_info
*image_info
)
321 unsigned width
= image
->info
.width
;
322 unsigned height
= image
->info
.height
;
325 * minigbm sometimes allocates bigger images which is going to result in
326 * weird strides and other properties. Lets be lenient where possible and
327 * fail it on GFX10 (as we cannot cope there).
329 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
331 if (create_info
->bo_metadata
&&
332 radv_is_valid_opaque_metadata(device
, create_info
->bo_metadata
)) {
333 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
335 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
336 width
= G_00A004_WIDTH_LO(md
->metadata
[3]) +
337 (G_00A008_WIDTH_HI(md
->metadata
[4]) << 2) + 1;
338 height
= S_00A008_HEIGHT(md
->metadata
[4]) + 1;
340 width
= G_008F18_WIDTH(md
->metadata
[4]) + 1;
341 height
= G_008F18_HEIGHT(md
->metadata
[4]) + 1;
345 if (image
->info
.width
== width
&& image
->info
.height
== height
)
348 if (width
< image
->info
.width
|| height
< image
->info
.height
) {
350 "The imported image has smaller dimensions than the internal\n"
351 "dimensions. Using it is going to fail badly, so we reject\n"
353 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
354 image
->info
.width
, image
->info
.height
, width
, height
);
355 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
356 } else if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
358 "Tried to import an image with inconsistent width on GFX10.\n"
359 "As GFX10 has no separate stride fields we cannot cope with\n"
360 "an inconsistency in width and will fail this import.\n"
361 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
362 image
->info
.width
, image
->info
.height
, width
, height
);
363 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
366 "Tried to import an image with inconsistent width on pre-GFX10.\n"
367 "As GFX10 has no separate stride fields we cannot cope with\n"
368 "an inconsistency and would fail on GFX10.\n"
369 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
370 image
->info
.width
, image
->info
.height
, width
, height
);
372 image_info
->width
= width
;
373 image_info
->height
= height
;
379 radv_patch_image_from_extra_info(struct radv_device
*device
,
380 struct radv_image
*image
,
381 const struct radv_image_create_info
*create_info
,
382 struct ac_surf_info
*image_info
)
384 VkResult result
= radv_patch_image_dimensions(device
, image
, create_info
, image_info
);
385 if (result
!= VK_SUCCESS
)
388 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
389 if (create_info
->bo_metadata
) {
390 radv_patch_surface_from_metadata(device
, &image
->planes
[plane
].surface
,
391 create_info
->bo_metadata
);
394 if (radv_surface_has_scanout(device
, create_info
)) {
395 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_SCANOUT
;
396 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
;
398 image
->info
.surf_index
= NULL
;
405 radv_init_surface(struct radv_device
*device
,
406 const struct radv_image
*image
,
407 struct radeon_surf
*surface
,
409 const VkImageCreateInfo
*pCreateInfo
,
410 VkFormat image_format
)
412 unsigned array_mode
= radv_choose_tiling(device
, pCreateInfo
, image_format
);
413 VkFormat format
= vk_format_get_plane_format(image_format
, plane_id
);
414 const struct vk_format_description
*desc
= vk_format_description(format
);
415 bool is_depth
, is_stencil
;
417 is_depth
= vk_format_has_depth(desc
);
418 is_stencil
= vk_format_has_stencil(desc
);
420 surface
->blk_w
= vk_format_get_blockwidth(format
);
421 surface
->blk_h
= vk_format_get_blockheight(format
);
423 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
424 /* align byte per element on dword */
425 if (surface
->bpe
== 3) {
429 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
431 switch (pCreateInfo
->imageType
){
432 case VK_IMAGE_TYPE_1D
:
433 if (pCreateInfo
->arrayLayers
> 1)
434 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
436 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
438 case VK_IMAGE_TYPE_2D
:
439 if (pCreateInfo
->arrayLayers
> 1)
440 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
442 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
444 case VK_IMAGE_TYPE_3D
:
445 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
448 unreachable("unhandled image type");
452 surface
->flags
|= RADEON_SURF_ZBUFFER
;
453 if (radv_use_tc_compat_htile_for_image(device
, pCreateInfo
, image_format
))
454 surface
->flags
|= RADEON_SURF_TC_COMPATIBLE_HTILE
;
458 surface
->flags
|= RADEON_SURF_SBUFFER
;
460 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
461 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
462 vk_format_get_blocksizebits(image_format
) == 128 &&
463 vk_format_is_compressed(image_format
))
464 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
466 if (!radv_use_dcc_for_image(device
, image
, pCreateInfo
, image_format
))
467 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
472 static inline unsigned
473 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
476 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
478 return plane
->surface
.u
.legacy
.tiling_index
[level
];
481 static unsigned radv_map_swizzle(unsigned swizzle
)
485 return V_008F0C_SQ_SEL_Y
;
487 return V_008F0C_SQ_SEL_Z
;
489 return V_008F0C_SQ_SEL_W
;
491 return V_008F0C_SQ_SEL_0
;
493 return V_008F0C_SQ_SEL_1
;
494 default: /* VK_SWIZZLE_X */
495 return V_008F0C_SQ_SEL_X
;
500 radv_make_buffer_descriptor(struct radv_device
*device
,
501 struct radv_buffer
*buffer
,
507 const struct vk_format_description
*desc
;
509 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
510 uint64_t va
= gpu_address
+ buffer
->offset
;
511 unsigned num_format
, data_format
;
513 desc
= vk_format_description(vk_format
);
514 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
515 stride
= desc
->block
.bits
/ 8;
519 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
520 S_008F04_STRIDE(stride
);
522 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
527 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
528 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
529 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
530 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3]));
532 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
533 const struct gfx10_format
*fmt
= gfx10_format_description(vk_format
);
535 /* OOB_SELECT chooses the out-of-bounds check:
536 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
537 * - 1: index >= NUM_RECORDS
538 * - 2: NUM_RECORDS == 0
539 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
540 * else: swizzle_address >= NUM_RECORDS
542 state
[3] |= S_008F0C_FORMAT(fmt
->img_format
) |
543 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET
) |
544 S_008F0C_RESOURCE_LEVEL(1);
546 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
547 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
549 assert(data_format
!= V_008F0C_BUF_DATA_FORMAT_INVALID
);
550 assert(num_format
!= ~0);
552 state
[3] |= S_008F0C_NUM_FORMAT(num_format
) |
553 S_008F0C_DATA_FORMAT(data_format
);
558 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
559 struct radv_image
*image
,
560 const struct legacy_surf_level
*base_level_info
,
562 unsigned base_level
, unsigned first_level
,
563 unsigned block_width
, bool is_stencil
,
564 bool is_storage_image
, bool disable_compression
,
567 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
568 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
569 uint64_t va
= gpu_address
+ plane
->offset
;
570 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
571 uint64_t meta_va
= 0;
572 if (chip_class
>= GFX9
) {
574 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
576 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
578 va
+= base_level_info
->offset
;
581 if (chip_class
>= GFX9
||
582 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
583 state
[0] |= plane
->surface
.tile_swizzle
;
584 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
585 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
587 if (chip_class
>= GFX8
) {
588 state
[6] &= C_008F28_COMPRESSION_EN
;
590 if (!disable_compression
&& radv_dcc_enabled(image
, first_level
)) {
591 meta_va
= gpu_address
+ image
->dcc_offset
;
592 if (chip_class
<= GFX8
)
593 meta_va
+= base_level_info
->dcc_offset
;
595 unsigned dcc_tile_swizzle
= plane
->surface
.tile_swizzle
<< 8;
596 dcc_tile_swizzle
&= plane
->surface
.dcc_alignment
- 1;
597 meta_va
|= dcc_tile_swizzle
;
598 } else if (!disable_compression
&&
599 radv_image_is_tc_compat_htile(image
)) {
600 meta_va
= gpu_address
+ image
->htile_offset
;
604 state
[6] |= S_008F28_COMPRESSION_EN(1);
605 if (chip_class
<= GFX9
)
606 state
[7] = meta_va
>> 8;
610 if (chip_class
>= GFX10
) {
611 state
[3] &= C_00A00C_SW_MODE
;
614 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
616 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
619 state
[6] &= C_00A018_META_DATA_ADDRESS_LO
&
620 C_00A018_META_PIPE_ALIGNED
;
623 struct gfx9_surf_meta_flags meta
= {
628 if (image
->dcc_offset
)
629 meta
= plane
->surface
.u
.gfx9
.dcc
;
631 state
[6] |= S_00A018_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
632 S_00A018_META_DATA_ADDRESS_LO(meta_va
>> 8);
635 state
[7] = meta_va
>> 16;
636 } else if (chip_class
== GFX9
) {
637 state
[3] &= C_008F1C_SW_MODE
;
638 state
[4] &= C_008F20_PITCH
;
641 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
642 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
644 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
645 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
648 state
[5] &= C_008F24_META_DATA_ADDRESS
&
649 C_008F24_META_PIPE_ALIGNED
&
650 C_008F24_META_RB_ALIGNED
;
652 struct gfx9_surf_meta_flags meta
= {
657 if (image
->dcc_offset
)
658 meta
= plane
->surface
.u
.gfx9
.dcc
;
660 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
661 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
662 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
666 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
667 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
669 state
[3] &= C_008F1C_TILING_INDEX
;
670 state
[3] |= S_008F1C_TILING_INDEX(index
);
671 state
[4] &= C_008F20_PITCH
;
672 state
[4] |= S_008F20_PITCH(pitch
- 1);
676 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
677 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
679 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
680 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
682 /* GFX9 allocates 1D textures as 2D. */
683 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
684 image_type
= VK_IMAGE_TYPE_2D
;
685 switch (image_type
) {
686 case VK_IMAGE_TYPE_1D
:
687 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
688 case VK_IMAGE_TYPE_2D
:
690 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
692 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
693 case VK_IMAGE_TYPE_3D
:
694 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
695 return V_008F1C_SQ_RSRC_IMG_3D
;
697 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
699 unreachable("illegal image type");
703 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
705 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
707 if (swizzle
[3] == VK_SWIZZLE_X
) {
708 /* For the pre-defined border color values (white, opaque
709 * black, transparent black), the only thing that matters is
710 * that the alpha channel winds up in the correct place
711 * (because the RGB channels are all the same) so either of
712 * these enumerations will work.
714 if (swizzle
[2] == VK_SWIZZLE_Y
)
715 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
717 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
718 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
719 if (swizzle
[1] == VK_SWIZZLE_Y
)
720 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
722 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
723 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
724 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
725 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
726 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
732 bool vi_alpha_is_on_msb(struct radv_device
*device
, VkFormat format
)
734 const struct vk_format_description
*desc
= vk_format_description(format
);
736 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
&& desc
->nr_channels
== 1)
737 return desc
->swizzle
[3] == VK_SWIZZLE_X
;
739 return radv_translate_colorswap(format
, false) <= 1;
742 * Build the sampler view descriptor for a texture (GFX10).
745 gfx10_make_texture_descriptor(struct radv_device
*device
,
746 struct radv_image
*image
,
747 bool is_storage_image
,
748 VkImageViewType view_type
,
750 const VkComponentMapping
*mapping
,
751 unsigned first_level
, unsigned last_level
,
752 unsigned first_layer
, unsigned last_layer
,
753 unsigned width
, unsigned height
, unsigned depth
,
755 uint32_t *fmask_state
)
757 const struct vk_format_description
*desc
;
758 enum vk_swizzle swizzle
[4];
762 desc
= vk_format_description(vk_format
);
763 img_format
= gfx10_format_description(vk_format
)->img_format
;
765 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
766 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
767 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
769 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
772 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
773 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
774 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
776 depth
= image
->info
.array_size
;
777 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
778 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
779 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
780 depth
= image
->info
.array_size
;
781 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
782 depth
= image
->info
.array_size
/ 6;
785 state
[1] = S_00A004_FORMAT(img_format
) |
786 S_00A004_WIDTH_LO(width
- 1);
787 state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
788 S_00A008_HEIGHT(height
- 1) |
789 S_00A008_RESOURCE_LEVEL(1);
790 state
[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
791 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
792 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
793 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
794 S_00A00C_BASE_LEVEL(image
->info
.samples
> 1 ?
796 S_00A00C_LAST_LEVEL(image
->info
.samples
> 1 ?
797 util_logbase2(image
->info
.samples
) :
799 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle
)) |
801 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
802 * to know the total number of layers.
804 state
[4] = S_00A010_DEPTH(type
== V_008F1C_SQ_RSRC_IMG_3D
? depth
- 1 : last_layer
) |
805 S_00A010_BASE_ARRAY(first_layer
);
806 state
[5] = S_00A014_ARRAY_PITCH(0) |
807 S_00A014_MAX_MIP(image
->info
.samples
> 1 ?
808 util_logbase2(image
->info
.samples
) :
809 image
->info
.levels
- 1) |
810 S_00A014_PERF_MOD(4);
814 if (radv_dcc_enabled(image
, first_level
)) {
815 state
[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B
) |
816 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B
) |
817 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
820 /* Initialize the sampler view for FMASK. */
821 if (radv_image_has_fmask(image
)) {
822 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
826 assert(image
->plane_count
== 1);
828 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
830 switch (image
->info
.samples
) {
832 format
= V_008F0C_IMG_FORMAT_FMASK8_S2_F2
;
835 format
= V_008F0C_IMG_FORMAT_FMASK8_S4_F4
;
838 format
= V_008F0C_IMG_FORMAT_FMASK32_S8_F8
;
841 unreachable("invalid nr_samples");
844 fmask_state
[0] = (va
>> 8) | image
->planes
[0].surface
.fmask_tile_swizzle
;
845 fmask_state
[1] = S_00A004_BASE_ADDRESS_HI(va
>> 40) |
846 S_00A004_FORMAT(format
) |
847 S_00A004_WIDTH_LO(width
- 1);
848 fmask_state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
849 S_00A008_HEIGHT(height
- 1) |
850 S_00A008_RESOURCE_LEVEL(1);
851 fmask_state
[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
852 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
853 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
854 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
855 S_00A00C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
) |
856 S_00A00C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
857 fmask_state
[4] = S_00A010_DEPTH(last_layer
) |
858 S_00A010_BASE_ARRAY(first_layer
);
860 fmask_state
[6] = S_00A018_META_PIPE_ALIGNED(1);
862 } else if (fmask_state
)
863 memset(fmask_state
, 0, 8 * 4);
867 * Build the sampler view descriptor for a texture (SI-GFX9)
870 si_make_texture_descriptor(struct radv_device
*device
,
871 struct radv_image
*image
,
872 bool is_storage_image
,
873 VkImageViewType view_type
,
875 const VkComponentMapping
*mapping
,
876 unsigned first_level
, unsigned last_level
,
877 unsigned first_layer
, unsigned last_layer
,
878 unsigned width
, unsigned height
, unsigned depth
,
880 uint32_t *fmask_state
)
882 const struct vk_format_description
*desc
;
883 enum vk_swizzle swizzle
[4];
885 unsigned num_format
, data_format
, type
;
887 desc
= vk_format_description(vk_format
);
889 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
890 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
891 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
893 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
896 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
898 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
899 if (num_format
== ~0) {
903 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
904 if (data_format
== ~0) {
908 /* S8 with either Z16 or Z32 HTILE need a special format. */
909 if (device
->physical_device
->rad_info
.chip_class
== GFX9
&&
910 vk_format
== VK_FORMAT_S8_UINT
&&
911 radv_image_is_tc_compat_htile(image
)) {
912 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
913 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
914 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
915 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
917 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
918 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
919 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
921 depth
= image
->info
.array_size
;
922 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
923 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
924 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
925 depth
= image
->info
.array_size
;
926 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
927 depth
= image
->info
.array_size
/ 6;
930 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
931 S_008F14_NUM_FORMAT(num_format
));
932 state
[2] = (S_008F18_WIDTH(width
- 1) |
933 S_008F18_HEIGHT(height
- 1) |
934 S_008F18_PERF_MOD(4));
935 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
936 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
937 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
938 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
939 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
941 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
942 util_logbase2(image
->info
.samples
) :
944 S_008F1C_TYPE(type
));
946 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
950 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
951 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
953 /* Depth is the last accessible layer on Gfx9.
954 * The hw doesn't need to know the total number of layers.
956 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
957 state
[4] |= S_008F20_DEPTH(depth
- 1);
959 state
[4] |= S_008F20_DEPTH(last_layer
);
961 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
962 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
963 util_logbase2(image
->info
.samples
) :
964 image
->info
.levels
- 1);
966 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
967 state
[4] |= S_008F20_DEPTH(depth
- 1);
968 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
970 if (image
->dcc_offset
) {
971 state
[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
973 /* The last dword is unused by hw. The shader uses it to clear
974 * bits in the first dword of sampler state.
976 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
977 if (first_level
== last_level
)
978 state
[7] = C_008F30_MAX_ANISO_RATIO
;
980 state
[7] = 0xffffffff;
984 /* Initialize the sampler view for FMASK. */
985 if (radv_image_has_fmask(image
)) {
986 uint32_t fmask_format
, num_format
;
987 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
990 assert(image
->plane_count
== 1);
992 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
994 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
995 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
996 switch (image
->info
.samples
) {
998 num_format
= V_008F14_IMG_FMASK_8_2_2
;
1001 num_format
= V_008F14_IMG_FMASK_8_4_4
;
1004 num_format
= V_008F14_IMG_FMASK_32_8_8
;
1007 unreachable("invalid nr_samples");
1010 switch (image
->info
.samples
) {
1012 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
1015 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
1018 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
1022 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
1024 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
1027 fmask_state
[0] = va
>> 8;
1028 fmask_state
[0] |= image
->planes
[0].surface
.fmask_tile_swizzle
;
1029 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
1030 S_008F14_DATA_FORMAT(fmask_format
) |
1031 S_008F14_NUM_FORMAT(num_format
);
1032 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
1033 S_008F18_HEIGHT(height
- 1);
1034 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
1035 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
1036 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
1037 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
1038 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
1040 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
1044 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
1045 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
1046 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
1047 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
1048 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1049 S_008F24_META_RB_ALIGNED(1);
1051 if (radv_image_is_tc_compat_cmask(image
)) {
1052 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1054 fmask_state
[5] |= S_008F24_META_DATA_ADDRESS(va
>> 40);
1055 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1056 fmask_state
[7] |= va
>> 8;
1059 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
);
1060 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
1061 S_008F20_PITCH(image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
- 1);
1062 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
1064 if (radv_image_is_tc_compat_cmask(image
)) {
1065 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1067 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1068 fmask_state
[7] |= va
>> 8;
1071 } else if (fmask_state
)
1072 memset(fmask_state
, 0, 8 * 4);
1076 radv_make_texture_descriptor(struct radv_device
*device
,
1077 struct radv_image
*image
,
1078 bool is_storage_image
,
1079 VkImageViewType view_type
,
1081 const VkComponentMapping
*mapping
,
1082 unsigned first_level
, unsigned last_level
,
1083 unsigned first_layer
, unsigned last_layer
,
1084 unsigned width
, unsigned height
, unsigned depth
,
1086 uint32_t *fmask_state
)
1088 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
1089 gfx10_make_texture_descriptor(device
, image
, is_storage_image
,
1090 view_type
, vk_format
, mapping
,
1091 first_level
, last_level
,
1092 first_layer
, last_layer
,
1093 width
, height
, depth
,
1094 state
, fmask_state
);
1096 si_make_texture_descriptor(device
, image
, is_storage_image
,
1097 view_type
, vk_format
, mapping
,
1098 first_level
, last_level
,
1099 first_layer
, last_layer
,
1100 width
, height
, depth
,
1101 state
, fmask_state
);
1106 radv_query_opaque_metadata(struct radv_device
*device
,
1107 struct radv_image
*image
,
1108 struct radeon_bo_metadata
*md
)
1110 static const VkComponentMapping fixedmapping
;
1111 uint32_t desc
[8], i
;
1113 assert(image
->plane_count
== 1);
1115 /* Metadata image format format version 1:
1116 * [0] = 1 (metadata format identifier)
1117 * [1] = (VENDOR_ID << 16) | PCI_ID
1118 * [2:9] = image descriptor for the whole resource
1119 * [2] is always 0, because the base address is cleared
1120 * [9] is the DCC offset bits [39:8] from the beginning of
1122 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1124 md
->metadata
[0] = 1; /* metadata image format version 1 */
1126 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1127 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
1130 radv_make_texture_descriptor(device
, image
, false,
1131 (VkImageViewType
)image
->type
, image
->vk_format
,
1132 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
1133 image
->info
.array_size
- 1,
1134 image
->info
.width
, image
->info
.height
,
1138 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
1139 image
->planes
[0].surface
.blk_w
, false, false, false, desc
);
1141 /* Clear the base address and set the relative DCC offset. */
1143 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
1144 desc
[7] = image
->dcc_offset
>> 8;
1146 /* Dwords [2:9] contain the image descriptor. */
1147 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
1149 /* Dwords [10:..] contain the mipmap level offsets. */
1150 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
1151 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
1152 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
1153 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
1155 md
->size_metadata
= 10 * 4;
1159 radv_init_metadata(struct radv_device
*device
,
1160 struct radv_image
*image
,
1161 struct radeon_bo_metadata
*metadata
)
1163 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1165 memset(metadata
, 0, sizeof(*metadata
));
1167 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1168 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
1169 metadata
->u
.gfx9
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1171 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
1172 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1173 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
1174 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1175 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
1176 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
1177 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
1178 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
1179 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
1180 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
1181 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
1182 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1184 radv_query_opaque_metadata(device
, image
, metadata
);
1188 radv_image_override_offset_stride(struct radv_device
*device
,
1189 struct radv_image
*image
,
1190 uint64_t offset
, uint32_t stride
)
1192 ac_surface_override_offset_stride(&device
->physical_device
->rad_info
,
1193 &image
->planes
[0].surface
,
1194 image
->info
.levels
, offset
, stride
);
1198 radv_image_alloc_fmask(struct radv_device
*device
,
1199 struct radv_image
*image
)
1201 unsigned fmask_alignment
= image
->planes
[0].surface
.fmask_alignment
;
1203 image
->fmask_offset
= align64(image
->size
, fmask_alignment
);
1204 image
->size
= image
->fmask_offset
+ image
->planes
[0].surface
.fmask_size
;
1205 image
->alignment
= MAX2(image
->alignment
, fmask_alignment
);
1209 radv_image_alloc_cmask(struct radv_device
*device
,
1210 struct radv_image
*image
)
1212 unsigned cmask_alignment
= image
->planes
[0].surface
.cmask_alignment
;
1213 unsigned cmask_size
= image
->planes
[0].surface
.cmask_size
;
1214 uint32_t clear_value_size
= 0;
1219 assert(cmask_alignment
);
1221 image
->cmask_offset
= align64(image
->size
, cmask_alignment
);
1222 /* + 8 for storing the clear values */
1223 if (!image
->clear_value_offset
) {
1224 image
->clear_value_offset
= image
->cmask_offset
+ cmask_size
;
1225 clear_value_size
= 8;
1227 image
->size
= image
->cmask_offset
+ cmask_size
+ clear_value_size
;
1228 image
->alignment
= MAX2(image
->alignment
, cmask_alignment
);
1232 radv_image_alloc_dcc(struct radv_image
*image
)
1234 assert(image
->plane_count
== 1);
1236 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
1237 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1238 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
1239 image
->fce_pred_offset
= image
->clear_value_offset
+ 8 * image
->info
.levels
;
1240 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16 * image
->info
.levels
;
1241 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24 * image
->info
.levels
;
1242 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
1246 radv_image_alloc_htile(struct radv_device
*device
, struct radv_image
*image
)
1248 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
1250 /* + 8 for storing the clear values */
1251 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
1252 image
->size
= image
->clear_value_offset
+ image
->info
.levels
* 8;
1253 if (radv_image_is_tc_compat_htile(image
) &&
1254 device
->physical_device
->rad_info
.has_tc_compat_zrange_bug
) {
1255 /* Metadata for the TC-compatible HTILE hardware bug which
1256 * have to be fixed by updating ZRANGE_PRECISION when doing
1257 * fast depth clears to 0.0f.
1259 image
->tc_compat_zrange_offset
= image
->size
;
1260 image
->size
= image
->tc_compat_zrange_offset
+ image
->info
.levels
* 4;
1262 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
1266 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
1268 if (image
->info
.samples
<= 1 &&
1269 image
->info
.width
* image
->info
.height
<= 512 * 512) {
1270 /* Do not enable CMASK or DCC for small surfaces where the cost
1271 * of the eliminate pass can be higher than the benefit of fast
1272 * clear. RadeonSI does this, but the image threshold is
1278 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1279 (image
->exclusive
|| image
->queue_family_mask
== 1);
1283 radv_image_can_enable_dcc(struct radv_device
*device
, struct radv_image
*image
)
1285 if (!radv_image_can_enable_dcc_or_cmask(image
) ||
1286 !radv_image_has_dcc(image
))
1289 /* On GFX8, DCC layers can be interleaved and it's currently only
1290 * enabled if slice size is equal to the per slice fast clear size
1291 * because the driver assumes that portions of multiple layers are
1292 * contiguous during fast clears.
1294 if (image
->info
.array_size
> 1) {
1295 const struct legacy_surf_level
*surf_level
=
1296 &image
->planes
[0].surface
.u
.legacy
.level
[0];
1298 assert(device
->physical_device
->rad_info
.chip_class
== GFX8
);
1300 if (image
->planes
[0].surface
.dcc_slice_size
!= surf_level
->dcc_fast_clear_size
)
1308 radv_image_can_enable_cmask(struct radv_image
*image
)
1310 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1311 /* Do not enable CMASK for non-MSAA images (fast color clear)
1312 * because 128 bit formats are not supported, but FMASK might
1318 return radv_image_can_enable_dcc_or_cmask(image
) &&
1319 image
->info
.levels
== 1 &&
1320 image
->info
.depth
== 1 &&
1321 !image
->planes
[0].surface
.is_linear
;
1325 radv_image_can_enable_fmask(struct radv_image
*image
)
1327 return image
->info
.samples
> 1 &&
1328 image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
;
1332 radv_image_can_enable_htile(struct radv_image
*image
)
1334 return radv_image_has_htile(image
) &&
1335 image
->info
.levels
== 1 &&
1336 image
->info
.width
* image
->info
.height
>= 8 * 8;
1339 static void radv_image_disable_dcc(struct radv_image
*image
)
1341 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1342 image
->planes
[i
].surface
.dcc_size
= 0;
1345 static void radv_image_disable_htile(struct radv_image
*image
)
1347 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1348 image
->planes
[i
].surface
.htile_size
= 0;
1352 radv_image_create_layout(struct radv_device
*device
,
1353 struct radv_image_create_info create_info
,
1354 struct radv_image
*image
)
1356 /* Check that we did not initialize things earlier */
1357 assert(!image
->planes
[0].surface
.surf_size
);
1359 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1360 * common internal case. */
1361 create_info
.vk_info
= NULL
;
1363 struct ac_surf_info image_info
= image
->info
;
1364 VkResult result
= radv_patch_image_from_extra_info(device
, image
, &create_info
, &image_info
);
1365 if (result
!= VK_SUCCESS
)
1369 image
->alignment
= 1;
1370 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1371 struct ac_surf_info info
= image_info
;
1374 const struct vk_format_description
*desc
= vk_format_description(image
->vk_format
);
1375 assert(info
.width
% desc
->width_divisor
== 0);
1376 assert(info
.height
% desc
->height_divisor
== 0);
1378 info
.width
/= desc
->width_divisor
;
1379 info
.height
/= desc
->height_divisor
;
1382 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1384 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1385 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1386 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1388 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1391 if (!create_info
.no_metadata_planes
) {
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device
, image
)) {
1394 radv_image_alloc_dcc(image
);
1395 if (image
->info
.samples
> 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1399 assert(radv_image_can_enable_cmask(image
));
1400 radv_image_alloc_cmask(device
, image
);
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image
);
1405 if (radv_image_can_enable_cmask(image
)) {
1406 radv_image_alloc_cmask(device
, image
);
1410 /* Try to enable FMASK for multisampled images. */
1411 if (radv_image_can_enable_fmask(image
)) {
1412 radv_image_alloc_fmask(device
, image
);
1414 if (radv_use_tc_compat_cmask_for_image(device
, image
))
1415 image
->tc_compatible_cmask
= true;
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image
) &&
1419 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1420 image
->tc_compatible_htile
= image
->planes
[0].surface
.flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1421 radv_image_alloc_htile(device
, image
);
1423 radv_image_disable_htile(image
);
1427 radv_image_disable_dcc(image
);
1428 radv_image_disable_htile(image
);
1431 assert(image
->planes
[0].surface
.surf_size
);
1436 radv_image_create(VkDevice _device
,
1437 const struct radv_image_create_info
*create_info
,
1438 const VkAllocationCallbacks
* alloc
,
1441 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1442 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1443 struct radv_image
*image
= NULL
;
1444 VkFormat format
= radv_select_android_external_format(pCreateInfo
->pNext
,
1445 pCreateInfo
->format
);
1446 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1448 const unsigned plane_count
= vk_format_get_plane_count(format
);
1449 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1451 radv_assert(pCreateInfo
->mipLevels
> 0);
1452 radv_assert(pCreateInfo
->arrayLayers
> 0);
1453 radv_assert(pCreateInfo
->samples
> 0);
1454 radv_assert(pCreateInfo
->extent
.width
> 0);
1455 radv_assert(pCreateInfo
->extent
.height
> 0);
1456 radv_assert(pCreateInfo
->extent
.depth
> 0);
1458 image
= vk_zalloc2(&device
->vk
.alloc
, alloc
, image_struct_size
, 8,
1459 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1461 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1463 vk_object_base_init(&device
->vk
, &image
->base
, VK_OBJECT_TYPE_IMAGE
);
1465 image
->type
= pCreateInfo
->imageType
;
1466 image
->info
.width
= pCreateInfo
->extent
.width
;
1467 image
->info
.height
= pCreateInfo
->extent
.height
;
1468 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1469 image
->info
.samples
= pCreateInfo
->samples
;
1470 image
->info
.storage_samples
= pCreateInfo
->samples
;
1471 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1472 image
->info
.levels
= pCreateInfo
->mipLevels
;
1473 image
->info
.num_channels
= vk_format_get_nr_components(format
);
1475 image
->vk_format
= format
;
1476 image
->tiling
= pCreateInfo
->tiling
;
1477 image
->usage
= pCreateInfo
->usage
;
1478 image
->flags
= pCreateInfo
->flags
;
1479 image
->plane_count
= plane_count
;
1481 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1482 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1483 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1484 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
||
1485 pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_FOREIGN_EXT
)
1486 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1488 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1491 const VkExternalMemoryImageCreateInfo
*external_info
=
1492 vk_find_struct_const(pCreateInfo
->pNext
,
1493 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) ;
1495 image
->shareable
= external_info
;
1496 if (!vk_format_is_depth_or_stencil(format
) && !image
->shareable
) {
1497 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1500 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1501 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, pCreateInfo
, format
);
1504 bool delay_layout
= external_info
&&
1505 (external_info
->handleTypes
& VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
);
1508 *pImage
= radv_image_to_handle(image
);
1509 assert (!(image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
));
1513 ASSERTED VkResult result
= radv_image_create_layout(device
, *create_info
, image
);
1514 assert(result
== VK_SUCCESS
);
1516 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1517 image
->alignment
= MAX2(image
->alignment
, 4096);
1518 image
->size
= align64(image
->size
, image
->alignment
);
1521 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1522 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1524 vk_free2(&device
->vk
.alloc
, alloc
, image
);
1525 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1529 *pImage
= radv_image_to_handle(image
);
1535 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1536 struct radv_device
*device
,
1538 const VkComponentMapping
*components
,
1539 bool is_storage_image
, bool disable_compression
,
1540 unsigned plane_id
, unsigned descriptor_plane_id
)
1542 struct radv_image
*image
= iview
->image
;
1543 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1544 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1545 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1547 union radv_descriptor
*descriptor
;
1548 uint32_t hw_level
= 0;
1550 if (is_storage_image
) {
1551 descriptor
= &iview
->storage_descriptor
;
1553 descriptor
= &iview
->descriptor
;
1556 assert(vk_format_get_plane_count(vk_format
) == 1);
1557 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1558 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1560 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1561 hw_level
= iview
->base_mip
;
1562 radv_make_texture_descriptor(device
, image
, is_storage_image
,
1566 hw_level
, hw_level
+ iview
->level_count
- 1,
1568 iview
->base_layer
+ iview
->layer_count
- 1,
1569 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1570 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1571 iview
->extent
.depth
,
1572 descriptor
->plane_descriptors
[descriptor_plane_id
],
1573 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1575 const struct legacy_surf_level
*base_level_info
= NULL
;
1576 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1578 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1580 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1582 si_set_mutable_tex_desc_fields(device
, image
,
1587 blk_w
, is_stencil
, is_storage_image
,
1588 is_storage_image
|| disable_compression
,
1589 descriptor
->plane_descriptors
[descriptor_plane_id
]);
1593 radv_plane_from_aspect(VkImageAspectFlags mask
)
1596 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1598 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1606 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1609 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1610 return image
->planes
[0].format
;
1611 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1612 return image
->planes
[1].format
;
1613 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1614 return image
->planes
[2].format
;
1615 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1616 return vk_format_stencil_only(image
->vk_format
);
1617 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1618 return vk_format_depth_only(image
->vk_format
);
1619 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1620 return vk_format_depth_only(image
->vk_format
);
1622 return image
->vk_format
;
1627 radv_image_view_init(struct radv_image_view
*iview
,
1628 struct radv_device
*device
,
1629 const VkImageViewCreateInfo
* pCreateInfo
,
1630 const struct radv_image_view_extra_create_info
* extra_create_info
)
1632 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1633 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1635 switch (image
->type
) {
1636 case VK_IMAGE_TYPE_1D
:
1637 case VK_IMAGE_TYPE_2D
:
1638 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1640 case VK_IMAGE_TYPE_3D
:
1641 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1642 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1645 unreachable("bad VkImageType");
1647 iview
->image
= image
;
1648 iview
->bo
= image
->bo
;
1649 iview
->type
= pCreateInfo
->viewType
;
1650 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1651 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1652 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1654 iview
->vk_format
= pCreateInfo
->format
;
1656 /* If the image has an Android external format, pCreateInfo->format will be
1657 * VK_FORMAT_UNDEFINED. */
1658 if (iview
->vk_format
== VK_FORMAT_UNDEFINED
)
1659 iview
->vk_format
= image
->vk_format
;
1661 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1662 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1663 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1664 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1667 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1668 iview
->extent
= (VkExtent3D
) {
1669 .width
= image
->info
.width
,
1670 .height
= image
->info
.height
,
1671 .depth
= image
->info
.depth
,
1674 iview
->extent
= (VkExtent3D
) {
1675 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1676 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1677 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1681 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1682 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1683 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1684 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1685 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1687 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1688 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1690 /* Comment ported from amdvlk -
1691 * If we have the following image:
1692 * Uncompressed pixels Compressed block sizes (4x4)
1693 * mip0: 22 x 22 6 x 6
1694 * mip1: 11 x 11 3 x 3
1699 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1700 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1701 * divide-by-two integer math):
1707 * This means that mip2 will be missing texels.
1709 * Fix this by calculating the base mip's width and height, then convert that, and round it
1710 * back up to get the level 0 size.
1711 * Clamp the converted size between the original values, and next power of two, which
1712 * means we don't oversize the image.
1714 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1715 vk_format_is_compressed(image
->vk_format
) &&
1716 !vk_format_is_compressed(iview
->vk_format
)) {
1717 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1718 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1720 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1721 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1723 lvl_width
<<= range
->baseMipLevel
;
1724 lvl_height
<<= range
->baseMipLevel
;
1726 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1727 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1731 iview
->base_layer
= range
->baseArrayLayer
;
1732 iview
->layer_count
= radv_get_layerCount(image
, range
);
1733 iview
->base_mip
= range
->baseMipLevel
;
1734 iview
->level_count
= radv_get_levelCount(image
, range
);
1736 bool disable_compression
= extra_create_info
? extra_create_info
->disable_compression
: false;
1737 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1738 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1739 radv_image_view_make_descriptor(iview
, device
, format
,
1740 &pCreateInfo
->components
,
1741 false, disable_compression
,
1742 iview
->plane_id
+ i
, i
);
1743 radv_image_view_make_descriptor(iview
, device
,
1744 format
, &pCreateInfo
->components
,
1745 true, disable_compression
,
1746 iview
->plane_id
+ i
, i
);
1750 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1751 VkImageLayout layout
,
1752 bool in_render_loop
,
1753 unsigned queue_mask
)
1755 if (radv_image_is_tc_compat_htile(image
)) {
1756 if (layout
== VK_IMAGE_LAYOUT_GENERAL
&&
1758 !(image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)) {
1759 /* It should be safe to enable TC-compat HTILE with
1760 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1761 * loop and if the image doesn't have the storage bit
1762 * set. This improves performance for apps that use
1763 * GENERAL for the main depth pass because this allows
1764 * compression and this reduces the number of
1765 * decompressions from/to GENERAL.
1770 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1773 return radv_image_has_htile(image
) &&
1774 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1775 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1776 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1777 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1778 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1781 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1782 VkImageLayout layout
,
1783 bool in_render_loop
,
1784 unsigned queue_mask
)
1786 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1789 bool radv_layout_dcc_compressed(const struct radv_device
*device
,
1790 const struct radv_image
*image
,
1791 VkImageLayout layout
,
1792 bool in_render_loop
,
1793 unsigned queue_mask
)
1795 /* Don't compress compute transfer dst, as image stores are not supported. */
1796 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1797 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1800 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1804 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1806 if (!image
->exclusive
)
1807 return image
->queue_family_mask
;
1808 if (family
== VK_QUEUE_FAMILY_EXTERNAL
||
1809 family
== VK_QUEUE_FAMILY_FOREIGN_EXT
)
1810 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1811 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1812 return 1u << queue_family
;
1813 return 1u << family
;
1817 radv_CreateImage(VkDevice device
,
1818 const VkImageCreateInfo
*pCreateInfo
,
1819 const VkAllocationCallbacks
*pAllocator
,
1823 const VkNativeBufferANDROID
*gralloc_info
=
1824 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1827 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1828 pAllocator
, pImage
);
1831 const struct wsi_image_create_info
*wsi_info
=
1832 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1833 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1835 return radv_image_create(device
,
1836 &(struct radv_image_create_info
) {
1837 .vk_info
= pCreateInfo
,
1845 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1846 const VkAllocationCallbacks
*pAllocator
)
1848 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1849 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1854 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1855 device
->ws
->buffer_destroy(image
->bo
);
1857 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1858 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1860 vk_object_base_finish(&image
->base
);
1861 vk_free2(&device
->vk
.alloc
, pAllocator
, image
);
1864 void radv_GetImageSubresourceLayout(
1867 const VkImageSubresource
* pSubresource
,
1868 VkSubresourceLayout
* pLayout
)
1870 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1871 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1872 int level
= pSubresource
->mipLevel
;
1873 int layer
= pSubresource
->arrayLayer
;
1875 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1877 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1878 struct radeon_surf
*surface
= &plane
->surface
;
1880 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1881 uint64_t level_offset
= surface
->is_linear
? surface
->u
.gfx9
.offset
[level
] : 0;
1883 pLayout
->offset
= plane
->offset
+ level_offset
+ surface
->u
.gfx9
.surf_slice_size
* layer
;
1884 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1885 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1886 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1887 /* Adjust the number of bytes between each row because
1888 * the pitch is actually the number of components per
1891 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1893 uint32_t pitch
= surface
->is_linear
? surface
->u
.gfx9
.pitch
[level
] : surface
->u
.gfx9
.surf_pitch
;
1895 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1896 pLayout
->rowPitch
= pitch
* surface
->bpe
;
1899 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1900 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1901 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1902 if (image
->type
== VK_IMAGE_TYPE_3D
)
1903 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1905 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1906 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1907 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1908 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1909 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1910 if (image
->type
== VK_IMAGE_TYPE_3D
)
1911 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1917 radv_CreateImageView(VkDevice _device
,
1918 const VkImageViewCreateInfo
*pCreateInfo
,
1919 const VkAllocationCallbacks
*pAllocator
,
1922 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1923 struct radv_image_view
*view
;
1925 view
= vk_alloc2(&device
->vk
.alloc
, pAllocator
, sizeof(*view
), 8,
1926 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1928 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1930 vk_object_base_init(&device
->vk
, &view
->base
,
1931 VK_OBJECT_TYPE_IMAGE_VIEW
);
1933 radv_image_view_init(view
, device
, pCreateInfo
, NULL
);
1935 *pView
= radv_image_view_to_handle(view
);
1941 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1942 const VkAllocationCallbacks
*pAllocator
)
1944 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1945 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1950 vk_object_base_finish(&iview
->base
);
1951 vk_free2(&device
->vk
.alloc
, pAllocator
, iview
);
1954 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1955 struct radv_device
*device
,
1956 const VkBufferViewCreateInfo
* pCreateInfo
)
1958 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1960 view
->bo
= buffer
->bo
;
1961 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1962 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1963 view
->vk_format
= pCreateInfo
->format
;
1965 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1966 pCreateInfo
->offset
, view
->range
, view
->state
);
1970 radv_CreateBufferView(VkDevice _device
,
1971 const VkBufferViewCreateInfo
*pCreateInfo
,
1972 const VkAllocationCallbacks
*pAllocator
,
1973 VkBufferView
*pView
)
1975 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1976 struct radv_buffer_view
*view
;
1978 view
= vk_alloc2(&device
->vk
.alloc
, pAllocator
, sizeof(*view
), 8,
1979 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1981 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1983 vk_object_base_init(&device
->vk
, &view
->base
,
1984 VK_OBJECT_TYPE_BUFFER_VIEW
);
1986 radv_buffer_view_init(view
, device
, pCreateInfo
);
1988 *pView
= radv_buffer_view_to_handle(view
);
1994 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1995 const VkAllocationCallbacks
*pAllocator
)
1997 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1998 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
2003 vk_object_base_finish(&view
->base
);
2004 vk_free2(&device
->vk
.alloc
, pAllocator
, view
);