2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
38 radv_choose_tiling(struct radv_device
*device
,
39 const VkImageCreateInfo
*pCreateInfo
,
42 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
43 assert(pCreateInfo
->samples
<= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
47 if (!vk_format_is_compressed(format
) &&
48 !vk_format_is_depth_or_stencil(format
)
49 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
53 /* Only very thin and long 2D textures should benefit from
55 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo
->samples
> 1)
61 return RADEON_SURF_MODE_2D
;
63 return RADEON_SURF_MODE_2D
;
67 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
68 const VkImageCreateInfo
*pCreateInfo
,
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
75 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
) ||
76 (pCreateInfo
->flags
& VK_IMAGE_CREATE_EXTENDED_USAGE_BIT
))
79 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
82 if (pCreateInfo
->mipLevels
> 1)
85 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
86 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
88 if (pCreateInfo
->samples
>= 2 &&
89 (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
90 (format
== VK_FORMAT_D32_SFLOAT
&&
91 device
->physical_device
->rad_info
.chip_class
== GFX10
)))
94 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95 * supports 32-bit. Though, it's possible to enable TC-compat for
96 * 16-bit depth surfaces if no Z planes are compressed.
98 if (format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
99 format
!= VK_FORMAT_D32_SFLOAT
&&
100 format
!= VK_FORMAT_D16_UNORM
)
103 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
104 const struct VkImageFormatListCreateInfo
*format_list
=
105 (const struct VkImageFormatListCreateInfo
*)
106 vk_find_struct_const(pCreateInfo
->pNext
,
107 IMAGE_FORMAT_LIST_CREATE_INFO
);
109 /* We have to ignore the existence of the list if viewFormatCount = 0 */
110 if (format_list
&& format_list
->viewFormatCount
) {
111 /* compatibility is transitive, so we only need to check
112 * one format with everything else.
114 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
115 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
118 if (format
!= format_list
->pViewFormats
[i
])
130 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
132 if (info
->bo_metadata
) {
133 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
134 return info
->bo_metadata
->u
.gfx9
.scanout
;
136 return info
->bo_metadata
->u
.legacy
.scanout
;
139 return info
->scanout
;
143 radv_use_dcc_for_image(struct radv_device
*device
,
144 const struct radv_image
*image
,
145 const VkImageCreateInfo
*pCreateInfo
,
148 bool dcc_compatible_formats
;
151 /* DCC (Delta Color Compression) is only available for GFX8+. */
152 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
155 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
158 if (image
->shareable
)
161 /* TODO: Enable DCC for storage images. */
162 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
) ||
163 (pCreateInfo
->flags
& VK_IMAGE_CREATE_EXTENDED_USAGE_BIT
))
166 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
169 if (vk_format_is_subsampled(format
) ||
170 vk_format_get_plane_count(format
) > 1)
173 /* TODO: Enable DCC for mipmaps on GFX9+. */
174 if ((pCreateInfo
->arrayLayers
> 1 || pCreateInfo
->mipLevels
> 1) &&
175 device
->physical_device
->rad_info
.chip_class
>= GFX9
)
178 /* Do not enable DCC for mipmapped arrays because performance is worse. */
179 if (pCreateInfo
->arrayLayers
> 1 && pCreateInfo
->mipLevels
> 1)
182 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
183 * 2x can be enabled with an option.
185 if (pCreateInfo
->samples
> 2 ||
186 (pCreateInfo
->samples
== 2 &&
187 !device
->physical_device
->dcc_msaa_allowed
))
190 /* Determine if the formats are DCC compatible. */
191 dcc_compatible_formats
=
192 radv_is_colorbuffer_format_supported(format
,
195 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
196 const struct VkImageFormatListCreateInfo
*format_list
=
197 (const struct VkImageFormatListCreateInfo
*)
198 vk_find_struct_const(pCreateInfo
->pNext
,
199 IMAGE_FORMAT_LIST_CREATE_INFO
);
201 /* We have to ignore the existence of the list if viewFormatCount = 0 */
202 if (format_list
&& format_list
->viewFormatCount
) {
203 /* compatibility is transitive, so we only need to check
204 * one format with everything else. */
205 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
206 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
209 if (!radv_dcc_formats_compatible(format
,
210 format_list
->pViewFormats
[i
]))
211 dcc_compatible_formats
= false;
214 dcc_compatible_formats
= false;
218 if (!dcc_compatible_formats
)
225 radv_use_tc_compat_cmask_for_image(struct radv_device
*device
,
226 struct radv_image
*image
)
228 if (!(device
->instance
->perftest_flags
& RADV_PERFTEST_TC_COMPAT_CMASK
))
231 /* TC-compat CMASK is only available for GFX8+. */
232 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
235 if (image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)
238 if (radv_image_has_dcc(image
))
241 if (!radv_image_has_cmask(image
))
247 static uint32_t si_get_bo_metadata_word1(const struct radv_device
*device
)
249 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
253 radv_is_valid_opaque_metadata(const struct radv_device
*device
,
254 const struct radeon_bo_metadata
*md
)
256 if (md
->metadata
[0] != 1 ||
257 md
->metadata
[1] != si_get_bo_metadata_word1(device
))
260 if (md
->size_metadata
< 40)
267 radv_patch_surface_from_metadata(struct radv_device
*device
,
268 struct radeon_surf
*surface
,
269 const struct radeon_bo_metadata
*md
)
271 surface
->flags
= RADEON_SURF_CLR(surface
->flags
, MODE
);
273 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
274 if (md
->u
.gfx9
.swizzle_mode
> 0)
275 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
277 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
279 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
281 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
282 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
283 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
284 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
285 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
286 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
288 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
289 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
290 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
291 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
293 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
299 radv_patch_image_dimensions(struct radv_device
*device
,
300 struct radv_image
*image
,
301 const struct radv_image_create_info
*create_info
,
302 struct ac_surf_info
*image_info
)
304 unsigned width
= image
->info
.width
;
305 unsigned height
= image
->info
.height
;
308 * minigbm sometimes allocates bigger images which is going to result in
309 * weird strides and other properties. Lets be lenient where possible and
310 * fail it on GFX10 (as we cannot cope there).
312 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
314 if (create_info
->bo_metadata
&&
315 radv_is_valid_opaque_metadata(device
, create_info
->bo_metadata
)) {
316 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
318 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
319 width
= G_00A004_WIDTH_LO(md
->metadata
[3]) +
320 (G_00A008_WIDTH_HI(md
->metadata
[4]) << 2) + 1;
321 height
= S_00A008_HEIGHT(md
->metadata
[4]) + 1;
323 width
= G_008F18_WIDTH(md
->metadata
[4]) + 1;
324 height
= G_008F18_HEIGHT(md
->metadata
[4]) + 1;
328 if (image
->info
.width
== width
&& image
->info
.height
== height
)
331 if (width
< image
->info
.width
|| height
< image
->info
.height
) {
333 "The imported image has smaller dimensions than the internal\n"
334 "dimensions. Using it is going to fail badly, so we reject\n"
336 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
337 image
->info
.width
, image
->info
.height
, width
, height
);
338 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
339 } else if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
341 "Tried to import an image with inconsistent width on GFX10.\n"
342 "As GFX10 has no separate stride fields we cannot cope with\n"
343 "an inconsistency in width and will fail this import.\n"
344 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
345 image
->info
.width
, image
->info
.height
, width
, height
);
346 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
349 "Tried to import an image with inconsistent width on pre-GFX10.\n"
350 "As GFX10 has no separate stride fields we cannot cope with\n"
351 "an inconsistency and would fail on GFX10.\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image
->info
.width
, image
->info
.height
, width
, height
);
355 image_info
->width
= width
;
356 image_info
->height
= height
;
362 radv_patch_image_from_extra_info(struct radv_device
*device
,
363 struct radv_image
*image
,
364 const struct radv_image_create_info
*create_info
,
365 struct ac_surf_info
*image_info
)
367 VkResult result
= radv_patch_image_dimensions(device
, image
, create_info
, image_info
);
368 if (result
!= VK_SUCCESS
)
371 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
372 if (create_info
->bo_metadata
) {
373 radv_patch_surface_from_metadata(device
, &image
->planes
[plane
].surface
,
374 create_info
->bo_metadata
);
377 if (radv_surface_has_scanout(device
, create_info
)) {
378 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_SCANOUT
;
379 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
;
381 image
->info
.surf_index
= NULL
;
388 radv_init_surface(struct radv_device
*device
,
389 const struct radv_image
*image
,
390 struct radeon_surf
*surface
,
392 const VkImageCreateInfo
*pCreateInfo
,
393 VkFormat image_format
)
395 unsigned array_mode
= radv_choose_tiling(device
, pCreateInfo
, image_format
);
396 VkFormat format
= vk_format_get_plane_format(image_format
, plane_id
);
397 const struct vk_format_description
*desc
= vk_format_description(format
);
398 bool is_depth
, is_stencil
;
400 is_depth
= vk_format_has_depth(desc
);
401 is_stencil
= vk_format_has_stencil(desc
);
403 surface
->blk_w
= vk_format_get_blockwidth(format
);
404 surface
->blk_h
= vk_format_get_blockheight(format
);
406 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
407 /* align byte per element on dword */
408 if (surface
->bpe
== 3) {
412 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
414 switch (pCreateInfo
->imageType
){
415 case VK_IMAGE_TYPE_1D
:
416 if (pCreateInfo
->arrayLayers
> 1)
417 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
419 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
421 case VK_IMAGE_TYPE_2D
:
422 if (pCreateInfo
->arrayLayers
> 1)
423 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
425 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
427 case VK_IMAGE_TYPE_3D
:
428 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
431 unreachable("unhandled image type");
435 surface
->flags
|= RADEON_SURF_ZBUFFER
;
436 if (radv_use_tc_compat_htile_for_image(device
, pCreateInfo
, image_format
))
437 surface
->flags
|= RADEON_SURF_TC_COMPATIBLE_HTILE
;
441 surface
->flags
|= RADEON_SURF_SBUFFER
;
443 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
444 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
445 vk_format_get_blocksizebits(image_format
) == 128 &&
446 vk_format_is_compressed(image_format
))
447 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
449 surface
->flags
|= RADEON_SURF_OPTIMIZE_FOR_SPACE
;
451 if (!radv_use_dcc_for_image(device
, image
, pCreateInfo
, image_format
))
452 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
457 static inline unsigned
458 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
461 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
463 return plane
->surface
.u
.legacy
.tiling_index
[level
];
466 static unsigned radv_map_swizzle(unsigned swizzle
)
470 return V_008F0C_SQ_SEL_Y
;
472 return V_008F0C_SQ_SEL_Z
;
474 return V_008F0C_SQ_SEL_W
;
476 return V_008F0C_SQ_SEL_0
;
478 return V_008F0C_SQ_SEL_1
;
479 default: /* VK_SWIZZLE_X */
480 return V_008F0C_SQ_SEL_X
;
485 radv_make_buffer_descriptor(struct radv_device
*device
,
486 struct radv_buffer
*buffer
,
492 const struct vk_format_description
*desc
;
494 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
495 uint64_t va
= gpu_address
+ buffer
->offset
;
496 unsigned num_format
, data_format
;
498 desc
= vk_format_description(vk_format
);
499 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
500 stride
= desc
->block
.bits
/ 8;
504 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
505 S_008F04_STRIDE(stride
);
507 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
512 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
513 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
514 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
515 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3]));
517 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
518 const struct gfx10_format
*fmt
= &gfx10_format_table
[vk_format
];
520 /* OOB_SELECT chooses the out-of-bounds check:
521 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
522 * - 1: index >= NUM_RECORDS
523 * - 2: NUM_RECORDS == 0
524 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
525 * else: swizzle_address >= NUM_RECORDS
527 state
[3] |= S_008F0C_FORMAT(fmt
->img_format
) |
528 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET
) |
529 S_008F0C_RESOURCE_LEVEL(1);
531 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
532 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
534 assert(data_format
!= V_008F0C_BUF_DATA_FORMAT_INVALID
);
535 assert(num_format
!= ~0);
537 state
[3] |= S_008F0C_NUM_FORMAT(num_format
) |
538 S_008F0C_DATA_FORMAT(data_format
);
543 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
544 struct radv_image
*image
,
545 const struct legacy_surf_level
*base_level_info
,
547 unsigned base_level
, unsigned first_level
,
548 unsigned block_width
, bool is_stencil
,
549 bool is_storage_image
, bool disable_compression
,
552 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
553 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
554 uint64_t va
= gpu_address
+ plane
->offset
;
555 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
556 uint64_t meta_va
= 0;
557 if (chip_class
>= GFX9
) {
559 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
561 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
563 va
+= base_level_info
->offset
;
566 if (chip_class
>= GFX9
||
567 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
568 state
[0] |= plane
->surface
.tile_swizzle
;
569 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
570 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
572 if (chip_class
>= GFX8
) {
573 state
[6] &= C_008F28_COMPRESSION_EN
;
575 if (!disable_compression
&& radv_dcc_enabled(image
, first_level
)) {
576 meta_va
= gpu_address
+ image
->dcc_offset
;
577 if (chip_class
<= GFX8
)
578 meta_va
+= base_level_info
->dcc_offset
;
580 unsigned dcc_tile_swizzle
= plane
->surface
.tile_swizzle
<< 8;
581 dcc_tile_swizzle
&= plane
->surface
.dcc_alignment
- 1;
582 meta_va
|= dcc_tile_swizzle
;
583 } else if (!disable_compression
&&
584 radv_image_is_tc_compat_htile(image
)) {
585 meta_va
= gpu_address
+ image
->htile_offset
;
589 state
[6] |= S_008F28_COMPRESSION_EN(1);
590 if (chip_class
<= GFX9
)
591 state
[7] = meta_va
>> 8;
595 if (chip_class
>= GFX10
) {
596 state
[3] &= C_00A00C_SW_MODE
;
599 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
601 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
604 state
[6] &= C_00A018_META_DATA_ADDRESS_LO
&
605 C_00A018_META_PIPE_ALIGNED
;
608 struct gfx9_surf_meta_flags meta
;
610 if (image
->dcc_offset
)
611 meta
= plane
->surface
.u
.gfx9
.dcc
;
613 meta
= plane
->surface
.u
.gfx9
.htile
;
615 state
[6] |= S_00A018_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
616 S_00A018_META_DATA_ADDRESS_LO(meta_va
>> 8);
619 state
[7] = meta_va
>> 16;
620 } else if (chip_class
== GFX9
) {
621 state
[3] &= C_008F1C_SW_MODE
;
622 state
[4] &= C_008F20_PITCH
;
625 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
626 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
628 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
629 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
632 state
[5] &= C_008F24_META_DATA_ADDRESS
&
633 C_008F24_META_PIPE_ALIGNED
&
634 C_008F24_META_RB_ALIGNED
;
636 struct gfx9_surf_meta_flags meta
;
638 if (image
->dcc_offset
)
639 meta
= plane
->surface
.u
.gfx9
.dcc
;
641 meta
= plane
->surface
.u
.gfx9
.htile
;
643 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
644 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
645 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
649 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
650 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
652 state
[3] &= C_008F1C_TILING_INDEX
;
653 state
[3] |= S_008F1C_TILING_INDEX(index
);
654 state
[4] &= C_008F20_PITCH
;
655 state
[4] |= S_008F20_PITCH(pitch
- 1);
659 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
660 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
662 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
663 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
665 /* GFX9 allocates 1D textures as 2D. */
666 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
667 image_type
= VK_IMAGE_TYPE_2D
;
668 switch (image_type
) {
669 case VK_IMAGE_TYPE_1D
:
670 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
671 case VK_IMAGE_TYPE_2D
:
673 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
675 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
676 case VK_IMAGE_TYPE_3D
:
677 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
678 return V_008F1C_SQ_RSRC_IMG_3D
;
680 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
682 unreachable("illegal image type");
686 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
688 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
690 if (swizzle
[3] == VK_SWIZZLE_X
) {
691 /* For the pre-defined border color values (white, opaque
692 * black, transparent black), the only thing that matters is
693 * that the alpha channel winds up in the correct place
694 * (because the RGB channels are all the same) so either of
695 * these enumerations will work.
697 if (swizzle
[2] == VK_SWIZZLE_Y
)
698 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
700 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
701 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
702 if (swizzle
[1] == VK_SWIZZLE_Y
)
703 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
705 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
706 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
707 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
708 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
709 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
715 bool vi_alpha_is_on_msb(struct radv_device
*device
, VkFormat format
)
717 const struct vk_format_description
*desc
= vk_format_description(format
);
719 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
&& desc
->nr_channels
== 1)
720 return desc
->swizzle
[3] == VK_SWIZZLE_X
;
722 return radv_translate_colorswap(format
, false) <= 1;
725 * Build the sampler view descriptor for a texture (GFX10).
728 gfx10_make_texture_descriptor(struct radv_device
*device
,
729 struct radv_image
*image
,
730 bool is_storage_image
,
731 VkImageViewType view_type
,
733 const VkComponentMapping
*mapping
,
734 unsigned first_level
, unsigned last_level
,
735 unsigned first_layer
, unsigned last_layer
,
736 unsigned width
, unsigned height
, unsigned depth
,
738 uint32_t *fmask_state
)
740 const struct vk_format_description
*desc
;
741 enum vk_swizzle swizzle
[4];
745 desc
= vk_format_description(vk_format
);
746 img_format
= gfx10_format_table
[vk_format
].img_format
;
748 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
749 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
750 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
752 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
755 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
756 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
757 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
759 depth
= image
->info
.array_size
;
760 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
761 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
762 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
763 depth
= image
->info
.array_size
;
764 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
765 depth
= image
->info
.array_size
/ 6;
768 state
[1] = S_00A004_FORMAT(img_format
) |
769 S_00A004_WIDTH_LO(width
- 1);
770 state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
771 S_00A008_HEIGHT(height
- 1) |
772 S_00A008_RESOURCE_LEVEL(1);
773 state
[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
774 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
775 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
776 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
777 S_00A00C_BASE_LEVEL(image
->info
.samples
> 1 ?
779 S_00A00C_LAST_LEVEL(image
->info
.samples
> 1 ?
780 util_logbase2(image
->info
.samples
) :
782 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle
)) |
784 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
785 * to know the total number of layers.
787 state
[4] = S_00A010_DEPTH(type
== V_008F1C_SQ_RSRC_IMG_3D
? depth
- 1 : last_layer
) |
788 S_00A010_BASE_ARRAY(first_layer
);
789 state
[5] = S_00A014_ARRAY_PITCH(0) |
790 S_00A014_MAX_MIP(image
->info
.samples
> 1 ?
791 util_logbase2(image
->info
.samples
) :
792 image
->info
.levels
- 1) |
793 S_00A014_PERF_MOD(4);
797 if (radv_dcc_enabled(image
, first_level
)) {
798 state
[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B
) |
799 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B
) |
800 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
803 /* Initialize the sampler view for FMASK. */
804 if (radv_image_has_fmask(image
)) {
805 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
809 assert(image
->plane_count
== 1);
811 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
813 switch (image
->info
.samples
) {
815 format
= V_008F0C_IMG_FORMAT_FMASK8_S2_F2
;
818 format
= V_008F0C_IMG_FORMAT_FMASK8_S4_F4
;
821 format
= V_008F0C_IMG_FORMAT_FMASK32_S8_F8
;
824 unreachable("invalid nr_samples");
827 fmask_state
[0] = (va
>> 8) | image
->planes
[0].surface
.fmask_tile_swizzle
;
828 fmask_state
[1] = S_00A004_BASE_ADDRESS_HI(va
>> 40) |
829 S_00A004_FORMAT(format
) |
830 S_00A004_WIDTH_LO(width
- 1);
831 fmask_state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
832 S_00A008_HEIGHT(height
- 1) |
833 S_00A008_RESOURCE_LEVEL(1);
834 fmask_state
[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
835 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
836 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
837 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
838 S_00A00C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
) |
839 S_00A00C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
840 fmask_state
[4] = S_00A010_DEPTH(last_layer
) |
841 S_00A010_BASE_ARRAY(first_layer
);
843 fmask_state
[6] = S_00A018_META_PIPE_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.pipe_aligned
);
845 } else if (fmask_state
)
846 memset(fmask_state
, 0, 8 * 4);
850 * Build the sampler view descriptor for a texture (SI-GFX9)
853 si_make_texture_descriptor(struct radv_device
*device
,
854 struct radv_image
*image
,
855 bool is_storage_image
,
856 VkImageViewType view_type
,
858 const VkComponentMapping
*mapping
,
859 unsigned first_level
, unsigned last_level
,
860 unsigned first_layer
, unsigned last_layer
,
861 unsigned width
, unsigned height
, unsigned depth
,
863 uint32_t *fmask_state
)
865 const struct vk_format_description
*desc
;
866 enum vk_swizzle swizzle
[4];
868 unsigned num_format
, data_format
, type
;
870 desc
= vk_format_description(vk_format
);
872 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
873 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
874 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
876 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
879 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
881 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
882 if (num_format
== ~0) {
886 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
887 if (data_format
== ~0) {
891 /* S8 with either Z16 or Z32 HTILE need a special format. */
892 if (device
->physical_device
->rad_info
.chip_class
== GFX9
&&
893 vk_format
== VK_FORMAT_S8_UINT
&&
894 radv_image_is_tc_compat_htile(image
)) {
895 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
896 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
897 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
898 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
900 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
901 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
902 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
904 depth
= image
->info
.array_size
;
905 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
906 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
907 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
908 depth
= image
->info
.array_size
;
909 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
910 depth
= image
->info
.array_size
/ 6;
913 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
914 S_008F14_NUM_FORMAT(num_format
));
915 state
[2] = (S_008F18_WIDTH(width
- 1) |
916 S_008F18_HEIGHT(height
- 1) |
917 S_008F18_PERF_MOD(4));
918 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
919 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
920 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
921 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
922 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
924 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
925 util_logbase2(image
->info
.samples
) :
927 S_008F1C_TYPE(type
));
929 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
933 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
934 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
936 /* Depth is the last accessible layer on Gfx9.
937 * The hw doesn't need to know the total number of layers.
939 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
940 state
[4] |= S_008F20_DEPTH(depth
- 1);
942 state
[4] |= S_008F20_DEPTH(last_layer
);
944 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
945 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
946 util_logbase2(image
->info
.samples
) :
947 image
->info
.levels
- 1);
949 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
950 state
[4] |= S_008F20_DEPTH(depth
- 1);
951 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
953 if (image
->dcc_offset
) {
954 state
[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
956 /* The last dword is unused by hw. The shader uses it to clear
957 * bits in the first dword of sampler state.
959 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
960 if (first_level
== last_level
)
961 state
[7] = C_008F30_MAX_ANISO_RATIO
;
963 state
[7] = 0xffffffff;
967 /* Initialize the sampler view for FMASK. */
968 if (radv_image_has_fmask(image
)) {
969 uint32_t fmask_format
, num_format
;
970 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
973 assert(image
->plane_count
== 1);
975 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
977 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
978 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
979 switch (image
->info
.samples
) {
981 num_format
= V_008F14_IMG_FMASK_8_2_2
;
984 num_format
= V_008F14_IMG_FMASK_8_4_4
;
987 num_format
= V_008F14_IMG_FMASK_32_8_8
;
990 unreachable("invalid nr_samples");
993 switch (image
->info
.samples
) {
995 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
998 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
1001 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
1005 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
1007 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
1010 fmask_state
[0] = va
>> 8;
1011 fmask_state
[0] |= image
->planes
[0].surface
.fmask_tile_swizzle
;
1012 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
1013 S_008F14_DATA_FORMAT(fmask_format
) |
1014 S_008F14_NUM_FORMAT(num_format
);
1015 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
1016 S_008F18_HEIGHT(height
- 1);
1017 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
1018 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
1019 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
1020 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
1021 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
1023 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
1027 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
1028 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
1029 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
1030 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
1031 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.pipe_aligned
) |
1032 S_008F24_META_RB_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.rb_aligned
);
1034 if (radv_image_is_tc_compat_cmask(image
)) {
1035 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1037 fmask_state
[5] |= S_008F24_META_DATA_ADDRESS(va
>> 40);
1038 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1039 fmask_state
[7] |= va
>> 8;
1042 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
);
1043 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
1044 S_008F20_PITCH(image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
- 1);
1045 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
1047 if (radv_image_is_tc_compat_cmask(image
)) {
1048 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1050 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1051 fmask_state
[7] |= va
>> 8;
1054 } else if (fmask_state
)
1055 memset(fmask_state
, 0, 8 * 4);
1059 radv_make_texture_descriptor(struct radv_device
*device
,
1060 struct radv_image
*image
,
1061 bool is_storage_image
,
1062 VkImageViewType view_type
,
1064 const VkComponentMapping
*mapping
,
1065 unsigned first_level
, unsigned last_level
,
1066 unsigned first_layer
, unsigned last_layer
,
1067 unsigned width
, unsigned height
, unsigned depth
,
1069 uint32_t *fmask_state
)
1071 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
1072 gfx10_make_texture_descriptor(device
, image
, is_storage_image
,
1073 view_type
, vk_format
, mapping
,
1074 first_level
, last_level
,
1075 first_layer
, last_layer
,
1076 width
, height
, depth
,
1077 state
, fmask_state
);
1079 si_make_texture_descriptor(device
, image
, is_storage_image
,
1080 view_type
, vk_format
, mapping
,
1081 first_level
, last_level
,
1082 first_layer
, last_layer
,
1083 width
, height
, depth
,
1084 state
, fmask_state
);
1089 radv_query_opaque_metadata(struct radv_device
*device
,
1090 struct radv_image
*image
,
1091 struct radeon_bo_metadata
*md
)
1093 static const VkComponentMapping fixedmapping
;
1094 uint32_t desc
[8], i
;
1096 assert(image
->plane_count
== 1);
1098 /* Metadata image format format version 1:
1099 * [0] = 1 (metadata format identifier)
1100 * [1] = (VENDOR_ID << 16) | PCI_ID
1101 * [2:9] = image descriptor for the whole resource
1102 * [2] is always 0, because the base address is cleared
1103 * [9] is the DCC offset bits [39:8] from the beginning of
1105 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1107 md
->metadata
[0] = 1; /* metadata image format version 1 */
1109 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1110 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
1113 radv_make_texture_descriptor(device
, image
, false,
1114 (VkImageViewType
)image
->type
, image
->vk_format
,
1115 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
1116 image
->info
.array_size
- 1,
1117 image
->info
.width
, image
->info
.height
,
1121 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
1122 image
->planes
[0].surface
.blk_w
, false, false, false, desc
);
1124 /* Clear the base address and set the relative DCC offset. */
1126 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
1127 desc
[7] = image
->dcc_offset
>> 8;
1129 /* Dwords [2:9] contain the image descriptor. */
1130 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
1132 /* Dwords [10:..] contain the mipmap level offsets. */
1133 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
1134 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
1135 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
1136 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
1138 md
->size_metadata
= 10 * 4;
1142 radv_init_metadata(struct radv_device
*device
,
1143 struct radv_image
*image
,
1144 struct radeon_bo_metadata
*metadata
)
1146 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1148 memset(metadata
, 0, sizeof(*metadata
));
1150 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1151 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
1152 metadata
->u
.gfx9
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1154 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
1155 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1156 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
1157 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1158 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
1159 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
1160 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
1161 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
1162 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
1163 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
1164 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
1165 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1167 radv_query_opaque_metadata(device
, image
, metadata
);
1171 radv_image_override_offset_stride(struct radv_device
*device
,
1172 struct radv_image
*image
,
1173 uint64_t offset
, uint32_t stride
)
1175 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1176 unsigned bpe
= vk_format_get_blocksizebits(image
->vk_format
) / 8;
1178 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1180 surface
->u
.gfx9
.surf_pitch
= stride
;
1181 surface
->u
.gfx9
.surf_slice_size
=
1182 (uint64_t)stride
* surface
->u
.gfx9
.surf_height
* bpe
;
1184 surface
->u
.gfx9
.surf_offset
= offset
;
1186 surface
->u
.legacy
.level
[0].nblk_x
= stride
;
1187 surface
->u
.legacy
.level
[0].slice_size_dw
=
1188 ((uint64_t)stride
* surface
->u
.legacy
.level
[0].nblk_y
* bpe
) / 4;
1191 for (unsigned i
= 0; i
< ARRAY_SIZE(surface
->u
.legacy
.level
); ++i
)
1192 surface
->u
.legacy
.level
[i
].offset
+= offset
;
1199 radv_image_alloc_fmask(struct radv_device
*device
,
1200 struct radv_image
*image
)
1202 unsigned fmask_alignment
= image
->planes
[0].surface
.fmask_alignment
;
1204 image
->fmask_offset
= align64(image
->size
, fmask_alignment
);
1205 image
->size
= image
->fmask_offset
+ image
->planes
[0].surface
.fmask_size
;
1206 image
->alignment
= MAX2(image
->alignment
, fmask_alignment
);
1210 radv_image_alloc_cmask(struct radv_device
*device
,
1211 struct radv_image
*image
)
1213 unsigned cmask_alignment
= image
->planes
[0].surface
.cmask_alignment
;
1214 unsigned cmask_size
= image
->planes
[0].surface
.cmask_size
;
1215 uint32_t clear_value_size
= 0;
1220 assert(cmask_alignment
);
1222 image
->cmask_offset
= align64(image
->size
, cmask_alignment
);
1223 /* + 8 for storing the clear values */
1224 if (!image
->clear_value_offset
) {
1225 image
->clear_value_offset
= image
->cmask_offset
+ cmask_size
;
1226 clear_value_size
= 8;
1228 image
->size
= image
->cmask_offset
+ cmask_size
+ clear_value_size
;
1229 image
->alignment
= MAX2(image
->alignment
, cmask_alignment
);
1233 radv_image_alloc_dcc(struct radv_image
*image
)
1235 assert(image
->plane_count
== 1);
1237 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
1238 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1239 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
1240 image
->fce_pred_offset
= image
->clear_value_offset
+ 8 * image
->info
.levels
;
1241 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16 * image
->info
.levels
;
1242 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24 * image
->info
.levels
;
1243 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
1247 radv_image_alloc_htile(struct radv_device
*device
, struct radv_image
*image
)
1249 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
1251 /* + 8 for storing the clear values */
1252 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
1253 image
->size
= image
->clear_value_offset
+ image
->info
.levels
* 8;
1254 if (radv_image_is_tc_compat_htile(image
) &&
1255 device
->physical_device
->rad_info
.has_tc_compat_zrange_bug
) {
1256 /* Metadata for the TC-compatible HTILE hardware bug which
1257 * have to be fixed by updating ZRANGE_PRECISION when doing
1258 * fast depth clears to 0.0f.
1260 image
->tc_compat_zrange_offset
= image
->size
;
1261 image
->size
= image
->tc_compat_zrange_offset
+ image
->info
.levels
* 4;
1263 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
1267 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
1269 if (image
->info
.samples
<= 1 &&
1270 image
->info
.width
* image
->info
.height
<= 512 * 512) {
1271 /* Do not enable CMASK or DCC for small surfaces where the cost
1272 * of the eliminate pass can be higher than the benefit of fast
1273 * clear. RadeonSI does this, but the image threshold is
1279 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1280 (image
->exclusive
|| image
->queue_family_mask
== 1);
1284 radv_image_can_enable_dcc(struct radv_device
*device
, struct radv_image
*image
)
1286 if (!radv_image_can_enable_dcc_or_cmask(image
) ||
1287 !radv_image_has_dcc(image
))
1290 /* On GFX8, DCC layers can be interleaved and it's currently only
1291 * enabled if slice size is equal to the per slice fast clear size
1292 * because the driver assumes that portions of multiple layers are
1293 * contiguous during fast clears.
1295 if (image
->info
.array_size
> 1) {
1296 const struct legacy_surf_level
*surf_level
=
1297 &image
->planes
[0].surface
.u
.legacy
.level
[0];
1299 assert(device
->physical_device
->rad_info
.chip_class
== GFX8
);
1301 if (image
->planes
[0].surface
.dcc_slice_size
!= surf_level
->dcc_fast_clear_size
)
1309 radv_image_can_enable_cmask(struct radv_image
*image
)
1311 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1312 /* Do not enable CMASK for non-MSAA images (fast color clear)
1313 * because 128 bit formats are not supported, but FMASK might
1319 return radv_image_can_enable_dcc_or_cmask(image
) &&
1320 image
->info
.levels
== 1 &&
1321 image
->info
.depth
== 1 &&
1322 !image
->planes
[0].surface
.is_linear
;
1326 radv_image_can_enable_fmask(struct radv_image
*image
)
1328 return image
->info
.samples
> 1 && vk_format_is_color(image
->vk_format
);
1332 radv_image_can_enable_htile(struct radv_image
*image
)
1334 return radv_image_has_htile(image
) &&
1335 image
->info
.levels
== 1 &&
1336 image
->info
.width
* image
->info
.height
>= 8 * 8;
1339 static void radv_image_disable_dcc(struct radv_image
*image
)
1341 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1342 image
->planes
[i
].surface
.dcc_size
= 0;
1345 static void radv_image_disable_htile(struct radv_image
*image
)
1347 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1348 image
->planes
[i
].surface
.htile_size
= 0;
1352 radv_image_create_layout(struct radv_device
*device
,
1353 struct radv_image_create_info create_info
,
1354 struct radv_image
*image
)
1356 /* Check that we did not initialize things earlier */
1357 assert(!image
->planes
[0].surface
.surf_size
);
1359 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1360 * common internal case. */
1361 create_info
.vk_info
= NULL
;
1363 struct ac_surf_info image_info
= image
->info
;
1364 VkResult result
= radv_patch_image_from_extra_info(device
, image
, &create_info
, &image_info
);
1365 if (result
!= VK_SUCCESS
)
1369 image
->alignment
= 1;
1370 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1371 struct ac_surf_info info
= image_info
;
1374 const struct vk_format_description
*desc
= vk_format_description(image
->vk_format
);
1375 assert(info
.width
% desc
->width_divisor
== 0);
1376 assert(info
.height
% desc
->height_divisor
== 0);
1378 info
.width
/= desc
->width_divisor
;
1379 info
.height
/= desc
->height_divisor
;
1382 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1384 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1385 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1386 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1388 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1391 if (!create_info
.no_metadata_planes
) {
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device
, image
)) {
1394 radv_image_alloc_dcc(image
);
1395 if (image
->info
.samples
> 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1399 assert(radv_image_can_enable_cmask(image
));
1400 radv_image_alloc_cmask(device
, image
);
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image
);
1405 if (radv_image_can_enable_cmask(image
)) {
1406 radv_image_alloc_cmask(device
, image
);
1410 /* Try to enable FMASK for multisampled images. */
1411 if (radv_image_can_enable_fmask(image
)) {
1412 radv_image_alloc_fmask(device
, image
);
1414 if (radv_use_tc_compat_cmask_for_image(device
, image
))
1415 image
->tc_compatible_cmask
= true;
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image
) &&
1419 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1420 image
->tc_compatible_htile
= image
->planes
[0].surface
.flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1421 radv_image_alloc_htile(device
, image
);
1423 radv_image_disable_htile(image
);
1427 radv_image_disable_dcc(image
);
1428 radv_image_disable_htile(image
);
1431 assert(image
->planes
[0].surface
.surf_size
);
1436 radv_image_create(VkDevice _device
,
1437 const struct radv_image_create_info
*create_info
,
1438 const VkAllocationCallbacks
* alloc
,
1441 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1442 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1443 struct radv_image
*image
= NULL
;
1444 VkFormat format
= radv_select_android_external_format(pCreateInfo
->pNext
,
1445 pCreateInfo
->format
);
1446 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1448 const unsigned plane_count
= vk_format_get_plane_count(format
);
1449 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1451 radv_assert(pCreateInfo
->mipLevels
> 0);
1452 radv_assert(pCreateInfo
->arrayLayers
> 0);
1453 radv_assert(pCreateInfo
->samples
> 0);
1454 radv_assert(pCreateInfo
->extent
.width
> 0);
1455 radv_assert(pCreateInfo
->extent
.height
> 0);
1456 radv_assert(pCreateInfo
->extent
.depth
> 0);
1458 image
= vk_zalloc2(&device
->alloc
, alloc
, image_struct_size
, 8,
1459 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1461 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1463 image
->type
= pCreateInfo
->imageType
;
1464 image
->info
.width
= pCreateInfo
->extent
.width
;
1465 image
->info
.height
= pCreateInfo
->extent
.height
;
1466 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1467 image
->info
.samples
= pCreateInfo
->samples
;
1468 image
->info
.storage_samples
= pCreateInfo
->samples
;
1469 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1470 image
->info
.levels
= pCreateInfo
->mipLevels
;
1471 image
->info
.num_channels
= vk_format_get_nr_components(format
);
1473 image
->vk_format
= format
;
1474 image
->tiling
= pCreateInfo
->tiling
;
1475 image
->usage
= pCreateInfo
->usage
;
1476 image
->flags
= pCreateInfo
->flags
;
1477 image
->plane_count
= plane_count
;
1479 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1480 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1481 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1482 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
||
1483 pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_FOREIGN_EXT
)
1484 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1486 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1489 const VkExternalMemoryImageCreateInfo
*external_info
=
1490 vk_find_struct_const(pCreateInfo
->pNext
,
1491 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) ;
1493 image
->shareable
= external_info
;
1494 if (!vk_format_is_depth_or_stencil(format
) && !image
->shareable
) {
1495 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1498 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1499 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, pCreateInfo
, format
);
1502 bool delay_layout
= external_info
&&
1503 (external_info
->handleTypes
& VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
);
1506 *pImage
= radv_image_to_handle(image
);
1507 assert (!(image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
));
1511 ASSERTED VkResult result
= radv_image_create_layout(device
, *create_info
, image
);
1512 assert(result
== VK_SUCCESS
);
1514 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1515 image
->alignment
= MAX2(image
->alignment
, 4096);
1516 image
->size
= align64(image
->size
, image
->alignment
);
1519 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1520 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1522 vk_free2(&device
->alloc
, alloc
, image
);
1523 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1527 *pImage
= radv_image_to_handle(image
);
1533 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1534 struct radv_device
*device
,
1536 const VkComponentMapping
*components
,
1537 bool is_storage_image
, bool disable_compression
,
1538 unsigned plane_id
, unsigned descriptor_plane_id
)
1540 struct radv_image
*image
= iview
->image
;
1541 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1542 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1543 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1545 union radv_descriptor
*descriptor
;
1546 uint32_t hw_level
= 0;
1548 if (is_storage_image
) {
1549 descriptor
= &iview
->storage_descriptor
;
1551 descriptor
= &iview
->descriptor
;
1554 assert(vk_format_get_plane_count(vk_format
) == 1);
1555 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1556 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1558 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1559 hw_level
= iview
->base_mip
;
1560 radv_make_texture_descriptor(device
, image
, is_storage_image
,
1564 hw_level
, hw_level
+ iview
->level_count
- 1,
1566 iview
->base_layer
+ iview
->layer_count
- 1,
1567 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1568 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1569 iview
->extent
.depth
,
1570 descriptor
->plane_descriptors
[descriptor_plane_id
],
1571 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1573 const struct legacy_surf_level
*base_level_info
= NULL
;
1574 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1576 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1578 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1580 si_set_mutable_tex_desc_fields(device
, image
,
1585 blk_w
, is_stencil
, is_storage_image
,
1586 is_storage_image
|| disable_compression
,
1587 descriptor
->plane_descriptors
[descriptor_plane_id
]);
1591 radv_plane_from_aspect(VkImageAspectFlags mask
)
1594 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1596 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1604 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1607 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1608 return image
->planes
[0].format
;
1609 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1610 return image
->planes
[1].format
;
1611 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1612 return image
->planes
[2].format
;
1613 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1614 return vk_format_stencil_only(image
->vk_format
);
1615 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1616 return vk_format_depth_only(image
->vk_format
);
1617 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1618 return vk_format_depth_only(image
->vk_format
);
1620 return image
->vk_format
;
1625 radv_image_view_init(struct radv_image_view
*iview
,
1626 struct radv_device
*device
,
1627 const VkImageViewCreateInfo
* pCreateInfo
,
1628 const struct radv_image_view_extra_create_info
* extra_create_info
)
1630 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1631 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1633 switch (image
->type
) {
1634 case VK_IMAGE_TYPE_1D
:
1635 case VK_IMAGE_TYPE_2D
:
1636 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1638 case VK_IMAGE_TYPE_3D
:
1639 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1640 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1643 unreachable("bad VkImageType");
1645 iview
->image
= image
;
1646 iview
->bo
= image
->bo
;
1647 iview
->type
= pCreateInfo
->viewType
;
1648 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1649 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1650 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1652 iview
->vk_format
= pCreateInfo
->format
;
1654 /* If the image has an Android external format, pCreateInfo->format will be
1655 * VK_FORMAT_UNDEFINED. */
1656 if (iview
->vk_format
== VK_FORMAT_UNDEFINED
)
1657 iview
->vk_format
= image
->vk_format
;
1659 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1660 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1661 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1662 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1665 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1666 iview
->extent
= (VkExtent3D
) {
1667 .width
= image
->info
.width
,
1668 .height
= image
->info
.height
,
1669 .depth
= image
->info
.depth
,
1672 iview
->extent
= (VkExtent3D
) {
1673 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1674 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1675 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1679 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1680 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1681 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1682 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1683 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1685 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1686 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1688 /* Comment ported from amdvlk -
1689 * If we have the following image:
1690 * Uncompressed pixels Compressed block sizes (4x4)
1691 * mip0: 22 x 22 6 x 6
1692 * mip1: 11 x 11 3 x 3
1697 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1698 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1699 * divide-by-two integer math):
1705 * This means that mip2 will be missing texels.
1707 * Fix this by calculating the base mip's width and height, then convert that, and round it
1708 * back up to get the level 0 size.
1709 * Clamp the converted size between the original values, and next power of two, which
1710 * means we don't oversize the image.
1712 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1713 vk_format_is_compressed(image
->vk_format
) &&
1714 !vk_format_is_compressed(iview
->vk_format
)) {
1715 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1716 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1718 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1719 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1721 lvl_width
<<= range
->baseMipLevel
;
1722 lvl_height
<<= range
->baseMipLevel
;
1724 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1725 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1729 iview
->base_layer
= range
->baseArrayLayer
;
1730 iview
->layer_count
= radv_get_layerCount(image
, range
);
1731 iview
->base_mip
= range
->baseMipLevel
;
1732 iview
->level_count
= radv_get_levelCount(image
, range
);
1734 bool disable_compression
= extra_create_info
? extra_create_info
->disable_compression
: false;
1735 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1736 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1737 radv_image_view_make_descriptor(iview
, device
, format
,
1738 &pCreateInfo
->components
,
1739 false, disable_compression
,
1740 iview
->plane_id
+ i
, i
);
1741 radv_image_view_make_descriptor(iview
, device
,
1742 format
, &pCreateInfo
->components
,
1743 true, disable_compression
,
1744 iview
->plane_id
+ i
, i
);
1748 bool radv_layout_has_htile(const struct radv_image
*image
,
1749 VkImageLayout layout
,
1750 bool in_render_loop
,
1751 unsigned queue_mask
)
1753 if (radv_image_is_tc_compat_htile(image
))
1754 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1756 return radv_image_has_htile(image
) &&
1757 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1758 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1759 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1760 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1761 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1764 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1765 VkImageLayout layout
,
1766 bool in_render_loop
,
1767 unsigned queue_mask
)
1769 if (radv_image_is_tc_compat_htile(image
))
1770 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1772 return radv_image_has_htile(image
) &&
1773 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1774 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1775 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1776 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1777 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1780 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1781 VkImageLayout layout
,
1782 bool in_render_loop
,
1783 unsigned queue_mask
)
1785 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1788 bool radv_layout_dcc_compressed(const struct radv_device
*device
,
1789 const struct radv_image
*image
,
1790 VkImageLayout layout
,
1791 bool in_render_loop
,
1792 unsigned queue_mask
)
1794 /* Don't compress compute transfer dst, as image stores are not supported. */
1795 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1796 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1799 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1803 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1805 if (!image
->exclusive
)
1806 return image
->queue_family_mask
;
1807 if (family
== VK_QUEUE_FAMILY_EXTERNAL
||
1808 family
== VK_QUEUE_FAMILY_FOREIGN_EXT
)
1809 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1810 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1811 return 1u << queue_family
;
1812 return 1u << family
;
1816 radv_CreateImage(VkDevice device
,
1817 const VkImageCreateInfo
*pCreateInfo
,
1818 const VkAllocationCallbacks
*pAllocator
,
1822 const VkNativeBufferANDROID
*gralloc_info
=
1823 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1826 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1827 pAllocator
, pImage
);
1830 const struct wsi_image_create_info
*wsi_info
=
1831 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1832 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1834 return radv_image_create(device
,
1835 &(struct radv_image_create_info
) {
1836 .vk_info
= pCreateInfo
,
1844 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1845 const VkAllocationCallbacks
*pAllocator
)
1847 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1848 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1853 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1854 device
->ws
->buffer_destroy(image
->bo
);
1856 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1857 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1859 vk_free2(&device
->alloc
, pAllocator
, image
);
1862 void radv_GetImageSubresourceLayout(
1865 const VkImageSubresource
* pSubresource
,
1866 VkSubresourceLayout
* pLayout
)
1868 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1869 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1870 int level
= pSubresource
->mipLevel
;
1871 int layer
= pSubresource
->arrayLayer
;
1873 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1875 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1876 struct radeon_surf
*surface
= &plane
->surface
;
1878 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1879 uint64_t level_offset
= surface
->is_linear
? surface
->u
.gfx9
.offset
[level
] : 0;
1881 pLayout
->offset
= plane
->offset
+ level_offset
+ surface
->u
.gfx9
.surf_slice_size
* layer
;
1882 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1883 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1884 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1885 /* Adjust the number of bytes between each row because
1886 * the pitch is actually the number of components per
1889 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1891 uint32_t pitch
= surface
->is_linear
? surface
->u
.gfx9
.pitch
[level
] : surface
->u
.gfx9
.surf_pitch
;
1893 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1894 pLayout
->rowPitch
= pitch
* surface
->bpe
;
1897 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1898 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1899 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1900 if (image
->type
== VK_IMAGE_TYPE_3D
)
1901 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1903 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1904 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1905 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1906 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1907 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1908 if (image
->type
== VK_IMAGE_TYPE_3D
)
1909 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1915 radv_CreateImageView(VkDevice _device
,
1916 const VkImageViewCreateInfo
*pCreateInfo
,
1917 const VkAllocationCallbacks
*pAllocator
,
1920 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1921 struct radv_image_view
*view
;
1923 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1924 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1926 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1928 radv_image_view_init(view
, device
, pCreateInfo
, NULL
);
1930 *pView
= radv_image_view_to_handle(view
);
1936 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1937 const VkAllocationCallbacks
*pAllocator
)
1939 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1940 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1944 vk_free2(&device
->alloc
, pAllocator
, iview
);
1947 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1948 struct radv_device
*device
,
1949 const VkBufferViewCreateInfo
* pCreateInfo
)
1951 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1953 view
->bo
= buffer
->bo
;
1954 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1955 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1956 view
->vk_format
= pCreateInfo
->format
;
1958 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1959 pCreateInfo
->offset
, view
->range
, view
->state
);
1963 radv_CreateBufferView(VkDevice _device
,
1964 const VkBufferViewCreateInfo
*pCreateInfo
,
1965 const VkAllocationCallbacks
*pAllocator
,
1966 VkBufferView
*pView
)
1968 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1969 struct radv_buffer_view
*view
;
1971 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1972 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1974 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1976 radv_buffer_view_init(view
, device
, pCreateInfo
);
1978 *pView
= radv_buffer_view_to_handle(view
);
1984 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1985 const VkAllocationCallbacks
*pAllocator
)
1987 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1988 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
1993 vk_free2(&device
->alloc
, pAllocator
, view
);