2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
38 radv_choose_tiling(struct radv_device
*device
,
39 const VkImageCreateInfo
*pCreateInfo
,
42 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
43 assert(pCreateInfo
->samples
<= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
47 if (!vk_format_is_compressed(format
) &&
48 !vk_format_is_depth_or_stencil(format
)
49 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
53 /* Only very thin and long 2D textures should benefit from
55 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo
->samples
> 1)
61 return RADEON_SURF_MODE_2D
;
63 return RADEON_SURF_MODE_2D
;
67 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
68 const VkImageCreateInfo
*pCreateInfo
,
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
75 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
78 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
81 if (pCreateInfo
->mipLevels
> 1)
84 /* Do not enable TC-compatible HTILE if the image isn't readable by a
85 * shader because no texture fetches will happen.
87 if (!(pCreateInfo
->usage
& (VK_IMAGE_USAGE_SAMPLED_BIT
|
88 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT
|
89 VK_IMAGE_USAGE_TRANSFER_SRC_BIT
)))
92 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
93 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
95 if (pCreateInfo
->samples
>= 2 &&
96 (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
97 (format
== VK_FORMAT_D32_SFLOAT
&&
98 device
->physical_device
->rad_info
.chip_class
== GFX10
)))
101 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
102 * supports 32-bit. Though, it's possible to enable TC-compat for
103 * 16-bit depth surfaces if no Z planes are compressed.
105 if (format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
106 format
!= VK_FORMAT_D32_SFLOAT
&&
107 format
!= VK_FORMAT_D16_UNORM
)
110 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
111 const struct VkImageFormatListCreateInfo
*format_list
=
112 (const struct VkImageFormatListCreateInfo
*)
113 vk_find_struct_const(pCreateInfo
->pNext
,
114 IMAGE_FORMAT_LIST_CREATE_INFO
);
116 /* We have to ignore the existence of the list if viewFormatCount = 0 */
117 if (format_list
&& format_list
->viewFormatCount
) {
118 /* compatibility is transitive, so we only need to check
119 * one format with everything else.
121 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
122 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
125 if (format
!= format_list
->pViewFormats
[i
])
137 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
139 if (info
->bo_metadata
) {
140 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
141 return info
->bo_metadata
->u
.gfx9
.scanout
;
143 return info
->bo_metadata
->u
.legacy
.scanout
;
146 return info
->scanout
;
150 radv_use_dcc_for_image(struct radv_device
*device
,
151 const struct radv_image
*image
,
152 const VkImageCreateInfo
*pCreateInfo
,
155 bool dcc_compatible_formats
;
158 /* DCC (Delta Color Compression) is only available for GFX8+. */
159 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
162 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
165 if (image
->shareable
)
168 /* TODO: Enable DCC for storage images. */
169 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
172 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
175 if (vk_format_is_subsampled(format
) ||
176 vk_format_get_plane_count(format
) > 1)
179 /* TODO: Enable DCC for mipmaps on GFX9+. */
180 if ((pCreateInfo
->arrayLayers
> 1 || pCreateInfo
->mipLevels
> 1) &&
181 device
->physical_device
->rad_info
.chip_class
>= GFX9
)
184 /* Do not enable DCC for mipmapped arrays because performance is worse. */
185 if (pCreateInfo
->arrayLayers
> 1 && pCreateInfo
->mipLevels
> 1)
188 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
189 * 2x can be enabled with an option.
191 if (pCreateInfo
->samples
> 2 ||
192 (pCreateInfo
->samples
== 2 &&
193 !device
->physical_device
->dcc_msaa_allowed
))
196 /* Determine if the formats are DCC compatible. */
197 dcc_compatible_formats
=
198 radv_is_colorbuffer_format_supported(format
,
201 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
202 const struct VkImageFormatListCreateInfo
*format_list
=
203 (const struct VkImageFormatListCreateInfo
*)
204 vk_find_struct_const(pCreateInfo
->pNext
,
205 IMAGE_FORMAT_LIST_CREATE_INFO
);
207 /* We have to ignore the existence of the list if viewFormatCount = 0 */
208 if (format_list
&& format_list
->viewFormatCount
) {
209 /* compatibility is transitive, so we only need to check
210 * one format with everything else. */
211 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
212 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
215 if (!radv_dcc_formats_compatible(format
,
216 format_list
->pViewFormats
[i
]))
217 dcc_compatible_formats
= false;
220 dcc_compatible_formats
= false;
224 if (!dcc_compatible_formats
)
231 radv_use_tc_compat_cmask_for_image(struct radv_device
*device
,
232 struct radv_image
*image
)
234 if (!(device
->instance
->perftest_flags
& RADV_PERFTEST_TC_COMPAT_CMASK
))
237 /* TC-compat CMASK is only available for GFX8+. */
238 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
241 if (image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)
244 if (radv_image_has_dcc(image
))
247 if (!radv_image_has_cmask(image
))
253 static uint32_t si_get_bo_metadata_word1(const struct radv_device
*device
)
255 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
259 radv_is_valid_opaque_metadata(const struct radv_device
*device
,
260 const struct radeon_bo_metadata
*md
)
262 if (md
->metadata
[0] != 1 ||
263 md
->metadata
[1] != si_get_bo_metadata_word1(device
))
266 if (md
->size_metadata
< 40)
273 radv_patch_surface_from_metadata(struct radv_device
*device
,
274 struct radeon_surf
*surface
,
275 const struct radeon_bo_metadata
*md
)
277 surface
->flags
= RADEON_SURF_CLR(surface
->flags
, MODE
);
279 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
280 if (md
->u
.gfx9
.swizzle_mode
> 0)
281 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
283 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
285 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
287 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
288 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
289 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
290 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
291 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
292 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
294 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
295 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
296 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
297 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
299 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
305 radv_patch_image_dimensions(struct radv_device
*device
,
306 struct radv_image
*image
,
307 const struct radv_image_create_info
*create_info
,
308 struct ac_surf_info
*image_info
)
310 unsigned width
= image
->info
.width
;
311 unsigned height
= image
->info
.height
;
314 * minigbm sometimes allocates bigger images which is going to result in
315 * weird strides and other properties. Lets be lenient where possible and
316 * fail it on GFX10 (as we cannot cope there).
318 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
320 if (create_info
->bo_metadata
&&
321 radv_is_valid_opaque_metadata(device
, create_info
->bo_metadata
)) {
322 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
324 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
325 width
= G_00A004_WIDTH_LO(md
->metadata
[3]) +
326 (G_00A008_WIDTH_HI(md
->metadata
[4]) << 2) + 1;
327 height
= S_00A008_HEIGHT(md
->metadata
[4]) + 1;
329 width
= G_008F18_WIDTH(md
->metadata
[4]) + 1;
330 height
= G_008F18_HEIGHT(md
->metadata
[4]) + 1;
334 if (image
->info
.width
== width
&& image
->info
.height
== height
)
337 if (width
< image
->info
.width
|| height
< image
->info
.height
) {
339 "The imported image has smaller dimensions than the internal\n"
340 "dimensions. Using it is going to fail badly, so we reject\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image
->info
.width
, image
->info
.height
, width
, height
);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
345 } else if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
347 "Tried to import an image with inconsistent width on GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency in width and will fail this import.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image
->info
.width
, image
->info
.height
, width
, height
);
352 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
355 "Tried to import an image with inconsistent width on pre-GFX10.\n"
356 "As GFX10 has no separate stride fields we cannot cope with\n"
357 "an inconsistency and would fail on GFX10.\n"
358 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
359 image
->info
.width
, image
->info
.height
, width
, height
);
361 image_info
->width
= width
;
362 image_info
->height
= height
;
368 radv_patch_image_from_extra_info(struct radv_device
*device
,
369 struct radv_image
*image
,
370 const struct radv_image_create_info
*create_info
,
371 struct ac_surf_info
*image_info
)
373 VkResult result
= radv_patch_image_dimensions(device
, image
, create_info
, image_info
);
374 if (result
!= VK_SUCCESS
)
377 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
378 if (create_info
->bo_metadata
) {
379 radv_patch_surface_from_metadata(device
, &image
->planes
[plane
].surface
,
380 create_info
->bo_metadata
);
383 if (radv_surface_has_scanout(device
, create_info
)) {
384 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_SCANOUT
;
385 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
;
387 image
->info
.surf_index
= NULL
;
394 radv_init_surface(struct radv_device
*device
,
395 const struct radv_image
*image
,
396 struct radeon_surf
*surface
,
398 const VkImageCreateInfo
*pCreateInfo
,
399 VkFormat image_format
)
401 unsigned array_mode
= radv_choose_tiling(device
, pCreateInfo
, image_format
);
402 VkFormat format
= vk_format_get_plane_format(image_format
, plane_id
);
403 const struct vk_format_description
*desc
= vk_format_description(format
);
404 bool is_depth
, is_stencil
;
406 is_depth
= vk_format_has_depth(desc
);
407 is_stencil
= vk_format_has_stencil(desc
);
409 surface
->blk_w
= vk_format_get_blockwidth(format
);
410 surface
->blk_h
= vk_format_get_blockheight(format
);
412 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
413 /* align byte per element on dword */
414 if (surface
->bpe
== 3) {
418 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
420 switch (pCreateInfo
->imageType
){
421 case VK_IMAGE_TYPE_1D
:
422 if (pCreateInfo
->arrayLayers
> 1)
423 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
425 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
427 case VK_IMAGE_TYPE_2D
:
428 if (pCreateInfo
->arrayLayers
> 1)
429 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
431 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
433 case VK_IMAGE_TYPE_3D
:
434 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
437 unreachable("unhandled image type");
441 surface
->flags
|= RADEON_SURF_ZBUFFER
;
444 surface
->flags
|= RADEON_SURF_SBUFFER
;
446 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
447 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
448 vk_format_get_blocksizebits(image_format
) == 128 &&
449 vk_format_is_compressed(image_format
))
450 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
452 if (!radv_use_dcc_for_image(device
, image
, pCreateInfo
, image_format
))
453 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
458 static inline unsigned
459 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
462 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
464 return plane
->surface
.u
.legacy
.tiling_index
[level
];
467 static unsigned radv_map_swizzle(unsigned swizzle
)
471 return V_008F0C_SQ_SEL_Y
;
473 return V_008F0C_SQ_SEL_Z
;
475 return V_008F0C_SQ_SEL_W
;
477 return V_008F0C_SQ_SEL_0
;
479 return V_008F0C_SQ_SEL_1
;
480 default: /* VK_SWIZZLE_X */
481 return V_008F0C_SQ_SEL_X
;
486 radv_make_buffer_descriptor(struct radv_device
*device
,
487 struct radv_buffer
*buffer
,
493 const struct vk_format_description
*desc
;
495 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
496 uint64_t va
= gpu_address
+ buffer
->offset
;
497 unsigned num_format
, data_format
;
499 desc
= vk_format_description(vk_format
);
500 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
501 stride
= desc
->block
.bits
/ 8;
505 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
506 S_008F04_STRIDE(stride
);
508 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
513 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
514 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
515 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
516 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3]));
518 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
519 const struct gfx10_format
*fmt
= &gfx10_format_table
[vk_format
];
521 /* OOB_SELECT chooses the out-of-bounds check:
522 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
523 * - 1: index >= NUM_RECORDS
524 * - 2: NUM_RECORDS == 0
525 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
526 * else: swizzle_address >= NUM_RECORDS
528 state
[3] |= S_008F0C_FORMAT(fmt
->img_format
) |
529 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET
) |
530 S_008F0C_RESOURCE_LEVEL(1);
532 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
533 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
535 assert(data_format
!= V_008F0C_BUF_DATA_FORMAT_INVALID
);
536 assert(num_format
!= ~0);
538 state
[3] |= S_008F0C_NUM_FORMAT(num_format
) |
539 S_008F0C_DATA_FORMAT(data_format
);
544 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
545 struct radv_image
*image
,
546 const struct legacy_surf_level
*base_level_info
,
548 unsigned base_level
, unsigned first_level
,
549 unsigned block_width
, bool is_stencil
,
550 bool is_storage_image
, bool disable_compression
,
553 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
554 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
555 uint64_t va
= gpu_address
+ plane
->offset
;
556 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
557 uint64_t meta_va
= 0;
558 if (chip_class
>= GFX9
) {
560 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
562 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
564 va
+= base_level_info
->offset
;
567 if (chip_class
>= GFX9
||
568 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
569 state
[0] |= plane
->surface
.tile_swizzle
;
570 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
571 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
573 if (chip_class
>= GFX8
) {
574 state
[6] &= C_008F28_COMPRESSION_EN
;
576 if (!disable_compression
&& radv_dcc_enabled(image
, first_level
)) {
577 meta_va
= gpu_address
+ image
->dcc_offset
;
578 if (chip_class
<= GFX8
)
579 meta_va
+= base_level_info
->dcc_offset
;
581 unsigned dcc_tile_swizzle
= plane
->surface
.tile_swizzle
<< 8;
582 dcc_tile_swizzle
&= plane
->surface
.dcc_alignment
- 1;
583 meta_va
|= dcc_tile_swizzle
;
584 } else if (!disable_compression
&&
585 radv_image_is_tc_compat_htile(image
)) {
586 meta_va
= gpu_address
+ image
->htile_offset
;
590 state
[6] |= S_008F28_COMPRESSION_EN(1);
591 if (chip_class
<= GFX9
)
592 state
[7] = meta_va
>> 8;
596 if (chip_class
>= GFX10
) {
597 state
[3] &= C_00A00C_SW_MODE
;
600 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
602 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
605 state
[6] &= C_00A018_META_DATA_ADDRESS_LO
&
606 C_00A018_META_PIPE_ALIGNED
;
609 struct gfx9_surf_meta_flags meta
;
611 if (image
->dcc_offset
)
612 meta
= plane
->surface
.u
.gfx9
.dcc
;
614 meta
= plane
->surface
.u
.gfx9
.htile
;
616 state
[6] |= S_00A018_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
617 S_00A018_META_DATA_ADDRESS_LO(meta_va
>> 8);
620 state
[7] = meta_va
>> 16;
621 } else if (chip_class
== GFX9
) {
622 state
[3] &= C_008F1C_SW_MODE
;
623 state
[4] &= C_008F20_PITCH
;
626 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
627 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
629 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
630 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
633 state
[5] &= C_008F24_META_DATA_ADDRESS
&
634 C_008F24_META_PIPE_ALIGNED
&
635 C_008F24_META_RB_ALIGNED
;
637 struct gfx9_surf_meta_flags meta
;
639 if (image
->dcc_offset
)
640 meta
= plane
->surface
.u
.gfx9
.dcc
;
642 meta
= plane
->surface
.u
.gfx9
.htile
;
644 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
645 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
646 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
650 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
651 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
653 state
[3] &= C_008F1C_TILING_INDEX
;
654 state
[3] |= S_008F1C_TILING_INDEX(index
);
655 state
[4] &= C_008F20_PITCH
;
656 state
[4] |= S_008F20_PITCH(pitch
- 1);
660 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
661 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
663 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
664 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
666 /* GFX9 allocates 1D textures as 2D. */
667 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
668 image_type
= VK_IMAGE_TYPE_2D
;
669 switch (image_type
) {
670 case VK_IMAGE_TYPE_1D
:
671 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
672 case VK_IMAGE_TYPE_2D
:
674 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
676 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
677 case VK_IMAGE_TYPE_3D
:
678 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
679 return V_008F1C_SQ_RSRC_IMG_3D
;
681 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
683 unreachable("illegal image type");
687 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
689 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
691 if (swizzle
[3] == VK_SWIZZLE_X
) {
692 /* For the pre-defined border color values (white, opaque
693 * black, transparent black), the only thing that matters is
694 * that the alpha channel winds up in the correct place
695 * (because the RGB channels are all the same) so either of
696 * these enumerations will work.
698 if (swizzle
[2] == VK_SWIZZLE_Y
)
699 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
701 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
702 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
703 if (swizzle
[1] == VK_SWIZZLE_Y
)
704 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
706 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
707 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
708 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
709 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
710 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
716 bool vi_alpha_is_on_msb(struct radv_device
*device
, VkFormat format
)
718 const struct vk_format_description
*desc
= vk_format_description(format
);
720 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
&& desc
->nr_channels
== 1)
721 return desc
->swizzle
[3] == VK_SWIZZLE_X
;
723 return radv_translate_colorswap(format
, false) <= 1;
726 * Build the sampler view descriptor for a texture (GFX10).
729 gfx10_make_texture_descriptor(struct radv_device
*device
,
730 struct radv_image
*image
,
731 bool is_storage_image
,
732 VkImageViewType view_type
,
734 const VkComponentMapping
*mapping
,
735 unsigned first_level
, unsigned last_level
,
736 unsigned first_layer
, unsigned last_layer
,
737 unsigned width
, unsigned height
, unsigned depth
,
739 uint32_t *fmask_state
)
741 const struct vk_format_description
*desc
;
742 enum vk_swizzle swizzle
[4];
746 desc
= vk_format_description(vk_format
);
747 img_format
= gfx10_format_table
[vk_format
].img_format
;
749 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
750 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
751 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
753 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
756 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
757 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
758 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
760 depth
= image
->info
.array_size
;
761 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
762 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
763 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
764 depth
= image
->info
.array_size
;
765 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
766 depth
= image
->info
.array_size
/ 6;
769 state
[1] = S_00A004_FORMAT(img_format
) |
770 S_00A004_WIDTH_LO(width
- 1);
771 state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
772 S_00A008_HEIGHT(height
- 1) |
773 S_00A008_RESOURCE_LEVEL(1);
774 state
[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
775 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
776 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
777 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
778 S_00A00C_BASE_LEVEL(image
->info
.samples
> 1 ?
780 S_00A00C_LAST_LEVEL(image
->info
.samples
> 1 ?
781 util_logbase2(image
->info
.samples
) :
783 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle
)) |
785 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
786 * to know the total number of layers.
788 state
[4] = S_00A010_DEPTH(type
== V_008F1C_SQ_RSRC_IMG_3D
? depth
- 1 : last_layer
) |
789 S_00A010_BASE_ARRAY(first_layer
);
790 state
[5] = S_00A014_ARRAY_PITCH(0) |
791 S_00A014_MAX_MIP(image
->info
.samples
> 1 ?
792 util_logbase2(image
->info
.samples
) :
793 image
->info
.levels
- 1) |
794 S_00A014_PERF_MOD(4);
798 if (radv_dcc_enabled(image
, first_level
)) {
799 state
[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B
) |
800 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B
) |
801 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
804 /* Initialize the sampler view for FMASK. */
805 if (radv_image_has_fmask(image
)) {
806 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
810 assert(image
->plane_count
== 1);
812 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
814 switch (image
->info
.samples
) {
816 format
= V_008F0C_IMG_FORMAT_FMASK8_S2_F2
;
819 format
= V_008F0C_IMG_FORMAT_FMASK8_S4_F4
;
822 format
= V_008F0C_IMG_FORMAT_FMASK32_S8_F8
;
825 unreachable("invalid nr_samples");
828 fmask_state
[0] = (va
>> 8) | image
->planes
[0].surface
.fmask_tile_swizzle
;
829 fmask_state
[1] = S_00A004_BASE_ADDRESS_HI(va
>> 40) |
830 S_00A004_FORMAT(format
) |
831 S_00A004_WIDTH_LO(width
- 1);
832 fmask_state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
833 S_00A008_HEIGHT(height
- 1) |
834 S_00A008_RESOURCE_LEVEL(1);
835 fmask_state
[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
836 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
837 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
838 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
839 S_00A00C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
) |
840 S_00A00C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
841 fmask_state
[4] = S_00A010_DEPTH(last_layer
) |
842 S_00A010_BASE_ARRAY(first_layer
);
844 fmask_state
[6] = S_00A018_META_PIPE_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.pipe_aligned
);
846 } else if (fmask_state
)
847 memset(fmask_state
, 0, 8 * 4);
851 * Build the sampler view descriptor for a texture (SI-GFX9)
854 si_make_texture_descriptor(struct radv_device
*device
,
855 struct radv_image
*image
,
856 bool is_storage_image
,
857 VkImageViewType view_type
,
859 const VkComponentMapping
*mapping
,
860 unsigned first_level
, unsigned last_level
,
861 unsigned first_layer
, unsigned last_layer
,
862 unsigned width
, unsigned height
, unsigned depth
,
864 uint32_t *fmask_state
)
866 const struct vk_format_description
*desc
;
867 enum vk_swizzle swizzle
[4];
869 unsigned num_format
, data_format
, type
;
871 desc
= vk_format_description(vk_format
);
873 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
874 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
875 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
877 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
880 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
882 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
883 if (num_format
== ~0) {
887 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
888 if (data_format
== ~0) {
892 /* S8 with either Z16 or Z32 HTILE need a special format. */
893 if (device
->physical_device
->rad_info
.chip_class
== GFX9
&&
894 vk_format
== VK_FORMAT_S8_UINT
&&
895 radv_image_is_tc_compat_htile(image
)) {
896 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
897 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
898 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
899 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
901 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
902 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
903 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
905 depth
= image
->info
.array_size
;
906 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
907 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
908 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
909 depth
= image
->info
.array_size
;
910 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
911 depth
= image
->info
.array_size
/ 6;
914 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
915 S_008F14_NUM_FORMAT(num_format
));
916 state
[2] = (S_008F18_WIDTH(width
- 1) |
917 S_008F18_HEIGHT(height
- 1) |
918 S_008F18_PERF_MOD(4));
919 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
920 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
921 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
922 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
923 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
925 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
926 util_logbase2(image
->info
.samples
) :
928 S_008F1C_TYPE(type
));
930 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
934 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
935 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
937 /* Depth is the last accessible layer on Gfx9.
938 * The hw doesn't need to know the total number of layers.
940 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
941 state
[4] |= S_008F20_DEPTH(depth
- 1);
943 state
[4] |= S_008F20_DEPTH(last_layer
);
945 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
946 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
947 util_logbase2(image
->info
.samples
) :
948 image
->info
.levels
- 1);
950 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
951 state
[4] |= S_008F20_DEPTH(depth
- 1);
952 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
954 if (image
->dcc_offset
) {
955 state
[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
957 /* The last dword is unused by hw. The shader uses it to clear
958 * bits in the first dword of sampler state.
960 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
961 if (first_level
== last_level
)
962 state
[7] = C_008F30_MAX_ANISO_RATIO
;
964 state
[7] = 0xffffffff;
968 /* Initialize the sampler view for FMASK. */
969 if (radv_image_has_fmask(image
)) {
970 uint32_t fmask_format
, num_format
;
971 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
974 assert(image
->plane_count
== 1);
976 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
978 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
979 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
980 switch (image
->info
.samples
) {
982 num_format
= V_008F14_IMG_FMASK_8_2_2
;
985 num_format
= V_008F14_IMG_FMASK_8_4_4
;
988 num_format
= V_008F14_IMG_FMASK_32_8_8
;
991 unreachable("invalid nr_samples");
994 switch (image
->info
.samples
) {
996 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
999 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
1002 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
1006 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
1008 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
1011 fmask_state
[0] = va
>> 8;
1012 fmask_state
[0] |= image
->planes
[0].surface
.fmask_tile_swizzle
;
1013 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
1014 S_008F14_DATA_FORMAT(fmask_format
) |
1015 S_008F14_NUM_FORMAT(num_format
);
1016 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
1017 S_008F18_HEIGHT(height
- 1);
1018 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
1019 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
1020 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
1021 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
1022 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
1024 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
1028 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
1029 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
1030 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
1031 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
1032 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.pipe_aligned
) |
1033 S_008F24_META_RB_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.rb_aligned
);
1035 if (radv_image_is_tc_compat_cmask(image
)) {
1036 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1038 fmask_state
[5] |= S_008F24_META_DATA_ADDRESS(va
>> 40);
1039 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1040 fmask_state
[7] |= va
>> 8;
1043 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
);
1044 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
1045 S_008F20_PITCH(image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
- 1);
1046 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
1048 if (radv_image_is_tc_compat_cmask(image
)) {
1049 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1051 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1052 fmask_state
[7] |= va
>> 8;
1055 } else if (fmask_state
)
1056 memset(fmask_state
, 0, 8 * 4);
1060 radv_make_texture_descriptor(struct radv_device
*device
,
1061 struct radv_image
*image
,
1062 bool is_storage_image
,
1063 VkImageViewType view_type
,
1065 const VkComponentMapping
*mapping
,
1066 unsigned first_level
, unsigned last_level
,
1067 unsigned first_layer
, unsigned last_layer
,
1068 unsigned width
, unsigned height
, unsigned depth
,
1070 uint32_t *fmask_state
)
1072 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
1073 gfx10_make_texture_descriptor(device
, image
, is_storage_image
,
1074 view_type
, vk_format
, mapping
,
1075 first_level
, last_level
,
1076 first_layer
, last_layer
,
1077 width
, height
, depth
,
1078 state
, fmask_state
);
1080 si_make_texture_descriptor(device
, image
, is_storage_image
,
1081 view_type
, vk_format
, mapping
,
1082 first_level
, last_level
,
1083 first_layer
, last_layer
,
1084 width
, height
, depth
,
1085 state
, fmask_state
);
1090 radv_query_opaque_metadata(struct radv_device
*device
,
1091 struct radv_image
*image
,
1092 struct radeon_bo_metadata
*md
)
1094 static const VkComponentMapping fixedmapping
;
1095 uint32_t desc
[8], i
;
1097 assert(image
->plane_count
== 1);
1099 /* Metadata image format format version 1:
1100 * [0] = 1 (metadata format identifier)
1101 * [1] = (VENDOR_ID << 16) | PCI_ID
1102 * [2:9] = image descriptor for the whole resource
1103 * [2] is always 0, because the base address is cleared
1104 * [9] is the DCC offset bits [39:8] from the beginning of
1106 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1108 md
->metadata
[0] = 1; /* metadata image format version 1 */
1110 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1111 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
1114 radv_make_texture_descriptor(device
, image
, false,
1115 (VkImageViewType
)image
->type
, image
->vk_format
,
1116 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
1117 image
->info
.array_size
- 1,
1118 image
->info
.width
, image
->info
.height
,
1122 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
1123 image
->planes
[0].surface
.blk_w
, false, false, false, desc
);
1125 /* Clear the base address and set the relative DCC offset. */
1127 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
1128 desc
[7] = image
->dcc_offset
>> 8;
1130 /* Dwords [2:9] contain the image descriptor. */
1131 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
1133 /* Dwords [10:..] contain the mipmap level offsets. */
1134 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
1135 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
1136 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
1137 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
1139 md
->size_metadata
= 10 * 4;
1143 radv_init_metadata(struct radv_device
*device
,
1144 struct radv_image
*image
,
1145 struct radeon_bo_metadata
*metadata
)
1147 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1149 memset(metadata
, 0, sizeof(*metadata
));
1151 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1152 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
1153 metadata
->u
.gfx9
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1155 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
1156 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1157 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
1158 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1159 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
1160 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
1161 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
1162 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
1163 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
1164 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
1165 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
1166 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1168 radv_query_opaque_metadata(device
, image
, metadata
);
1172 radv_image_override_offset_stride(struct radv_device
*device
,
1173 struct radv_image
*image
,
1174 uint64_t offset
, uint32_t stride
)
1176 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1177 unsigned bpe
= vk_format_get_blocksizebits(image
->vk_format
) / 8;
1179 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1181 surface
->u
.gfx9
.surf_pitch
= stride
;
1182 surface
->u
.gfx9
.surf_slice_size
=
1183 (uint64_t)stride
* surface
->u
.gfx9
.surf_height
* bpe
;
1185 surface
->u
.gfx9
.surf_offset
= offset
;
1187 surface
->u
.legacy
.level
[0].nblk_x
= stride
;
1188 surface
->u
.legacy
.level
[0].slice_size_dw
=
1189 ((uint64_t)stride
* surface
->u
.legacy
.level
[0].nblk_y
* bpe
) / 4;
1192 for (unsigned i
= 0; i
< ARRAY_SIZE(surface
->u
.legacy
.level
); ++i
)
1193 surface
->u
.legacy
.level
[i
].offset
+= offset
;
1200 radv_image_alloc_fmask(struct radv_device
*device
,
1201 struct radv_image
*image
)
1203 unsigned fmask_alignment
= image
->planes
[0].surface
.fmask_alignment
;
1205 image
->fmask_offset
= align64(image
->size
, fmask_alignment
);
1206 image
->size
= image
->fmask_offset
+ image
->planes
[0].surface
.fmask_size
;
1207 image
->alignment
= MAX2(image
->alignment
, fmask_alignment
);
1211 radv_image_alloc_cmask(struct radv_device
*device
,
1212 struct radv_image
*image
)
1214 unsigned cmask_alignment
= image
->planes
[0].surface
.cmask_alignment
;
1215 unsigned cmask_size
= image
->planes
[0].surface
.cmask_size
;
1216 uint32_t clear_value_size
= 0;
1221 assert(cmask_alignment
);
1223 image
->cmask_offset
= align64(image
->size
, cmask_alignment
);
1224 /* + 8 for storing the clear values */
1225 if (!image
->clear_value_offset
) {
1226 image
->clear_value_offset
= image
->cmask_offset
+ cmask_size
;
1227 clear_value_size
= 8;
1229 image
->size
= image
->cmask_offset
+ cmask_size
+ clear_value_size
;
1230 image
->alignment
= MAX2(image
->alignment
, cmask_alignment
);
1234 radv_image_alloc_dcc(struct radv_image
*image
)
1236 assert(image
->plane_count
== 1);
1238 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
1239 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1240 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
1241 image
->fce_pred_offset
= image
->clear_value_offset
+ 8 * image
->info
.levels
;
1242 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16 * image
->info
.levels
;
1243 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24 * image
->info
.levels
;
1244 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
1248 radv_image_alloc_htile(struct radv_device
*device
, struct radv_image
*image
)
1250 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
1252 /* + 8 for storing the clear values */
1253 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
1254 image
->size
= image
->clear_value_offset
+ image
->info
.levels
* 8;
1255 if (radv_image_is_tc_compat_htile(image
) &&
1256 device
->physical_device
->rad_info
.has_tc_compat_zrange_bug
) {
1257 /* Metadata for the TC-compatible HTILE hardware bug which
1258 * have to be fixed by updating ZRANGE_PRECISION when doing
1259 * fast depth clears to 0.0f.
1261 image
->tc_compat_zrange_offset
= image
->size
;
1262 image
->size
= image
->tc_compat_zrange_offset
+ image
->info
.levels
* 4;
1264 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
1268 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
1270 if (image
->info
.samples
<= 1 &&
1271 image
->info
.width
* image
->info
.height
<= 512 * 512) {
1272 /* Do not enable CMASK or DCC for small surfaces where the cost
1273 * of the eliminate pass can be higher than the benefit of fast
1274 * clear. RadeonSI does this, but the image threshold is
1280 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1281 (image
->exclusive
|| image
->queue_family_mask
== 1);
1285 radv_image_can_enable_dcc(struct radv_device
*device
, struct radv_image
*image
)
1287 if (!radv_image_can_enable_dcc_or_cmask(image
) ||
1288 !radv_image_has_dcc(image
))
1291 /* On GFX8, DCC layers can be interleaved and it's currently only
1292 * enabled if slice size is equal to the per slice fast clear size
1293 * because the driver assumes that portions of multiple layers are
1294 * contiguous during fast clears.
1296 if (image
->info
.array_size
> 1) {
1297 const struct legacy_surf_level
*surf_level
=
1298 &image
->planes
[0].surface
.u
.legacy
.level
[0];
1300 assert(device
->physical_device
->rad_info
.chip_class
== GFX8
);
1302 if (image
->planes
[0].surface
.dcc_slice_size
!= surf_level
->dcc_fast_clear_size
)
1310 radv_image_can_enable_cmask(struct radv_image
*image
)
1312 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1313 /* Do not enable CMASK for non-MSAA images (fast color clear)
1314 * because 128 bit formats are not supported, but FMASK might
1320 return radv_image_can_enable_dcc_or_cmask(image
) &&
1321 image
->info
.levels
== 1 &&
1322 image
->info
.depth
== 1 &&
1323 !image
->planes
[0].surface
.is_linear
;
1327 radv_image_can_enable_fmask(struct radv_image
*image
)
1329 return image
->info
.samples
> 1 &&
1330 image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
;
1334 radv_image_can_enable_htile(struct radv_image
*image
)
1336 return radv_image_has_htile(image
) &&
1337 image
->info
.levels
== 1 &&
1338 image
->info
.width
* image
->info
.height
>= 8 * 8;
1341 static void radv_image_disable_dcc(struct radv_image
*image
)
1343 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1344 image
->planes
[i
].surface
.dcc_size
= 0;
1347 static void radv_image_disable_htile(struct radv_image
*image
)
1349 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1350 image
->planes
[i
].surface
.htile_size
= 0;
1352 image
->tc_compatible_htile
= false;
1356 radv_image_create_layout(struct radv_device
*device
,
1357 struct radv_image_create_info create_info
,
1358 struct radv_image
*image
)
1360 /* Check that we did not initialize things earlier */
1361 assert(!image
->planes
[0].surface
.surf_size
);
1363 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1364 * common internal case. */
1365 create_info
.vk_info
= NULL
;
1367 struct ac_surf_info image_info
= image
->info
;
1368 VkResult result
= radv_patch_image_from_extra_info(device
, image
, &create_info
, &image_info
);
1369 if (result
!= VK_SUCCESS
)
1373 image
->alignment
= 1;
1374 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1375 struct ac_surf_info info
= image_info
;
1378 const struct vk_format_description
*desc
= vk_format_description(image
->vk_format
);
1379 assert(info
.width
% desc
->width_divisor
== 0);
1380 assert(info
.height
% desc
->height_divisor
== 0);
1382 info
.width
/= desc
->width_divisor
;
1383 info
.height
/= desc
->height_divisor
;
1386 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1388 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1389 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1390 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1392 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1395 if (!create_info
.no_metadata_planes
) {
1396 /* Try to enable DCC first. */
1397 if (radv_image_can_enable_dcc(device
, image
)) {
1398 radv_image_alloc_dcc(image
);
1399 if (image
->info
.samples
> 1) {
1400 /* CMASK should be enabled because DCC fast
1401 * clear with MSAA needs it.
1403 assert(radv_image_can_enable_cmask(image
));
1404 radv_image_alloc_cmask(device
, image
);
1407 /* When DCC cannot be enabled, try CMASK. */
1408 radv_image_disable_dcc(image
);
1409 if (radv_image_can_enable_cmask(image
)) {
1410 radv_image_alloc_cmask(device
, image
);
1414 /* Try to enable FMASK for multisampled images. */
1415 if (radv_image_can_enable_fmask(image
)) {
1416 radv_image_alloc_fmask(device
, image
);
1418 if (radv_use_tc_compat_cmask_for_image(device
, image
))
1419 image
->tc_compatible_cmask
= true;
1421 /* Otherwise, try to enable HTILE for depth surfaces. */
1422 if (radv_image_can_enable_htile(image
) &&
1423 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1424 if (!image
->planes
[0].surface
.tc_compatible_htile_allowed
)
1425 image
->tc_compatible_htile
= false;
1426 radv_image_alloc_htile(device
, image
);
1428 radv_image_disable_htile(image
);
1432 radv_image_disable_dcc(image
);
1433 radv_image_disable_htile(image
);
1436 assert(image
->planes
[0].surface
.surf_size
);
1441 radv_image_create(VkDevice _device
,
1442 const struct radv_image_create_info
*create_info
,
1443 const VkAllocationCallbacks
* alloc
,
1446 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1447 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1448 struct radv_image
*image
= NULL
;
1449 VkFormat format
= radv_select_android_external_format(pCreateInfo
->pNext
,
1450 pCreateInfo
->format
);
1451 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1453 const unsigned plane_count
= vk_format_get_plane_count(format
);
1454 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1456 radv_assert(pCreateInfo
->mipLevels
> 0);
1457 radv_assert(pCreateInfo
->arrayLayers
> 0);
1458 radv_assert(pCreateInfo
->samples
> 0);
1459 radv_assert(pCreateInfo
->extent
.width
> 0);
1460 radv_assert(pCreateInfo
->extent
.height
> 0);
1461 radv_assert(pCreateInfo
->extent
.depth
> 0);
1463 image
= vk_zalloc2(&device
->alloc
, alloc
, image_struct_size
, 8,
1464 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1466 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1468 image
->type
= pCreateInfo
->imageType
;
1469 image
->info
.width
= pCreateInfo
->extent
.width
;
1470 image
->info
.height
= pCreateInfo
->extent
.height
;
1471 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1472 image
->info
.samples
= pCreateInfo
->samples
;
1473 image
->info
.storage_samples
= pCreateInfo
->samples
;
1474 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1475 image
->info
.levels
= pCreateInfo
->mipLevels
;
1476 image
->info
.num_channels
= vk_format_get_nr_components(format
);
1478 image
->vk_format
= format
;
1479 image
->tiling
= pCreateInfo
->tiling
;
1480 image
->usage
= pCreateInfo
->usage
;
1481 image
->flags
= pCreateInfo
->flags
;
1482 image
->plane_count
= plane_count
;
1484 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1485 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1486 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1487 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
||
1488 pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_FOREIGN_EXT
)
1489 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1491 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1494 const VkExternalMemoryImageCreateInfo
*external_info
=
1495 vk_find_struct_const(pCreateInfo
->pNext
,
1496 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) ;
1498 image
->shareable
= external_info
;
1499 if (!vk_format_is_depth_or_stencil(format
) && !image
->shareable
) {
1500 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1503 image
->tc_compatible_htile
=
1504 radv_use_tc_compat_htile_for_image(device
, create_info
->vk_info
,
1507 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1508 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, pCreateInfo
, format
);
1511 bool delay_layout
= external_info
&&
1512 (external_info
->handleTypes
& VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
);
1515 *pImage
= radv_image_to_handle(image
);
1516 assert (!(image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
));
1520 ASSERTED VkResult result
= radv_image_create_layout(device
, *create_info
, image
);
1521 assert(result
== VK_SUCCESS
);
1523 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1524 image
->alignment
= MAX2(image
->alignment
, 4096);
1525 image
->size
= align64(image
->size
, image
->alignment
);
1528 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1529 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1531 vk_free2(&device
->alloc
, alloc
, image
);
1532 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1536 *pImage
= radv_image_to_handle(image
);
1542 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1543 struct radv_device
*device
,
1545 const VkComponentMapping
*components
,
1546 bool is_storage_image
, bool disable_compression
,
1547 unsigned plane_id
, unsigned descriptor_plane_id
)
1549 struct radv_image
*image
= iview
->image
;
1550 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1551 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1552 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1554 union radv_descriptor
*descriptor
;
1555 uint32_t hw_level
= 0;
1557 if (is_storage_image
) {
1558 descriptor
= &iview
->storage_descriptor
;
1560 descriptor
= &iview
->descriptor
;
1563 assert(vk_format_get_plane_count(vk_format
) == 1);
1564 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1565 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1567 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1568 hw_level
= iview
->base_mip
;
1569 radv_make_texture_descriptor(device
, image
, is_storage_image
,
1573 hw_level
, hw_level
+ iview
->level_count
- 1,
1575 iview
->base_layer
+ iview
->layer_count
- 1,
1576 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1577 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1578 iview
->extent
.depth
,
1579 descriptor
->plane_descriptors
[descriptor_plane_id
],
1580 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1582 const struct legacy_surf_level
*base_level_info
= NULL
;
1583 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1585 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1587 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1589 si_set_mutable_tex_desc_fields(device
, image
,
1594 blk_w
, is_stencil
, is_storage_image
,
1595 is_storage_image
|| disable_compression
,
1596 descriptor
->plane_descriptors
[descriptor_plane_id
]);
1600 radv_plane_from_aspect(VkImageAspectFlags mask
)
1603 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1605 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1613 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1616 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1617 return image
->planes
[0].format
;
1618 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1619 return image
->planes
[1].format
;
1620 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1621 return image
->planes
[2].format
;
1622 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1623 return vk_format_stencil_only(image
->vk_format
);
1624 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1625 return vk_format_depth_only(image
->vk_format
);
1626 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1627 return vk_format_depth_only(image
->vk_format
);
1629 return image
->vk_format
;
1634 radv_image_view_init(struct radv_image_view
*iview
,
1635 struct radv_device
*device
,
1636 const VkImageViewCreateInfo
* pCreateInfo
,
1637 const struct radv_image_view_extra_create_info
* extra_create_info
)
1639 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1640 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1642 switch (image
->type
) {
1643 case VK_IMAGE_TYPE_1D
:
1644 case VK_IMAGE_TYPE_2D
:
1645 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1647 case VK_IMAGE_TYPE_3D
:
1648 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1649 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1652 unreachable("bad VkImageType");
1654 iview
->image
= image
;
1655 iview
->bo
= image
->bo
;
1656 iview
->type
= pCreateInfo
->viewType
;
1657 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1658 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1659 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1661 iview
->vk_format
= pCreateInfo
->format
;
1663 /* If the image has an Android external format, pCreateInfo->format will be
1664 * VK_FORMAT_UNDEFINED. */
1665 if (iview
->vk_format
== VK_FORMAT_UNDEFINED
)
1666 iview
->vk_format
= image
->vk_format
;
1668 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1669 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1670 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1671 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1674 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1675 iview
->extent
= (VkExtent3D
) {
1676 .width
= image
->info
.width
,
1677 .height
= image
->info
.height
,
1678 .depth
= image
->info
.depth
,
1681 iview
->extent
= (VkExtent3D
) {
1682 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1683 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1684 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1688 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1689 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1690 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1691 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1692 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1694 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1695 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1697 /* Comment ported from amdvlk -
1698 * If we have the following image:
1699 * Uncompressed pixels Compressed block sizes (4x4)
1700 * mip0: 22 x 22 6 x 6
1701 * mip1: 11 x 11 3 x 3
1706 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1707 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1708 * divide-by-two integer math):
1714 * This means that mip2 will be missing texels.
1716 * Fix this by calculating the base mip's width and height, then convert that, and round it
1717 * back up to get the level 0 size.
1718 * Clamp the converted size between the original values, and next power of two, which
1719 * means we don't oversize the image.
1721 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1722 vk_format_is_compressed(image
->vk_format
) &&
1723 !vk_format_is_compressed(iview
->vk_format
)) {
1724 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1725 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1727 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1728 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1730 lvl_width
<<= range
->baseMipLevel
;
1731 lvl_height
<<= range
->baseMipLevel
;
1733 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1734 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1738 iview
->base_layer
= range
->baseArrayLayer
;
1739 iview
->layer_count
= radv_get_layerCount(image
, range
);
1740 iview
->base_mip
= range
->baseMipLevel
;
1741 iview
->level_count
= radv_get_levelCount(image
, range
);
1743 bool disable_compression
= extra_create_info
? extra_create_info
->disable_compression
: false;
1744 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1745 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1746 radv_image_view_make_descriptor(iview
, device
, format
,
1747 &pCreateInfo
->components
,
1748 false, disable_compression
,
1749 iview
->plane_id
+ i
, i
);
1750 radv_image_view_make_descriptor(iview
, device
,
1751 format
, &pCreateInfo
->components
,
1752 true, disable_compression
,
1753 iview
->plane_id
+ i
, i
);
1757 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1758 VkImageLayout layout
,
1759 bool in_render_loop
,
1760 unsigned queue_mask
)
1762 if (radv_image_is_tc_compat_htile(image
)) {
1763 if (layout
== VK_IMAGE_LAYOUT_GENERAL
&&
1765 !(image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)) {
1766 /* It should be safe to enable TC-compat HTILE with
1767 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1768 * loop and if the image doesn't have the storage bit
1769 * set. This improves performance for apps that use
1770 * GENERAL for the main depth pass because this allows
1771 * compression and this reduces the number of
1772 * decompressions from/to GENERAL.
1777 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1780 return radv_image_has_htile(image
) &&
1781 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1782 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1783 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1784 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1785 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1788 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1789 VkImageLayout layout
,
1790 bool in_render_loop
,
1791 unsigned queue_mask
)
1793 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1796 bool radv_layout_dcc_compressed(const struct radv_device
*device
,
1797 const struct radv_image
*image
,
1798 VkImageLayout layout
,
1799 bool in_render_loop
,
1800 unsigned queue_mask
)
1802 /* Don't compress compute transfer dst, as image stores are not supported. */
1803 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1804 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1807 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1811 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1813 if (!image
->exclusive
)
1814 return image
->queue_family_mask
;
1815 if (family
== VK_QUEUE_FAMILY_EXTERNAL
||
1816 family
== VK_QUEUE_FAMILY_FOREIGN_EXT
)
1817 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1818 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1819 return 1u << queue_family
;
1820 return 1u << family
;
1824 radv_CreateImage(VkDevice device
,
1825 const VkImageCreateInfo
*pCreateInfo
,
1826 const VkAllocationCallbacks
*pAllocator
,
1830 const VkNativeBufferANDROID
*gralloc_info
=
1831 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1834 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1835 pAllocator
, pImage
);
1838 const struct wsi_image_create_info
*wsi_info
=
1839 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1840 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1842 return radv_image_create(device
,
1843 &(struct radv_image_create_info
) {
1844 .vk_info
= pCreateInfo
,
1852 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1853 const VkAllocationCallbacks
*pAllocator
)
1855 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1856 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1861 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1862 device
->ws
->buffer_destroy(image
->bo
);
1864 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1865 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1867 vk_free2(&device
->alloc
, pAllocator
, image
);
1870 void radv_GetImageSubresourceLayout(
1873 const VkImageSubresource
* pSubresource
,
1874 VkSubresourceLayout
* pLayout
)
1876 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1877 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1878 int level
= pSubresource
->mipLevel
;
1879 int layer
= pSubresource
->arrayLayer
;
1881 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1883 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1884 struct radeon_surf
*surface
= &plane
->surface
;
1886 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1887 uint64_t level_offset
= surface
->is_linear
? surface
->u
.gfx9
.offset
[level
] : 0;
1889 pLayout
->offset
= plane
->offset
+ level_offset
+ surface
->u
.gfx9
.surf_slice_size
* layer
;
1890 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1891 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1892 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1893 /* Adjust the number of bytes between each row because
1894 * the pitch is actually the number of components per
1897 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1899 uint32_t pitch
= surface
->is_linear
? surface
->u
.gfx9
.pitch
[level
] : surface
->u
.gfx9
.surf_pitch
;
1901 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1902 pLayout
->rowPitch
= pitch
* surface
->bpe
;
1905 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1906 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1907 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1908 if (image
->type
== VK_IMAGE_TYPE_3D
)
1909 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1911 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1912 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1913 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1914 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1915 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1916 if (image
->type
== VK_IMAGE_TYPE_3D
)
1917 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1923 radv_CreateImageView(VkDevice _device
,
1924 const VkImageViewCreateInfo
*pCreateInfo
,
1925 const VkAllocationCallbacks
*pAllocator
,
1928 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1929 struct radv_image_view
*view
;
1931 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1932 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1934 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1936 radv_image_view_init(view
, device
, pCreateInfo
, NULL
);
1938 *pView
= radv_image_view_to_handle(view
);
1944 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1945 const VkAllocationCallbacks
*pAllocator
)
1947 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1948 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1952 vk_free2(&device
->alloc
, pAllocator
, iview
);
1955 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1956 struct radv_device
*device
,
1957 const VkBufferViewCreateInfo
* pCreateInfo
)
1959 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1961 view
->bo
= buffer
->bo
;
1962 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1963 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1964 view
->vk_format
= pCreateInfo
->format
;
1966 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1967 pCreateInfo
->offset
, view
->range
, view
->state
);
1971 radv_CreateBufferView(VkDevice _device
,
1972 const VkBufferViewCreateInfo
*pCreateInfo
,
1973 const VkAllocationCallbacks
*pAllocator
,
1974 VkBufferView
*pView
)
1976 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1977 struct radv_buffer_view
*view
;
1979 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1980 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1982 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1984 radv_buffer_view_init(view
, device
, pCreateInfo
);
1986 *pView
= radv_buffer_view_to_handle(view
);
1992 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1993 const VkAllocationCallbacks
*pAllocator
)
1995 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1996 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
2001 vk_free2(&device
->alloc
, pAllocator
, view
);