2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
38 radv_choose_tiling(struct radv_device
*device
,
39 const VkImageCreateInfo
*pCreateInfo
,
42 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
43 assert(pCreateInfo
->samples
<= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
47 if (!vk_format_is_compressed(format
) &&
48 !vk_format_is_depth_or_stencil(format
)
49 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
53 /* Only very thin and long 2D textures should benefit from
55 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo
->samples
> 1)
61 return RADEON_SURF_MODE_2D
;
63 return RADEON_SURF_MODE_2D
;
67 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
68 const VkImageCreateInfo
*pCreateInfo
,
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
75 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
78 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
81 if (pCreateInfo
->mipLevels
> 1)
84 /* Do not enable TC-compatible HTILE if the image isn't readable by a
85 * shader because no texture fetches will happen.
87 if (!(pCreateInfo
->usage
& (VK_IMAGE_USAGE_SAMPLED_BIT
|
88 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT
|
89 VK_IMAGE_USAGE_TRANSFER_SRC_BIT
)))
92 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
93 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
95 if (pCreateInfo
->samples
>= 2 &&
96 (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
97 (format
== VK_FORMAT_D32_SFLOAT
&&
98 device
->physical_device
->rad_info
.chip_class
== GFX10
)))
101 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
102 * supports 32-bit. Though, it's possible to enable TC-compat for
103 * 16-bit depth surfaces if no Z planes are compressed.
105 if (format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
106 format
!= VK_FORMAT_D32_SFLOAT
&&
107 format
!= VK_FORMAT_D16_UNORM
)
110 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
111 const struct VkImageFormatListCreateInfo
*format_list
=
112 (const struct VkImageFormatListCreateInfo
*)
113 vk_find_struct_const(pCreateInfo
->pNext
,
114 IMAGE_FORMAT_LIST_CREATE_INFO
);
116 /* We have to ignore the existence of the list if viewFormatCount = 0 */
117 if (format_list
&& format_list
->viewFormatCount
) {
118 /* compatibility is transitive, so we only need to check
119 * one format with everything else.
121 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
122 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
125 if (format
!= format_list
->pViewFormats
[i
])
137 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
139 if (info
->bo_metadata
) {
140 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
141 return info
->bo_metadata
->u
.gfx9
.scanout
;
143 return info
->bo_metadata
->u
.legacy
.scanout
;
146 return info
->scanout
;
150 radv_use_dcc_for_image(struct radv_device
*device
,
151 const struct radv_image
*image
,
152 const VkImageCreateInfo
*pCreateInfo
,
155 bool dcc_compatible_formats
;
158 /* DCC (Delta Color Compression) is only available for GFX8+. */
159 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
162 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
165 if (image
->shareable
)
168 /* TODO: Enable DCC for storage images. */
169 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
172 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
175 if (vk_format_is_subsampled(format
) ||
176 vk_format_get_plane_count(format
) > 1)
179 /* TODO: Enable DCC for mipmaps on GFX9+. */
180 if ((pCreateInfo
->arrayLayers
> 1 || pCreateInfo
->mipLevels
> 1) &&
181 device
->physical_device
->rad_info
.chip_class
>= GFX9
)
184 /* Do not enable DCC for mipmapped arrays because performance is worse. */
185 if (pCreateInfo
->arrayLayers
> 1 && pCreateInfo
->mipLevels
> 1)
188 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
189 * 2x can be enabled with an option.
191 if (pCreateInfo
->samples
> 2 ||
192 (pCreateInfo
->samples
== 2 &&
193 !device
->physical_device
->dcc_msaa_allowed
))
196 /* Determine if the formats are DCC compatible. */
197 dcc_compatible_formats
=
198 radv_is_colorbuffer_format_supported(format
,
201 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
202 const struct VkImageFormatListCreateInfo
*format_list
=
203 (const struct VkImageFormatListCreateInfo
*)
204 vk_find_struct_const(pCreateInfo
->pNext
,
205 IMAGE_FORMAT_LIST_CREATE_INFO
);
207 /* We have to ignore the existence of the list if viewFormatCount = 0 */
208 if (format_list
&& format_list
->viewFormatCount
) {
209 /* compatibility is transitive, so we only need to check
210 * one format with everything else. */
211 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
212 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
215 if (!radv_dcc_formats_compatible(format
,
216 format_list
->pViewFormats
[i
]))
217 dcc_compatible_formats
= false;
220 dcc_compatible_formats
= false;
224 if (!dcc_compatible_formats
)
231 radv_use_tc_compat_cmask_for_image(struct radv_device
*device
,
232 struct radv_image
*image
)
234 if (!(device
->instance
->perftest_flags
& RADV_PERFTEST_TC_COMPAT_CMASK
))
237 /* TC-compat CMASK is only available for GFX8+. */
238 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
241 if (image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)
244 if (radv_image_has_dcc(image
))
247 if (!radv_image_has_cmask(image
))
253 static uint32_t si_get_bo_metadata_word1(const struct radv_device
*device
)
255 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
259 radv_is_valid_opaque_metadata(const struct radv_device
*device
,
260 const struct radeon_bo_metadata
*md
)
262 if (md
->metadata
[0] != 1 ||
263 md
->metadata
[1] != si_get_bo_metadata_word1(device
))
266 if (md
->size_metadata
< 40)
273 radv_patch_surface_from_metadata(struct radv_device
*device
,
274 struct radeon_surf
*surface
,
275 const struct radeon_bo_metadata
*md
)
277 surface
->flags
= RADEON_SURF_CLR(surface
->flags
, MODE
);
279 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
280 if (md
->u
.gfx9
.swizzle_mode
> 0)
281 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
283 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
285 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
287 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
288 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
289 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
290 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
291 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
292 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
294 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
295 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
296 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
297 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
299 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
305 radv_patch_image_dimensions(struct radv_device
*device
,
306 struct radv_image
*image
,
307 const struct radv_image_create_info
*create_info
,
308 struct ac_surf_info
*image_info
)
310 unsigned width
= image
->info
.width
;
311 unsigned height
= image
->info
.height
;
314 * minigbm sometimes allocates bigger images which is going to result in
315 * weird strides and other properties. Lets be lenient where possible and
316 * fail it on GFX10 (as we cannot cope there).
318 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
320 if (create_info
->bo_metadata
&&
321 radv_is_valid_opaque_metadata(device
, create_info
->bo_metadata
)) {
322 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
324 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
325 width
= G_00A004_WIDTH_LO(md
->metadata
[3]) +
326 (G_00A008_WIDTH_HI(md
->metadata
[4]) << 2) + 1;
327 height
= S_00A008_HEIGHT(md
->metadata
[4]) + 1;
329 width
= G_008F18_WIDTH(md
->metadata
[4]) + 1;
330 height
= G_008F18_HEIGHT(md
->metadata
[4]) + 1;
334 if (image
->info
.width
== width
&& image
->info
.height
== height
)
337 if (width
< image
->info
.width
|| height
< image
->info
.height
) {
339 "The imported image has smaller dimensions than the internal\n"
340 "dimensions. Using it is going to fail badly, so we reject\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image
->info
.width
, image
->info
.height
, width
, height
);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
345 } else if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
347 "Tried to import an image with inconsistent width on GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency in width and will fail this import.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image
->info
.width
, image
->info
.height
, width
, height
);
352 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
355 "Tried to import an image with inconsistent width on pre-GFX10.\n"
356 "As GFX10 has no separate stride fields we cannot cope with\n"
357 "an inconsistency and would fail on GFX10.\n"
358 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
359 image
->info
.width
, image
->info
.height
, width
, height
);
361 image_info
->width
= width
;
362 image_info
->height
= height
;
368 radv_patch_image_from_extra_info(struct radv_device
*device
,
369 struct radv_image
*image
,
370 const struct radv_image_create_info
*create_info
,
371 struct ac_surf_info
*image_info
)
373 VkResult result
= radv_patch_image_dimensions(device
, image
, create_info
, image_info
);
374 if (result
!= VK_SUCCESS
)
377 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
378 if (create_info
->bo_metadata
) {
379 radv_patch_surface_from_metadata(device
, &image
->planes
[plane
].surface
,
380 create_info
->bo_metadata
);
383 if (radv_surface_has_scanout(device
, create_info
)) {
384 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_SCANOUT
;
385 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
;
387 image
->info
.surf_index
= NULL
;
394 radv_init_surface(struct radv_device
*device
,
395 const struct radv_image
*image
,
396 struct radeon_surf
*surface
,
398 const VkImageCreateInfo
*pCreateInfo
,
399 VkFormat image_format
)
401 unsigned array_mode
= radv_choose_tiling(device
, pCreateInfo
, image_format
);
402 VkFormat format
= vk_format_get_plane_format(image_format
, plane_id
);
403 const struct vk_format_description
*desc
= vk_format_description(format
);
404 bool is_depth
, is_stencil
;
406 is_depth
= vk_format_has_depth(desc
);
407 is_stencil
= vk_format_has_stencil(desc
);
409 surface
->blk_w
= vk_format_get_blockwidth(format
);
410 surface
->blk_h
= vk_format_get_blockheight(format
);
412 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
413 /* align byte per element on dword */
414 if (surface
->bpe
== 3) {
418 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
420 switch (pCreateInfo
->imageType
){
421 case VK_IMAGE_TYPE_1D
:
422 if (pCreateInfo
->arrayLayers
> 1)
423 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
425 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
427 case VK_IMAGE_TYPE_2D
:
428 if (pCreateInfo
->arrayLayers
> 1)
429 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
431 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
433 case VK_IMAGE_TYPE_3D
:
434 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
437 unreachable("unhandled image type");
441 surface
->flags
|= RADEON_SURF_ZBUFFER
;
442 if (radv_use_tc_compat_htile_for_image(device
, pCreateInfo
, image_format
))
443 surface
->flags
|= RADEON_SURF_TC_COMPATIBLE_HTILE
;
447 surface
->flags
|= RADEON_SURF_SBUFFER
;
449 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
450 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
451 vk_format_get_blocksizebits(image_format
) == 128 &&
452 vk_format_is_compressed(image_format
))
453 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
455 if (!radv_use_dcc_for_image(device
, image
, pCreateInfo
, image_format
))
456 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
461 static inline unsigned
462 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
465 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
467 return plane
->surface
.u
.legacy
.tiling_index
[level
];
470 static unsigned radv_map_swizzle(unsigned swizzle
)
474 return V_008F0C_SQ_SEL_Y
;
476 return V_008F0C_SQ_SEL_Z
;
478 return V_008F0C_SQ_SEL_W
;
480 return V_008F0C_SQ_SEL_0
;
482 return V_008F0C_SQ_SEL_1
;
483 default: /* VK_SWIZZLE_X */
484 return V_008F0C_SQ_SEL_X
;
489 radv_make_buffer_descriptor(struct radv_device
*device
,
490 struct radv_buffer
*buffer
,
496 const struct vk_format_description
*desc
;
498 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
499 uint64_t va
= gpu_address
+ buffer
->offset
;
500 unsigned num_format
, data_format
;
502 desc
= vk_format_description(vk_format
);
503 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
504 stride
= desc
->block
.bits
/ 8;
508 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
509 S_008F04_STRIDE(stride
);
511 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
516 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
517 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
518 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
519 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3]));
521 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
522 const struct gfx10_format
*fmt
= gfx10_format_description(vk_format
);
524 /* OOB_SELECT chooses the out-of-bounds check:
525 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
526 * - 1: index >= NUM_RECORDS
527 * - 2: NUM_RECORDS == 0
528 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
529 * else: swizzle_address >= NUM_RECORDS
531 state
[3] |= S_008F0C_FORMAT(fmt
->img_format
) |
532 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET
) |
533 S_008F0C_RESOURCE_LEVEL(1);
535 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
536 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
538 assert(data_format
!= V_008F0C_BUF_DATA_FORMAT_INVALID
);
539 assert(num_format
!= ~0);
541 state
[3] |= S_008F0C_NUM_FORMAT(num_format
) |
542 S_008F0C_DATA_FORMAT(data_format
);
547 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
548 struct radv_image
*image
,
549 const struct legacy_surf_level
*base_level_info
,
551 unsigned base_level
, unsigned first_level
,
552 unsigned block_width
, bool is_stencil
,
553 bool is_storage_image
, bool disable_compression
,
556 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
557 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
558 uint64_t va
= gpu_address
+ plane
->offset
;
559 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
560 uint64_t meta_va
= 0;
561 if (chip_class
>= GFX9
) {
563 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
565 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
567 va
+= base_level_info
->offset
;
570 if (chip_class
>= GFX9
||
571 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
572 state
[0] |= plane
->surface
.tile_swizzle
;
573 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
574 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
576 if (chip_class
>= GFX8
) {
577 state
[6] &= C_008F28_COMPRESSION_EN
;
579 if (!disable_compression
&& radv_dcc_enabled(image
, first_level
)) {
580 meta_va
= gpu_address
+ image
->dcc_offset
;
581 if (chip_class
<= GFX8
)
582 meta_va
+= base_level_info
->dcc_offset
;
584 unsigned dcc_tile_swizzle
= plane
->surface
.tile_swizzle
<< 8;
585 dcc_tile_swizzle
&= plane
->surface
.dcc_alignment
- 1;
586 meta_va
|= dcc_tile_swizzle
;
587 } else if (!disable_compression
&&
588 radv_image_is_tc_compat_htile(image
)) {
589 meta_va
= gpu_address
+ image
->htile_offset
;
593 state
[6] |= S_008F28_COMPRESSION_EN(1);
594 if (chip_class
<= GFX9
)
595 state
[7] = meta_va
>> 8;
599 if (chip_class
>= GFX10
) {
600 state
[3] &= C_00A00C_SW_MODE
;
603 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
605 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
608 state
[6] &= C_00A018_META_DATA_ADDRESS_LO
&
609 C_00A018_META_PIPE_ALIGNED
;
612 struct gfx9_surf_meta_flags meta
= {
617 if (image
->dcc_offset
)
618 meta
= plane
->surface
.u
.gfx9
.dcc
;
620 state
[6] |= S_00A018_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
621 S_00A018_META_DATA_ADDRESS_LO(meta_va
>> 8);
624 state
[7] = meta_va
>> 16;
625 } else if (chip_class
== GFX9
) {
626 state
[3] &= C_008F1C_SW_MODE
;
627 state
[4] &= C_008F20_PITCH
;
630 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
631 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
633 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
634 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
637 state
[5] &= C_008F24_META_DATA_ADDRESS
&
638 C_008F24_META_PIPE_ALIGNED
&
639 C_008F24_META_RB_ALIGNED
;
641 struct gfx9_surf_meta_flags meta
= {
646 if (image
->dcc_offset
)
647 meta
= plane
->surface
.u
.gfx9
.dcc
;
649 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
650 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
651 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
655 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
656 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
658 state
[3] &= C_008F1C_TILING_INDEX
;
659 state
[3] |= S_008F1C_TILING_INDEX(index
);
660 state
[4] &= C_008F20_PITCH
;
661 state
[4] |= S_008F20_PITCH(pitch
- 1);
665 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
666 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
668 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
669 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
671 /* GFX9 allocates 1D textures as 2D. */
672 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
673 image_type
= VK_IMAGE_TYPE_2D
;
674 switch (image_type
) {
675 case VK_IMAGE_TYPE_1D
:
676 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
677 case VK_IMAGE_TYPE_2D
:
679 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
681 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
682 case VK_IMAGE_TYPE_3D
:
683 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
684 return V_008F1C_SQ_RSRC_IMG_3D
;
686 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
688 unreachable("illegal image type");
692 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
694 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
696 if (swizzle
[3] == VK_SWIZZLE_X
) {
697 /* For the pre-defined border color values (white, opaque
698 * black, transparent black), the only thing that matters is
699 * that the alpha channel winds up in the correct place
700 * (because the RGB channels are all the same) so either of
701 * these enumerations will work.
703 if (swizzle
[2] == VK_SWIZZLE_Y
)
704 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
706 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
707 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
708 if (swizzle
[1] == VK_SWIZZLE_Y
)
709 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
711 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
712 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
713 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
714 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
715 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
721 bool vi_alpha_is_on_msb(struct radv_device
*device
, VkFormat format
)
723 const struct vk_format_description
*desc
= vk_format_description(format
);
725 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
&& desc
->nr_channels
== 1)
726 return desc
->swizzle
[3] == VK_SWIZZLE_X
;
728 return radv_translate_colorswap(format
, false) <= 1;
731 * Build the sampler view descriptor for a texture (GFX10).
734 gfx10_make_texture_descriptor(struct radv_device
*device
,
735 struct radv_image
*image
,
736 bool is_storage_image
,
737 VkImageViewType view_type
,
739 const VkComponentMapping
*mapping
,
740 unsigned first_level
, unsigned last_level
,
741 unsigned first_layer
, unsigned last_layer
,
742 unsigned width
, unsigned height
, unsigned depth
,
744 uint32_t *fmask_state
)
746 const struct vk_format_description
*desc
;
747 enum vk_swizzle swizzle
[4];
751 desc
= vk_format_description(vk_format
);
752 img_format
= gfx10_format_description(vk_format
)->img_format
;
754 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
755 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
756 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
758 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
761 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
762 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
763 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
765 depth
= image
->info
.array_size
;
766 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
767 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
768 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
769 depth
= image
->info
.array_size
;
770 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
771 depth
= image
->info
.array_size
/ 6;
774 state
[1] = S_00A004_FORMAT(img_format
) |
775 S_00A004_WIDTH_LO(width
- 1);
776 state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
777 S_00A008_HEIGHT(height
- 1) |
778 S_00A008_RESOURCE_LEVEL(1);
779 state
[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
780 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
781 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
782 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
783 S_00A00C_BASE_LEVEL(image
->info
.samples
> 1 ?
785 S_00A00C_LAST_LEVEL(image
->info
.samples
> 1 ?
786 util_logbase2(image
->info
.samples
) :
788 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle
)) |
790 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
791 * to know the total number of layers.
793 state
[4] = S_00A010_DEPTH(type
== V_008F1C_SQ_RSRC_IMG_3D
? depth
- 1 : last_layer
) |
794 S_00A010_BASE_ARRAY(first_layer
);
795 state
[5] = S_00A014_ARRAY_PITCH(0) |
796 S_00A014_MAX_MIP(image
->info
.samples
> 1 ?
797 util_logbase2(image
->info
.samples
) :
798 image
->info
.levels
- 1) |
799 S_00A014_PERF_MOD(4);
803 if (radv_dcc_enabled(image
, first_level
)) {
804 state
[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B
) |
805 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B
) |
806 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
809 /* Initialize the sampler view for FMASK. */
810 if (radv_image_has_fmask(image
)) {
811 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
815 assert(image
->plane_count
== 1);
817 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
819 switch (image
->info
.samples
) {
821 format
= V_008F0C_IMG_FORMAT_FMASK8_S2_F2
;
824 format
= V_008F0C_IMG_FORMAT_FMASK8_S4_F4
;
827 format
= V_008F0C_IMG_FORMAT_FMASK32_S8_F8
;
830 unreachable("invalid nr_samples");
833 fmask_state
[0] = (va
>> 8) | image
->planes
[0].surface
.fmask_tile_swizzle
;
834 fmask_state
[1] = S_00A004_BASE_ADDRESS_HI(va
>> 40) |
835 S_00A004_FORMAT(format
) |
836 S_00A004_WIDTH_LO(width
- 1);
837 fmask_state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
838 S_00A008_HEIGHT(height
- 1) |
839 S_00A008_RESOURCE_LEVEL(1);
840 fmask_state
[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
841 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
842 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
843 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
844 S_00A00C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
) |
845 S_00A00C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
846 fmask_state
[4] = S_00A010_DEPTH(last_layer
) |
847 S_00A010_BASE_ARRAY(first_layer
);
849 fmask_state
[6] = S_00A018_META_PIPE_ALIGNED(1);
851 } else if (fmask_state
)
852 memset(fmask_state
, 0, 8 * 4);
856 * Build the sampler view descriptor for a texture (SI-GFX9)
859 si_make_texture_descriptor(struct radv_device
*device
,
860 struct radv_image
*image
,
861 bool is_storage_image
,
862 VkImageViewType view_type
,
864 const VkComponentMapping
*mapping
,
865 unsigned first_level
, unsigned last_level
,
866 unsigned first_layer
, unsigned last_layer
,
867 unsigned width
, unsigned height
, unsigned depth
,
869 uint32_t *fmask_state
)
871 const struct vk_format_description
*desc
;
872 enum vk_swizzle swizzle
[4];
874 unsigned num_format
, data_format
, type
;
876 desc
= vk_format_description(vk_format
);
878 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
879 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
880 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
882 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
885 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
887 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
888 if (num_format
== ~0) {
892 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
893 if (data_format
== ~0) {
897 /* S8 with either Z16 or Z32 HTILE need a special format. */
898 if (device
->physical_device
->rad_info
.chip_class
== GFX9
&&
899 vk_format
== VK_FORMAT_S8_UINT
&&
900 radv_image_is_tc_compat_htile(image
)) {
901 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
902 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
903 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
904 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
906 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
907 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
908 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
910 depth
= image
->info
.array_size
;
911 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
912 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
913 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
914 depth
= image
->info
.array_size
;
915 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
916 depth
= image
->info
.array_size
/ 6;
919 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
920 S_008F14_NUM_FORMAT(num_format
));
921 state
[2] = (S_008F18_WIDTH(width
- 1) |
922 S_008F18_HEIGHT(height
- 1) |
923 S_008F18_PERF_MOD(4));
924 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
925 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
926 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
927 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
928 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
930 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
931 util_logbase2(image
->info
.samples
) :
933 S_008F1C_TYPE(type
));
935 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
939 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
940 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
942 /* Depth is the last accessible layer on Gfx9.
943 * The hw doesn't need to know the total number of layers.
945 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
946 state
[4] |= S_008F20_DEPTH(depth
- 1);
948 state
[4] |= S_008F20_DEPTH(last_layer
);
950 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
951 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
952 util_logbase2(image
->info
.samples
) :
953 image
->info
.levels
- 1);
955 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
956 state
[4] |= S_008F20_DEPTH(depth
- 1);
957 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
959 if (image
->dcc_offset
) {
960 state
[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
962 /* The last dword is unused by hw. The shader uses it to clear
963 * bits in the first dword of sampler state.
965 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
966 if (first_level
== last_level
)
967 state
[7] = C_008F30_MAX_ANISO_RATIO
;
969 state
[7] = 0xffffffff;
973 /* Initialize the sampler view for FMASK. */
974 if (radv_image_has_fmask(image
)) {
975 uint32_t fmask_format
, num_format
;
976 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
979 assert(image
->plane_count
== 1);
981 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
983 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
984 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
985 switch (image
->info
.samples
) {
987 num_format
= V_008F14_IMG_FMASK_8_2_2
;
990 num_format
= V_008F14_IMG_FMASK_8_4_4
;
993 num_format
= V_008F14_IMG_FMASK_32_8_8
;
996 unreachable("invalid nr_samples");
999 switch (image
->info
.samples
) {
1001 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
1004 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
1007 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
1011 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
1013 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
1016 fmask_state
[0] = va
>> 8;
1017 fmask_state
[0] |= image
->planes
[0].surface
.fmask_tile_swizzle
;
1018 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
1019 S_008F14_DATA_FORMAT(fmask_format
) |
1020 S_008F14_NUM_FORMAT(num_format
);
1021 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
1022 S_008F18_HEIGHT(height
- 1);
1023 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
1024 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
1025 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
1026 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
1027 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
1029 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
1033 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
1034 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
1035 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
1036 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
1037 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1038 S_008F24_META_RB_ALIGNED(1);
1040 if (radv_image_is_tc_compat_cmask(image
)) {
1041 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1043 fmask_state
[5] |= S_008F24_META_DATA_ADDRESS(va
>> 40);
1044 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1045 fmask_state
[7] |= va
>> 8;
1048 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
);
1049 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
1050 S_008F20_PITCH(image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
- 1);
1051 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
1053 if (radv_image_is_tc_compat_cmask(image
)) {
1054 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1056 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1057 fmask_state
[7] |= va
>> 8;
1060 } else if (fmask_state
)
1061 memset(fmask_state
, 0, 8 * 4);
1065 radv_make_texture_descriptor(struct radv_device
*device
,
1066 struct radv_image
*image
,
1067 bool is_storage_image
,
1068 VkImageViewType view_type
,
1070 const VkComponentMapping
*mapping
,
1071 unsigned first_level
, unsigned last_level
,
1072 unsigned first_layer
, unsigned last_layer
,
1073 unsigned width
, unsigned height
, unsigned depth
,
1075 uint32_t *fmask_state
)
1077 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
1078 gfx10_make_texture_descriptor(device
, image
, is_storage_image
,
1079 view_type
, vk_format
, mapping
,
1080 first_level
, last_level
,
1081 first_layer
, last_layer
,
1082 width
, height
, depth
,
1083 state
, fmask_state
);
1085 si_make_texture_descriptor(device
, image
, is_storage_image
,
1086 view_type
, vk_format
, mapping
,
1087 first_level
, last_level
,
1088 first_layer
, last_layer
,
1089 width
, height
, depth
,
1090 state
, fmask_state
);
1095 radv_query_opaque_metadata(struct radv_device
*device
,
1096 struct radv_image
*image
,
1097 struct radeon_bo_metadata
*md
)
1099 static const VkComponentMapping fixedmapping
;
1100 uint32_t desc
[8], i
;
1102 assert(image
->plane_count
== 1);
1104 /* Metadata image format format version 1:
1105 * [0] = 1 (metadata format identifier)
1106 * [1] = (VENDOR_ID << 16) | PCI_ID
1107 * [2:9] = image descriptor for the whole resource
1108 * [2] is always 0, because the base address is cleared
1109 * [9] is the DCC offset bits [39:8] from the beginning of
1111 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1113 md
->metadata
[0] = 1; /* metadata image format version 1 */
1115 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1116 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
1119 radv_make_texture_descriptor(device
, image
, false,
1120 (VkImageViewType
)image
->type
, image
->vk_format
,
1121 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
1122 image
->info
.array_size
- 1,
1123 image
->info
.width
, image
->info
.height
,
1127 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
1128 image
->planes
[0].surface
.blk_w
, false, false, false, desc
);
1130 /* Clear the base address and set the relative DCC offset. */
1132 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
1133 desc
[7] = image
->dcc_offset
>> 8;
1135 /* Dwords [2:9] contain the image descriptor. */
1136 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
1138 /* Dwords [10:..] contain the mipmap level offsets. */
1139 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
1140 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
1141 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
1142 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
1144 md
->size_metadata
= 10 * 4;
1148 radv_init_metadata(struct radv_device
*device
,
1149 struct radv_image
*image
,
1150 struct radeon_bo_metadata
*metadata
)
1152 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1154 memset(metadata
, 0, sizeof(*metadata
));
1156 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1157 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
1158 metadata
->u
.gfx9
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1160 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
1161 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1162 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
1163 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1164 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
1165 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
1166 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
1167 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
1168 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
1169 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
1170 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
1171 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1173 radv_query_opaque_metadata(device
, image
, metadata
);
1177 radv_image_override_offset_stride(struct radv_device
*device
,
1178 struct radv_image
*image
,
1179 uint64_t offset
, uint32_t stride
)
1181 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1182 unsigned bpe
= vk_format_get_blocksizebits(image
->vk_format
) / 8;
1184 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1186 surface
->u
.gfx9
.surf_pitch
= stride
;
1187 surface
->u
.gfx9
.surf_slice_size
=
1188 (uint64_t)stride
* surface
->u
.gfx9
.surf_height
* bpe
;
1190 surface
->u
.gfx9
.surf_offset
= offset
;
1192 surface
->u
.legacy
.level
[0].nblk_x
= stride
;
1193 surface
->u
.legacy
.level
[0].slice_size_dw
=
1194 ((uint64_t)stride
* surface
->u
.legacy
.level
[0].nblk_y
* bpe
) / 4;
1197 for (unsigned i
= 0; i
< ARRAY_SIZE(surface
->u
.legacy
.level
); ++i
)
1198 surface
->u
.legacy
.level
[i
].offset
+= offset
;
1205 radv_image_alloc_fmask(struct radv_device
*device
,
1206 struct radv_image
*image
)
1208 unsigned fmask_alignment
= image
->planes
[0].surface
.fmask_alignment
;
1210 image
->fmask_offset
= align64(image
->size
, fmask_alignment
);
1211 image
->size
= image
->fmask_offset
+ image
->planes
[0].surface
.fmask_size
;
1212 image
->alignment
= MAX2(image
->alignment
, fmask_alignment
);
1216 radv_image_alloc_cmask(struct radv_device
*device
,
1217 struct radv_image
*image
)
1219 unsigned cmask_alignment
= image
->planes
[0].surface
.cmask_alignment
;
1220 unsigned cmask_size
= image
->planes
[0].surface
.cmask_size
;
1221 uint32_t clear_value_size
= 0;
1226 assert(cmask_alignment
);
1228 image
->cmask_offset
= align64(image
->size
, cmask_alignment
);
1229 /* + 8 for storing the clear values */
1230 if (!image
->clear_value_offset
) {
1231 image
->clear_value_offset
= image
->cmask_offset
+ cmask_size
;
1232 clear_value_size
= 8;
1234 image
->size
= image
->cmask_offset
+ cmask_size
+ clear_value_size
;
1235 image
->alignment
= MAX2(image
->alignment
, cmask_alignment
);
1239 radv_image_alloc_dcc(struct radv_image
*image
)
1241 assert(image
->plane_count
== 1);
1243 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
1244 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1245 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
1246 image
->fce_pred_offset
= image
->clear_value_offset
+ 8 * image
->info
.levels
;
1247 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16 * image
->info
.levels
;
1248 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24 * image
->info
.levels
;
1249 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
1253 radv_image_alloc_htile(struct radv_device
*device
, struct radv_image
*image
)
1255 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
1257 /* + 8 for storing the clear values */
1258 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
1259 image
->size
= image
->clear_value_offset
+ image
->info
.levels
* 8;
1260 if (radv_image_is_tc_compat_htile(image
) &&
1261 device
->physical_device
->rad_info
.has_tc_compat_zrange_bug
) {
1262 /* Metadata for the TC-compatible HTILE hardware bug which
1263 * have to be fixed by updating ZRANGE_PRECISION when doing
1264 * fast depth clears to 0.0f.
1266 image
->tc_compat_zrange_offset
= image
->size
;
1267 image
->size
= image
->tc_compat_zrange_offset
+ image
->info
.levels
* 4;
1269 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
1273 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
1275 if (image
->info
.samples
<= 1 &&
1276 image
->info
.width
* image
->info
.height
<= 512 * 512) {
1277 /* Do not enable CMASK or DCC for small surfaces where the cost
1278 * of the eliminate pass can be higher than the benefit of fast
1279 * clear. RadeonSI does this, but the image threshold is
1285 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1286 (image
->exclusive
|| image
->queue_family_mask
== 1);
1290 radv_image_can_enable_dcc(struct radv_device
*device
, struct radv_image
*image
)
1292 if (!radv_image_can_enable_dcc_or_cmask(image
) ||
1293 !radv_image_has_dcc(image
))
1296 /* On GFX8, DCC layers can be interleaved and it's currently only
1297 * enabled if slice size is equal to the per slice fast clear size
1298 * because the driver assumes that portions of multiple layers are
1299 * contiguous during fast clears.
1301 if (image
->info
.array_size
> 1) {
1302 const struct legacy_surf_level
*surf_level
=
1303 &image
->planes
[0].surface
.u
.legacy
.level
[0];
1305 assert(device
->physical_device
->rad_info
.chip_class
== GFX8
);
1307 if (image
->planes
[0].surface
.dcc_slice_size
!= surf_level
->dcc_fast_clear_size
)
1315 radv_image_can_enable_cmask(struct radv_image
*image
)
1317 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1318 /* Do not enable CMASK for non-MSAA images (fast color clear)
1319 * because 128 bit formats are not supported, but FMASK might
1325 return radv_image_can_enable_dcc_or_cmask(image
) &&
1326 image
->info
.levels
== 1 &&
1327 image
->info
.depth
== 1 &&
1328 !image
->planes
[0].surface
.is_linear
;
1332 radv_image_can_enable_fmask(struct radv_image
*image
)
1334 return image
->info
.samples
> 1 &&
1335 image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
;
1339 radv_image_can_enable_htile(struct radv_image
*image
)
1341 return radv_image_has_htile(image
) &&
1342 image
->info
.levels
== 1 &&
1343 image
->info
.width
* image
->info
.height
>= 8 * 8;
1346 static void radv_image_disable_dcc(struct radv_image
*image
)
1348 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1349 image
->planes
[i
].surface
.dcc_size
= 0;
1352 static void radv_image_disable_htile(struct radv_image
*image
)
1354 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1355 image
->planes
[i
].surface
.htile_size
= 0;
1359 radv_image_create_layout(struct radv_device
*device
,
1360 struct radv_image_create_info create_info
,
1361 struct radv_image
*image
)
1363 /* Check that we did not initialize things earlier */
1364 assert(!image
->planes
[0].surface
.surf_size
);
1366 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1367 * common internal case. */
1368 create_info
.vk_info
= NULL
;
1370 struct ac_surf_info image_info
= image
->info
;
1371 VkResult result
= radv_patch_image_from_extra_info(device
, image
, &create_info
, &image_info
);
1372 if (result
!= VK_SUCCESS
)
1376 image
->alignment
= 1;
1377 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1378 struct ac_surf_info info
= image_info
;
1381 const struct vk_format_description
*desc
= vk_format_description(image
->vk_format
);
1382 assert(info
.width
% desc
->width_divisor
== 0);
1383 assert(info
.height
% desc
->height_divisor
== 0);
1385 info
.width
/= desc
->width_divisor
;
1386 info
.height
/= desc
->height_divisor
;
1389 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1391 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1392 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1393 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1395 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1398 if (!create_info
.no_metadata_planes
) {
1399 /* Try to enable DCC first. */
1400 if (radv_image_can_enable_dcc(device
, image
)) {
1401 radv_image_alloc_dcc(image
);
1402 if (image
->info
.samples
> 1) {
1403 /* CMASK should be enabled because DCC fast
1404 * clear with MSAA needs it.
1406 assert(radv_image_can_enable_cmask(image
));
1407 radv_image_alloc_cmask(device
, image
);
1410 /* When DCC cannot be enabled, try CMASK. */
1411 radv_image_disable_dcc(image
);
1412 if (radv_image_can_enable_cmask(image
)) {
1413 radv_image_alloc_cmask(device
, image
);
1417 /* Try to enable FMASK for multisampled images. */
1418 if (radv_image_can_enable_fmask(image
)) {
1419 radv_image_alloc_fmask(device
, image
);
1421 if (radv_use_tc_compat_cmask_for_image(device
, image
))
1422 image
->tc_compatible_cmask
= true;
1424 /* Otherwise, try to enable HTILE for depth surfaces. */
1425 if (radv_image_can_enable_htile(image
) &&
1426 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1427 image
->tc_compatible_htile
= image
->planes
[0].surface
.flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1428 radv_image_alloc_htile(device
, image
);
1430 radv_image_disable_htile(image
);
1434 radv_image_disable_dcc(image
);
1435 radv_image_disable_htile(image
);
1438 assert(image
->planes
[0].surface
.surf_size
);
1443 radv_image_create(VkDevice _device
,
1444 const struct radv_image_create_info
*create_info
,
1445 const VkAllocationCallbacks
* alloc
,
1448 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1449 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1450 struct radv_image
*image
= NULL
;
1451 VkFormat format
= radv_select_android_external_format(pCreateInfo
->pNext
,
1452 pCreateInfo
->format
);
1453 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1455 const unsigned plane_count
= vk_format_get_plane_count(format
);
1456 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1458 radv_assert(pCreateInfo
->mipLevels
> 0);
1459 radv_assert(pCreateInfo
->arrayLayers
> 0);
1460 radv_assert(pCreateInfo
->samples
> 0);
1461 radv_assert(pCreateInfo
->extent
.width
> 0);
1462 radv_assert(pCreateInfo
->extent
.height
> 0);
1463 radv_assert(pCreateInfo
->extent
.depth
> 0);
1465 image
= vk_zalloc2(&device
->alloc
, alloc
, image_struct_size
, 8,
1466 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1468 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1470 image
->type
= pCreateInfo
->imageType
;
1471 image
->info
.width
= pCreateInfo
->extent
.width
;
1472 image
->info
.height
= pCreateInfo
->extent
.height
;
1473 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1474 image
->info
.samples
= pCreateInfo
->samples
;
1475 image
->info
.storage_samples
= pCreateInfo
->samples
;
1476 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1477 image
->info
.levels
= pCreateInfo
->mipLevels
;
1478 image
->info
.num_channels
= vk_format_get_nr_components(format
);
1480 image
->vk_format
= format
;
1481 image
->tiling
= pCreateInfo
->tiling
;
1482 image
->usage
= pCreateInfo
->usage
;
1483 image
->flags
= pCreateInfo
->flags
;
1484 image
->plane_count
= plane_count
;
1486 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1487 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1488 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1489 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
||
1490 pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_FOREIGN_EXT
)
1491 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1493 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1496 const VkExternalMemoryImageCreateInfo
*external_info
=
1497 vk_find_struct_const(pCreateInfo
->pNext
,
1498 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) ;
1500 image
->shareable
= external_info
;
1501 if (!vk_format_is_depth_or_stencil(format
) && !image
->shareable
) {
1502 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1505 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1506 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, pCreateInfo
, format
);
1509 bool delay_layout
= external_info
&&
1510 (external_info
->handleTypes
& VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
);
1513 *pImage
= radv_image_to_handle(image
);
1514 assert (!(image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
));
1518 ASSERTED VkResult result
= radv_image_create_layout(device
, *create_info
, image
);
1519 assert(result
== VK_SUCCESS
);
1521 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1522 image
->alignment
= MAX2(image
->alignment
, 4096);
1523 image
->size
= align64(image
->size
, image
->alignment
);
1526 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1527 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1529 vk_free2(&device
->alloc
, alloc
, image
);
1530 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1534 *pImage
= radv_image_to_handle(image
);
1540 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1541 struct radv_device
*device
,
1543 const VkComponentMapping
*components
,
1544 bool is_storage_image
, bool disable_compression
,
1545 unsigned plane_id
, unsigned descriptor_plane_id
)
1547 struct radv_image
*image
= iview
->image
;
1548 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1549 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1550 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1552 union radv_descriptor
*descriptor
;
1553 uint32_t hw_level
= 0;
1555 if (is_storage_image
) {
1556 descriptor
= &iview
->storage_descriptor
;
1558 descriptor
= &iview
->descriptor
;
1561 assert(vk_format_get_plane_count(vk_format
) == 1);
1562 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1563 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1565 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1566 hw_level
= iview
->base_mip
;
1567 radv_make_texture_descriptor(device
, image
, is_storage_image
,
1571 hw_level
, hw_level
+ iview
->level_count
- 1,
1573 iview
->base_layer
+ iview
->layer_count
- 1,
1574 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1575 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1576 iview
->extent
.depth
,
1577 descriptor
->plane_descriptors
[descriptor_plane_id
],
1578 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1580 const struct legacy_surf_level
*base_level_info
= NULL
;
1581 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1583 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1585 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1587 si_set_mutable_tex_desc_fields(device
, image
,
1592 blk_w
, is_stencil
, is_storage_image
,
1593 is_storage_image
|| disable_compression
,
1594 descriptor
->plane_descriptors
[descriptor_plane_id
]);
1598 radv_plane_from_aspect(VkImageAspectFlags mask
)
1601 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1603 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1611 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1614 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1615 return image
->planes
[0].format
;
1616 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1617 return image
->planes
[1].format
;
1618 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1619 return image
->planes
[2].format
;
1620 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1621 return vk_format_stencil_only(image
->vk_format
);
1622 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1623 return vk_format_depth_only(image
->vk_format
);
1624 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1625 return vk_format_depth_only(image
->vk_format
);
1627 return image
->vk_format
;
1632 radv_image_view_init(struct radv_image_view
*iview
,
1633 struct radv_device
*device
,
1634 const VkImageViewCreateInfo
* pCreateInfo
,
1635 const struct radv_image_view_extra_create_info
* extra_create_info
)
1637 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1638 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1640 switch (image
->type
) {
1641 case VK_IMAGE_TYPE_1D
:
1642 case VK_IMAGE_TYPE_2D
:
1643 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1645 case VK_IMAGE_TYPE_3D
:
1646 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1647 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1650 unreachable("bad VkImageType");
1652 iview
->image
= image
;
1653 iview
->bo
= image
->bo
;
1654 iview
->type
= pCreateInfo
->viewType
;
1655 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1656 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1657 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1659 iview
->vk_format
= pCreateInfo
->format
;
1661 /* If the image has an Android external format, pCreateInfo->format will be
1662 * VK_FORMAT_UNDEFINED. */
1663 if (iview
->vk_format
== VK_FORMAT_UNDEFINED
)
1664 iview
->vk_format
= image
->vk_format
;
1666 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1667 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1668 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1669 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1672 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1673 iview
->extent
= (VkExtent3D
) {
1674 .width
= image
->info
.width
,
1675 .height
= image
->info
.height
,
1676 .depth
= image
->info
.depth
,
1679 iview
->extent
= (VkExtent3D
) {
1680 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1681 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1682 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1686 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1687 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1688 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1689 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1690 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1692 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1693 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1695 /* Comment ported from amdvlk -
1696 * If we have the following image:
1697 * Uncompressed pixels Compressed block sizes (4x4)
1698 * mip0: 22 x 22 6 x 6
1699 * mip1: 11 x 11 3 x 3
1704 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1705 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1706 * divide-by-two integer math):
1712 * This means that mip2 will be missing texels.
1714 * Fix this by calculating the base mip's width and height, then convert that, and round it
1715 * back up to get the level 0 size.
1716 * Clamp the converted size between the original values, and next power of two, which
1717 * means we don't oversize the image.
1719 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1720 vk_format_is_compressed(image
->vk_format
) &&
1721 !vk_format_is_compressed(iview
->vk_format
)) {
1722 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1723 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1725 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1726 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1728 lvl_width
<<= range
->baseMipLevel
;
1729 lvl_height
<<= range
->baseMipLevel
;
1731 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1732 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1736 iview
->base_layer
= range
->baseArrayLayer
;
1737 iview
->layer_count
= radv_get_layerCount(image
, range
);
1738 iview
->base_mip
= range
->baseMipLevel
;
1739 iview
->level_count
= radv_get_levelCount(image
, range
);
1741 bool disable_compression
= extra_create_info
? extra_create_info
->disable_compression
: false;
1742 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1743 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1744 radv_image_view_make_descriptor(iview
, device
, format
,
1745 &pCreateInfo
->components
,
1746 false, disable_compression
,
1747 iview
->plane_id
+ i
, i
);
1748 radv_image_view_make_descriptor(iview
, device
,
1749 format
, &pCreateInfo
->components
,
1750 true, disable_compression
,
1751 iview
->plane_id
+ i
, i
);
1755 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1756 VkImageLayout layout
,
1757 bool in_render_loop
,
1758 unsigned queue_mask
)
1760 if (radv_image_is_tc_compat_htile(image
)) {
1761 if (layout
== VK_IMAGE_LAYOUT_GENERAL
&&
1763 !(image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)) {
1764 /* It should be safe to enable TC-compat HTILE with
1765 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1766 * loop and if the image doesn't have the storage bit
1767 * set. This improves performance for apps that use
1768 * GENERAL for the main depth pass because this allows
1769 * compression and this reduces the number of
1770 * decompressions from/to GENERAL.
1775 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1778 return radv_image_has_htile(image
) &&
1779 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1780 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1781 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1782 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1783 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1786 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1787 VkImageLayout layout
,
1788 bool in_render_loop
,
1789 unsigned queue_mask
)
1791 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1794 bool radv_layout_dcc_compressed(const struct radv_device
*device
,
1795 const struct radv_image
*image
,
1796 VkImageLayout layout
,
1797 bool in_render_loop
,
1798 unsigned queue_mask
)
1800 /* Don't compress compute transfer dst, as image stores are not supported. */
1801 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1802 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1805 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1809 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1811 if (!image
->exclusive
)
1812 return image
->queue_family_mask
;
1813 if (family
== VK_QUEUE_FAMILY_EXTERNAL
||
1814 family
== VK_QUEUE_FAMILY_FOREIGN_EXT
)
1815 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1816 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1817 return 1u << queue_family
;
1818 return 1u << family
;
1822 radv_CreateImage(VkDevice device
,
1823 const VkImageCreateInfo
*pCreateInfo
,
1824 const VkAllocationCallbacks
*pAllocator
,
1828 const VkNativeBufferANDROID
*gralloc_info
=
1829 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1832 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1833 pAllocator
, pImage
);
1836 const struct wsi_image_create_info
*wsi_info
=
1837 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1838 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1840 return radv_image_create(device
,
1841 &(struct radv_image_create_info
) {
1842 .vk_info
= pCreateInfo
,
1850 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1851 const VkAllocationCallbacks
*pAllocator
)
1853 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1854 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1859 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1860 device
->ws
->buffer_destroy(image
->bo
);
1862 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1863 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1865 vk_free2(&device
->alloc
, pAllocator
, image
);
1868 void radv_GetImageSubresourceLayout(
1871 const VkImageSubresource
* pSubresource
,
1872 VkSubresourceLayout
* pLayout
)
1874 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1875 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1876 int level
= pSubresource
->mipLevel
;
1877 int layer
= pSubresource
->arrayLayer
;
1879 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1881 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1882 struct radeon_surf
*surface
= &plane
->surface
;
1884 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1885 uint64_t level_offset
= surface
->is_linear
? surface
->u
.gfx9
.offset
[level
] : 0;
1887 pLayout
->offset
= plane
->offset
+ level_offset
+ surface
->u
.gfx9
.surf_slice_size
* layer
;
1888 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1889 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1890 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1891 /* Adjust the number of bytes between each row because
1892 * the pitch is actually the number of components per
1895 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1897 uint32_t pitch
= surface
->is_linear
? surface
->u
.gfx9
.pitch
[level
] : surface
->u
.gfx9
.surf_pitch
;
1899 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1900 pLayout
->rowPitch
= pitch
* surface
->bpe
;
1903 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1904 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1905 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1906 if (image
->type
== VK_IMAGE_TYPE_3D
)
1907 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1909 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1910 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1911 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1912 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1913 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1914 if (image
->type
== VK_IMAGE_TYPE_3D
)
1915 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1921 radv_CreateImageView(VkDevice _device
,
1922 const VkImageViewCreateInfo
*pCreateInfo
,
1923 const VkAllocationCallbacks
*pAllocator
,
1926 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1927 struct radv_image_view
*view
;
1929 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1930 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1932 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1934 radv_image_view_init(view
, device
, pCreateInfo
, NULL
);
1936 *pView
= radv_image_view_to_handle(view
);
1942 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1943 const VkAllocationCallbacks
*pAllocator
)
1945 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1946 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1950 vk_free2(&device
->alloc
, pAllocator
, iview
);
1953 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1954 struct radv_device
*device
,
1955 const VkBufferViewCreateInfo
* pCreateInfo
)
1957 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1959 view
->bo
= buffer
->bo
;
1960 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1961 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1962 view
->vk_format
= pCreateInfo
->format
;
1964 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1965 pCreateInfo
->offset
, view
->range
, view
->state
);
1969 radv_CreateBufferView(VkDevice _device
,
1970 const VkBufferViewCreateInfo
*pCreateInfo
,
1971 const VkAllocationCallbacks
*pAllocator
,
1972 VkBufferView
*pView
)
1974 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1975 struct radv_buffer_view
*view
;
1977 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1978 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1980 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1982 radv_buffer_view_init(view
, device
, pCreateInfo
);
1984 *pView
= radv_buffer_view_to_handle(view
);
1990 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1991 const VkAllocationCallbacks
*pAllocator
)
1993 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1994 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
1999 vk_free2(&device
->alloc
, pAllocator
, view
);