2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
38 #include "gfx10_format_table.h"
41 radv_choose_tiling(struct radv_device
*device
,
42 const VkImageCreateInfo
*pCreateInfo
,
45 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
46 assert(pCreateInfo
->samples
<= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
50 if (!vk_format_is_compressed(format
) &&
51 !vk_format_is_depth_or_stencil(format
)
52 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
56 /* Only very thin and long 2D textures should benefit from
58 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo
->samples
> 1)
64 return RADEON_SURF_MODE_2D
;
66 return RADEON_SURF_MODE_2D
;
70 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
71 const VkImageCreateInfo
*pCreateInfo
,
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
78 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
81 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
84 if (pCreateInfo
->mipLevels
> 1)
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
90 if (!(pCreateInfo
->usage
& (VK_IMAGE_USAGE_SAMPLED_BIT
|
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT
|
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT
)))
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
98 if (pCreateInfo
->samples
>= 2 &&
99 (format
== VK_FORMAT_D32_SFLOAT_S8_UINT
||
100 (format
== VK_FORMAT_D32_SFLOAT
&&
101 device
->physical_device
->rad_info
.chip_class
== GFX10
)))
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
108 if (format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
109 format
!= VK_FORMAT_D32_SFLOAT
&&
110 format
!= VK_FORMAT_D16_UNORM
)
113 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
114 const struct VkImageFormatListCreateInfo
*format_list
=
115 (const struct VkImageFormatListCreateInfo
*)
116 vk_find_struct_const(pCreateInfo
->pNext
,
117 IMAGE_FORMAT_LIST_CREATE_INFO
);
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list
&& format_list
->viewFormatCount
) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
124 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
125 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
128 if (format
!= format_list
->pViewFormats
[i
])
140 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
142 if (info
->bo_metadata
) {
143 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
144 return info
->bo_metadata
->u
.gfx9
.scanout
;
146 return info
->bo_metadata
->u
.legacy
.scanout
;
149 return info
->scanout
;
153 radv_use_dcc_for_image(struct radv_device
*device
,
154 const struct radv_image
*image
,
155 const VkImageCreateInfo
*pCreateInfo
,
158 bool dcc_compatible_formats
;
161 /* DCC (Delta Color Compression) is only available for GFX8+. */
162 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
165 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
168 if (image
->shareable
)
171 /* TODO: Enable DCC for storage images. */
172 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
))
175 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
178 if (vk_format_is_subsampled(format
) ||
179 vk_format_get_plane_count(format
) > 1)
182 /* TODO: Enable DCC for mipmaps on GFX9+. */
183 if ((pCreateInfo
->arrayLayers
> 1 || pCreateInfo
->mipLevels
> 1) &&
184 device
->physical_device
->rad_info
.chip_class
>= GFX9
)
187 /* Do not enable DCC for mipmapped arrays because performance is worse. */
188 if (pCreateInfo
->arrayLayers
> 1 && pCreateInfo
->mipLevels
> 1)
191 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
192 * 2x can be enabled with an option.
194 if (pCreateInfo
->samples
> 2 ||
195 (pCreateInfo
->samples
== 2 &&
196 !device
->physical_device
->dcc_msaa_allowed
))
199 /* Determine if the formats are DCC compatible. */
200 dcc_compatible_formats
=
201 radv_is_colorbuffer_format_supported(format
,
204 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
205 const struct VkImageFormatListCreateInfo
*format_list
=
206 (const struct VkImageFormatListCreateInfo
*)
207 vk_find_struct_const(pCreateInfo
->pNext
,
208 IMAGE_FORMAT_LIST_CREATE_INFO
);
210 /* We have to ignore the existence of the list if viewFormatCount = 0 */
211 if (format_list
&& format_list
->viewFormatCount
) {
212 /* compatibility is transitive, so we only need to check
213 * one format with everything else. */
214 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
215 if (format_list
->pViewFormats
[i
] == VK_FORMAT_UNDEFINED
)
218 if (!radv_dcc_formats_compatible(format
,
219 format_list
->pViewFormats
[i
]))
220 dcc_compatible_formats
= false;
223 dcc_compatible_formats
= false;
227 if (!dcc_compatible_formats
)
234 radv_use_fmask_for_image(const struct radv_image
*image
)
236 return image
->info
.samples
> 1 &&
237 image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
;
241 radv_use_tc_compat_cmask_for_image(struct radv_device
*device
,
242 struct radv_image
*image
)
244 if (!(device
->instance
->perftest_flags
& RADV_PERFTEST_TC_COMPAT_CMASK
))
247 /* TC-compat CMASK is only available for GFX8+. */
248 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
251 if (image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)
254 if (radv_image_has_dcc(image
))
257 if (!radv_image_has_cmask(image
))
263 static uint32_t si_get_bo_metadata_word1(const struct radv_device
*device
)
265 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
269 radv_is_valid_opaque_metadata(const struct radv_device
*device
,
270 const struct radeon_bo_metadata
*md
)
272 if (md
->metadata
[0] != 1 ||
273 md
->metadata
[1] != si_get_bo_metadata_word1(device
))
276 if (md
->size_metadata
< 40)
283 radv_patch_surface_from_metadata(struct radv_device
*device
,
284 struct radeon_surf
*surface
,
285 const struct radeon_bo_metadata
*md
)
287 surface
->flags
= RADEON_SURF_CLR(surface
->flags
, MODE
);
289 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
290 if (md
->u
.gfx9
.swizzle_mode
> 0)
291 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
293 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
295 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
297 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
298 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
299 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
300 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
301 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
302 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
304 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
305 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
306 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
307 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
309 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
315 radv_patch_image_dimensions(struct radv_device
*device
,
316 struct radv_image
*image
,
317 const struct radv_image_create_info
*create_info
,
318 struct ac_surf_info
*image_info
)
320 unsigned width
= image
->info
.width
;
321 unsigned height
= image
->info
.height
;
324 * minigbm sometimes allocates bigger images which is going to result in
325 * weird strides and other properties. Lets be lenient where possible and
326 * fail it on GFX10 (as we cannot cope there).
328 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
330 if (create_info
->bo_metadata
&&
331 radv_is_valid_opaque_metadata(device
, create_info
->bo_metadata
)) {
332 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
334 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
335 width
= G_00A004_WIDTH_LO(md
->metadata
[3]) +
336 (G_00A008_WIDTH_HI(md
->metadata
[4]) << 2) + 1;
337 height
= S_00A008_HEIGHT(md
->metadata
[4]) + 1;
339 width
= G_008F18_WIDTH(md
->metadata
[4]) + 1;
340 height
= G_008F18_HEIGHT(md
->metadata
[4]) + 1;
344 if (image
->info
.width
== width
&& image
->info
.height
== height
)
347 if (width
< image
->info
.width
|| height
< image
->info
.height
) {
349 "The imported image has smaller dimensions than the internal\n"
350 "dimensions. Using it is going to fail badly, so we reject\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image
->info
.width
, image
->info
.height
, width
, height
);
354 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
355 } else if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
357 "Tried to import an image with inconsistent width on GFX10.\n"
358 "As GFX10 has no separate stride fields we cannot cope with\n"
359 "an inconsistency in width and will fail this import.\n"
360 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
361 image
->info
.width
, image
->info
.height
, width
, height
);
362 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
365 "Tried to import an image with inconsistent width on pre-GFX10.\n"
366 "As GFX10 has no separate stride fields we cannot cope with\n"
367 "an inconsistency and would fail on GFX10.\n"
368 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
369 image
->info
.width
, image
->info
.height
, width
, height
);
371 image_info
->width
= width
;
372 image_info
->height
= height
;
378 radv_patch_image_from_extra_info(struct radv_device
*device
,
379 struct radv_image
*image
,
380 const struct radv_image_create_info
*create_info
,
381 struct ac_surf_info
*image_info
)
383 VkResult result
= radv_patch_image_dimensions(device
, image
, create_info
, image_info
);
384 if (result
!= VK_SUCCESS
)
387 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
388 if (create_info
->bo_metadata
) {
389 radv_patch_surface_from_metadata(device
, &image
->planes
[plane
].surface
,
390 create_info
->bo_metadata
);
393 if (radv_surface_has_scanout(device
, create_info
)) {
394 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_SCANOUT
;
395 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
;
397 image
->info
.surf_index
= NULL
;
404 radv_init_surface(struct radv_device
*device
,
405 const struct radv_image
*image
,
406 struct radeon_surf
*surface
,
408 const VkImageCreateInfo
*pCreateInfo
,
409 VkFormat image_format
)
411 unsigned array_mode
= radv_choose_tiling(device
, pCreateInfo
, image_format
);
412 VkFormat format
= vk_format_get_plane_format(image_format
, plane_id
);
413 const struct vk_format_description
*desc
= vk_format_description(format
);
414 bool is_depth
, is_stencil
;
416 is_depth
= vk_format_has_depth(desc
);
417 is_stencil
= vk_format_has_stencil(desc
);
419 surface
->blk_w
= vk_format_get_blockwidth(format
);
420 surface
->blk_h
= vk_format_get_blockheight(format
);
422 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
423 /* align byte per element on dword */
424 if (surface
->bpe
== 3) {
428 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
430 switch (pCreateInfo
->imageType
){
431 case VK_IMAGE_TYPE_1D
:
432 if (pCreateInfo
->arrayLayers
> 1)
433 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
435 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
437 case VK_IMAGE_TYPE_2D
:
438 if (pCreateInfo
->arrayLayers
> 1)
439 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
441 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
443 case VK_IMAGE_TYPE_3D
:
444 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
447 unreachable("unhandled image type");
451 surface
->flags
|= RADEON_SURF_ZBUFFER
;
452 if (radv_use_tc_compat_htile_for_image(device
, pCreateInfo
, image_format
))
453 surface
->flags
|= RADEON_SURF_TC_COMPATIBLE_HTILE
;
457 surface
->flags
|= RADEON_SURF_SBUFFER
;
459 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
460 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
461 vk_format_get_blocksizebits(image_format
) == 128 &&
462 vk_format_is_compressed(image_format
))
463 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
465 if (!radv_use_dcc_for_image(device
, image
, pCreateInfo
, image_format
))
466 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
468 if (!radv_use_fmask_for_image(image
))
469 surface
->flags
|= RADEON_SURF_NO_FMASK
;
474 static inline unsigned
475 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
478 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
480 return plane
->surface
.u
.legacy
.tiling_index
[level
];
483 static unsigned radv_map_swizzle(unsigned swizzle
)
487 return V_008F0C_SQ_SEL_Y
;
489 return V_008F0C_SQ_SEL_Z
;
491 return V_008F0C_SQ_SEL_W
;
493 return V_008F0C_SQ_SEL_0
;
495 return V_008F0C_SQ_SEL_1
;
496 default: /* VK_SWIZZLE_X */
497 return V_008F0C_SQ_SEL_X
;
502 radv_make_buffer_descriptor(struct radv_device
*device
,
503 struct radv_buffer
*buffer
,
509 const struct vk_format_description
*desc
;
511 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
512 uint64_t va
= gpu_address
+ buffer
->offset
;
513 unsigned num_format
, data_format
;
515 desc
= vk_format_description(vk_format
);
516 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
517 stride
= desc
->block
.bits
/ 8;
521 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
522 S_008F04_STRIDE(stride
);
524 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
529 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
530 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
531 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
532 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3]));
534 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
535 const struct gfx10_format
*fmt
= &gfx10_format_table
[vk_format_to_pipe_format(vk_format
)];
537 /* OOB_SELECT chooses the out-of-bounds check:
538 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
539 * - 1: index >= NUM_RECORDS
540 * - 2: NUM_RECORDS == 0
541 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
542 * else: swizzle_address >= NUM_RECORDS
544 state
[3] |= S_008F0C_FORMAT(fmt
->img_format
) |
545 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET
) |
546 S_008F0C_RESOURCE_LEVEL(1);
548 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
549 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
551 assert(data_format
!= V_008F0C_BUF_DATA_FORMAT_INVALID
);
552 assert(num_format
!= ~0);
554 state
[3] |= S_008F0C_NUM_FORMAT(num_format
) |
555 S_008F0C_DATA_FORMAT(data_format
);
560 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
561 struct radv_image
*image
,
562 const struct legacy_surf_level
*base_level_info
,
564 unsigned base_level
, unsigned first_level
,
565 unsigned block_width
, bool is_stencil
,
566 bool is_storage_image
, bool disable_compression
,
569 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
570 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
571 uint64_t va
= gpu_address
+ plane
->offset
;
572 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
573 uint64_t meta_va
= 0;
574 if (chip_class
>= GFX9
) {
576 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
578 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
580 va
+= base_level_info
->offset
;
583 if (chip_class
>= GFX9
||
584 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
585 state
[0] |= plane
->surface
.tile_swizzle
;
586 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
587 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
589 if (chip_class
>= GFX8
) {
590 state
[6] &= C_008F28_COMPRESSION_EN
;
592 if (!disable_compression
&& radv_dcc_enabled(image
, first_level
)) {
593 meta_va
= gpu_address
+ image
->dcc_offset
;
594 if (chip_class
<= GFX8
)
595 meta_va
+= base_level_info
->dcc_offset
;
597 unsigned dcc_tile_swizzle
= plane
->surface
.tile_swizzle
<< 8;
598 dcc_tile_swizzle
&= plane
->surface
.dcc_alignment
- 1;
599 meta_va
|= dcc_tile_swizzle
;
600 } else if (!disable_compression
&&
601 radv_image_is_tc_compat_htile(image
)) {
602 meta_va
= gpu_address
+ image
->htile_offset
;
606 state
[6] |= S_008F28_COMPRESSION_EN(1);
607 if (chip_class
<= GFX9
)
608 state
[7] = meta_va
>> 8;
612 if (chip_class
>= GFX10
) {
613 state
[3] &= C_00A00C_SW_MODE
;
616 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
618 state
[3] |= S_00A00C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
621 state
[6] &= C_00A018_META_DATA_ADDRESS_LO
&
622 C_00A018_META_PIPE_ALIGNED
;
625 struct gfx9_surf_meta_flags meta
= {
630 if (image
->dcc_offset
)
631 meta
= plane
->surface
.u
.gfx9
.dcc
;
633 state
[6] |= S_00A018_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
634 S_00A018_META_DATA_ADDRESS_LO(meta_va
>> 8);
637 state
[7] = meta_va
>> 16;
638 } else if (chip_class
== GFX9
) {
639 state
[3] &= C_008F1C_SW_MODE
;
640 state
[4] &= C_008F20_PITCH
;
643 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
644 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
646 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
647 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
650 state
[5] &= C_008F24_META_DATA_ADDRESS
&
651 C_008F24_META_PIPE_ALIGNED
&
652 C_008F24_META_RB_ALIGNED
;
654 struct gfx9_surf_meta_flags meta
= {
659 if (image
->dcc_offset
)
660 meta
= plane
->surface
.u
.gfx9
.dcc
;
662 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
663 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
664 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
668 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
669 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
671 state
[3] &= C_008F1C_TILING_INDEX
;
672 state
[3] |= S_008F1C_TILING_INDEX(index
);
673 state
[4] &= C_008F20_PITCH
;
674 state
[4] |= S_008F20_PITCH(pitch
- 1);
678 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
679 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
681 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
682 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
684 /* GFX9 allocates 1D textures as 2D. */
685 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
686 image_type
= VK_IMAGE_TYPE_2D
;
687 switch (image_type
) {
688 case VK_IMAGE_TYPE_1D
:
689 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
690 case VK_IMAGE_TYPE_2D
:
692 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
694 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
695 case VK_IMAGE_TYPE_3D
:
696 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
697 return V_008F1C_SQ_RSRC_IMG_3D
;
699 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
701 unreachable("illegal image type");
705 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
707 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
709 if (swizzle
[3] == VK_SWIZZLE_X
) {
710 /* For the pre-defined border color values (white, opaque
711 * black, transparent black), the only thing that matters is
712 * that the alpha channel winds up in the correct place
713 * (because the RGB channels are all the same) so either of
714 * these enumerations will work.
716 if (swizzle
[2] == VK_SWIZZLE_Y
)
717 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
719 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
720 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
721 if (swizzle
[1] == VK_SWIZZLE_Y
)
722 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
724 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
725 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
726 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
727 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
728 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
734 bool vi_alpha_is_on_msb(struct radv_device
*device
, VkFormat format
)
736 const struct vk_format_description
*desc
= vk_format_description(format
);
738 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
&& desc
->nr_channels
== 1)
739 return desc
->swizzle
[3] == VK_SWIZZLE_X
;
741 return radv_translate_colorswap(format
, false) <= 1;
744 * Build the sampler view descriptor for a texture (GFX10).
747 gfx10_make_texture_descriptor(struct radv_device
*device
,
748 struct radv_image
*image
,
749 bool is_storage_image
,
750 VkImageViewType view_type
,
752 const VkComponentMapping
*mapping
,
753 unsigned first_level
, unsigned last_level
,
754 unsigned first_layer
, unsigned last_layer
,
755 unsigned width
, unsigned height
, unsigned depth
,
757 uint32_t *fmask_state
)
759 const struct vk_format_description
*desc
;
760 enum vk_swizzle swizzle
[4];
764 desc
= vk_format_description(vk_format
);
765 img_format
= gfx10_format_table
[vk_format_to_pipe_format(vk_format
)].img_format
;
767 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
768 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
769 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
771 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
774 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
775 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
776 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
778 depth
= image
->info
.array_size
;
779 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
780 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
781 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
782 depth
= image
->info
.array_size
;
783 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
784 depth
= image
->info
.array_size
/ 6;
787 state
[1] = S_00A004_FORMAT(img_format
) |
788 S_00A004_WIDTH_LO(width
- 1);
789 state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
790 S_00A008_HEIGHT(height
- 1) |
791 S_00A008_RESOURCE_LEVEL(1);
792 state
[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
793 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
794 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
795 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
796 S_00A00C_BASE_LEVEL(image
->info
.samples
> 1 ?
798 S_00A00C_LAST_LEVEL(image
->info
.samples
> 1 ?
799 util_logbase2(image
->info
.samples
) :
801 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle
)) |
803 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
804 * to know the total number of layers.
806 state
[4] = S_00A010_DEPTH(type
== V_008F1C_SQ_RSRC_IMG_3D
? depth
- 1 : last_layer
) |
807 S_00A010_BASE_ARRAY(first_layer
);
808 state
[5] = S_00A014_ARRAY_PITCH(0) |
809 S_00A014_MAX_MIP(image
->info
.samples
> 1 ?
810 util_logbase2(image
->info
.samples
) :
811 image
->info
.levels
- 1) |
812 S_00A014_PERF_MOD(4);
816 if (radv_dcc_enabled(image
, first_level
)) {
817 state
[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B
) |
818 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B
) |
819 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
822 /* Initialize the sampler view for FMASK. */
823 if (radv_image_has_fmask(image
)) {
824 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
828 assert(image
->plane_count
== 1);
830 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
832 switch (image
->info
.samples
) {
834 format
= V_008F0C_IMG_FORMAT_FMASK8_S2_F2
;
837 format
= V_008F0C_IMG_FORMAT_FMASK8_S4_F4
;
840 format
= V_008F0C_IMG_FORMAT_FMASK32_S8_F8
;
843 unreachable("invalid nr_samples");
846 fmask_state
[0] = (va
>> 8) | image
->planes
[0].surface
.fmask_tile_swizzle
;
847 fmask_state
[1] = S_00A004_BASE_ADDRESS_HI(va
>> 40) |
848 S_00A004_FORMAT(format
) |
849 S_00A004_WIDTH_LO(width
- 1);
850 fmask_state
[2] = S_00A008_WIDTH_HI((width
- 1) >> 2) |
851 S_00A008_HEIGHT(height
- 1) |
852 S_00A008_RESOURCE_LEVEL(1);
853 fmask_state
[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
854 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
855 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
856 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
857 S_00A00C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
) |
858 S_00A00C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
859 fmask_state
[4] = S_00A010_DEPTH(last_layer
) |
860 S_00A010_BASE_ARRAY(first_layer
);
862 fmask_state
[6] = S_00A018_META_PIPE_ALIGNED(1);
864 } else if (fmask_state
)
865 memset(fmask_state
, 0, 8 * 4);
869 * Build the sampler view descriptor for a texture (SI-GFX9)
872 si_make_texture_descriptor(struct radv_device
*device
,
873 struct radv_image
*image
,
874 bool is_storage_image
,
875 VkImageViewType view_type
,
877 const VkComponentMapping
*mapping
,
878 unsigned first_level
, unsigned last_level
,
879 unsigned first_layer
, unsigned last_layer
,
880 unsigned width
, unsigned height
, unsigned depth
,
882 uint32_t *fmask_state
)
884 const struct vk_format_description
*desc
;
885 enum vk_swizzle swizzle
[4];
887 unsigned num_format
, data_format
, type
;
889 desc
= vk_format_description(vk_format
);
891 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
892 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
893 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
895 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
898 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
900 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
901 if (num_format
== ~0) {
905 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
906 if (data_format
== ~0) {
910 /* S8 with either Z16 or Z32 HTILE need a special format. */
911 if (device
->physical_device
->rad_info
.chip_class
== GFX9
&&
912 vk_format
== VK_FORMAT_S8_UINT
&&
913 radv_image_is_tc_compat_htile(image
)) {
914 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
915 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
916 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
917 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
919 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
920 is_storage_image
, device
->physical_device
->rad_info
.chip_class
== GFX9
);
921 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
923 depth
= image
->info
.array_size
;
924 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
925 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
926 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
927 depth
= image
->info
.array_size
;
928 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
929 depth
= image
->info
.array_size
/ 6;
932 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
933 S_008F14_NUM_FORMAT(num_format
));
934 state
[2] = (S_008F18_WIDTH(width
- 1) |
935 S_008F18_HEIGHT(height
- 1) |
936 S_008F18_PERF_MOD(4));
937 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
938 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
939 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
940 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
941 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
943 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
944 util_logbase2(image
->info
.samples
) :
946 S_008F1C_TYPE(type
));
948 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
952 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
953 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
955 /* Depth is the last accessible layer on Gfx9.
956 * The hw doesn't need to know the total number of layers.
958 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
959 state
[4] |= S_008F20_DEPTH(depth
- 1);
961 state
[4] |= S_008F20_DEPTH(last_layer
);
963 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
964 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
965 util_logbase2(image
->info
.samples
) :
966 image
->info
.levels
- 1);
968 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
969 state
[4] |= S_008F20_DEPTH(depth
- 1);
970 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
972 if (image
->dcc_offset
) {
973 state
[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device
, vk_format
));
975 /* The last dword is unused by hw. The shader uses it to clear
976 * bits in the first dword of sampler state.
978 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
979 if (first_level
== last_level
)
980 state
[7] = C_008F30_MAX_ANISO_RATIO
;
982 state
[7] = 0xffffffff;
986 /* Initialize the sampler view for FMASK. */
987 if (radv_image_has_fmask(image
)) {
988 uint32_t fmask_format
, num_format
;
989 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
992 assert(image
->plane_count
== 1);
994 va
= gpu_address
+ image
->offset
+ image
->fmask_offset
;
996 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
997 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
998 switch (image
->info
.samples
) {
1000 num_format
= V_008F14_IMG_FMASK_8_2_2
;
1003 num_format
= V_008F14_IMG_FMASK_8_4_4
;
1006 num_format
= V_008F14_IMG_FMASK_32_8_8
;
1009 unreachable("invalid nr_samples");
1012 switch (image
->info
.samples
) {
1014 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
1017 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
1020 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
1024 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
1026 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
1029 fmask_state
[0] = va
>> 8;
1030 fmask_state
[0] |= image
->planes
[0].surface
.fmask_tile_swizzle
;
1031 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
1032 S_008F14_DATA_FORMAT(fmask_format
) |
1033 S_008F14_NUM_FORMAT(num_format
);
1034 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
1035 S_008F18_HEIGHT(height
- 1);
1036 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
1037 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
1038 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
1039 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
1040 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
1042 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
1046 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
1047 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
1048 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
1049 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
1050 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1051 S_008F24_META_RB_ALIGNED(1);
1053 if (radv_image_is_tc_compat_cmask(image
)) {
1054 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1056 fmask_state
[5] |= S_008F24_META_DATA_ADDRESS(va
>> 40);
1057 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1058 fmask_state
[7] |= va
>> 8;
1061 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
);
1062 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
1063 S_008F20_PITCH(image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
- 1);
1064 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
1066 if (radv_image_is_tc_compat_cmask(image
)) {
1067 va
= gpu_address
+ image
->offset
+ image
->cmask_offset
;
1069 fmask_state
[6] |= S_008F28_COMPRESSION_EN(1);
1070 fmask_state
[7] |= va
>> 8;
1073 } else if (fmask_state
)
1074 memset(fmask_state
, 0, 8 * 4);
1078 radv_make_texture_descriptor(struct radv_device
*device
,
1079 struct radv_image
*image
,
1080 bool is_storage_image
,
1081 VkImageViewType view_type
,
1083 const VkComponentMapping
*mapping
,
1084 unsigned first_level
, unsigned last_level
,
1085 unsigned first_layer
, unsigned last_layer
,
1086 unsigned width
, unsigned height
, unsigned depth
,
1088 uint32_t *fmask_state
)
1090 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
1091 gfx10_make_texture_descriptor(device
, image
, is_storage_image
,
1092 view_type
, vk_format
, mapping
,
1093 first_level
, last_level
,
1094 first_layer
, last_layer
,
1095 width
, height
, depth
,
1096 state
, fmask_state
);
1098 si_make_texture_descriptor(device
, image
, is_storage_image
,
1099 view_type
, vk_format
, mapping
,
1100 first_level
, last_level
,
1101 first_layer
, last_layer
,
1102 width
, height
, depth
,
1103 state
, fmask_state
);
1108 radv_query_opaque_metadata(struct radv_device
*device
,
1109 struct radv_image
*image
,
1110 struct radeon_bo_metadata
*md
)
1112 static const VkComponentMapping fixedmapping
;
1113 uint32_t desc
[8], i
;
1115 assert(image
->plane_count
== 1);
1117 /* Metadata image format format version 1:
1118 * [0] = 1 (metadata format identifier)
1119 * [1] = (VENDOR_ID << 16) | PCI_ID
1120 * [2:9] = image descriptor for the whole resource
1121 * [2] is always 0, because the base address is cleared
1122 * [9] is the DCC offset bits [39:8] from the beginning of
1124 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1126 md
->metadata
[0] = 1; /* metadata image format version 1 */
1128 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1129 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
1132 radv_make_texture_descriptor(device
, image
, false,
1133 (VkImageViewType
)image
->type
, image
->vk_format
,
1134 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
1135 image
->info
.array_size
- 1,
1136 image
->info
.width
, image
->info
.height
,
1140 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
1141 image
->planes
[0].surface
.blk_w
, false, false, false, desc
);
1143 /* Clear the base address and set the relative DCC offset. */
1145 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
1146 desc
[7] = image
->dcc_offset
>> 8;
1148 /* Dwords [2:9] contain the image descriptor. */
1149 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
1151 /* Dwords [10:..] contain the mipmap level offsets. */
1152 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
1153 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
1154 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
1155 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
1157 md
->size_metadata
= 10 * 4;
1161 radv_init_metadata(struct radv_device
*device
,
1162 struct radv_image
*image
,
1163 struct radeon_bo_metadata
*metadata
)
1165 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
1167 memset(metadata
, 0, sizeof(*metadata
));
1169 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1170 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
1171 metadata
->u
.gfx9
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1173 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
1174 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1175 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
1176 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
1177 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
1178 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
1179 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
1180 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
1181 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
1182 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
1183 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
1184 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
1186 radv_query_opaque_metadata(device
, image
, metadata
);
1190 radv_image_override_offset_stride(struct radv_device
*device
,
1191 struct radv_image
*image
,
1192 uint64_t offset
, uint32_t stride
)
1194 ac_surface_override_offset_stride(&device
->physical_device
->rad_info
,
1195 &image
->planes
[0].surface
,
1196 image
->info
.levels
, offset
, stride
);
1200 radv_image_alloc_fmask(struct radv_device
*device
,
1201 struct radv_image
*image
)
1203 unsigned fmask_alignment
= image
->planes
[0].surface
.fmask_alignment
;
1205 image
->fmask_offset
= align64(image
->size
, fmask_alignment
);
1206 image
->size
= image
->fmask_offset
+ image
->planes
[0].surface
.fmask_size
;
1207 image
->alignment
= MAX2(image
->alignment
, fmask_alignment
);
1211 radv_image_alloc_cmask(struct radv_device
*device
,
1212 struct radv_image
*image
)
1214 unsigned cmask_alignment
= image
->planes
[0].surface
.cmask_alignment
;
1215 unsigned cmask_size
= image
->planes
[0].surface
.cmask_size
;
1216 uint32_t clear_value_size
= 0;
1221 assert(cmask_alignment
);
1223 image
->cmask_offset
= align64(image
->size
, cmask_alignment
);
1224 /* + 8 for storing the clear values */
1225 if (!image
->clear_value_offset
) {
1226 image
->clear_value_offset
= image
->cmask_offset
+ cmask_size
;
1227 clear_value_size
= 8;
1229 image
->size
= image
->cmask_offset
+ cmask_size
+ clear_value_size
;
1230 image
->alignment
= MAX2(image
->alignment
, cmask_alignment
);
1234 radv_image_alloc_dcc(struct radv_image
*image
)
1236 assert(image
->plane_count
== 1);
1238 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
1239 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1240 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
1241 image
->fce_pred_offset
= image
->clear_value_offset
+ 8 * image
->info
.levels
;
1242 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16 * image
->info
.levels
;
1243 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24 * image
->info
.levels
;
1244 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
1248 radv_image_alloc_htile(struct radv_device
*device
, struct radv_image
*image
)
1250 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
1252 /* + 8 for storing the clear values */
1253 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
1254 image
->size
= image
->clear_value_offset
+ image
->info
.levels
* 8;
1255 if (radv_image_is_tc_compat_htile(image
) &&
1256 device
->physical_device
->rad_info
.has_tc_compat_zrange_bug
) {
1257 /* Metadata for the TC-compatible HTILE hardware bug which
1258 * have to be fixed by updating ZRANGE_PRECISION when doing
1259 * fast depth clears to 0.0f.
1261 image
->tc_compat_zrange_offset
= image
->size
;
1262 image
->size
= image
->tc_compat_zrange_offset
+ image
->info
.levels
* 4;
1264 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
1268 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
1270 if (image
->info
.samples
<= 1 &&
1271 image
->info
.width
* image
->info
.height
<= 512 * 512) {
1272 /* Do not enable CMASK or DCC for small surfaces where the cost
1273 * of the eliminate pass can be higher than the benefit of fast
1274 * clear. RadeonSI does this, but the image threshold is
1280 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1281 (image
->exclusive
|| image
->queue_family_mask
== 1);
1285 radv_image_can_enable_dcc(struct radv_device
*device
, struct radv_image
*image
)
1287 if (!radv_image_can_enable_dcc_or_cmask(image
) ||
1288 !radv_image_has_dcc(image
))
1291 /* On GFX8, DCC layers can be interleaved and it's currently only
1292 * enabled if slice size is equal to the per slice fast clear size
1293 * because the driver assumes that portions of multiple layers are
1294 * contiguous during fast clears.
1296 if (image
->info
.array_size
> 1) {
1297 const struct legacy_surf_level
*surf_level
=
1298 &image
->planes
[0].surface
.u
.legacy
.level
[0];
1300 assert(device
->physical_device
->rad_info
.chip_class
== GFX8
);
1302 if (image
->planes
[0].surface
.dcc_slice_size
!= surf_level
->dcc_fast_clear_size
)
1310 radv_image_can_enable_cmask(struct radv_image
*image
)
1312 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1313 /* Do not enable CMASK for non-MSAA images (fast color clear)
1314 * because 128 bit formats are not supported, but FMASK might
1320 return radv_image_can_enable_dcc_or_cmask(image
) &&
1321 image
->info
.levels
== 1 &&
1322 image
->info
.depth
== 1 &&
1323 !image
->planes
[0].surface
.is_linear
;
1327 radv_image_can_enable_htile(struct radv_image
*image
)
1329 return radv_image_has_htile(image
) &&
1330 image
->info
.levels
== 1 &&
1331 image
->info
.width
* image
->info
.height
>= 8 * 8;
1334 static void radv_image_disable_dcc(struct radv_image
*image
)
1336 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1337 image
->planes
[i
].surface
.dcc_size
= 0;
1340 static void radv_image_disable_htile(struct radv_image
*image
)
1342 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1343 image
->planes
[i
].surface
.htile_size
= 0;
1347 radv_image_create_layout(struct radv_device
*device
,
1348 struct radv_image_create_info create_info
,
1349 struct radv_image
*image
)
1351 /* Check that we did not initialize things earlier */
1352 assert(!image
->planes
[0].surface
.surf_size
);
1354 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1355 * common internal case. */
1356 create_info
.vk_info
= NULL
;
1358 struct ac_surf_info image_info
= image
->info
;
1359 VkResult result
= radv_patch_image_from_extra_info(device
, image
, &create_info
, &image_info
);
1360 if (result
!= VK_SUCCESS
)
1364 image
->alignment
= 1;
1365 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1366 struct ac_surf_info info
= image_info
;
1369 const struct vk_format_description
*desc
= vk_format_description(image
->vk_format
);
1370 assert(info
.width
% desc
->width_divisor
== 0);
1371 assert(info
.height
% desc
->height_divisor
== 0);
1373 info
.width
/= desc
->width_divisor
;
1374 info
.height
/= desc
->height_divisor
;
1377 if (create_info
.no_metadata_planes
|| image
->plane_count
> 1) {
1378 image
->planes
[plane
].surface
.flags
|= RADEON_SURF_DISABLE_DCC
|
1379 RADEON_SURF_NO_FMASK
|
1380 RADEON_SURF_NO_HTILE
;
1383 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1385 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1386 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1387 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1389 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device
, image
)) {
1394 radv_image_alloc_dcc(image
);
1395 if (image
->info
.samples
> 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1399 assert(radv_image_can_enable_cmask(image
));
1400 radv_image_alloc_cmask(device
, image
);
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image
);
1405 if (radv_image_can_enable_cmask(image
)) {
1406 radv_image_alloc_cmask(device
, image
);
1410 /* Try to enable FMASK for multisampled images. */
1411 if (image
->planes
[0].surface
.fmask_size
) {
1412 radv_image_alloc_fmask(device
, image
);
1414 if (radv_use_tc_compat_cmask_for_image(device
, image
))
1415 image
->tc_compatible_cmask
= true;
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image
) &&
1419 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1420 image
->tc_compatible_htile
= image
->planes
[0].surface
.flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1421 radv_image_alloc_htile(device
, image
);
1423 radv_image_disable_htile(image
);
1427 assert(image
->planes
[0].surface
.surf_size
);
1432 radv_image_create(VkDevice _device
,
1433 const struct radv_image_create_info
*create_info
,
1434 const VkAllocationCallbacks
* alloc
,
1437 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1438 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1439 struct radv_image
*image
= NULL
;
1440 VkFormat format
= radv_select_android_external_format(pCreateInfo
->pNext
,
1441 pCreateInfo
->format
);
1442 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1444 const unsigned plane_count
= vk_format_get_plane_count(format
);
1445 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1447 radv_assert(pCreateInfo
->mipLevels
> 0);
1448 radv_assert(pCreateInfo
->arrayLayers
> 0);
1449 radv_assert(pCreateInfo
->samples
> 0);
1450 radv_assert(pCreateInfo
->extent
.width
> 0);
1451 radv_assert(pCreateInfo
->extent
.height
> 0);
1452 radv_assert(pCreateInfo
->extent
.depth
> 0);
1454 image
= vk_zalloc2(&device
->vk
.alloc
, alloc
, image_struct_size
, 8,
1455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1457 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1459 vk_object_base_init(&device
->vk
, &image
->base
, VK_OBJECT_TYPE_IMAGE
);
1461 image
->type
= pCreateInfo
->imageType
;
1462 image
->info
.width
= pCreateInfo
->extent
.width
;
1463 image
->info
.height
= pCreateInfo
->extent
.height
;
1464 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1465 image
->info
.samples
= pCreateInfo
->samples
;
1466 image
->info
.storage_samples
= pCreateInfo
->samples
;
1467 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1468 image
->info
.levels
= pCreateInfo
->mipLevels
;
1469 image
->info
.num_channels
= vk_format_get_nr_components(format
);
1471 image
->vk_format
= format
;
1472 image
->tiling
= pCreateInfo
->tiling
;
1473 image
->usage
= pCreateInfo
->usage
;
1474 image
->flags
= pCreateInfo
->flags
;
1475 image
->plane_count
= plane_count
;
1477 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1478 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1479 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1480 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
||
1481 pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_FOREIGN_EXT
)
1482 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1484 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1487 const VkExternalMemoryImageCreateInfo
*external_info
=
1488 vk_find_struct_const(pCreateInfo
->pNext
,
1489 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) ;
1491 image
->shareable
= external_info
;
1492 if (!vk_format_is_depth_or_stencil(format
) && !image
->shareable
) {
1493 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1496 for (unsigned plane
= 0; plane
< image
->plane_count
; ++plane
) {
1497 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, pCreateInfo
, format
);
1500 bool delay_layout
= external_info
&&
1501 (external_info
->handleTypes
& VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID
);
1504 *pImage
= radv_image_to_handle(image
);
1505 assert (!(image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
));
1509 ASSERTED VkResult result
= radv_image_create_layout(device
, *create_info
, image
);
1510 assert(result
== VK_SUCCESS
);
1512 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1513 image
->alignment
= MAX2(image
->alignment
, 4096);
1514 image
->size
= align64(image
->size
, image
->alignment
);
1517 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1518 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1520 vk_free2(&device
->vk
.alloc
, alloc
, image
);
1521 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1525 *pImage
= radv_image_to_handle(image
);
1531 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1532 struct radv_device
*device
,
1534 const VkComponentMapping
*components
,
1535 bool is_storage_image
, bool disable_compression
,
1536 unsigned plane_id
, unsigned descriptor_plane_id
)
1538 struct radv_image
*image
= iview
->image
;
1539 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1540 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1541 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1543 union radv_descriptor
*descriptor
;
1544 uint32_t hw_level
= 0;
1546 if (is_storage_image
) {
1547 descriptor
= &iview
->storage_descriptor
;
1549 descriptor
= &iview
->descriptor
;
1552 assert(vk_format_get_plane_count(vk_format
) == 1);
1553 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1554 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1556 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1557 hw_level
= iview
->base_mip
;
1558 radv_make_texture_descriptor(device
, image
, is_storage_image
,
1562 hw_level
, hw_level
+ iview
->level_count
- 1,
1564 iview
->base_layer
+ iview
->layer_count
- 1,
1565 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1566 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1567 iview
->extent
.depth
,
1568 descriptor
->plane_descriptors
[descriptor_plane_id
],
1569 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1571 const struct legacy_surf_level
*base_level_info
= NULL
;
1572 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1574 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1576 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1578 si_set_mutable_tex_desc_fields(device
, image
,
1583 blk_w
, is_stencil
, is_storage_image
,
1584 is_storage_image
|| disable_compression
,
1585 descriptor
->plane_descriptors
[descriptor_plane_id
]);
1589 radv_plane_from_aspect(VkImageAspectFlags mask
)
1592 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1594 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1602 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1605 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1606 return image
->planes
[0].format
;
1607 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1608 return image
->planes
[1].format
;
1609 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1610 return image
->planes
[2].format
;
1611 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1612 return vk_format_stencil_only(image
->vk_format
);
1613 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1614 return vk_format_depth_only(image
->vk_format
);
1615 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1616 return vk_format_depth_only(image
->vk_format
);
1618 return image
->vk_format
;
1623 radv_image_view_init(struct radv_image_view
*iview
,
1624 struct radv_device
*device
,
1625 const VkImageViewCreateInfo
* pCreateInfo
,
1626 const struct radv_image_view_extra_create_info
* extra_create_info
)
1628 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1629 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1631 switch (image
->type
) {
1632 case VK_IMAGE_TYPE_1D
:
1633 case VK_IMAGE_TYPE_2D
:
1634 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1636 case VK_IMAGE_TYPE_3D
:
1637 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1638 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1641 unreachable("bad VkImageType");
1643 iview
->image
= image
;
1644 iview
->bo
= image
->bo
;
1645 iview
->type
= pCreateInfo
->viewType
;
1646 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1647 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1648 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1650 iview
->vk_format
= pCreateInfo
->format
;
1652 /* If the image has an Android external format, pCreateInfo->format will be
1653 * VK_FORMAT_UNDEFINED. */
1654 if (iview
->vk_format
== VK_FORMAT_UNDEFINED
)
1655 iview
->vk_format
= image
->vk_format
;
1657 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1658 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1659 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1660 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1663 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1664 iview
->extent
= (VkExtent3D
) {
1665 .width
= image
->info
.width
,
1666 .height
= image
->info
.height
,
1667 .depth
= image
->info
.depth
,
1670 iview
->extent
= (VkExtent3D
) {
1671 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1672 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1673 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1677 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1678 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1679 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1680 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1681 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1683 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1684 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1686 /* Comment ported from amdvlk -
1687 * If we have the following image:
1688 * Uncompressed pixels Compressed block sizes (4x4)
1689 * mip0: 22 x 22 6 x 6
1690 * mip1: 11 x 11 3 x 3
1695 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1696 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1697 * divide-by-two integer math):
1703 * This means that mip2 will be missing texels.
1705 * Fix this by calculating the base mip's width and height, then convert that, and round it
1706 * back up to get the level 0 size.
1707 * Clamp the converted size between the original values, and next power of two, which
1708 * means we don't oversize the image.
1710 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1711 vk_format_is_compressed(image
->vk_format
) &&
1712 !vk_format_is_compressed(iview
->vk_format
)) {
1713 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1714 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1716 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1717 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1719 lvl_width
<<= range
->baseMipLevel
;
1720 lvl_height
<<= range
->baseMipLevel
;
1722 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1723 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1727 iview
->base_layer
= range
->baseArrayLayer
;
1728 iview
->layer_count
= radv_get_layerCount(image
, range
);
1729 iview
->base_mip
= range
->baseMipLevel
;
1730 iview
->level_count
= radv_get_levelCount(image
, range
);
1732 bool disable_compression
= extra_create_info
? extra_create_info
->disable_compression
: false;
1733 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1734 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1735 radv_image_view_make_descriptor(iview
, device
, format
,
1736 &pCreateInfo
->components
,
1737 false, disable_compression
,
1738 iview
->plane_id
+ i
, i
);
1739 radv_image_view_make_descriptor(iview
, device
,
1740 format
, &pCreateInfo
->components
,
1741 true, disable_compression
,
1742 iview
->plane_id
+ i
, i
);
1746 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1747 VkImageLayout layout
,
1748 bool in_render_loop
,
1749 unsigned queue_mask
)
1751 if (radv_image_is_tc_compat_htile(image
)) {
1752 if (layout
== VK_IMAGE_LAYOUT_GENERAL
&&
1754 !(image
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
)) {
1755 /* It should be safe to enable TC-compat HTILE with
1756 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1757 * loop and if the image doesn't have the storage bit
1758 * set. This improves performance for apps that use
1759 * GENERAL for the main depth pass because this allows
1760 * compression and this reduces the number of
1761 * decompressions from/to GENERAL.
1766 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1769 return radv_image_has_htile(image
) &&
1770 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1771 layout
== VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR
||
1772 layout
== VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR
||
1773 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1774 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1777 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1778 VkImageLayout layout
,
1779 bool in_render_loop
,
1780 unsigned queue_mask
)
1782 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1785 bool radv_layout_dcc_compressed(const struct radv_device
*device
,
1786 const struct radv_image
*image
,
1787 VkImageLayout layout
,
1788 bool in_render_loop
,
1789 unsigned queue_mask
)
1791 /* Don't compress compute transfer dst, as image stores are not supported. */
1792 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1793 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1796 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1800 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1802 if (!image
->exclusive
)
1803 return image
->queue_family_mask
;
1804 if (family
== VK_QUEUE_FAMILY_EXTERNAL
||
1805 family
== VK_QUEUE_FAMILY_FOREIGN_EXT
)
1806 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1807 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1808 return 1u << queue_family
;
1809 return 1u << family
;
1813 radv_CreateImage(VkDevice device
,
1814 const VkImageCreateInfo
*pCreateInfo
,
1815 const VkAllocationCallbacks
*pAllocator
,
1819 const VkNativeBufferANDROID
*gralloc_info
=
1820 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1823 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1824 pAllocator
, pImage
);
1827 const struct wsi_image_create_info
*wsi_info
=
1828 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1829 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1831 return radv_image_create(device
,
1832 &(struct radv_image_create_info
) {
1833 .vk_info
= pCreateInfo
,
1841 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1842 const VkAllocationCallbacks
*pAllocator
)
1844 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1845 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1850 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1851 device
->ws
->buffer_destroy(image
->bo
);
1853 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1854 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1856 vk_object_base_finish(&image
->base
);
1857 vk_free2(&device
->vk
.alloc
, pAllocator
, image
);
1860 void radv_GetImageSubresourceLayout(
1863 const VkImageSubresource
* pSubresource
,
1864 VkSubresourceLayout
* pLayout
)
1866 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1867 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1868 int level
= pSubresource
->mipLevel
;
1869 int layer
= pSubresource
->arrayLayer
;
1871 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1873 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1874 struct radeon_surf
*surface
= &plane
->surface
;
1876 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1877 uint64_t level_offset
= surface
->is_linear
? surface
->u
.gfx9
.offset
[level
] : 0;
1879 pLayout
->offset
= plane
->offset
+ level_offset
+ surface
->u
.gfx9
.surf_slice_size
* layer
;
1880 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1881 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1882 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1883 /* Adjust the number of bytes between each row because
1884 * the pitch is actually the number of components per
1887 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1889 uint32_t pitch
= surface
->is_linear
? surface
->u
.gfx9
.pitch
[level
] : surface
->u
.gfx9
.surf_pitch
;
1891 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1892 pLayout
->rowPitch
= pitch
* surface
->bpe
;
1895 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1896 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1897 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1898 if (image
->type
== VK_IMAGE_TYPE_3D
)
1899 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1901 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1902 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1903 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1904 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1905 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1906 if (image
->type
== VK_IMAGE_TYPE_3D
)
1907 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1913 radv_CreateImageView(VkDevice _device
,
1914 const VkImageViewCreateInfo
*pCreateInfo
,
1915 const VkAllocationCallbacks
*pAllocator
,
1918 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1919 struct radv_image_view
*view
;
1921 view
= vk_alloc2(&device
->vk
.alloc
, pAllocator
, sizeof(*view
), 8,
1922 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1924 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1926 vk_object_base_init(&device
->vk
, &view
->base
,
1927 VK_OBJECT_TYPE_IMAGE_VIEW
);
1929 radv_image_view_init(view
, device
, pCreateInfo
, NULL
);
1931 *pView
= radv_image_view_to_handle(view
);
1937 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1938 const VkAllocationCallbacks
*pAllocator
)
1940 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1941 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1946 vk_object_base_finish(&iview
->base
);
1947 vk_free2(&device
->vk
.alloc
, pAllocator
, iview
);
1950 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1951 struct radv_device
*device
,
1952 const VkBufferViewCreateInfo
* pCreateInfo
)
1954 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1956 view
->bo
= buffer
->bo
;
1957 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1958 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1959 view
->vk_format
= pCreateInfo
->format
;
1961 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1962 pCreateInfo
->offset
, view
->range
, view
->state
);
1966 radv_CreateBufferView(VkDevice _device
,
1967 const VkBufferViewCreateInfo
*pCreateInfo
,
1968 const VkAllocationCallbacks
*pAllocator
,
1969 VkBufferView
*pView
)
1971 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1972 struct radv_buffer_view
*view
;
1974 view
= vk_alloc2(&device
->vk
.alloc
, pAllocator
, sizeof(*view
), 8,
1975 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1977 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1979 vk_object_base_init(&device
->vk
, &view
->base
,
1980 VK_OBJECT_TYPE_BUFFER_VIEW
);
1982 radv_buffer_view_init(view
, device
, pCreateInfo
);
1984 *pView
= radv_buffer_view_to_handle(view
);
1990 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1991 const VkAllocationCallbacks
*pAllocator
)
1993 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1994 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
1999 vk_object_base_finish(&view
->base
);
2000 vk_free2(&device
->vk
.alloc
, pAllocator
, view
);