2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
32 #include "radv_radeon_winsys.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
38 radv_choose_tiling(struct radv_device
*device
,
39 const struct radv_image_create_info
*create_info
)
41 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
43 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
) {
44 assert(pCreateInfo
->samples
<= 1);
45 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
48 if (!vk_format_is_compressed(pCreateInfo
->format
) &&
49 !vk_format_is_depth_or_stencil(pCreateInfo
->format
)
50 && device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
51 /* this causes hangs in some VK CTS tests on GFX9. */
52 /* Textures with a very small height are recommended to be linear. */
53 if (pCreateInfo
->imageType
== VK_IMAGE_TYPE_1D
||
54 /* Only very thin and long 2D textures should benefit from
56 (pCreateInfo
->extent
.width
> 8 && pCreateInfo
->extent
.height
<= 2))
57 return RADEON_SURF_MODE_LINEAR_ALIGNED
;
60 /* MSAA resources must be 2D tiled. */
61 if (pCreateInfo
->samples
> 1)
62 return RADEON_SURF_MODE_2D
;
64 return RADEON_SURF_MODE_2D
;
68 radv_use_tc_compat_htile_for_image(struct radv_device
*device
,
69 const VkImageCreateInfo
*pCreateInfo
)
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
75 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
) ||
76 (pCreateInfo
->flags
& VK_IMAGE_CREATE_EXTENDED_USAGE_BIT
))
79 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
82 if (pCreateInfo
->mipLevels
> 1)
85 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
86 * tests - disable for now */
87 if (pCreateInfo
->samples
>= 2 &&
88 pCreateInfo
->format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
91 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
92 * supports 32-bit. Though, it's possible to enable TC-compat for
93 * 16-bit depth surfaces if no Z planes are compressed.
95 if (pCreateInfo
->format
!= VK_FORMAT_D32_SFLOAT_S8_UINT
&&
96 pCreateInfo
->format
!= VK_FORMAT_D32_SFLOAT
&&
97 pCreateInfo
->format
!= VK_FORMAT_D16_UNORM
)
100 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
101 const struct VkImageFormatListCreateInfoKHR
*format_list
=
102 (const struct VkImageFormatListCreateInfoKHR
*)
103 vk_find_struct_const(pCreateInfo
->pNext
,
104 IMAGE_FORMAT_LIST_CREATE_INFO_KHR
);
106 /* We have to ignore the existence of the list if viewFormatCount = 0 */
107 if (format_list
&& format_list
->viewFormatCount
) {
108 /* compatibility is transitive, so we only need to check
109 * one format with everything else.
111 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
112 if (pCreateInfo
->format
!= format_list
->pViewFormats
[i
])
124 radv_surface_has_scanout(struct radv_device
*device
, const struct radv_image_create_info
*info
)
129 if (!info
->bo_metadata
)
132 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
133 return info
->bo_metadata
->u
.gfx9
.swizzle_mode
== 0 || info
->bo_metadata
->u
.gfx9
.swizzle_mode
% 4 == 2;
135 return info
->bo_metadata
->u
.legacy
.scanout
;
140 radv_use_dcc_for_image(struct radv_device
*device
,
141 const struct radv_image
*image
,
142 const struct radv_image_create_info
*create_info
,
143 const VkImageCreateInfo
*pCreateInfo
)
145 bool dcc_compatible_formats
;
148 /* DCC (Delta Color Compression) is only available for GFX8+. */
149 if (device
->physical_device
->rad_info
.chip_class
< GFX8
)
152 if (device
->instance
->debug_flags
& RADV_DEBUG_NO_DCC
)
155 /* FIXME: DCC is broken for shareable images starting with GFX9 */
156 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
160 /* TODO: Enable DCC for storage images. */
161 if ((pCreateInfo
->usage
& VK_IMAGE_USAGE_STORAGE_BIT
) ||
162 (pCreateInfo
->flags
& VK_IMAGE_CREATE_EXTENDED_USAGE_BIT
))
165 if (pCreateInfo
->tiling
== VK_IMAGE_TILING_LINEAR
)
168 if (vk_format_is_subsampled(pCreateInfo
->format
) ||
169 vk_format_get_plane_count(pCreateInfo
->format
) > 1)
172 /* TODO: Enable DCC for mipmaps and array layers. */
173 if (pCreateInfo
->mipLevels
> 1 || pCreateInfo
->arrayLayers
> 1)
176 if (radv_surface_has_scanout(device
, create_info
))
179 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
180 * 2x can be enabled with an option.
182 if (pCreateInfo
->samples
> 2 ||
183 (pCreateInfo
->samples
== 2 &&
184 !device
->physical_device
->dcc_msaa_allowed
))
187 /* Determine if the formats are DCC compatible. */
188 dcc_compatible_formats
=
189 radv_is_colorbuffer_format_supported(pCreateInfo
->format
,
192 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
) {
193 const struct VkImageFormatListCreateInfoKHR
*format_list
=
194 (const struct VkImageFormatListCreateInfoKHR
*)
195 vk_find_struct_const(pCreateInfo
->pNext
,
196 IMAGE_FORMAT_LIST_CREATE_INFO_KHR
);
198 /* We have to ignore the existence of the list if viewFormatCount = 0 */
199 if (format_list
&& format_list
->viewFormatCount
) {
200 /* compatibility is transitive, so we only need to check
201 * one format with everything else. */
202 for (unsigned i
= 0; i
< format_list
->viewFormatCount
; ++i
) {
203 if (!radv_dcc_formats_compatible(pCreateInfo
->format
,
204 format_list
->pViewFormats
[i
]))
205 dcc_compatible_formats
= false;
208 dcc_compatible_formats
= false;
212 if (!dcc_compatible_formats
)
219 radv_prefill_surface_from_metadata(struct radv_device
*device
,
220 struct radeon_surf
*surface
,
221 const struct radv_image_create_info
*create_info
)
223 const struct radeon_bo_metadata
*md
= create_info
->bo_metadata
;
224 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
225 if (md
->u
.gfx9
.swizzle_mode
> 0)
226 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
228 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
230 surface
->u
.gfx9
.surf
.swizzle_mode
= md
->u
.gfx9
.swizzle_mode
;
232 surface
->u
.legacy
.pipe_config
= md
->u
.legacy
.pipe_config
;
233 surface
->u
.legacy
.bankw
= md
->u
.legacy
.bankw
;
234 surface
->u
.legacy
.bankh
= md
->u
.legacy
.bankh
;
235 surface
->u
.legacy
.tile_split
= md
->u
.legacy
.tile_split
;
236 surface
->u
.legacy
.mtilea
= md
->u
.legacy
.mtilea
;
237 surface
->u
.legacy
.num_banks
= md
->u
.legacy
.num_banks
;
239 if (md
->u
.legacy
.macrotile
== RADEON_LAYOUT_TILED
)
240 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_2D
, MODE
);
241 else if (md
->u
.legacy
.microtile
== RADEON_LAYOUT_TILED
)
242 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_1D
, MODE
);
244 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED
, MODE
);
250 radv_init_surface(struct radv_device
*device
,
251 const struct radv_image
*image
,
252 struct radeon_surf
*surface
,
254 const struct radv_image_create_info
*create_info
)
256 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
257 unsigned array_mode
= radv_choose_tiling(device
, create_info
);
258 VkFormat format
= vk_format_get_plane_format(pCreateInfo
->format
, plane_id
);
259 const struct vk_format_description
*desc
= vk_format_description(format
);
260 bool is_depth
, is_stencil
;
262 is_depth
= vk_format_has_depth(desc
);
263 is_stencil
= vk_format_has_stencil(desc
);
265 surface
->blk_w
= vk_format_get_blockwidth(format
);
266 surface
->blk_h
= vk_format_get_blockheight(format
);
268 surface
->bpe
= vk_format_get_blocksize(vk_format_depth_only(format
));
269 /* align byte per element on dword */
270 if (surface
->bpe
== 3) {
273 if (create_info
->bo_metadata
) {
274 radv_prefill_surface_from_metadata(device
, surface
, create_info
);
276 surface
->flags
= RADEON_SURF_SET(array_mode
, MODE
);
279 switch (pCreateInfo
->imageType
){
280 case VK_IMAGE_TYPE_1D
:
281 if (pCreateInfo
->arrayLayers
> 1)
282 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY
, TYPE
);
284 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_1D
, TYPE
);
286 case VK_IMAGE_TYPE_2D
:
287 if (pCreateInfo
->arrayLayers
> 1)
288 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY
, TYPE
);
290 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_2D
, TYPE
);
292 case VK_IMAGE_TYPE_3D
:
293 surface
->flags
|= RADEON_SURF_SET(RADEON_SURF_TYPE_3D
, TYPE
);
296 unreachable("unhandled image type");
300 surface
->flags
|= RADEON_SURF_ZBUFFER
;
301 if (radv_use_tc_compat_htile_for_image(device
, pCreateInfo
))
302 surface
->flags
|= RADEON_SURF_TC_COMPATIBLE_HTILE
;
306 surface
->flags
|= RADEON_SURF_SBUFFER
;
308 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
309 pCreateInfo
->imageType
== VK_IMAGE_TYPE_3D
&&
310 vk_format_get_blocksizebits(pCreateInfo
->format
) == 128 &&
311 vk_format_is_compressed(pCreateInfo
->format
))
312 surface
->flags
|= RADEON_SURF_NO_RENDER_TARGET
;
314 surface
->flags
|= RADEON_SURF_OPTIMIZE_FOR_SPACE
;
316 if (!radv_use_dcc_for_image(device
, image
, create_info
, pCreateInfo
))
317 surface
->flags
|= RADEON_SURF_DISABLE_DCC
;
319 if (radv_surface_has_scanout(device
, create_info
))
320 surface
->flags
|= RADEON_SURF_SCANOUT
;
325 static uint32_t si_get_bo_metadata_word1(struct radv_device
*device
)
327 return (ATI_VENDOR_ID
<< 16) | device
->physical_device
->rad_info
.pci_id
;
330 static inline unsigned
331 si_tile_mode_index(const struct radv_image_plane
*plane
, unsigned level
, bool stencil
)
334 return plane
->surface
.u
.legacy
.stencil_tiling_index
[level
];
336 return plane
->surface
.u
.legacy
.tiling_index
[level
];
339 static unsigned radv_map_swizzle(unsigned swizzle
)
343 return V_008F0C_SQ_SEL_Y
;
345 return V_008F0C_SQ_SEL_Z
;
347 return V_008F0C_SQ_SEL_W
;
349 return V_008F0C_SQ_SEL_0
;
351 return V_008F0C_SQ_SEL_1
;
352 default: /* VK_SWIZZLE_X */
353 return V_008F0C_SQ_SEL_X
;
358 radv_make_buffer_descriptor(struct radv_device
*device
,
359 struct radv_buffer
*buffer
,
365 const struct vk_format_description
*desc
;
367 uint64_t gpu_address
= radv_buffer_get_va(buffer
->bo
);
368 uint64_t va
= gpu_address
+ buffer
->offset
;
369 unsigned num_format
, data_format
;
371 desc
= vk_format_description(vk_format
);
372 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
373 stride
= desc
->block
.bits
/ 8;
375 num_format
= radv_translate_buffer_numformat(desc
, first_non_void
);
376 data_format
= radv_translate_buffer_dataformat(desc
, first_non_void
);
380 state
[1] = S_008F04_BASE_ADDRESS_HI(va
>> 32) |
381 S_008F04_STRIDE(stride
);
383 if (device
->physical_device
->rad_info
.chip_class
!= GFX8
&& stride
) {
388 state
[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc
->swizzle
[0])) |
389 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc
->swizzle
[1])) |
390 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc
->swizzle
[2])) |
391 S_008F0C_DST_SEL_W(radv_map_swizzle(desc
->swizzle
[3])) |
392 S_008F0C_NUM_FORMAT(num_format
) |
393 S_008F0C_DATA_FORMAT(data_format
);
397 si_set_mutable_tex_desc_fields(struct radv_device
*device
,
398 struct radv_image
*image
,
399 const struct legacy_surf_level
*base_level_info
,
401 unsigned base_level
, unsigned first_level
,
402 unsigned block_width
, bool is_stencil
,
403 bool is_storage_image
, uint32_t *state
)
405 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
406 uint64_t gpu_address
= image
->bo
? radv_buffer_get_va(image
->bo
) + image
->offset
: 0;
407 uint64_t va
= gpu_address
+ plane
->offset
;
408 enum chip_class chip_class
= device
->physical_device
->rad_info
.chip_class
;
409 uint64_t meta_va
= 0;
410 if (chip_class
>= GFX9
) {
412 va
+= plane
->surface
.u
.gfx9
.stencil_offset
;
414 va
+= plane
->surface
.u
.gfx9
.surf_offset
;
416 va
+= base_level_info
->offset
;
419 if (chip_class
>= GFX9
||
420 base_level_info
->mode
== RADEON_SURF_MODE_2D
)
421 state
[0] |= plane
->surface
.tile_swizzle
;
422 state
[1] &= C_008F14_BASE_ADDRESS_HI
;
423 state
[1] |= S_008F14_BASE_ADDRESS_HI(va
>> 40);
425 if (chip_class
>= GFX8
) {
426 state
[6] &= C_008F28_COMPRESSION_EN
;
428 if (!is_storage_image
&& radv_dcc_enabled(image
, first_level
)) {
429 meta_va
= gpu_address
+ image
->dcc_offset
;
430 if (chip_class
<= GFX8
)
431 meta_va
+= base_level_info
->dcc_offset
;
432 } else if (!is_storage_image
&&
433 radv_image_is_tc_compat_htile(image
)) {
434 meta_va
= gpu_address
+ image
->htile_offset
;
438 state
[6] |= S_008F28_COMPRESSION_EN(1);
439 state
[7] = meta_va
>> 8;
440 state
[7] |= plane
->surface
.tile_swizzle
;
444 if (chip_class
>= GFX9
) {
445 state
[3] &= C_008F1C_SW_MODE
;
446 state
[4] &= C_008F20_PITCH
;
449 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.stencil
.swizzle_mode
);
450 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.stencil
.epitch
);
452 state
[3] |= S_008F1C_SW_MODE(plane
->surface
.u
.gfx9
.surf
.swizzle_mode
);
453 state
[4] |= S_008F20_PITCH(plane
->surface
.u
.gfx9
.surf
.epitch
);
456 state
[5] &= C_008F24_META_DATA_ADDRESS
&
457 C_008F24_META_PIPE_ALIGNED
&
458 C_008F24_META_RB_ALIGNED
;
460 struct gfx9_surf_meta_flags meta
;
462 if (image
->dcc_offset
)
463 meta
= plane
->surface
.u
.gfx9
.dcc
;
465 meta
= plane
->surface
.u
.gfx9
.htile
;
467 state
[5] |= S_008F24_META_DATA_ADDRESS(meta_va
>> 40) |
468 S_008F24_META_PIPE_ALIGNED(meta
.pipe_aligned
) |
469 S_008F24_META_RB_ALIGNED(meta
.rb_aligned
);
473 unsigned pitch
= base_level_info
->nblk_x
* block_width
;
474 unsigned index
= si_tile_mode_index(plane
, base_level
, is_stencil
);
476 state
[3] &= C_008F1C_TILING_INDEX
;
477 state
[3] |= S_008F1C_TILING_INDEX(index
);
478 state
[4] &= C_008F20_PITCH
;
479 state
[4] |= S_008F20_PITCH(pitch
- 1);
483 static unsigned radv_tex_dim(VkImageType image_type
, VkImageViewType view_type
,
484 unsigned nr_layers
, unsigned nr_samples
, bool is_storage_image
, bool gfx9
)
486 if (view_type
== VK_IMAGE_VIEW_TYPE_CUBE
|| view_type
== VK_IMAGE_VIEW_TYPE_CUBE_ARRAY
)
487 return is_storage_image
? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_CUBE
;
489 /* GFX9 allocates 1D textures as 2D. */
490 if (gfx9
&& image_type
== VK_IMAGE_TYPE_1D
)
491 image_type
= VK_IMAGE_TYPE_2D
;
492 switch (image_type
) {
493 case VK_IMAGE_TYPE_1D
:
494 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY
: V_008F1C_SQ_RSRC_IMG_1D
;
495 case VK_IMAGE_TYPE_2D
:
497 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D_MSAA
;
499 return nr_layers
> 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY
: V_008F1C_SQ_RSRC_IMG_2D
;
500 case VK_IMAGE_TYPE_3D
:
501 if (view_type
== VK_IMAGE_VIEW_TYPE_3D
)
502 return V_008F1C_SQ_RSRC_IMG_3D
;
504 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY
;
506 unreachable("illegal image type");
510 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle
[4])
512 unsigned bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
514 if (swizzle
[3] == VK_SWIZZLE_X
) {
515 /* For the pre-defined border color values (white, opaque
516 * black, transparent black), the only thing that matters is
517 * that the alpha channel winds up in the correct place
518 * (because the RGB channels are all the same) so either of
519 * these enumerations will work.
521 if (swizzle
[2] == VK_SWIZZLE_Y
)
522 bc_swizzle
= V_008F20_BC_SWIZZLE_WZYX
;
524 bc_swizzle
= V_008F20_BC_SWIZZLE_WXYZ
;
525 } else if (swizzle
[0] == VK_SWIZZLE_X
) {
526 if (swizzle
[1] == VK_SWIZZLE_Y
)
527 bc_swizzle
= V_008F20_BC_SWIZZLE_XYZW
;
529 bc_swizzle
= V_008F20_BC_SWIZZLE_XWYZ
;
530 } else if (swizzle
[1] == VK_SWIZZLE_X
) {
531 bc_swizzle
= V_008F20_BC_SWIZZLE_YXWZ
;
532 } else if (swizzle
[2] == VK_SWIZZLE_X
) {
533 bc_swizzle
= V_008F20_BC_SWIZZLE_ZYXW
;
540 * Build the sampler view descriptor for a texture.
543 si_make_texture_descriptor(struct radv_device
*device
,
544 struct radv_image
*image
,
545 bool is_storage_image
,
546 VkImageViewType view_type
,
548 const VkComponentMapping
*mapping
,
549 unsigned first_level
, unsigned last_level
,
550 unsigned first_layer
, unsigned last_layer
,
551 unsigned width
, unsigned height
, unsigned depth
,
553 uint32_t *fmask_state
)
555 const struct vk_format_description
*desc
;
556 enum vk_swizzle swizzle
[4];
558 unsigned num_format
, data_format
, type
;
560 desc
= vk_format_description(vk_format
);
562 if (desc
->colorspace
== VK_FORMAT_COLORSPACE_ZS
) {
563 const unsigned char swizzle_xxxx
[4] = {0, 0, 0, 0};
564 vk_format_compose_swizzles(mapping
, swizzle_xxxx
, swizzle
);
566 vk_format_compose_swizzles(mapping
, desc
->swizzle
, swizzle
);
569 first_non_void
= vk_format_get_first_non_void_channel(vk_format
);
571 num_format
= radv_translate_tex_numformat(vk_format
, desc
, first_non_void
);
572 if (num_format
== ~0) {
576 data_format
= radv_translate_tex_dataformat(vk_format
, desc
, first_non_void
);
577 if (data_format
== ~0) {
581 /* S8 with either Z16 or Z32 HTILE need a special format. */
582 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
583 vk_format
== VK_FORMAT_S8_UINT
&&
584 radv_image_is_tc_compat_htile(image
)) {
585 if (image
->vk_format
== VK_FORMAT_D32_SFLOAT_S8_UINT
)
586 data_format
= V_008F14_IMG_DATA_FORMAT_S8_32
;
587 else if (image
->vk_format
== VK_FORMAT_D16_UNORM_S8_UINT
)
588 data_format
= V_008F14_IMG_DATA_FORMAT_S8_16
;
590 type
= radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, image
->info
.samples
,
591 is_storage_image
, device
->physical_device
->rad_info
.chip_class
>= GFX9
);
592 if (type
== V_008F1C_SQ_RSRC_IMG_1D_ARRAY
) {
594 depth
= image
->info
.array_size
;
595 } else if (type
== V_008F1C_SQ_RSRC_IMG_2D_ARRAY
||
596 type
== V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY
) {
597 if (view_type
!= VK_IMAGE_VIEW_TYPE_3D
)
598 depth
= image
->info
.array_size
;
599 } else if (type
== V_008F1C_SQ_RSRC_IMG_CUBE
)
600 depth
= image
->info
.array_size
/ 6;
603 state
[1] = (S_008F14_DATA_FORMAT(data_format
) |
604 S_008F14_NUM_FORMAT(num_format
));
605 state
[2] = (S_008F18_WIDTH(width
- 1) |
606 S_008F18_HEIGHT(height
- 1) |
607 S_008F18_PERF_MOD(4));
608 state
[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle
[0])) |
609 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle
[1])) |
610 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle
[2])) |
611 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle
[3])) |
612 S_008F1C_BASE_LEVEL(image
->info
.samples
> 1 ?
614 S_008F1C_LAST_LEVEL(image
->info
.samples
> 1 ?
615 util_logbase2(image
->info
.samples
) :
617 S_008F1C_TYPE(type
));
619 state
[5] = S_008F24_BASE_ARRAY(first_layer
);
623 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
624 unsigned bc_swizzle
= gfx9_border_color_swizzle(swizzle
);
626 /* Depth is the last accessible layer on Gfx9.
627 * The hw doesn't need to know the total number of layers.
629 if (type
== V_008F1C_SQ_RSRC_IMG_3D
)
630 state
[4] |= S_008F20_DEPTH(depth
- 1);
632 state
[4] |= S_008F20_DEPTH(last_layer
);
634 state
[4] |= S_008F20_BC_SWIZZLE(bc_swizzle
);
635 state
[5] |= S_008F24_MAX_MIP(image
->info
.samples
> 1 ?
636 util_logbase2(image
->info
.samples
) :
637 image
->info
.levels
- 1);
639 state
[3] |= S_008F1C_POW2_PAD(image
->info
.levels
> 1);
640 state
[4] |= S_008F20_DEPTH(depth
- 1);
641 state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
643 if (image
->dcc_offset
) {
644 unsigned swap
= radv_translate_colorswap(vk_format
, FALSE
);
646 state
[6] = S_008F28_ALPHA_IS_ON_MSB(swap
<= 1);
648 /* The last dword is unused by hw. The shader uses it to clear
649 * bits in the first dword of sampler state.
651 if (device
->physical_device
->rad_info
.chip_class
<= GFX7
&& image
->info
.samples
<= 1) {
652 if (first_level
== last_level
)
653 state
[7] = C_008F30_MAX_ANISO_RATIO
;
655 state
[7] = 0xffffffff;
659 /* Initialize the sampler view for FMASK. */
660 if (radv_image_has_fmask(image
)) {
661 uint32_t fmask_format
, num_format
;
662 uint64_t gpu_address
= radv_buffer_get_va(image
->bo
);
665 assert(image
->plane_count
== 1);
667 va
= gpu_address
+ image
->offset
+ image
->fmask
.offset
;
669 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
670 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK
;
671 switch (image
->info
.samples
) {
673 num_format
= V_008F14_IMG_FMASK_8_2_2
;
676 num_format
= V_008F14_IMG_FMASK_8_4_4
;
679 num_format
= V_008F14_IMG_FMASK_32_8_8
;
682 unreachable("invalid nr_samples");
685 switch (image
->info
.samples
) {
687 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2
;
690 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4
;
693 fmask_format
= V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8
;
697 fmask_format
= V_008F14_IMG_DATA_FORMAT_INVALID
;
699 num_format
= V_008F14_IMG_NUM_FORMAT_UINT
;
702 fmask_state
[0] = va
>> 8;
703 fmask_state
[0] |= image
->fmask
.tile_swizzle
;
704 fmask_state
[1] = S_008F14_BASE_ADDRESS_HI(va
>> 40) |
705 S_008F14_DATA_FORMAT(fmask_format
) |
706 S_008F14_NUM_FORMAT(num_format
);
707 fmask_state
[2] = S_008F18_WIDTH(width
- 1) |
708 S_008F18_HEIGHT(height
- 1);
709 fmask_state
[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X
) |
710 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X
) |
711 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X
) |
712 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X
) |
713 S_008F1C_TYPE(radv_tex_dim(image
->type
, view_type
, image
->info
.array_size
, 0, false, false));
715 fmask_state
[5] = S_008F24_BASE_ARRAY(first_layer
);
719 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
720 fmask_state
[3] |= S_008F1C_SW_MODE(image
->planes
[0].surface
.u
.gfx9
.fmask
.swizzle_mode
);
721 fmask_state
[4] |= S_008F20_DEPTH(last_layer
) |
722 S_008F20_PITCH(image
->planes
[0].surface
.u
.gfx9
.fmask
.epitch
);
723 fmask_state
[5] |= S_008F24_META_PIPE_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.pipe_aligned
) |
724 S_008F24_META_RB_ALIGNED(image
->planes
[0].surface
.u
.gfx9
.cmask
.rb_aligned
);
726 fmask_state
[3] |= S_008F1C_TILING_INDEX(image
->fmask
.tile_mode_index
);
727 fmask_state
[4] |= S_008F20_DEPTH(depth
- 1) |
728 S_008F20_PITCH(image
->fmask
.pitch_in_pixels
- 1);
729 fmask_state
[5] |= S_008F24_LAST_ARRAY(last_layer
);
731 } else if (fmask_state
)
732 memset(fmask_state
, 0, 8 * 4);
736 radv_query_opaque_metadata(struct radv_device
*device
,
737 struct radv_image
*image
,
738 struct radeon_bo_metadata
*md
)
740 static const VkComponentMapping fixedmapping
;
743 assert(image
->plane_count
== 1);
745 /* Metadata image format format version 1:
746 * [0] = 1 (metadata format identifier)
747 * [1] = (VENDOR_ID << 16) | PCI_ID
748 * [2:9] = image descriptor for the whole resource
749 * [2] is always 0, because the base address is cleared
750 * [9] is the DCC offset bits [39:8] from the beginning of
752 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
754 md
->metadata
[0] = 1; /* metadata image format version 1 */
756 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
757 md
->metadata
[1] = si_get_bo_metadata_word1(device
);
760 si_make_texture_descriptor(device
, image
, false,
761 (VkImageViewType
)image
->type
, image
->vk_format
,
762 &fixedmapping
, 0, image
->info
.levels
- 1, 0,
763 image
->info
.array_size
- 1,
764 image
->info
.width
, image
->info
.height
,
768 si_set_mutable_tex_desc_fields(device
, image
, &image
->planes
[0].surface
.u
.legacy
.level
[0], 0, 0, 0,
769 image
->planes
[0].surface
.blk_w
, false, false, desc
);
771 /* Clear the base address and set the relative DCC offset. */
773 desc
[1] &= C_008F14_BASE_ADDRESS_HI
;
774 desc
[7] = image
->dcc_offset
>> 8;
776 /* Dwords [2:9] contain the image descriptor. */
777 memcpy(&md
->metadata
[2], desc
, sizeof(desc
));
779 /* Dwords [10:..] contain the mipmap level offsets. */
780 if (device
->physical_device
->rad_info
.chip_class
<= GFX8
) {
781 for (i
= 0; i
<= image
->info
.levels
- 1; i
++)
782 md
->metadata
[10+i
] = image
->planes
[0].surface
.u
.legacy
.level
[i
].offset
>> 8;
783 md
->size_metadata
= (11 + image
->info
.levels
- 1) * 4;
788 radv_init_metadata(struct radv_device
*device
,
789 struct radv_image
*image
,
790 struct radeon_bo_metadata
*metadata
)
792 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
794 memset(metadata
, 0, sizeof(*metadata
));
796 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
797 metadata
->u
.gfx9
.swizzle_mode
= surface
->u
.gfx9
.surf
.swizzle_mode
;
799 metadata
->u
.legacy
.microtile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_1D
?
800 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
801 metadata
->u
.legacy
.macrotile
= surface
->u
.legacy
.level
[0].mode
>= RADEON_SURF_MODE_2D
?
802 RADEON_LAYOUT_TILED
: RADEON_LAYOUT_LINEAR
;
803 metadata
->u
.legacy
.pipe_config
= surface
->u
.legacy
.pipe_config
;
804 metadata
->u
.legacy
.bankw
= surface
->u
.legacy
.bankw
;
805 metadata
->u
.legacy
.bankh
= surface
->u
.legacy
.bankh
;
806 metadata
->u
.legacy
.tile_split
= surface
->u
.legacy
.tile_split
;
807 metadata
->u
.legacy
.mtilea
= surface
->u
.legacy
.mtilea
;
808 metadata
->u
.legacy
.num_banks
= surface
->u
.legacy
.num_banks
;
809 metadata
->u
.legacy
.stride
= surface
->u
.legacy
.level
[0].nblk_x
* surface
->bpe
;
810 metadata
->u
.legacy
.scanout
= (surface
->flags
& RADEON_SURF_SCANOUT
) != 0;
812 radv_query_opaque_metadata(device
, image
, metadata
);
816 radv_image_override_offset_stride(struct radv_device
*device
,
817 struct radv_image
*image
,
818 uint64_t offset
, uint32_t stride
)
820 struct radeon_surf
*surface
= &image
->planes
[0].surface
;
821 unsigned bpe
= vk_format_get_blocksizebits(image
->vk_format
) / 8;
823 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
825 surface
->u
.gfx9
.surf_pitch
= stride
;
826 surface
->u
.gfx9
.surf_slice_size
=
827 (uint64_t)stride
* surface
->u
.gfx9
.surf_height
* bpe
;
829 surface
->u
.gfx9
.surf_offset
= offset
;
831 surface
->u
.legacy
.level
[0].nblk_x
= stride
;
832 surface
->u
.legacy
.level
[0].slice_size_dw
=
833 ((uint64_t)stride
* surface
->u
.legacy
.level
[0].nblk_y
* bpe
) / 4;
836 for (unsigned i
= 0; i
< ARRAY_SIZE(surface
->u
.legacy
.level
); ++i
)
837 surface
->u
.legacy
.level
[i
].offset
+= offset
;
843 /* The number of samples can be specified independently of the texture. */
845 radv_image_get_fmask_info(struct radv_device
*device
,
846 struct radv_image
*image
,
848 struct radv_fmask_info
*out
)
850 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
851 out
->alignment
= image
->planes
[0].surface
.fmask_alignment
;
852 out
->size
= image
->planes
[0].surface
.fmask_size
;
853 out
->tile_swizzle
= image
->planes
[0].surface
.fmask_tile_swizzle
;
857 out
->slice_tile_max
= image
->planes
[0].surface
.u
.legacy
.fmask
.slice_tile_max
;
858 out
->tile_mode_index
= image
->planes
[0].surface
.u
.legacy
.fmask
.tiling_index
;
859 out
->pitch_in_pixels
= image
->planes
[0].surface
.u
.legacy
.fmask
.pitch_in_pixels
;
860 out
->bank_height
= image
->planes
[0].surface
.u
.legacy
.fmask
.bankh
;
861 out
->tile_swizzle
= image
->planes
[0].surface
.fmask_tile_swizzle
;
862 out
->alignment
= image
->planes
[0].surface
.fmask_alignment
;
863 out
->size
= image
->planes
[0].surface
.fmask_size
;
865 assert(!out
->tile_swizzle
|| !image
->shareable
);
869 radv_image_alloc_fmask(struct radv_device
*device
,
870 struct radv_image
*image
)
872 radv_image_get_fmask_info(device
, image
, image
->info
.samples
, &image
->fmask
);
874 image
->fmask
.offset
= align64(image
->size
, image
->fmask
.alignment
);
875 image
->size
= image
->fmask
.offset
+ image
->fmask
.size
;
876 image
->alignment
= MAX2(image
->alignment
, image
->fmask
.alignment
);
880 radv_image_get_cmask_info(struct radv_device
*device
,
881 struct radv_image
*image
,
882 struct radv_cmask_info
*out
)
884 unsigned pipe_interleave_bytes
= device
->physical_device
->rad_info
.pipe_interleave_bytes
;
885 unsigned num_pipes
= device
->physical_device
->rad_info
.num_tile_pipes
;
886 unsigned cl_width
, cl_height
;
888 assert(image
->plane_count
== 1);
890 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
891 out
->alignment
= image
->planes
[0].surface
.cmask_alignment
;
892 out
->size
= image
->planes
[0].surface
.cmask_size
;
909 case 16: /* Hawaii */
918 unsigned base_align
= num_pipes
* pipe_interleave_bytes
;
920 unsigned width
= align(image
->planes
[0].surface
.u
.legacy
.level
[0].nblk_x
, cl_width
*8);
921 unsigned height
= align(image
->planes
[0].surface
.u
.legacy
.level
[0].nblk_y
, cl_height
*8);
922 unsigned slice_elements
= (width
* height
) / (8*8);
924 /* Each element of CMASK is a nibble. */
925 unsigned slice_bytes
= slice_elements
/ 2;
927 out
->slice_tile_max
= (width
* height
) / (128*128);
928 if (out
->slice_tile_max
)
929 out
->slice_tile_max
-= 1;
931 out
->alignment
= MAX2(256, base_align
);
932 out
->size
= (image
->type
== VK_IMAGE_TYPE_3D
? image
->info
.depth
: image
->info
.array_size
) *
933 align(slice_bytes
, base_align
);
937 radv_image_alloc_cmask(struct radv_device
*device
,
938 struct radv_image
*image
)
940 uint32_t clear_value_size
= 0;
941 radv_image_get_cmask_info(device
, image
, &image
->cmask
);
943 image
->cmask
.offset
= align64(image
->size
, image
->cmask
.alignment
);
944 /* + 8 for storing the clear values */
945 if (!image
->clear_value_offset
) {
946 image
->clear_value_offset
= image
->cmask
.offset
+ image
->cmask
.size
;
947 clear_value_size
= 8;
949 image
->size
= image
->cmask
.offset
+ image
->cmask
.size
+ clear_value_size
;
950 image
->alignment
= MAX2(image
->alignment
, image
->cmask
.alignment
);
954 radv_image_alloc_dcc(struct radv_image
*image
)
956 assert(image
->plane_count
== 1);
958 image
->dcc_offset
= align64(image
->size
, image
->planes
[0].surface
.dcc_alignment
);
959 /* + 16 for storing the clear values + dcc pred */
960 image
->clear_value_offset
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
;
961 image
->fce_pred_offset
= image
->clear_value_offset
+ 8;
962 image
->dcc_pred_offset
= image
->clear_value_offset
+ 16;
963 image
->size
= image
->dcc_offset
+ image
->planes
[0].surface
.dcc_size
+ 24;
964 image
->alignment
= MAX2(image
->alignment
, image
->planes
[0].surface
.dcc_alignment
);
968 radv_image_alloc_htile(struct radv_image
*image
)
970 image
->htile_offset
= align64(image
->size
, image
->planes
[0].surface
.htile_alignment
);
972 /* + 8 for storing the clear values */
973 image
->clear_value_offset
= image
->htile_offset
+ image
->planes
[0].surface
.htile_size
;
974 image
->size
= image
->clear_value_offset
+ 8;
975 if (radv_image_is_tc_compat_htile(image
)) {
976 /* Metadata for the TC-compatible HTILE hardware bug which
977 * have to be fixed by updating ZRANGE_PRECISION when doing
978 * fast depth clears to 0.0f.
980 image
->tc_compat_zrange_offset
= image
->clear_value_offset
+ 8;
981 image
->size
= image
->clear_value_offset
+ 16;
983 image
->alignment
= align64(image
->alignment
, image
->planes
[0].surface
.htile_alignment
);
987 radv_image_can_enable_dcc_or_cmask(struct radv_image
*image
)
989 if (image
->info
.samples
<= 1 &&
990 image
->info
.width
* image
->info
.height
<= 512 * 512) {
991 /* Do not enable CMASK or DCC for small surfaces where the cost
992 * of the eliminate pass can be higher than the benefit of fast
993 * clear. RadeonSI does this, but the image threshold is
999 return image
->usage
& VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
&&
1000 (image
->exclusive
|| image
->queue_family_mask
== 1);
1004 radv_image_can_enable_dcc(struct radv_image
*image
)
1006 return radv_image_can_enable_dcc_or_cmask(image
) &&
1007 radv_image_has_dcc(image
);
1011 radv_image_can_enable_cmask(struct radv_image
*image
)
1013 if (image
->planes
[0].surface
.bpe
> 8 && image
->info
.samples
== 1) {
1014 /* Do not enable CMASK for non-MSAA images (fast color clear)
1015 * because 128 bit formats are not supported, but FMASK might
1021 return radv_image_can_enable_dcc_or_cmask(image
) &&
1022 image
->info
.levels
== 1 &&
1023 image
->info
.depth
== 1 &&
1024 !image
->planes
[0].surface
.is_linear
;
1028 radv_image_can_enable_fmask(struct radv_image
*image
)
1030 return image
->info
.samples
> 1 && vk_format_is_color(image
->vk_format
);
1034 radv_image_can_enable_htile(struct radv_image
*image
)
1036 return radv_image_has_htile(image
) &&
1037 image
->info
.levels
== 1 &&
1038 image
->info
.width
* image
->info
.height
>= 8 * 8;
1041 static void radv_image_disable_dcc(struct radv_image
*image
)
1043 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1044 image
->planes
[i
].surface
.dcc_size
= 0;
1047 static void radv_image_disable_htile(struct radv_image
*image
)
1049 for (unsigned i
= 0; i
< image
->plane_count
; ++i
)
1050 image
->planes
[i
].surface
.htile_size
= 0;
1054 radv_image_create(VkDevice _device
,
1055 const struct radv_image_create_info
*create_info
,
1056 const VkAllocationCallbacks
* alloc
,
1059 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1060 const VkImageCreateInfo
*pCreateInfo
= create_info
->vk_info
;
1061 struct radv_image
*image
= NULL
;
1062 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO
);
1064 const unsigned plane_count
= vk_format_get_plane_count(pCreateInfo
->format
);
1065 const size_t image_struct_size
= sizeof(*image
) + sizeof(struct radv_image_plane
) * plane_count
;
1067 radv_assert(pCreateInfo
->mipLevels
> 0);
1068 radv_assert(pCreateInfo
->arrayLayers
> 0);
1069 radv_assert(pCreateInfo
->samples
> 0);
1070 radv_assert(pCreateInfo
->extent
.width
> 0);
1071 radv_assert(pCreateInfo
->extent
.height
> 0);
1072 radv_assert(pCreateInfo
->extent
.depth
> 0);
1074 image
= vk_zalloc2(&device
->alloc
, alloc
, image_struct_size
, 8,
1075 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1077 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1079 image
->type
= pCreateInfo
->imageType
;
1080 image
->info
.width
= pCreateInfo
->extent
.width
;
1081 image
->info
.height
= pCreateInfo
->extent
.height
;
1082 image
->info
.depth
= pCreateInfo
->extent
.depth
;
1083 image
->info
.samples
= pCreateInfo
->samples
;
1084 image
->info
.storage_samples
= pCreateInfo
->samples
;
1085 image
->info
.array_size
= pCreateInfo
->arrayLayers
;
1086 image
->info
.levels
= pCreateInfo
->mipLevels
;
1087 image
->info
.num_channels
= vk_format_get_nr_components(pCreateInfo
->format
);
1089 image
->vk_format
= pCreateInfo
->format
;
1090 image
->tiling
= pCreateInfo
->tiling
;
1091 image
->usage
= pCreateInfo
->usage
;
1092 image
->flags
= pCreateInfo
->flags
;
1094 image
->exclusive
= pCreateInfo
->sharingMode
== VK_SHARING_MODE_EXCLUSIVE
;
1095 if (pCreateInfo
->sharingMode
== VK_SHARING_MODE_CONCURRENT
) {
1096 for (uint32_t i
= 0; i
< pCreateInfo
->queueFamilyIndexCount
; ++i
)
1097 if (pCreateInfo
->pQueueFamilyIndices
[i
] == VK_QUEUE_FAMILY_EXTERNAL
)
1098 image
->queue_family_mask
|= (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1100 image
->queue_family_mask
|= 1u << pCreateInfo
->pQueueFamilyIndices
[i
];
1103 image
->shareable
= vk_find_struct_const(pCreateInfo
->pNext
,
1104 EXTERNAL_MEMORY_IMAGE_CREATE_INFO
) != NULL
;
1105 if (!vk_format_is_depth_or_stencil(pCreateInfo
->format
) &&
1106 !radv_surface_has_scanout(device
, create_info
) && !image
->shareable
) {
1107 image
->info
.surf_index
= &device
->image_mrt_offset_counter
;
1110 image
->plane_count
= plane_count
;
1112 image
->alignment
= 1;
1113 for (unsigned plane
= 0; plane
< plane_count
; ++plane
) {
1114 struct ac_surf_info info
= image
->info
;
1115 radv_init_surface(device
, image
, &image
->planes
[plane
].surface
, plane
, create_info
);
1118 const struct vk_format_description
*desc
= vk_format_description(pCreateInfo
->format
);
1119 assert(info
.width
% desc
->width_divisor
== 0);
1120 assert(info
.height
% desc
->height_divisor
== 0);
1122 info
.width
/= desc
->width_divisor
;
1123 info
.height
/= desc
->height_divisor
;
1126 device
->ws
->surface_init(device
->ws
, &info
, &image
->planes
[plane
].surface
);
1128 image
->planes
[plane
].offset
= align(image
->size
, image
->planes
[plane
].surface
.surf_alignment
);
1129 image
->size
= image
->planes
[plane
].offset
+ image
->planes
[plane
].surface
.surf_size
;
1130 image
->alignment
= image
->planes
[plane
].surface
.surf_alignment
;
1132 image
->planes
[plane
].format
= vk_format_get_plane_format(image
->vk_format
, plane
);
1135 if (!create_info
->no_metadata_planes
) {
1136 /* Try to enable DCC first. */
1137 if (radv_image_can_enable_dcc(image
)) {
1138 radv_image_alloc_dcc(image
);
1139 if (image
->info
.samples
> 1) {
1140 /* CMASK should be enabled because DCC fast
1141 * clear with MSAA needs it.
1143 assert(radv_image_can_enable_cmask(image
));
1144 radv_image_alloc_cmask(device
, image
);
1147 /* When DCC cannot be enabled, try CMASK. */
1148 radv_image_disable_dcc(image
);
1149 if (radv_image_can_enable_cmask(image
)) {
1150 radv_image_alloc_cmask(device
, image
);
1154 /* Try to enable FMASK for multisampled images. */
1155 if (radv_image_can_enable_fmask(image
)) {
1156 radv_image_alloc_fmask(device
, image
);
1158 /* Otherwise, try to enable HTILE for depth surfaces. */
1159 if (radv_image_can_enable_htile(image
) &&
1160 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_HIZ
)) {
1161 image
->tc_compatible_htile
= image
->planes
[0].surface
.flags
& RADEON_SURF_TC_COMPATIBLE_HTILE
;
1162 radv_image_alloc_htile(image
);
1164 radv_image_disable_htile(image
);
1168 radv_image_disable_dcc(image
);
1169 radv_image_disable_htile(image
);
1172 if (pCreateInfo
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
) {
1173 image
->alignment
= MAX2(image
->alignment
, 4096);
1174 image
->size
= align64(image
->size
, image
->alignment
);
1177 image
->bo
= device
->ws
->buffer_create(device
->ws
, image
->size
, image
->alignment
,
1178 0, RADEON_FLAG_VIRTUAL
, RADV_BO_PRIORITY_VIRTUAL
);
1180 vk_free2(&device
->alloc
, alloc
, image
);
1181 return vk_error(device
->instance
, VK_ERROR_OUT_OF_DEVICE_MEMORY
);
1185 *pImage
= radv_image_to_handle(image
);
1191 radv_image_view_make_descriptor(struct radv_image_view
*iview
,
1192 struct radv_device
*device
,
1194 const VkComponentMapping
*components
,
1195 bool is_storage_image
, unsigned plane_id
,
1196 unsigned descriptor_plane_id
)
1198 struct radv_image
*image
= iview
->image
;
1199 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1200 const struct vk_format_description
*format_desc
= vk_format_description(image
->vk_format
);
1201 bool is_stencil
= iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
;
1203 union radv_descriptor
*descriptor
;
1204 uint32_t hw_level
= 0;
1206 if (is_storage_image
) {
1207 descriptor
= &iview
->storage_descriptor
;
1209 descriptor
= &iview
->descriptor
;
1212 assert(vk_format_get_plane_count(vk_format
) == 1);
1213 assert(plane
->surface
.blk_w
% vk_format_get_blockwidth(plane
->format
) == 0);
1214 blk_w
= plane
->surface
.blk_w
/ vk_format_get_blockwidth(plane
->format
) * vk_format_get_blockwidth(vk_format
);
1216 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
)
1217 hw_level
= iview
->base_mip
;
1218 si_make_texture_descriptor(device
, image
, is_storage_image
,
1222 hw_level
, hw_level
+ iview
->level_count
- 1,
1224 iview
->base_layer
+ iview
->layer_count
- 1,
1225 iview
->extent
.width
/ (plane_id
? format_desc
->width_divisor
: 1),
1226 iview
->extent
.height
/ (plane_id
? format_desc
->height_divisor
: 1),
1227 iview
->extent
.depth
,
1228 descriptor
->plane_descriptors
[descriptor_plane_id
],
1229 descriptor_plane_id
? NULL
: descriptor
->fmask_descriptor
);
1231 const struct legacy_surf_level
*base_level_info
= NULL
;
1232 if (device
->physical_device
->rad_info
.chip_class
<= GFX9
) {
1234 base_level_info
= &plane
->surface
.u
.legacy
.stencil_level
[iview
->base_mip
];
1236 base_level_info
= &plane
->surface
.u
.legacy
.level
[iview
->base_mip
];
1238 si_set_mutable_tex_desc_fields(device
, image
,
1243 blk_w
, is_stencil
, is_storage_image
, descriptor
->plane_descriptors
[descriptor_plane_id
]);
1247 radv_plane_from_aspect(VkImageAspectFlags mask
)
1250 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1252 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1260 radv_get_aspect_format(struct radv_image
*image
, VkImageAspectFlags mask
)
1263 case VK_IMAGE_ASPECT_PLANE_0_BIT
:
1264 return image
->planes
[0].format
;
1265 case VK_IMAGE_ASPECT_PLANE_1_BIT
:
1266 return image
->planes
[1].format
;
1267 case VK_IMAGE_ASPECT_PLANE_2_BIT
:
1268 return image
->planes
[2].format
;
1269 case VK_IMAGE_ASPECT_STENCIL_BIT
:
1270 return vk_format_stencil_only(image
->vk_format
);
1271 case VK_IMAGE_ASPECT_DEPTH_BIT
:
1272 return vk_format_depth_only(image
->vk_format
);
1273 case VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT
:
1274 return vk_format_depth_only(image
->vk_format
);
1276 return image
->vk_format
;
1281 radv_image_view_init(struct radv_image_view
*iview
,
1282 struct radv_device
*device
,
1283 const VkImageViewCreateInfo
* pCreateInfo
)
1285 RADV_FROM_HANDLE(radv_image
, image
, pCreateInfo
->image
);
1286 const VkImageSubresourceRange
*range
= &pCreateInfo
->subresourceRange
;
1288 switch (image
->type
) {
1289 case VK_IMAGE_TYPE_1D
:
1290 case VK_IMAGE_TYPE_2D
:
1291 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1 <= image
->info
.array_size
);
1293 case VK_IMAGE_TYPE_3D
:
1294 assert(range
->baseArrayLayer
+ radv_get_layerCount(image
, range
) - 1
1295 <= radv_minify(image
->info
.depth
, range
->baseMipLevel
));
1298 unreachable("bad VkImageType");
1300 iview
->image
= image
;
1301 iview
->bo
= image
->bo
;
1302 iview
->type
= pCreateInfo
->viewType
;
1303 iview
->plane_id
= radv_plane_from_aspect(pCreateInfo
->subresourceRange
.aspectMask
);
1304 iview
->aspect_mask
= pCreateInfo
->subresourceRange
.aspectMask
;
1305 iview
->multiple_planes
= vk_format_get_plane_count(image
->vk_format
) > 1 && iview
->aspect_mask
== VK_IMAGE_ASPECT_COLOR_BIT
;
1306 iview
->vk_format
= pCreateInfo
->format
;
1308 if (iview
->aspect_mask
== VK_IMAGE_ASPECT_STENCIL_BIT
) {
1309 iview
->vk_format
= vk_format_stencil_only(iview
->vk_format
);
1310 } else if (iview
->aspect_mask
== VK_IMAGE_ASPECT_DEPTH_BIT
) {
1311 iview
->vk_format
= vk_format_depth_only(iview
->vk_format
);
1314 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1315 iview
->extent
= (VkExtent3D
) {
1316 .width
= image
->info
.width
,
1317 .height
= image
->info
.height
,
1318 .depth
= image
->info
.depth
,
1321 iview
->extent
= (VkExtent3D
) {
1322 .width
= radv_minify(image
->info
.width
, range
->baseMipLevel
),
1323 .height
= radv_minify(image
->info
.height
, range
->baseMipLevel
),
1324 .depth
= radv_minify(image
->info
.depth
, range
->baseMipLevel
),
1328 if (iview
->vk_format
!= image
->planes
[iview
->plane_id
].format
) {
1329 unsigned view_bw
= vk_format_get_blockwidth(iview
->vk_format
);
1330 unsigned view_bh
= vk_format_get_blockheight(iview
->vk_format
);
1331 unsigned img_bw
= vk_format_get_blockwidth(image
->vk_format
);
1332 unsigned img_bh
= vk_format_get_blockheight(image
->vk_format
);
1334 iview
->extent
.width
= round_up_u32(iview
->extent
.width
* view_bw
, img_bw
);
1335 iview
->extent
.height
= round_up_u32(iview
->extent
.height
* view_bh
, img_bh
);
1337 /* Comment ported from amdvlk -
1338 * If we have the following image:
1339 * Uncompressed pixels Compressed block sizes (4x4)
1340 * mip0: 22 x 22 6 x 6
1341 * mip1: 11 x 11 3 x 3
1346 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1347 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1348 * divide-by-two integer math):
1354 * This means that mip2 will be missing texels.
1356 * Fix this by calculating the base mip's width and height, then convert that, and round it
1357 * back up to get the level 0 size.
1358 * Clamp the converted size between the original values, and next power of two, which
1359 * means we don't oversize the image.
1361 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
1362 vk_format_is_compressed(image
->vk_format
) &&
1363 !vk_format_is_compressed(iview
->vk_format
)) {
1364 unsigned lvl_width
= radv_minify(image
->info
.width
, range
->baseMipLevel
);
1365 unsigned lvl_height
= radv_minify(image
->info
.height
, range
->baseMipLevel
);
1367 lvl_width
= round_up_u32(lvl_width
* view_bw
, img_bw
);
1368 lvl_height
= round_up_u32(lvl_height
* view_bh
, img_bh
);
1370 lvl_width
<<= range
->baseMipLevel
;
1371 lvl_height
<<= range
->baseMipLevel
;
1373 iview
->extent
.width
= CLAMP(lvl_width
, iview
->extent
.width
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_pitch
);
1374 iview
->extent
.height
= CLAMP(lvl_height
, iview
->extent
.height
, iview
->image
->planes
[0].surface
.u
.gfx9
.surf_height
);
1378 iview
->base_layer
= range
->baseArrayLayer
;
1379 iview
->layer_count
= radv_get_layerCount(image
, range
);
1380 iview
->base_mip
= range
->baseMipLevel
;
1381 iview
->level_count
= radv_get_levelCount(image
, range
);
1383 for (unsigned i
= 0; i
< (iview
->multiple_planes
? vk_format_get_plane_count(image
->vk_format
) : 1); ++i
) {
1384 VkFormat format
= vk_format_get_plane_format(iview
->vk_format
, i
);
1385 radv_image_view_make_descriptor(iview
, device
, format
, &pCreateInfo
->components
, false, iview
->plane_id
+ i
, i
);
1386 radv_image_view_make_descriptor(iview
, device
, format
, &pCreateInfo
->components
, true, iview
->plane_id
+ i
, i
);
1390 bool radv_layout_has_htile(const struct radv_image
*image
,
1391 VkImageLayout layout
,
1392 unsigned queue_mask
)
1394 if (radv_image_is_tc_compat_htile(image
))
1395 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1397 return radv_image_has_htile(image
) &&
1398 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1399 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1400 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1403 bool radv_layout_is_htile_compressed(const struct radv_image
*image
,
1404 VkImageLayout layout
,
1405 unsigned queue_mask
)
1407 if (radv_image_is_tc_compat_htile(image
))
1408 return layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1410 return radv_image_has_htile(image
) &&
1411 (layout
== VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
||
1412 (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1413 queue_mask
== (1u << RADV_QUEUE_GENERAL
)));
1416 bool radv_layout_can_fast_clear(const struct radv_image
*image
,
1417 VkImageLayout layout
,
1418 unsigned queue_mask
)
1420 return layout
== VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
;
1423 bool radv_layout_dcc_compressed(const struct radv_image
*image
,
1424 VkImageLayout layout
,
1425 unsigned queue_mask
)
1427 /* Don't compress compute transfer dst, as image stores are not supported. */
1428 if (layout
== VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
&&
1429 (queue_mask
& (1u << RADV_QUEUE_COMPUTE
)))
1432 return radv_image_has_dcc(image
) && layout
!= VK_IMAGE_LAYOUT_GENERAL
;
1436 unsigned radv_image_queue_family_mask(const struct radv_image
*image
, uint32_t family
, uint32_t queue_family
)
1438 if (!image
->exclusive
)
1439 return image
->queue_family_mask
;
1440 if (family
== VK_QUEUE_FAMILY_EXTERNAL
)
1441 return (1u << RADV_MAX_QUEUE_FAMILIES
) - 1u;
1442 if (family
== VK_QUEUE_FAMILY_IGNORED
)
1443 return 1u << queue_family
;
1444 return 1u << family
;
1448 radv_CreateImage(VkDevice device
,
1449 const VkImageCreateInfo
*pCreateInfo
,
1450 const VkAllocationCallbacks
*pAllocator
,
1454 const VkNativeBufferANDROID
*gralloc_info
=
1455 vk_find_struct_const(pCreateInfo
->pNext
, NATIVE_BUFFER_ANDROID
);
1458 return radv_image_from_gralloc(device
, pCreateInfo
, gralloc_info
,
1459 pAllocator
, pImage
);
1462 const struct wsi_image_create_info
*wsi_info
=
1463 vk_find_struct_const(pCreateInfo
->pNext
, WSI_IMAGE_CREATE_INFO_MESA
);
1464 bool scanout
= wsi_info
&& wsi_info
->scanout
;
1466 return radv_image_create(device
,
1467 &(struct radv_image_create_info
) {
1468 .vk_info
= pCreateInfo
,
1476 radv_DestroyImage(VkDevice _device
, VkImage _image
,
1477 const VkAllocationCallbacks
*pAllocator
)
1479 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1480 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1485 if (image
->flags
& VK_IMAGE_CREATE_SPARSE_BINDING_BIT
)
1486 device
->ws
->buffer_destroy(image
->bo
);
1488 if (image
->owned_memory
!= VK_NULL_HANDLE
)
1489 radv_FreeMemory(_device
, image
->owned_memory
, pAllocator
);
1491 vk_free2(&device
->alloc
, pAllocator
, image
);
1494 void radv_GetImageSubresourceLayout(
1497 const VkImageSubresource
* pSubresource
,
1498 VkSubresourceLayout
* pLayout
)
1500 RADV_FROM_HANDLE(radv_image
, image
, _image
);
1501 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1502 int level
= pSubresource
->mipLevel
;
1503 int layer
= pSubresource
->arrayLayer
;
1505 unsigned plane_id
= radv_plane_from_aspect(pSubresource
->aspectMask
);
1507 struct radv_image_plane
*plane
= &image
->planes
[plane_id
];
1508 struct radeon_surf
*surface
= &plane
->surface
;
1510 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
1511 pLayout
->offset
= plane
->offset
+ surface
->u
.gfx9
.offset
[level
] + surface
->u
.gfx9
.surf_slice_size
* layer
;
1512 if (image
->vk_format
== VK_FORMAT_R32G32B32_UINT
||
1513 image
->vk_format
== VK_FORMAT_R32G32B32_SINT
||
1514 image
->vk_format
== VK_FORMAT_R32G32B32_SFLOAT
) {
1515 /* Adjust the number of bytes between each row because
1516 * the pitch is actually the number of components per
1519 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
/ 3;
1521 assert(util_is_power_of_two_nonzero(surface
->bpe
));
1522 pLayout
->rowPitch
= surface
->u
.gfx9
.surf_pitch
* surface
->bpe
;
1525 pLayout
->arrayPitch
= surface
->u
.gfx9
.surf_slice_size
;
1526 pLayout
->depthPitch
= surface
->u
.gfx9
.surf_slice_size
;
1527 pLayout
->size
= surface
->u
.gfx9
.surf_slice_size
;
1528 if (image
->type
== VK_IMAGE_TYPE_3D
)
1529 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1531 pLayout
->offset
= plane
->offset
+ surface
->u
.legacy
.level
[level
].offset
+ (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4 * layer
;
1532 pLayout
->rowPitch
= surface
->u
.legacy
.level
[level
].nblk_x
* surface
->bpe
;
1533 pLayout
->arrayPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1534 pLayout
->depthPitch
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1535 pLayout
->size
= (uint64_t)surface
->u
.legacy
.level
[level
].slice_size_dw
* 4;
1536 if (image
->type
== VK_IMAGE_TYPE_3D
)
1537 pLayout
->size
*= u_minify(image
->info
.depth
, level
);
1543 radv_CreateImageView(VkDevice _device
,
1544 const VkImageViewCreateInfo
*pCreateInfo
,
1545 const VkAllocationCallbacks
*pAllocator
,
1548 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1549 struct radv_image_view
*view
;
1551 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1552 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1554 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1556 radv_image_view_init(view
, device
, pCreateInfo
);
1558 *pView
= radv_image_view_to_handle(view
);
1564 radv_DestroyImageView(VkDevice _device
, VkImageView _iview
,
1565 const VkAllocationCallbacks
*pAllocator
)
1567 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1568 RADV_FROM_HANDLE(radv_image_view
, iview
, _iview
);
1572 vk_free2(&device
->alloc
, pAllocator
, iview
);
1575 void radv_buffer_view_init(struct radv_buffer_view
*view
,
1576 struct radv_device
*device
,
1577 const VkBufferViewCreateInfo
* pCreateInfo
)
1579 RADV_FROM_HANDLE(radv_buffer
, buffer
, pCreateInfo
->buffer
);
1581 view
->bo
= buffer
->bo
;
1582 view
->range
= pCreateInfo
->range
== VK_WHOLE_SIZE
?
1583 buffer
->size
- pCreateInfo
->offset
: pCreateInfo
->range
;
1584 view
->vk_format
= pCreateInfo
->format
;
1586 radv_make_buffer_descriptor(device
, buffer
, view
->vk_format
,
1587 pCreateInfo
->offset
, view
->range
, view
->state
);
1591 radv_CreateBufferView(VkDevice _device
,
1592 const VkBufferViewCreateInfo
*pCreateInfo
,
1593 const VkAllocationCallbacks
*pAllocator
,
1594 VkBufferView
*pView
)
1596 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1597 struct radv_buffer_view
*view
;
1599 view
= vk_alloc2(&device
->alloc
, pAllocator
, sizeof(*view
), 8,
1600 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
1602 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1604 radv_buffer_view_init(view
, device
, pCreateInfo
);
1606 *pView
= radv_buffer_view_to_handle(view
);
1612 radv_DestroyBufferView(VkDevice _device
, VkBufferView bufferView
,
1613 const VkAllocationCallbacks
*pAllocator
)
1615 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1616 RADV_FROM_HANDLE(radv_buffer_view
, view
, bufferView
);
1621 vk_free2(&device
->alloc
, pAllocator
, view
);