radv: update VK_KHR_image_format_list for Vulkan 1.2
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
76 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
77 return false;
78
79 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
80 return false;
81
82 if (pCreateInfo->mipLevels > 1)
83 return false;
84
85 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
86 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
87 */
88 if (pCreateInfo->samples >= 2 &&
89 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
90 (format == VK_FORMAT_D32_SFLOAT &&
91 device->physical_device->rad_info.chip_class == GFX10)))
92 return false;
93
94 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95 * supports 32-bit. Though, it's possible to enable TC-compat for
96 * 16-bit depth surfaces if no Z planes are compressed.
97 */
98 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
99 format != VK_FORMAT_D32_SFLOAT &&
100 format != VK_FORMAT_D16_UNORM)
101 return false;
102
103 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
104 const struct VkImageFormatListCreateInfo *format_list =
105 (const struct VkImageFormatListCreateInfo *)
106 vk_find_struct_const(pCreateInfo->pNext,
107 IMAGE_FORMAT_LIST_CREATE_INFO);
108
109 /* We have to ignore the existence of the list if viewFormatCount = 0 */
110 if (format_list && format_list->viewFormatCount) {
111 /* compatibility is transitive, so we only need to check
112 * one format with everything else.
113 */
114 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
115 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
116 continue;
117
118 if (format != format_list->pViewFormats[i])
119 return false;
120 }
121 } else {
122 return false;
123 }
124 }
125
126 return true;
127 }
128
129 static bool
130 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
131 {
132 if (info->bo_metadata) {
133 if (device->physical_device->rad_info.chip_class >= GFX9)
134 return info->bo_metadata->u.gfx9.scanout;
135 else
136 return info->bo_metadata->u.legacy.scanout;
137 }
138
139 return info->scanout;
140 }
141
142 static bool
143 radv_use_dcc_for_image(struct radv_device *device,
144 const struct radv_image *image,
145 const VkImageCreateInfo *pCreateInfo,
146 VkFormat format)
147 {
148 bool dcc_compatible_formats;
149 bool blendable;
150
151 /* DCC (Delta Color Compression) is only available for GFX8+. */
152 if (device->physical_device->rad_info.chip_class < GFX8)
153 return false;
154
155 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
156 return false;
157
158 if (image->shareable)
159 return false;
160
161 /* TODO: Enable DCC for storage images. */
162 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
163 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
164 return false;
165
166 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
167 return false;
168
169 if (vk_format_is_subsampled(format) ||
170 vk_format_get_plane_count(format) > 1)
171 return false;
172
173 /* TODO: Enable DCC for mipmaps on GFX9+. */
174 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
175 device->physical_device->rad_info.chip_class >= GFX9)
176 return false;
177
178 /* Do not enable DCC for mipmapped arrays because performance is worse. */
179 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
180 return false;
181
182 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
183 * 2x can be enabled with an option.
184 */
185 if (pCreateInfo->samples > 2 ||
186 (pCreateInfo->samples == 2 &&
187 !device->physical_device->dcc_msaa_allowed))
188 return false;
189
190 /* Determine if the formats are DCC compatible. */
191 dcc_compatible_formats =
192 radv_is_colorbuffer_format_supported(format,
193 &blendable);
194
195 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
196 const struct VkImageFormatListCreateInfo *format_list =
197 (const struct VkImageFormatListCreateInfo *)
198 vk_find_struct_const(pCreateInfo->pNext,
199 IMAGE_FORMAT_LIST_CREATE_INFO);
200
201 /* We have to ignore the existence of the list if viewFormatCount = 0 */
202 if (format_list && format_list->viewFormatCount) {
203 /* compatibility is transitive, so we only need to check
204 * one format with everything else. */
205 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
206 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
207 continue;
208
209 if (!radv_dcc_formats_compatible(format,
210 format_list->pViewFormats[i]))
211 dcc_compatible_formats = false;
212 }
213 } else {
214 dcc_compatible_formats = false;
215 }
216 }
217
218 if (!dcc_compatible_formats)
219 return false;
220
221 return true;
222 }
223
224 static bool
225 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
226 struct radv_image *image)
227 {
228 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
229 return false;
230
231 /* TC-compat CMASK is only available for GFX8+. */
232 if (device->physical_device->rad_info.chip_class < GFX8)
233 return false;
234
235 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
236 return false;
237
238 if (radv_image_has_dcc(image))
239 return false;
240
241 if (!radv_image_has_cmask(image))
242 return false;
243
244 return true;
245 }
246
247 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
248 {
249 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
250 }
251
252 static bool
253 radv_is_valid_opaque_metadata(const struct radv_device *device,
254 const struct radeon_bo_metadata *md)
255 {
256 if (md->metadata[0] != 1 ||
257 md->metadata[1] != si_get_bo_metadata_word1(device))
258 return false;
259
260 if (md->size_metadata < 40)
261 return false;
262
263 return true;
264 }
265
266 static void
267 radv_patch_surface_from_metadata(struct radv_device *device,
268 struct radeon_surf *surface,
269 const struct radeon_bo_metadata *md)
270 {
271 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
272
273 if (device->physical_device->rad_info.chip_class >= GFX9) {
274 if (md->u.gfx9.swizzle_mode > 0)
275 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
276 else
277 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
278
279 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
280 } else {
281 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
282 surface->u.legacy.bankw = md->u.legacy.bankw;
283 surface->u.legacy.bankh = md->u.legacy.bankh;
284 surface->u.legacy.tile_split = md->u.legacy.tile_split;
285 surface->u.legacy.mtilea = md->u.legacy.mtilea;
286 surface->u.legacy.num_banks = md->u.legacy.num_banks;
287
288 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
289 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
290 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
291 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
292 else
293 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
294
295 }
296 }
297
298 static VkResult
299 radv_patch_image_dimensions(struct radv_device *device,
300 struct radv_image *image,
301 const struct radv_image_create_info *create_info,
302 struct ac_surf_info *image_info)
303 {
304 unsigned width = image->info.width;
305 unsigned height = image->info.height;
306
307 /*
308 * minigbm sometimes allocates bigger images which is going to result in
309 * weird strides and other properties. Lets be lenient where possible and
310 * fail it on GFX10 (as we cannot cope there).
311 *
312 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
313 */
314 if (create_info->bo_metadata &&
315 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
316 const struct radeon_bo_metadata *md = create_info->bo_metadata;
317
318 if (device->physical_device->rad_info.chip_class >= GFX10) {
319 width = G_00A004_WIDTH_LO(md->metadata[3]) +
320 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
321 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
322 } else {
323 width = G_008F18_WIDTH(md->metadata[4]) + 1;
324 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
325 }
326 }
327
328 if (image->info.width == width && image->info.height == height)
329 return VK_SUCCESS;
330
331 if (width < image->info.width || height < image->info.height) {
332 fprintf(stderr,
333 "The imported image has smaller dimensions than the internal\n"
334 "dimensions. Using it is going to fail badly, so we reject\n"
335 "this import.\n"
336 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
337 image->info.width, image->info.height, width, height);
338 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
339 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
340 fprintf(stderr,
341 "Tried to import an image with inconsistent width on GFX10.\n"
342 "As GFX10 has no separate stride fields we cannot cope with\n"
343 "an inconsistency in width and will fail this import.\n"
344 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
345 image->info.width, image->info.height, width, height);
346 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
347 } else {
348 fprintf(stderr,
349 "Tried to import an image with inconsistent width on pre-GFX10.\n"
350 "As GFX10 has no separate stride fields we cannot cope with\n"
351 "an inconsistency and would fail on GFX10.\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image->info.width, image->info.height, width, height);
354 }
355 image_info->width = width;
356 image_info->height = height;
357
358 return VK_SUCCESS;
359 }
360
361 static VkResult
362 radv_patch_image_from_extra_info(struct radv_device *device,
363 struct radv_image *image,
364 const struct radv_image_create_info *create_info,
365 struct ac_surf_info *image_info)
366 {
367 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
368 if (result != VK_SUCCESS)
369 return result;
370
371 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
372 if (create_info->bo_metadata) {
373 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
374 create_info->bo_metadata);
375 }
376
377 if (radv_surface_has_scanout(device, create_info)) {
378 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
379 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
380
381 image->info.surf_index = NULL;
382 }
383 }
384 return VK_SUCCESS;
385 }
386
387 static int
388 radv_init_surface(struct radv_device *device,
389 const struct radv_image *image,
390 struct radeon_surf *surface,
391 unsigned plane_id,
392 const VkImageCreateInfo *pCreateInfo,
393 VkFormat image_format)
394 {
395 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
396 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
397 const struct vk_format_description *desc = vk_format_description(format);
398 bool is_depth, is_stencil;
399
400 is_depth = vk_format_has_depth(desc);
401 is_stencil = vk_format_has_stencil(desc);
402
403 surface->blk_w = vk_format_get_blockwidth(format);
404 surface->blk_h = vk_format_get_blockheight(format);
405
406 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
407 /* align byte per element on dword */
408 if (surface->bpe == 3) {
409 surface->bpe = 4;
410 }
411
412 surface->flags = RADEON_SURF_SET(array_mode, MODE);
413
414 switch (pCreateInfo->imageType){
415 case VK_IMAGE_TYPE_1D:
416 if (pCreateInfo->arrayLayers > 1)
417 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
418 else
419 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
420 break;
421 case VK_IMAGE_TYPE_2D:
422 if (pCreateInfo->arrayLayers > 1)
423 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
424 else
425 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
426 break;
427 case VK_IMAGE_TYPE_3D:
428 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
429 break;
430 default:
431 unreachable("unhandled image type");
432 }
433
434 if (is_depth) {
435 surface->flags |= RADEON_SURF_ZBUFFER;
436 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
437 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
438 }
439
440 if (is_stencil)
441 surface->flags |= RADEON_SURF_SBUFFER;
442
443 if (device->physical_device->rad_info.chip_class >= GFX9 &&
444 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
445 vk_format_get_blocksizebits(image_format) == 128 &&
446 vk_format_is_compressed(image_format))
447 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
448
449 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
450
451 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
452 surface->flags |= RADEON_SURF_DISABLE_DCC;
453
454 return 0;
455 }
456
457 static inline unsigned
458 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
459 {
460 if (stencil)
461 return plane->surface.u.legacy.stencil_tiling_index[level];
462 else
463 return plane->surface.u.legacy.tiling_index[level];
464 }
465
466 static unsigned radv_map_swizzle(unsigned swizzle)
467 {
468 switch (swizzle) {
469 case VK_SWIZZLE_Y:
470 return V_008F0C_SQ_SEL_Y;
471 case VK_SWIZZLE_Z:
472 return V_008F0C_SQ_SEL_Z;
473 case VK_SWIZZLE_W:
474 return V_008F0C_SQ_SEL_W;
475 case VK_SWIZZLE_0:
476 return V_008F0C_SQ_SEL_0;
477 case VK_SWIZZLE_1:
478 return V_008F0C_SQ_SEL_1;
479 default: /* VK_SWIZZLE_X */
480 return V_008F0C_SQ_SEL_X;
481 }
482 }
483
484 static void
485 radv_make_buffer_descriptor(struct radv_device *device,
486 struct radv_buffer *buffer,
487 VkFormat vk_format,
488 unsigned offset,
489 unsigned range,
490 uint32_t *state)
491 {
492 const struct vk_format_description *desc;
493 unsigned stride;
494 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
495 uint64_t va = gpu_address + buffer->offset;
496 unsigned num_format, data_format;
497 int first_non_void;
498 desc = vk_format_description(vk_format);
499 first_non_void = vk_format_get_first_non_void_channel(vk_format);
500 stride = desc->block.bits / 8;
501
502 va += offset;
503 state[0] = va;
504 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
505 S_008F04_STRIDE(stride);
506
507 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
508 range /= stride;
509 }
510
511 state[2] = range;
512 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
513 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
514 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
515 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
516
517 if (device->physical_device->rad_info.chip_class >= GFX10) {
518 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
519
520 /* OOB_SELECT chooses the out-of-bounds check:
521 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
522 * - 1: index >= NUM_RECORDS
523 * - 2: NUM_RECORDS == 0
524 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
525 * else: swizzle_address >= NUM_RECORDS
526 */
527 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
528 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
529 S_008F0C_RESOURCE_LEVEL(1);
530 } else {
531 num_format = radv_translate_buffer_numformat(desc, first_non_void);
532 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
533
534 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
535 assert(num_format != ~0);
536
537 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
538 S_008F0C_DATA_FORMAT(data_format);
539 }
540 }
541
542 static void
543 si_set_mutable_tex_desc_fields(struct radv_device *device,
544 struct radv_image *image,
545 const struct legacy_surf_level *base_level_info,
546 unsigned plane_id,
547 unsigned base_level, unsigned first_level,
548 unsigned block_width, bool is_stencil,
549 bool is_storage_image, bool disable_compression,
550 uint32_t *state)
551 {
552 struct radv_image_plane *plane = &image->planes[plane_id];
553 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
554 uint64_t va = gpu_address + plane->offset;
555 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
556 uint64_t meta_va = 0;
557 if (chip_class >= GFX9) {
558 if (is_stencil)
559 va += plane->surface.u.gfx9.stencil_offset;
560 else
561 va += plane->surface.u.gfx9.surf_offset;
562 } else
563 va += base_level_info->offset;
564
565 state[0] = va >> 8;
566 if (chip_class >= GFX9 ||
567 base_level_info->mode == RADEON_SURF_MODE_2D)
568 state[0] |= plane->surface.tile_swizzle;
569 state[1] &= C_008F14_BASE_ADDRESS_HI;
570 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
571
572 if (chip_class >= GFX8) {
573 state[6] &= C_008F28_COMPRESSION_EN;
574 state[7] = 0;
575 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
576 meta_va = gpu_address + image->dcc_offset;
577 if (chip_class <= GFX8)
578 meta_va += base_level_info->dcc_offset;
579
580 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
581 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
582 meta_va |= dcc_tile_swizzle;
583 } else if (!disable_compression &&
584 radv_image_is_tc_compat_htile(image)) {
585 meta_va = gpu_address + image->htile_offset;
586 }
587
588 if (meta_va) {
589 state[6] |= S_008F28_COMPRESSION_EN(1);
590 if (chip_class <= GFX9)
591 state[7] = meta_va >> 8;
592 }
593 }
594
595 if (chip_class >= GFX10) {
596 state[3] &= C_00A00C_SW_MODE;
597
598 if (is_stencil) {
599 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
600 } else {
601 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
602 }
603
604 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
605 C_00A018_META_PIPE_ALIGNED;
606
607 if (meta_va) {
608 struct gfx9_surf_meta_flags meta;
609
610 if (image->dcc_offset)
611 meta = plane->surface.u.gfx9.dcc;
612 else
613 meta = plane->surface.u.gfx9.htile;
614
615 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
616 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
617 }
618
619 state[7] = meta_va >> 16;
620 } else if (chip_class == GFX9) {
621 state[3] &= C_008F1C_SW_MODE;
622 state[4] &= C_008F20_PITCH;
623
624 if (is_stencil) {
625 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
626 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
627 } else {
628 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
629 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
630 }
631
632 state[5] &= C_008F24_META_DATA_ADDRESS &
633 C_008F24_META_PIPE_ALIGNED &
634 C_008F24_META_RB_ALIGNED;
635 if (meta_va) {
636 struct gfx9_surf_meta_flags meta;
637
638 if (image->dcc_offset)
639 meta = plane->surface.u.gfx9.dcc;
640 else
641 meta = plane->surface.u.gfx9.htile;
642
643 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
644 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
645 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
646 }
647 } else {
648 /* GFX6-GFX8 */
649 unsigned pitch = base_level_info->nblk_x * block_width;
650 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
651
652 state[3] &= C_008F1C_TILING_INDEX;
653 state[3] |= S_008F1C_TILING_INDEX(index);
654 state[4] &= C_008F20_PITCH;
655 state[4] |= S_008F20_PITCH(pitch - 1);
656 }
657 }
658
659 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
660 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
661 {
662 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
663 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
664
665 /* GFX9 allocates 1D textures as 2D. */
666 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
667 image_type = VK_IMAGE_TYPE_2D;
668 switch (image_type) {
669 case VK_IMAGE_TYPE_1D:
670 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
671 case VK_IMAGE_TYPE_2D:
672 if (nr_samples > 1)
673 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
674 else
675 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
676 case VK_IMAGE_TYPE_3D:
677 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
678 return V_008F1C_SQ_RSRC_IMG_3D;
679 else
680 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
681 default:
682 unreachable("illegal image type");
683 }
684 }
685
686 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
687 {
688 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
689
690 if (swizzle[3] == VK_SWIZZLE_X) {
691 /* For the pre-defined border color values (white, opaque
692 * black, transparent black), the only thing that matters is
693 * that the alpha channel winds up in the correct place
694 * (because the RGB channels are all the same) so either of
695 * these enumerations will work.
696 */
697 if (swizzle[2] == VK_SWIZZLE_Y)
698 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
699 else
700 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
701 } else if (swizzle[0] == VK_SWIZZLE_X) {
702 if (swizzle[1] == VK_SWIZZLE_Y)
703 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
704 else
705 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
706 } else if (swizzle[1] == VK_SWIZZLE_X) {
707 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
708 } else if (swizzle[2] == VK_SWIZZLE_X) {
709 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
710 }
711
712 return bc_swizzle;
713 }
714
715 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
716 {
717 const struct vk_format_description *desc = vk_format_description(format);
718
719 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
720 return desc->swizzle[3] == VK_SWIZZLE_X;
721
722 return radv_translate_colorswap(format, false) <= 1;
723 }
724 /**
725 * Build the sampler view descriptor for a texture (GFX10).
726 */
727 static void
728 gfx10_make_texture_descriptor(struct radv_device *device,
729 struct radv_image *image,
730 bool is_storage_image,
731 VkImageViewType view_type,
732 VkFormat vk_format,
733 const VkComponentMapping *mapping,
734 unsigned first_level, unsigned last_level,
735 unsigned first_layer, unsigned last_layer,
736 unsigned width, unsigned height, unsigned depth,
737 uint32_t *state,
738 uint32_t *fmask_state)
739 {
740 const struct vk_format_description *desc;
741 enum vk_swizzle swizzle[4];
742 unsigned img_format;
743 unsigned type;
744
745 desc = vk_format_description(vk_format);
746 img_format = gfx10_format_table[vk_format].img_format;
747
748 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
749 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
750 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
751 } else {
752 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
753 }
754
755 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
756 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
757 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
758 height = 1;
759 depth = image->info.array_size;
760 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
761 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
762 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
763 depth = image->info.array_size;
764 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
765 depth = image->info.array_size / 6;
766
767 state[0] = 0;
768 state[1] = S_00A004_FORMAT(img_format) |
769 S_00A004_WIDTH_LO(width - 1);
770 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
771 S_00A008_HEIGHT(height - 1) |
772 S_00A008_RESOURCE_LEVEL(1);
773 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
774 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
775 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
776 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
777 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
778 0 : first_level) |
779 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
780 util_logbase2(image->info.samples) :
781 last_level) |
782 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
783 S_00A00C_TYPE(type);
784 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
785 * to know the total number of layers.
786 */
787 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
788 S_00A010_BASE_ARRAY(first_layer);
789 state[5] = S_00A014_ARRAY_PITCH(0) |
790 S_00A014_MAX_MIP(image->info.samples > 1 ?
791 util_logbase2(image->info.samples) :
792 image->info.levels - 1) |
793 S_00A014_PERF_MOD(4);
794 state[6] = 0;
795 state[7] = 0;
796
797 if (radv_dcc_enabled(image, first_level)) {
798 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
799 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
800 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
801 }
802
803 /* Initialize the sampler view for FMASK. */
804 if (radv_image_has_fmask(image)) {
805 uint64_t gpu_address = radv_buffer_get_va(image->bo);
806 uint32_t format;
807 uint64_t va;
808
809 assert(image->plane_count == 1);
810
811 va = gpu_address + image->offset + image->fmask_offset;
812
813 switch (image->info.samples) {
814 case 2:
815 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
816 break;
817 case 4:
818 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
819 break;
820 case 8:
821 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
822 break;
823 default:
824 unreachable("invalid nr_samples");
825 }
826
827 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
828 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
829 S_00A004_FORMAT(format) |
830 S_00A004_WIDTH_LO(width - 1);
831 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
832 S_00A008_HEIGHT(height - 1) |
833 S_00A008_RESOURCE_LEVEL(1);
834 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
835 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
836 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
837 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
838 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
839 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
840 fmask_state[4] = S_00A010_DEPTH(last_layer) |
841 S_00A010_BASE_ARRAY(first_layer);
842 fmask_state[5] = 0;
843 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
844 fmask_state[7] = 0;
845 } else if (fmask_state)
846 memset(fmask_state, 0, 8 * 4);
847 }
848
849 /**
850 * Build the sampler view descriptor for a texture (SI-GFX9)
851 */
852 static void
853 si_make_texture_descriptor(struct radv_device *device,
854 struct radv_image *image,
855 bool is_storage_image,
856 VkImageViewType view_type,
857 VkFormat vk_format,
858 const VkComponentMapping *mapping,
859 unsigned first_level, unsigned last_level,
860 unsigned first_layer, unsigned last_layer,
861 unsigned width, unsigned height, unsigned depth,
862 uint32_t *state,
863 uint32_t *fmask_state)
864 {
865 const struct vk_format_description *desc;
866 enum vk_swizzle swizzle[4];
867 int first_non_void;
868 unsigned num_format, data_format, type;
869
870 desc = vk_format_description(vk_format);
871
872 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
873 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
874 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
875 } else {
876 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
877 }
878
879 first_non_void = vk_format_get_first_non_void_channel(vk_format);
880
881 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
882 if (num_format == ~0) {
883 num_format = 0;
884 }
885
886 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
887 if (data_format == ~0) {
888 data_format = 0;
889 }
890
891 /* S8 with either Z16 or Z32 HTILE need a special format. */
892 if (device->physical_device->rad_info.chip_class == GFX9 &&
893 vk_format == VK_FORMAT_S8_UINT &&
894 radv_image_is_tc_compat_htile(image)) {
895 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
896 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
897 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
898 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
899 }
900 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
901 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
902 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
903 height = 1;
904 depth = image->info.array_size;
905 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
906 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
907 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
908 depth = image->info.array_size;
909 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
910 depth = image->info.array_size / 6;
911
912 state[0] = 0;
913 state[1] = (S_008F14_DATA_FORMAT(data_format) |
914 S_008F14_NUM_FORMAT(num_format));
915 state[2] = (S_008F18_WIDTH(width - 1) |
916 S_008F18_HEIGHT(height - 1) |
917 S_008F18_PERF_MOD(4));
918 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
919 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
920 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
921 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
922 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
923 0 : first_level) |
924 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
925 util_logbase2(image->info.samples) :
926 last_level) |
927 S_008F1C_TYPE(type));
928 state[4] = 0;
929 state[5] = S_008F24_BASE_ARRAY(first_layer);
930 state[6] = 0;
931 state[7] = 0;
932
933 if (device->physical_device->rad_info.chip_class == GFX9) {
934 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
935
936 /* Depth is the last accessible layer on Gfx9.
937 * The hw doesn't need to know the total number of layers.
938 */
939 if (type == V_008F1C_SQ_RSRC_IMG_3D)
940 state[4] |= S_008F20_DEPTH(depth - 1);
941 else
942 state[4] |= S_008F20_DEPTH(last_layer);
943
944 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
945 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
946 util_logbase2(image->info.samples) :
947 image->info.levels - 1);
948 } else {
949 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
950 state[4] |= S_008F20_DEPTH(depth - 1);
951 state[5] |= S_008F24_LAST_ARRAY(last_layer);
952 }
953 if (image->dcc_offset) {
954 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
955 } else {
956 /* The last dword is unused by hw. The shader uses it to clear
957 * bits in the first dword of sampler state.
958 */
959 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
960 if (first_level == last_level)
961 state[7] = C_008F30_MAX_ANISO_RATIO;
962 else
963 state[7] = 0xffffffff;
964 }
965 }
966
967 /* Initialize the sampler view for FMASK. */
968 if (radv_image_has_fmask(image)) {
969 uint32_t fmask_format, num_format;
970 uint64_t gpu_address = radv_buffer_get_va(image->bo);
971 uint64_t va;
972
973 assert(image->plane_count == 1);
974
975 va = gpu_address + image->offset + image->fmask_offset;
976
977 if (device->physical_device->rad_info.chip_class == GFX9) {
978 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
979 switch (image->info.samples) {
980 case 2:
981 num_format = V_008F14_IMG_FMASK_8_2_2;
982 break;
983 case 4:
984 num_format = V_008F14_IMG_FMASK_8_4_4;
985 break;
986 case 8:
987 num_format = V_008F14_IMG_FMASK_32_8_8;
988 break;
989 default:
990 unreachable("invalid nr_samples");
991 }
992 } else {
993 switch (image->info.samples) {
994 case 2:
995 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
996 break;
997 case 4:
998 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
999 break;
1000 case 8:
1001 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1002 break;
1003 default:
1004 assert(0);
1005 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1006 }
1007 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1008 }
1009
1010 fmask_state[0] = va >> 8;
1011 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1012 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1013 S_008F14_DATA_FORMAT(fmask_format) |
1014 S_008F14_NUM_FORMAT(num_format);
1015 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1016 S_008F18_HEIGHT(height - 1);
1017 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1018 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1019 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1020 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1021 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1022 fmask_state[4] = 0;
1023 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1024 fmask_state[6] = 0;
1025 fmask_state[7] = 0;
1026
1027 if (device->physical_device->rad_info.chip_class == GFX9) {
1028 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1029 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1030 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1031 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1032 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1033
1034 if (radv_image_is_tc_compat_cmask(image)) {
1035 va = gpu_address + image->offset + image->cmask_offset;
1036
1037 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1038 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1039 fmask_state[7] |= va >> 8;
1040 }
1041 } else {
1042 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1043 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1044 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1045 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1046
1047 if (radv_image_is_tc_compat_cmask(image)) {
1048 va = gpu_address + image->offset + image->cmask_offset;
1049
1050 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1051 fmask_state[7] |= va >> 8;
1052 }
1053 }
1054 } else if (fmask_state)
1055 memset(fmask_state, 0, 8 * 4);
1056 }
1057
1058 static void
1059 radv_make_texture_descriptor(struct radv_device *device,
1060 struct radv_image *image,
1061 bool is_storage_image,
1062 VkImageViewType view_type,
1063 VkFormat vk_format,
1064 const VkComponentMapping *mapping,
1065 unsigned first_level, unsigned last_level,
1066 unsigned first_layer, unsigned last_layer,
1067 unsigned width, unsigned height, unsigned depth,
1068 uint32_t *state,
1069 uint32_t *fmask_state)
1070 {
1071 if (device->physical_device->rad_info.chip_class >= GFX10) {
1072 gfx10_make_texture_descriptor(device, image, is_storage_image,
1073 view_type, vk_format, mapping,
1074 first_level, last_level,
1075 first_layer, last_layer,
1076 width, height, depth,
1077 state, fmask_state);
1078 } else {
1079 si_make_texture_descriptor(device, image, is_storage_image,
1080 view_type, vk_format, mapping,
1081 first_level, last_level,
1082 first_layer, last_layer,
1083 width, height, depth,
1084 state, fmask_state);
1085 }
1086 }
1087
1088 static void
1089 radv_query_opaque_metadata(struct radv_device *device,
1090 struct radv_image *image,
1091 struct radeon_bo_metadata *md)
1092 {
1093 static const VkComponentMapping fixedmapping;
1094 uint32_t desc[8], i;
1095
1096 assert(image->plane_count == 1);
1097
1098 /* Metadata image format format version 1:
1099 * [0] = 1 (metadata format identifier)
1100 * [1] = (VENDOR_ID << 16) | PCI_ID
1101 * [2:9] = image descriptor for the whole resource
1102 * [2] is always 0, because the base address is cleared
1103 * [9] is the DCC offset bits [39:8] from the beginning of
1104 * the buffer
1105 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1106 */
1107 md->metadata[0] = 1; /* metadata image format version 1 */
1108
1109 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1110 md->metadata[1] = si_get_bo_metadata_word1(device);
1111
1112
1113 radv_make_texture_descriptor(device, image, false,
1114 (VkImageViewType)image->type, image->vk_format,
1115 &fixedmapping, 0, image->info.levels - 1, 0,
1116 image->info.array_size - 1,
1117 image->info.width, image->info.height,
1118 image->info.depth,
1119 desc, NULL);
1120
1121 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1122 image->planes[0].surface.blk_w, false, false, false, desc);
1123
1124 /* Clear the base address and set the relative DCC offset. */
1125 desc[0] = 0;
1126 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1127 desc[7] = image->dcc_offset >> 8;
1128
1129 /* Dwords [2:9] contain the image descriptor. */
1130 memcpy(&md->metadata[2], desc, sizeof(desc));
1131
1132 /* Dwords [10:..] contain the mipmap level offsets. */
1133 if (device->physical_device->rad_info.chip_class <= GFX8) {
1134 for (i = 0; i <= image->info.levels - 1; i++)
1135 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1136 md->size_metadata = (11 + image->info.levels - 1) * 4;
1137 } else
1138 md->size_metadata = 10 * 4;
1139 }
1140
1141 void
1142 radv_init_metadata(struct radv_device *device,
1143 struct radv_image *image,
1144 struct radeon_bo_metadata *metadata)
1145 {
1146 struct radeon_surf *surface = &image->planes[0].surface;
1147
1148 memset(metadata, 0, sizeof(*metadata));
1149
1150 if (device->physical_device->rad_info.chip_class >= GFX9) {
1151 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1152 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1153 } else {
1154 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1155 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1156 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1157 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1158 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1159 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1160 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1161 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1162 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1163 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1164 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1165 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1166 }
1167 radv_query_opaque_metadata(device, image, metadata);
1168 }
1169
1170 void
1171 radv_image_override_offset_stride(struct radv_device *device,
1172 struct radv_image *image,
1173 uint64_t offset, uint32_t stride)
1174 {
1175 struct radeon_surf *surface = &image->planes[0].surface;
1176 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1177
1178 if (device->physical_device->rad_info.chip_class >= GFX9) {
1179 if (stride) {
1180 surface->u.gfx9.surf_pitch = stride;
1181 surface->u.gfx9.surf_slice_size =
1182 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1183 }
1184 surface->u.gfx9.surf_offset = offset;
1185 } else {
1186 surface->u.legacy.level[0].nblk_x = stride;
1187 surface->u.legacy.level[0].slice_size_dw =
1188 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1189
1190 if (offset) {
1191 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1192 surface->u.legacy.level[i].offset += offset;
1193 }
1194
1195 }
1196 }
1197
1198 static void
1199 radv_image_alloc_fmask(struct radv_device *device,
1200 struct radv_image *image)
1201 {
1202 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1203
1204 image->fmask_offset = align64(image->size, fmask_alignment);
1205 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1206 image->alignment = MAX2(image->alignment, fmask_alignment);
1207 }
1208
1209 static void
1210 radv_image_alloc_cmask(struct radv_device *device,
1211 struct radv_image *image)
1212 {
1213 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1214 unsigned cmask_size = image->planes[0].surface.cmask_size;
1215 uint32_t clear_value_size = 0;
1216
1217 if (!cmask_size)
1218 return;
1219
1220 assert(cmask_alignment);
1221
1222 image->cmask_offset = align64(image->size, cmask_alignment);
1223 /* + 8 for storing the clear values */
1224 if (!image->clear_value_offset) {
1225 image->clear_value_offset = image->cmask_offset + cmask_size;
1226 clear_value_size = 8;
1227 }
1228 image->size = image->cmask_offset + cmask_size + clear_value_size;
1229 image->alignment = MAX2(image->alignment, cmask_alignment);
1230 }
1231
1232 static void
1233 radv_image_alloc_dcc(struct radv_image *image)
1234 {
1235 assert(image->plane_count == 1);
1236
1237 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1238 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1239 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1240 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1241 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1242 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1243 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1244 }
1245
1246 static void
1247 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1248 {
1249 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1250
1251 /* + 8 for storing the clear values */
1252 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1253 image->size = image->clear_value_offset + image->info.levels * 8;
1254 if (radv_image_is_tc_compat_htile(image) &&
1255 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1256 /* Metadata for the TC-compatible HTILE hardware bug which
1257 * have to be fixed by updating ZRANGE_PRECISION when doing
1258 * fast depth clears to 0.0f.
1259 */
1260 image->tc_compat_zrange_offset = image->size;
1261 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1262 }
1263 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1264 }
1265
1266 static inline bool
1267 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1268 {
1269 if (image->info.samples <= 1 &&
1270 image->info.width * image->info.height <= 512 * 512) {
1271 /* Do not enable CMASK or DCC for small surfaces where the cost
1272 * of the eliminate pass can be higher than the benefit of fast
1273 * clear. RadeonSI does this, but the image threshold is
1274 * different.
1275 */
1276 return false;
1277 }
1278
1279 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1280 (image->exclusive || image->queue_family_mask == 1);
1281 }
1282
1283 static inline bool
1284 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1285 {
1286 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1287 !radv_image_has_dcc(image))
1288 return false;
1289
1290 /* On GFX8, DCC layers can be interleaved and it's currently only
1291 * enabled if slice size is equal to the per slice fast clear size
1292 * because the driver assumes that portions of multiple layers are
1293 * contiguous during fast clears.
1294 */
1295 if (image->info.array_size > 1) {
1296 const struct legacy_surf_level *surf_level =
1297 &image->planes[0].surface.u.legacy.level[0];
1298
1299 assert(device->physical_device->rad_info.chip_class == GFX8);
1300
1301 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1302 return false;
1303 }
1304
1305 return true;
1306 }
1307
1308 static inline bool
1309 radv_image_can_enable_cmask(struct radv_image *image)
1310 {
1311 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1312 /* Do not enable CMASK for non-MSAA images (fast color clear)
1313 * because 128 bit formats are not supported, but FMASK might
1314 * still be used.
1315 */
1316 return false;
1317 }
1318
1319 return radv_image_can_enable_dcc_or_cmask(image) &&
1320 image->info.levels == 1 &&
1321 image->info.depth == 1 &&
1322 !image->planes[0].surface.is_linear;
1323 }
1324
1325 static inline bool
1326 radv_image_can_enable_fmask(struct radv_image *image)
1327 {
1328 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
1329 }
1330
1331 static inline bool
1332 radv_image_can_enable_htile(struct radv_image *image)
1333 {
1334 return radv_image_has_htile(image) &&
1335 image->info.levels == 1 &&
1336 image->info.width * image->info.height >= 8 * 8;
1337 }
1338
1339 static void radv_image_disable_dcc(struct radv_image *image)
1340 {
1341 for (unsigned i = 0; i < image->plane_count; ++i)
1342 image->planes[i].surface.dcc_size = 0;
1343 }
1344
1345 static void radv_image_disable_htile(struct radv_image *image)
1346 {
1347 for (unsigned i = 0; i < image->plane_count; ++i)
1348 image->planes[i].surface.htile_size = 0;
1349 }
1350
1351 VkResult
1352 radv_image_create_layout(struct radv_device *device,
1353 struct radv_image_create_info create_info,
1354 struct radv_image *image)
1355 {
1356 /* Check that we did not initialize things earlier */
1357 assert(!image->planes[0].surface.surf_size);
1358
1359 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1360 * common internal case. */
1361 create_info.vk_info = NULL;
1362
1363 struct ac_surf_info image_info = image->info;
1364 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1365 if (result != VK_SUCCESS)
1366 return result;
1367
1368 image->size = 0;
1369 image->alignment = 1;
1370 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1371 struct ac_surf_info info = image_info;
1372
1373 if (plane) {
1374 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1375 assert(info.width % desc->width_divisor == 0);
1376 assert(info.height % desc->height_divisor == 0);
1377
1378 info.width /= desc->width_divisor;
1379 info.height /= desc->height_divisor;
1380 }
1381
1382 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1383
1384 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1385 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1386 image->alignment = image->planes[plane].surface.surf_alignment;
1387
1388 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1389 }
1390
1391 if (!create_info.no_metadata_planes) {
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device, image)) {
1394 radv_image_alloc_dcc(image);
1395 if (image->info.samples > 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1398 */
1399 assert(radv_image_can_enable_cmask(image));
1400 radv_image_alloc_cmask(device, image);
1401 }
1402 } else {
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image);
1405 if (radv_image_can_enable_cmask(image)) {
1406 radv_image_alloc_cmask(device, image);
1407 }
1408 }
1409
1410 /* Try to enable FMASK for multisampled images. */
1411 if (radv_image_can_enable_fmask(image)) {
1412 radv_image_alloc_fmask(device, image);
1413
1414 if (radv_use_tc_compat_cmask_for_image(device, image))
1415 image->tc_compatible_cmask = true;
1416 } else {
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image) &&
1419 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1420 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1421 radv_image_alloc_htile(device, image);
1422 } else {
1423 radv_image_disable_htile(image);
1424 }
1425 }
1426 } else {
1427 radv_image_disable_dcc(image);
1428 radv_image_disable_htile(image);
1429 }
1430
1431 assert(image->planes[0].surface.surf_size);
1432 return VK_SUCCESS;
1433 }
1434
1435 VkResult
1436 radv_image_create(VkDevice _device,
1437 const struct radv_image_create_info *create_info,
1438 const VkAllocationCallbacks* alloc,
1439 VkImage *pImage)
1440 {
1441 RADV_FROM_HANDLE(radv_device, device, _device);
1442 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1443 struct radv_image *image = NULL;
1444 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1445 pCreateInfo->format);
1446 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1447
1448 const unsigned plane_count = vk_format_get_plane_count(format);
1449 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1450
1451 radv_assert(pCreateInfo->mipLevels > 0);
1452 radv_assert(pCreateInfo->arrayLayers > 0);
1453 radv_assert(pCreateInfo->samples > 0);
1454 radv_assert(pCreateInfo->extent.width > 0);
1455 radv_assert(pCreateInfo->extent.height > 0);
1456 radv_assert(pCreateInfo->extent.depth > 0);
1457
1458 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1459 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1460 if (!image)
1461 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1462
1463 image->type = pCreateInfo->imageType;
1464 image->info.width = pCreateInfo->extent.width;
1465 image->info.height = pCreateInfo->extent.height;
1466 image->info.depth = pCreateInfo->extent.depth;
1467 image->info.samples = pCreateInfo->samples;
1468 image->info.storage_samples = pCreateInfo->samples;
1469 image->info.array_size = pCreateInfo->arrayLayers;
1470 image->info.levels = pCreateInfo->mipLevels;
1471 image->info.num_channels = vk_format_get_nr_components(format);
1472
1473 image->vk_format = format;
1474 image->tiling = pCreateInfo->tiling;
1475 image->usage = pCreateInfo->usage;
1476 image->flags = pCreateInfo->flags;
1477 image->plane_count = plane_count;
1478
1479 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1480 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1481 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1482 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1483 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1484 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1485 else
1486 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1487 }
1488
1489 const VkExternalMemoryImageCreateInfo *external_info =
1490 vk_find_struct_const(pCreateInfo->pNext,
1491 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1492
1493 image->shareable = external_info;
1494 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1495 image->info.surf_index = &device->image_mrt_offset_counter;
1496 }
1497
1498 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1499 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1500 }
1501
1502 bool delay_layout = external_info &&
1503 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1504
1505 if (delay_layout) {
1506 *pImage = radv_image_to_handle(image);
1507 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1508 return VK_SUCCESS;
1509 }
1510
1511 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1512 assert(result == VK_SUCCESS);
1513
1514 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1515 image->alignment = MAX2(image->alignment, 4096);
1516 image->size = align64(image->size, image->alignment);
1517 image->offset = 0;
1518
1519 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1520 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1521 if (!image->bo) {
1522 vk_free2(&device->alloc, alloc, image);
1523 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1524 }
1525 }
1526
1527 *pImage = radv_image_to_handle(image);
1528
1529 return VK_SUCCESS;
1530 }
1531
1532 static void
1533 radv_image_view_make_descriptor(struct radv_image_view *iview,
1534 struct radv_device *device,
1535 VkFormat vk_format,
1536 const VkComponentMapping *components,
1537 bool is_storage_image, bool disable_compression,
1538 unsigned plane_id, unsigned descriptor_plane_id)
1539 {
1540 struct radv_image *image = iview->image;
1541 struct radv_image_plane *plane = &image->planes[plane_id];
1542 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1543 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1544 uint32_t blk_w;
1545 union radv_descriptor *descriptor;
1546 uint32_t hw_level = 0;
1547
1548 if (is_storage_image) {
1549 descriptor = &iview->storage_descriptor;
1550 } else {
1551 descriptor = &iview->descriptor;
1552 }
1553
1554 assert(vk_format_get_plane_count(vk_format) == 1);
1555 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1556 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1557
1558 if (device->physical_device->rad_info.chip_class >= GFX9)
1559 hw_level = iview->base_mip;
1560 radv_make_texture_descriptor(device, image, is_storage_image,
1561 iview->type,
1562 vk_format,
1563 components,
1564 hw_level, hw_level + iview->level_count - 1,
1565 iview->base_layer,
1566 iview->base_layer + iview->layer_count - 1,
1567 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1568 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1569 iview->extent.depth,
1570 descriptor->plane_descriptors[descriptor_plane_id],
1571 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1572
1573 const struct legacy_surf_level *base_level_info = NULL;
1574 if (device->physical_device->rad_info.chip_class <= GFX9) {
1575 if (is_stencil)
1576 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1577 else
1578 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1579 }
1580 si_set_mutable_tex_desc_fields(device, image,
1581 base_level_info,
1582 plane_id,
1583 iview->base_mip,
1584 iview->base_mip,
1585 blk_w, is_stencil, is_storage_image,
1586 is_storage_image || disable_compression,
1587 descriptor->plane_descriptors[descriptor_plane_id]);
1588 }
1589
1590 static unsigned
1591 radv_plane_from_aspect(VkImageAspectFlags mask)
1592 {
1593 switch(mask) {
1594 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1595 return 1;
1596 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1597 return 2;
1598 default:
1599 return 0;
1600 }
1601 }
1602
1603 VkFormat
1604 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1605 {
1606 switch(mask) {
1607 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1608 return image->planes[0].format;
1609 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1610 return image->planes[1].format;
1611 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1612 return image->planes[2].format;
1613 case VK_IMAGE_ASPECT_STENCIL_BIT:
1614 return vk_format_stencil_only(image->vk_format);
1615 case VK_IMAGE_ASPECT_DEPTH_BIT:
1616 return vk_format_depth_only(image->vk_format);
1617 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1618 return vk_format_depth_only(image->vk_format);
1619 default:
1620 return image->vk_format;
1621 }
1622 }
1623
1624 void
1625 radv_image_view_init(struct radv_image_view *iview,
1626 struct radv_device *device,
1627 const VkImageViewCreateInfo* pCreateInfo,
1628 const struct radv_image_view_extra_create_info* extra_create_info)
1629 {
1630 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1631 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1632
1633 switch (image->type) {
1634 case VK_IMAGE_TYPE_1D:
1635 case VK_IMAGE_TYPE_2D:
1636 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1637 break;
1638 case VK_IMAGE_TYPE_3D:
1639 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1640 <= radv_minify(image->info.depth, range->baseMipLevel));
1641 break;
1642 default:
1643 unreachable("bad VkImageType");
1644 }
1645 iview->image = image;
1646 iview->bo = image->bo;
1647 iview->type = pCreateInfo->viewType;
1648 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1649 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1650 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1651
1652 iview->vk_format = pCreateInfo->format;
1653
1654 /* If the image has an Android external format, pCreateInfo->format will be
1655 * VK_FORMAT_UNDEFINED. */
1656 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1657 iview->vk_format = image->vk_format;
1658
1659 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1660 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1661 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1662 iview->vk_format = vk_format_depth_only(iview->vk_format);
1663 }
1664
1665 if (device->physical_device->rad_info.chip_class >= GFX9) {
1666 iview->extent = (VkExtent3D) {
1667 .width = image->info.width,
1668 .height = image->info.height,
1669 .depth = image->info.depth,
1670 };
1671 } else {
1672 iview->extent = (VkExtent3D) {
1673 .width = radv_minify(image->info.width , range->baseMipLevel),
1674 .height = radv_minify(image->info.height, range->baseMipLevel),
1675 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1676 };
1677 }
1678
1679 if (iview->vk_format != image->planes[iview->plane_id].format) {
1680 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1681 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1682 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1683 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1684
1685 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1686 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1687
1688 /* Comment ported from amdvlk -
1689 * If we have the following image:
1690 * Uncompressed pixels Compressed block sizes (4x4)
1691 * mip0: 22 x 22 6 x 6
1692 * mip1: 11 x 11 3 x 3
1693 * mip2: 5 x 5 2 x 2
1694 * mip3: 2 x 2 1 x 1
1695 * mip4: 1 x 1 1 x 1
1696 *
1697 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1698 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1699 * divide-by-two integer math):
1700 * mip0: 6x6
1701 * mip1: 3x3
1702 * mip2: 1x1
1703 * mip3: 1x1
1704 *
1705 * This means that mip2 will be missing texels.
1706 *
1707 * Fix this by calculating the base mip's width and height, then convert that, and round it
1708 * back up to get the level 0 size.
1709 * Clamp the converted size between the original values, and next power of two, which
1710 * means we don't oversize the image.
1711 */
1712 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1713 vk_format_is_compressed(image->vk_format) &&
1714 !vk_format_is_compressed(iview->vk_format)) {
1715 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1716 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1717
1718 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1719 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1720
1721 lvl_width <<= range->baseMipLevel;
1722 lvl_height <<= range->baseMipLevel;
1723
1724 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1725 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1726 }
1727 }
1728
1729 iview->base_layer = range->baseArrayLayer;
1730 iview->layer_count = radv_get_layerCount(image, range);
1731 iview->base_mip = range->baseMipLevel;
1732 iview->level_count = radv_get_levelCount(image, range);
1733
1734 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1735 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1736 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1737 radv_image_view_make_descriptor(iview, device, format,
1738 &pCreateInfo->components,
1739 false, disable_compression,
1740 iview->plane_id + i, i);
1741 radv_image_view_make_descriptor(iview, device,
1742 format, &pCreateInfo->components,
1743 true, disable_compression,
1744 iview->plane_id + i, i);
1745 }
1746 }
1747
1748 bool radv_layout_has_htile(const struct radv_image *image,
1749 VkImageLayout layout,
1750 bool in_render_loop,
1751 unsigned queue_mask)
1752 {
1753 if (radv_image_is_tc_compat_htile(image))
1754 return layout != VK_IMAGE_LAYOUT_GENERAL;
1755
1756 return radv_image_has_htile(image) &&
1757 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1758 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1759 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1760 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1761 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1762 }
1763
1764 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1765 VkImageLayout layout,
1766 bool in_render_loop,
1767 unsigned queue_mask)
1768 {
1769 if (radv_image_is_tc_compat_htile(image))
1770 return layout != VK_IMAGE_LAYOUT_GENERAL;
1771
1772 return radv_image_has_htile(image) &&
1773 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1774 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1775 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1776 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1777 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1778 }
1779
1780 bool radv_layout_can_fast_clear(const struct radv_image *image,
1781 VkImageLayout layout,
1782 bool in_render_loop,
1783 unsigned queue_mask)
1784 {
1785 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1786 }
1787
1788 bool radv_layout_dcc_compressed(const struct radv_device *device,
1789 const struct radv_image *image,
1790 VkImageLayout layout,
1791 bool in_render_loop,
1792 unsigned queue_mask)
1793 {
1794 /* Don't compress compute transfer dst, as image stores are not supported. */
1795 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1796 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1797 return false;
1798
1799 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1800 }
1801
1802
1803 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1804 {
1805 if (!image->exclusive)
1806 return image->queue_family_mask;
1807 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1808 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1809 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1810 if (family == VK_QUEUE_FAMILY_IGNORED)
1811 return 1u << queue_family;
1812 return 1u << family;
1813 }
1814
1815 VkResult
1816 radv_CreateImage(VkDevice device,
1817 const VkImageCreateInfo *pCreateInfo,
1818 const VkAllocationCallbacks *pAllocator,
1819 VkImage *pImage)
1820 {
1821 #ifdef ANDROID
1822 const VkNativeBufferANDROID *gralloc_info =
1823 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1824
1825 if (gralloc_info)
1826 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1827 pAllocator, pImage);
1828 #endif
1829
1830 const struct wsi_image_create_info *wsi_info =
1831 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1832 bool scanout = wsi_info && wsi_info->scanout;
1833
1834 return radv_image_create(device,
1835 &(struct radv_image_create_info) {
1836 .vk_info = pCreateInfo,
1837 .scanout = scanout,
1838 },
1839 pAllocator,
1840 pImage);
1841 }
1842
1843 void
1844 radv_DestroyImage(VkDevice _device, VkImage _image,
1845 const VkAllocationCallbacks *pAllocator)
1846 {
1847 RADV_FROM_HANDLE(radv_device, device, _device);
1848 RADV_FROM_HANDLE(radv_image, image, _image);
1849
1850 if (!image)
1851 return;
1852
1853 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1854 device->ws->buffer_destroy(image->bo);
1855
1856 if (image->owned_memory != VK_NULL_HANDLE)
1857 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1858
1859 vk_free2(&device->alloc, pAllocator, image);
1860 }
1861
1862 void radv_GetImageSubresourceLayout(
1863 VkDevice _device,
1864 VkImage _image,
1865 const VkImageSubresource* pSubresource,
1866 VkSubresourceLayout* pLayout)
1867 {
1868 RADV_FROM_HANDLE(radv_image, image, _image);
1869 RADV_FROM_HANDLE(radv_device, device, _device);
1870 int level = pSubresource->mipLevel;
1871 int layer = pSubresource->arrayLayer;
1872
1873 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1874
1875 struct radv_image_plane *plane = &image->planes[plane_id];
1876 struct radeon_surf *surface = &plane->surface;
1877
1878 if (device->physical_device->rad_info.chip_class >= GFX9) {
1879 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1880
1881 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1882 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1883 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1884 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1885 /* Adjust the number of bytes between each row because
1886 * the pitch is actually the number of components per
1887 * row.
1888 */
1889 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1890 } else {
1891 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1892
1893 assert(util_is_power_of_two_nonzero(surface->bpe));
1894 pLayout->rowPitch = pitch * surface->bpe;
1895 }
1896
1897 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1898 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1899 pLayout->size = surface->u.gfx9.surf_slice_size;
1900 if (image->type == VK_IMAGE_TYPE_3D)
1901 pLayout->size *= u_minify(image->info.depth, level);
1902 } else {
1903 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1904 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1905 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1906 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1907 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1908 if (image->type == VK_IMAGE_TYPE_3D)
1909 pLayout->size *= u_minify(image->info.depth, level);
1910 }
1911 }
1912
1913
1914 VkResult
1915 radv_CreateImageView(VkDevice _device,
1916 const VkImageViewCreateInfo *pCreateInfo,
1917 const VkAllocationCallbacks *pAllocator,
1918 VkImageView *pView)
1919 {
1920 RADV_FROM_HANDLE(radv_device, device, _device);
1921 struct radv_image_view *view;
1922
1923 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1924 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1925 if (view == NULL)
1926 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1927
1928 radv_image_view_init(view, device, pCreateInfo, NULL);
1929
1930 *pView = radv_image_view_to_handle(view);
1931
1932 return VK_SUCCESS;
1933 }
1934
1935 void
1936 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1937 const VkAllocationCallbacks *pAllocator)
1938 {
1939 RADV_FROM_HANDLE(radv_device, device, _device);
1940 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1941
1942 if (!iview)
1943 return;
1944 vk_free2(&device->alloc, pAllocator, iview);
1945 }
1946
1947 void radv_buffer_view_init(struct radv_buffer_view *view,
1948 struct radv_device *device,
1949 const VkBufferViewCreateInfo* pCreateInfo)
1950 {
1951 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1952
1953 view->bo = buffer->bo;
1954 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1955 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1956 view->vk_format = pCreateInfo->format;
1957
1958 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1959 pCreateInfo->offset, view->range, view->state);
1960 }
1961
1962 VkResult
1963 radv_CreateBufferView(VkDevice _device,
1964 const VkBufferViewCreateInfo *pCreateInfo,
1965 const VkAllocationCallbacks *pAllocator,
1966 VkBufferView *pView)
1967 {
1968 RADV_FROM_HANDLE(radv_device, device, _device);
1969 struct radv_buffer_view *view;
1970
1971 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1972 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1973 if (!view)
1974 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1975
1976 radv_buffer_view_init(view, device, pCreateInfo);
1977
1978 *pView = radv_buffer_view_to_handle(view);
1979
1980 return VK_SUCCESS;
1981 }
1982
1983 void
1984 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1985 const VkAllocationCallbacks *pAllocator)
1986 {
1987 RADV_FROM_HANDLE(radv_device, device, _device);
1988 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1989
1990 if (!view)
1991 return;
1992
1993 vk_free2(&device->alloc, pAllocator, view);
1994 }