radv: fix DCC fast clear code for intensity formats
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
76 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
77 return false;
78
79 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
80 return false;
81
82 if (pCreateInfo->mipLevels > 1)
83 return false;
84
85 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
86 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
87 */
88 if (pCreateInfo->samples >= 2 &&
89 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
90 (format == VK_FORMAT_D32_SFLOAT &&
91 device->physical_device->rad_info.chip_class == GFX10)))
92 return false;
93
94 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95 * supports 32-bit. Though, it's possible to enable TC-compat for
96 * 16-bit depth surfaces if no Z planes are compressed.
97 */
98 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
99 format != VK_FORMAT_D32_SFLOAT &&
100 format != VK_FORMAT_D16_UNORM)
101 return false;
102
103 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
104 const struct VkImageFormatListCreateInfoKHR *format_list =
105 (const struct VkImageFormatListCreateInfoKHR *)
106 vk_find_struct_const(pCreateInfo->pNext,
107 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
108
109 /* We have to ignore the existence of the list if viewFormatCount = 0 */
110 if (format_list && format_list->viewFormatCount) {
111 /* compatibility is transitive, so we only need to check
112 * one format with everything else.
113 */
114 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
115 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
116 continue;
117
118 if (format != format_list->pViewFormats[i])
119 return false;
120 }
121 } else {
122 return false;
123 }
124 }
125
126 return true;
127 }
128
129 static bool
130 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
131 {
132 if (info->scanout)
133 return true;
134
135 if (!info->bo_metadata)
136 return false;
137
138 if (device->physical_device->rad_info.chip_class >= GFX9) {
139 return info->bo_metadata->u.gfx9.swizzle_mode == 0 || info->bo_metadata->u.gfx9.swizzle_mode % 4 == 2;
140 } else {
141 return info->bo_metadata->u.legacy.scanout;
142 }
143 }
144
145 static bool
146 radv_use_dcc_for_image(struct radv_device *device,
147 const struct radv_image *image,
148 const VkImageCreateInfo *pCreateInfo,
149 VkFormat format)
150 {
151 bool dcc_compatible_formats;
152 bool blendable;
153
154 /* DCC (Delta Color Compression) is only available for GFX8+. */
155 if (device->physical_device->rad_info.chip_class < GFX8)
156 return false;
157
158 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
159 return false;
160
161 if (image->shareable)
162 return false;
163
164 /* TODO: Enable DCC for storage images. */
165 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
166 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
167 return false;
168
169 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
170 return false;
171
172 if (vk_format_is_subsampled(format) ||
173 vk_format_get_plane_count(format) > 1)
174 return false;
175
176 /* TODO: Enable DCC for mipmaps on GFX9+. */
177 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
178 device->physical_device->rad_info.chip_class >= GFX9)
179 return false;
180
181 /* Do not enable DCC for mipmapped arrays because performance is worse. */
182 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
183 return false;
184
185 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
186 * 2x can be enabled with an option.
187 */
188 if (pCreateInfo->samples > 2 ||
189 (pCreateInfo->samples == 2 &&
190 !device->physical_device->dcc_msaa_allowed))
191 return false;
192
193 /* Determine if the formats are DCC compatible. */
194 dcc_compatible_formats =
195 radv_is_colorbuffer_format_supported(format,
196 &blendable);
197
198 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
199 const struct VkImageFormatListCreateInfoKHR *format_list =
200 (const struct VkImageFormatListCreateInfoKHR *)
201 vk_find_struct_const(pCreateInfo->pNext,
202 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
203
204 /* We have to ignore the existence of the list if viewFormatCount = 0 */
205 if (format_list && format_list->viewFormatCount) {
206 /* compatibility is transitive, so we only need to check
207 * one format with everything else. */
208 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
209 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
210 continue;
211
212 if (!radv_dcc_formats_compatible(format,
213 format_list->pViewFormats[i]))
214 dcc_compatible_formats = false;
215 }
216 } else {
217 dcc_compatible_formats = false;
218 }
219 }
220
221 if (!dcc_compatible_formats)
222 return false;
223
224 return true;
225 }
226
227 static bool
228 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
229 struct radv_image *image)
230 {
231 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
232 return false;
233
234 /* TC-compat CMASK is only available for GFX8+. */
235 if (device->physical_device->rad_info.chip_class < GFX8)
236 return false;
237
238 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
239 return false;
240
241 if (radv_image_has_dcc(image))
242 return false;
243
244 if (!radv_image_has_cmask(image))
245 return false;
246
247 return true;
248 }
249
250 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
251 {
252 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
253 }
254
255 static bool
256 radv_is_valid_opaque_metadata(const struct radv_device *device,
257 const struct radeon_bo_metadata *md)
258 {
259 if (md->metadata[0] != 1 ||
260 md->metadata[1] != si_get_bo_metadata_word1(device))
261 return false;
262
263 if (md->size_metadata < 40)
264 return false;
265
266 return true;
267 }
268
269 static void
270 radv_patch_surface_from_metadata(struct radv_device *device,
271 struct radeon_surf *surface,
272 const struct radeon_bo_metadata *md)
273 {
274 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
275
276 if (device->physical_device->rad_info.chip_class >= GFX9) {
277 if (md->u.gfx9.swizzle_mode > 0)
278 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
279 else
280 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
281
282 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
283 } else {
284 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
285 surface->u.legacy.bankw = md->u.legacy.bankw;
286 surface->u.legacy.bankh = md->u.legacy.bankh;
287 surface->u.legacy.tile_split = md->u.legacy.tile_split;
288 surface->u.legacy.mtilea = md->u.legacy.mtilea;
289 surface->u.legacy.num_banks = md->u.legacy.num_banks;
290
291 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
292 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
293 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
294 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
295 else
296 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
297
298 }
299 }
300
301 static VkResult
302 radv_patch_image_dimensions(struct radv_device *device,
303 struct radv_image *image,
304 const struct radv_image_create_info *create_info,
305 struct ac_surf_info *image_info)
306 {
307 unsigned width = image->info.width;
308 unsigned height = image->info.height;
309
310 /*
311 * minigbm sometimes allocates bigger images which is going to result in
312 * weird strides and other properties. Lets be lenient where possible and
313 * fail it on GFX10 (as we cannot cope there).
314 *
315 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
316 */
317 if (create_info->bo_metadata &&
318 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
319 const struct radeon_bo_metadata *md = create_info->bo_metadata;
320
321 if (device->physical_device->rad_info.chip_class >= GFX10) {
322 width = G_00A004_WIDTH_LO(md->metadata[3]) +
323 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
324 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
325 } else {
326 width = G_008F18_WIDTH(md->metadata[4]) + 1;
327 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
328 }
329 }
330
331 if (image->info.width == width && image->info.height == height)
332 return VK_SUCCESS;
333
334 if (width < image->info.width || height < image->info.height) {
335 fprintf(stderr,
336 "The imported image has smaller dimensions than the internal\n"
337 "dimensions. Using it is going to fail badly, so we reject\n"
338 "this import.\n"
339 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
340 image->info.width, image->info.height, width, height);
341 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
342 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
343 fprintf(stderr,
344 "Tried to import an image with inconsistent width on GFX10.\n"
345 "As GFX10 has no separate stride fields we cannot cope with\n"
346 "an inconsistency in width and will fail this import.\n"
347 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
348 image->info.width, image->info.height, width, height);
349 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
350 } else {
351 fprintf(stderr,
352 "Tried to import an image with inconsistent width on pre-GFX10.\n"
353 "As GFX10 has no separate stride fields we cannot cope with\n"
354 "an inconsistency and would fail on GFX10.\n"
355 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
356 image->info.width, image->info.height, width, height);
357 }
358 image_info->width = width;
359 image_info->height = height;
360
361 return VK_SUCCESS;
362 }
363
364 static VkResult
365 radv_patch_image_from_extra_info(struct radv_device *device,
366 struct radv_image *image,
367 const struct radv_image_create_info *create_info,
368 struct ac_surf_info *image_info)
369 {
370 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
371 if (result != VK_SUCCESS)
372 return result;
373
374 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
375 if (create_info->bo_metadata) {
376 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
377 create_info->bo_metadata);
378 }
379
380 if (radv_surface_has_scanout(device, create_info)) {
381 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
382 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
383
384 image->info.surf_index = NULL;
385 }
386 }
387 return VK_SUCCESS;
388 }
389
390 static int
391 radv_init_surface(struct radv_device *device,
392 const struct radv_image *image,
393 struct radeon_surf *surface,
394 unsigned plane_id,
395 const VkImageCreateInfo *pCreateInfo,
396 VkFormat image_format)
397 {
398 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
399 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
400 const struct vk_format_description *desc = vk_format_description(format);
401 bool is_depth, is_stencil;
402
403 is_depth = vk_format_has_depth(desc);
404 is_stencil = vk_format_has_stencil(desc);
405
406 surface->blk_w = vk_format_get_blockwidth(format);
407 surface->blk_h = vk_format_get_blockheight(format);
408
409 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
410 /* align byte per element on dword */
411 if (surface->bpe == 3) {
412 surface->bpe = 4;
413 }
414
415 surface->flags = RADEON_SURF_SET(array_mode, MODE);
416
417 switch (pCreateInfo->imageType){
418 case VK_IMAGE_TYPE_1D:
419 if (pCreateInfo->arrayLayers > 1)
420 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
421 else
422 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
423 break;
424 case VK_IMAGE_TYPE_2D:
425 if (pCreateInfo->arrayLayers > 1)
426 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
427 else
428 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
429 break;
430 case VK_IMAGE_TYPE_3D:
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
432 break;
433 default:
434 unreachable("unhandled image type");
435 }
436
437 if (is_depth) {
438 surface->flags |= RADEON_SURF_ZBUFFER;
439 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
440 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
441 }
442
443 if (is_stencil)
444 surface->flags |= RADEON_SURF_SBUFFER;
445
446 if (device->physical_device->rad_info.chip_class >= GFX9 &&
447 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
448 vk_format_get_blocksizebits(image_format) == 128 &&
449 vk_format_is_compressed(image_format))
450 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
451
452 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
453
454 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
455 surface->flags |= RADEON_SURF_DISABLE_DCC;
456
457 return 0;
458 }
459
460 static inline unsigned
461 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
462 {
463 if (stencil)
464 return plane->surface.u.legacy.stencil_tiling_index[level];
465 else
466 return plane->surface.u.legacy.tiling_index[level];
467 }
468
469 static unsigned radv_map_swizzle(unsigned swizzle)
470 {
471 switch (swizzle) {
472 case VK_SWIZZLE_Y:
473 return V_008F0C_SQ_SEL_Y;
474 case VK_SWIZZLE_Z:
475 return V_008F0C_SQ_SEL_Z;
476 case VK_SWIZZLE_W:
477 return V_008F0C_SQ_SEL_W;
478 case VK_SWIZZLE_0:
479 return V_008F0C_SQ_SEL_0;
480 case VK_SWIZZLE_1:
481 return V_008F0C_SQ_SEL_1;
482 default: /* VK_SWIZZLE_X */
483 return V_008F0C_SQ_SEL_X;
484 }
485 }
486
487 static void
488 radv_make_buffer_descriptor(struct radv_device *device,
489 struct radv_buffer *buffer,
490 VkFormat vk_format,
491 unsigned offset,
492 unsigned range,
493 uint32_t *state)
494 {
495 const struct vk_format_description *desc;
496 unsigned stride;
497 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
498 uint64_t va = gpu_address + buffer->offset;
499 unsigned num_format, data_format;
500 int first_non_void;
501 desc = vk_format_description(vk_format);
502 first_non_void = vk_format_get_first_non_void_channel(vk_format);
503 stride = desc->block.bits / 8;
504
505 va += offset;
506 state[0] = va;
507 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
508 S_008F04_STRIDE(stride);
509
510 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
511 range /= stride;
512 }
513
514 state[2] = range;
515 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
516 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
517 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
518 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
519
520 if (device->physical_device->rad_info.chip_class >= GFX10) {
521 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
522
523 /* OOB_SELECT chooses the out-of-bounds check:
524 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
525 * - 1: index >= NUM_RECORDS
526 * - 2: NUM_RECORDS == 0
527 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
528 * else: swizzle_address >= NUM_RECORDS
529 */
530 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
531 S_008F0C_OOB_SELECT(0) |
532 S_008F0C_RESOURCE_LEVEL(1);
533 } else {
534 num_format = radv_translate_buffer_numformat(desc, first_non_void);
535 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
536
537 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
538 assert(num_format != ~0);
539
540 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
541 S_008F0C_DATA_FORMAT(data_format);
542 }
543 }
544
545 static void
546 si_set_mutable_tex_desc_fields(struct radv_device *device,
547 struct radv_image *image,
548 const struct legacy_surf_level *base_level_info,
549 unsigned plane_id,
550 unsigned base_level, unsigned first_level,
551 unsigned block_width, bool is_stencil,
552 bool is_storage_image, bool disable_compression,
553 uint32_t *state)
554 {
555 struct radv_image_plane *plane = &image->planes[plane_id];
556 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
557 uint64_t va = gpu_address + plane->offset;
558 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
559 uint64_t meta_va = 0;
560 if (chip_class >= GFX9) {
561 if (is_stencil)
562 va += plane->surface.u.gfx9.stencil_offset;
563 else
564 va += plane->surface.u.gfx9.surf_offset;
565 } else
566 va += base_level_info->offset;
567
568 state[0] = va >> 8;
569 if (chip_class >= GFX9 ||
570 base_level_info->mode == RADEON_SURF_MODE_2D)
571 state[0] |= plane->surface.tile_swizzle;
572 state[1] &= C_008F14_BASE_ADDRESS_HI;
573 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
574
575 if (chip_class >= GFX8) {
576 state[6] &= C_008F28_COMPRESSION_EN;
577 state[7] = 0;
578 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
579 meta_va = gpu_address + image->dcc_offset;
580 if (chip_class <= GFX8)
581 meta_va += base_level_info->dcc_offset;
582
583 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
584 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
585 meta_va |= dcc_tile_swizzle;
586 } else if (!disable_compression &&
587 radv_image_is_tc_compat_htile(image)) {
588 meta_va = gpu_address + image->htile_offset;
589 }
590
591 if (meta_va) {
592 state[6] |= S_008F28_COMPRESSION_EN(1);
593 if (chip_class <= GFX9)
594 state[7] = meta_va >> 8;
595 }
596 }
597
598 if (chip_class >= GFX10) {
599 state[3] &= C_00A00C_SW_MODE;
600
601 if (is_stencil) {
602 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
603 } else {
604 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
605 }
606
607 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
608 C_00A018_META_PIPE_ALIGNED;
609
610 if (meta_va) {
611 struct gfx9_surf_meta_flags meta;
612
613 if (image->dcc_offset)
614 meta = plane->surface.u.gfx9.dcc;
615 else
616 meta = plane->surface.u.gfx9.htile;
617
618 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
619 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
620 }
621
622 state[7] = meta_va >> 16;
623 } else if (chip_class == GFX9) {
624 state[3] &= C_008F1C_SW_MODE;
625 state[4] &= C_008F20_PITCH;
626
627 if (is_stencil) {
628 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
629 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
630 } else {
631 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
632 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
633 }
634
635 state[5] &= C_008F24_META_DATA_ADDRESS &
636 C_008F24_META_PIPE_ALIGNED &
637 C_008F24_META_RB_ALIGNED;
638 if (meta_va) {
639 struct gfx9_surf_meta_flags meta;
640
641 if (image->dcc_offset)
642 meta = plane->surface.u.gfx9.dcc;
643 else
644 meta = plane->surface.u.gfx9.htile;
645
646 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
647 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
648 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
649 }
650 } else {
651 /* GFX6-GFX8 */
652 unsigned pitch = base_level_info->nblk_x * block_width;
653 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
654
655 state[3] &= C_008F1C_TILING_INDEX;
656 state[3] |= S_008F1C_TILING_INDEX(index);
657 state[4] &= C_008F20_PITCH;
658 state[4] |= S_008F20_PITCH(pitch - 1);
659 }
660 }
661
662 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
663 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
664 {
665 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
666 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
667
668 /* GFX9 allocates 1D textures as 2D. */
669 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
670 image_type = VK_IMAGE_TYPE_2D;
671 switch (image_type) {
672 case VK_IMAGE_TYPE_1D:
673 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
674 case VK_IMAGE_TYPE_2D:
675 if (nr_samples > 1)
676 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
677 else
678 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
679 case VK_IMAGE_TYPE_3D:
680 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
681 return V_008F1C_SQ_RSRC_IMG_3D;
682 else
683 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
684 default:
685 unreachable("illegal image type");
686 }
687 }
688
689 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
690 {
691 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
692
693 if (swizzle[3] == VK_SWIZZLE_X) {
694 /* For the pre-defined border color values (white, opaque
695 * black, transparent black), the only thing that matters is
696 * that the alpha channel winds up in the correct place
697 * (because the RGB channels are all the same) so either of
698 * these enumerations will work.
699 */
700 if (swizzle[2] == VK_SWIZZLE_Y)
701 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
702 else
703 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
704 } else if (swizzle[0] == VK_SWIZZLE_X) {
705 if (swizzle[1] == VK_SWIZZLE_Y)
706 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
707 else
708 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
709 } else if (swizzle[1] == VK_SWIZZLE_X) {
710 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
711 } else if (swizzle[2] == VK_SWIZZLE_X) {
712 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
713 }
714
715 return bc_swizzle;
716 }
717
718 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
719 {
720 const struct vk_format_description *desc = vk_format_description(format);
721
722 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
723 return desc->swizzle[3] == VK_SWIZZLE_X;
724
725 return radv_translate_colorswap(format, false) <= 1;
726 }
727 /**
728 * Build the sampler view descriptor for a texture (GFX10).
729 */
730 static void
731 gfx10_make_texture_descriptor(struct radv_device *device,
732 struct radv_image *image,
733 bool is_storage_image,
734 VkImageViewType view_type,
735 VkFormat vk_format,
736 const VkComponentMapping *mapping,
737 unsigned first_level, unsigned last_level,
738 unsigned first_layer, unsigned last_layer,
739 unsigned width, unsigned height, unsigned depth,
740 uint32_t *state,
741 uint32_t *fmask_state)
742 {
743 const struct vk_format_description *desc;
744 enum vk_swizzle swizzle[4];
745 unsigned img_format;
746 unsigned type;
747
748 desc = vk_format_description(vk_format);
749 img_format = gfx10_format_table[vk_format].img_format;
750
751 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
752 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
753 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
754 } else {
755 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
756 }
757
758 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
759 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
760 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
761 height = 1;
762 depth = image->info.array_size;
763 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
764 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
765 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
766 depth = image->info.array_size;
767 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
768 depth = image->info.array_size / 6;
769
770 state[0] = 0;
771 state[1] = S_00A004_FORMAT(img_format) |
772 S_00A004_WIDTH_LO(width - 1);
773 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
774 S_00A008_HEIGHT(height - 1) |
775 S_00A008_RESOURCE_LEVEL(1);
776 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
777 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
778 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
779 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
780 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
781 0 : first_level) |
782 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
783 util_logbase2(image->info.samples) :
784 last_level) |
785 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
786 S_00A00C_TYPE(type);
787 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
788 * to know the total number of layers.
789 */
790 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
791 S_00A010_BASE_ARRAY(first_layer);
792 state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D)) |
793 S_00A014_MAX_MIP(image->info.samples > 1 ?
794 util_logbase2(image->info.samples) :
795 image->info.levels - 1) |
796 S_00A014_PERF_MOD(4);
797 state[6] = 0;
798 state[7] = 0;
799
800 if (radv_dcc_enabled(image, first_level)) {
801 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
802 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
803 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
804 }
805
806 /* Initialize the sampler view for FMASK. */
807 if (radv_image_has_fmask(image)) {
808 uint64_t gpu_address = radv_buffer_get_va(image->bo);
809 uint32_t format;
810 uint64_t va;
811
812 assert(image->plane_count == 1);
813
814 va = gpu_address + image->offset + image->fmask_offset;
815
816 switch (image->info.samples) {
817 case 2:
818 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
819 break;
820 case 4:
821 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
822 break;
823 case 8:
824 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
825 break;
826 default:
827 unreachable("invalid nr_samples");
828 }
829
830 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
831 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
832 S_00A004_FORMAT(format) |
833 S_00A004_WIDTH_LO(width - 1);
834 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
835 S_00A008_HEIGHT(height - 1) |
836 S_00A008_RESOURCE_LEVEL(1);
837 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
838 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
839 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
840 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
841 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
842 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
843 fmask_state[4] = S_00A010_DEPTH(last_layer) |
844 S_00A010_BASE_ARRAY(first_layer);
845 fmask_state[5] = 0;
846 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
847 fmask_state[7] = 0;
848 } else if (fmask_state)
849 memset(fmask_state, 0, 8 * 4);
850 }
851
852 /**
853 * Build the sampler view descriptor for a texture (SI-GFX9)
854 */
855 static void
856 si_make_texture_descriptor(struct radv_device *device,
857 struct radv_image *image,
858 bool is_storage_image,
859 VkImageViewType view_type,
860 VkFormat vk_format,
861 const VkComponentMapping *mapping,
862 unsigned first_level, unsigned last_level,
863 unsigned first_layer, unsigned last_layer,
864 unsigned width, unsigned height, unsigned depth,
865 uint32_t *state,
866 uint32_t *fmask_state)
867 {
868 const struct vk_format_description *desc;
869 enum vk_swizzle swizzle[4];
870 int first_non_void;
871 unsigned num_format, data_format, type;
872
873 desc = vk_format_description(vk_format);
874
875 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
876 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
877 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
878 } else {
879 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
880 }
881
882 first_non_void = vk_format_get_first_non_void_channel(vk_format);
883
884 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
885 if (num_format == ~0) {
886 num_format = 0;
887 }
888
889 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
890 if (data_format == ~0) {
891 data_format = 0;
892 }
893
894 /* S8 with either Z16 or Z32 HTILE need a special format. */
895 if (device->physical_device->rad_info.chip_class == GFX9 &&
896 vk_format == VK_FORMAT_S8_UINT &&
897 radv_image_is_tc_compat_htile(image)) {
898 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
899 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
900 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
901 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
902 }
903 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
904 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
905 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
906 height = 1;
907 depth = image->info.array_size;
908 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
909 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
910 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
911 depth = image->info.array_size;
912 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
913 depth = image->info.array_size / 6;
914
915 state[0] = 0;
916 state[1] = (S_008F14_DATA_FORMAT(data_format) |
917 S_008F14_NUM_FORMAT(num_format));
918 state[2] = (S_008F18_WIDTH(width - 1) |
919 S_008F18_HEIGHT(height - 1) |
920 S_008F18_PERF_MOD(4));
921 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
922 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
923 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
924 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
925 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
926 0 : first_level) |
927 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
928 util_logbase2(image->info.samples) :
929 last_level) |
930 S_008F1C_TYPE(type));
931 state[4] = 0;
932 state[5] = S_008F24_BASE_ARRAY(first_layer);
933 state[6] = 0;
934 state[7] = 0;
935
936 if (device->physical_device->rad_info.chip_class == GFX9) {
937 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
938
939 /* Depth is the last accessible layer on Gfx9.
940 * The hw doesn't need to know the total number of layers.
941 */
942 if (type == V_008F1C_SQ_RSRC_IMG_3D)
943 state[4] |= S_008F20_DEPTH(depth - 1);
944 else
945 state[4] |= S_008F20_DEPTH(last_layer);
946
947 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
948 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
949 util_logbase2(image->info.samples) :
950 image->info.levels - 1);
951 } else {
952 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
953 state[4] |= S_008F20_DEPTH(depth - 1);
954 state[5] |= S_008F24_LAST_ARRAY(last_layer);
955 }
956 if (image->dcc_offset) {
957 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
958 } else {
959 /* The last dword is unused by hw. The shader uses it to clear
960 * bits in the first dword of sampler state.
961 */
962 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
963 if (first_level == last_level)
964 state[7] = C_008F30_MAX_ANISO_RATIO;
965 else
966 state[7] = 0xffffffff;
967 }
968 }
969
970 /* Initialize the sampler view for FMASK. */
971 if (radv_image_has_fmask(image)) {
972 uint32_t fmask_format, num_format;
973 uint64_t gpu_address = radv_buffer_get_va(image->bo);
974 uint64_t va;
975
976 assert(image->plane_count == 1);
977
978 va = gpu_address + image->offset + image->fmask_offset;
979
980 if (device->physical_device->rad_info.chip_class == GFX9) {
981 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
982 switch (image->info.samples) {
983 case 2:
984 num_format = V_008F14_IMG_FMASK_8_2_2;
985 break;
986 case 4:
987 num_format = V_008F14_IMG_FMASK_8_4_4;
988 break;
989 case 8:
990 num_format = V_008F14_IMG_FMASK_32_8_8;
991 break;
992 default:
993 unreachable("invalid nr_samples");
994 }
995 } else {
996 switch (image->info.samples) {
997 case 2:
998 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
999 break;
1000 case 4:
1001 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1002 break;
1003 case 8:
1004 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1005 break;
1006 default:
1007 assert(0);
1008 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1009 }
1010 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1011 }
1012
1013 fmask_state[0] = va >> 8;
1014 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1015 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1016 S_008F14_DATA_FORMAT(fmask_format) |
1017 S_008F14_NUM_FORMAT(num_format);
1018 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1019 S_008F18_HEIGHT(height - 1);
1020 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1021 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1022 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1023 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1024 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1025 fmask_state[4] = 0;
1026 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1027 fmask_state[6] = 0;
1028 fmask_state[7] = 0;
1029
1030 if (device->physical_device->rad_info.chip_class == GFX9) {
1031 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1032 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1033 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1034 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1035 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1036
1037 if (radv_image_is_tc_compat_cmask(image)) {
1038 va = gpu_address + image->offset + image->cmask_offset;
1039
1040 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1041 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1042 fmask_state[7] |= va >> 8;
1043 }
1044 } else {
1045 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1046 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1047 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1048 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1049
1050 if (radv_image_is_tc_compat_cmask(image)) {
1051 va = gpu_address + image->offset + image->cmask_offset;
1052
1053 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1054 fmask_state[7] |= va >> 8;
1055 }
1056 }
1057 } else if (fmask_state)
1058 memset(fmask_state, 0, 8 * 4);
1059 }
1060
1061 static void
1062 radv_make_texture_descriptor(struct radv_device *device,
1063 struct radv_image *image,
1064 bool is_storage_image,
1065 VkImageViewType view_type,
1066 VkFormat vk_format,
1067 const VkComponentMapping *mapping,
1068 unsigned first_level, unsigned last_level,
1069 unsigned first_layer, unsigned last_layer,
1070 unsigned width, unsigned height, unsigned depth,
1071 uint32_t *state,
1072 uint32_t *fmask_state)
1073 {
1074 if (device->physical_device->rad_info.chip_class >= GFX10) {
1075 gfx10_make_texture_descriptor(device, image, is_storage_image,
1076 view_type, vk_format, mapping,
1077 first_level, last_level,
1078 first_layer, last_layer,
1079 width, height, depth,
1080 state, fmask_state);
1081 } else {
1082 si_make_texture_descriptor(device, image, is_storage_image,
1083 view_type, vk_format, mapping,
1084 first_level, last_level,
1085 first_layer, last_layer,
1086 width, height, depth,
1087 state, fmask_state);
1088 }
1089 }
1090
1091 static void
1092 radv_query_opaque_metadata(struct radv_device *device,
1093 struct radv_image *image,
1094 struct radeon_bo_metadata *md)
1095 {
1096 static const VkComponentMapping fixedmapping;
1097 uint32_t desc[8], i;
1098
1099 assert(image->plane_count == 1);
1100
1101 /* Metadata image format format version 1:
1102 * [0] = 1 (metadata format identifier)
1103 * [1] = (VENDOR_ID << 16) | PCI_ID
1104 * [2:9] = image descriptor for the whole resource
1105 * [2] is always 0, because the base address is cleared
1106 * [9] is the DCC offset bits [39:8] from the beginning of
1107 * the buffer
1108 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1109 */
1110 md->metadata[0] = 1; /* metadata image format version 1 */
1111
1112 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1113 md->metadata[1] = si_get_bo_metadata_word1(device);
1114
1115
1116 radv_make_texture_descriptor(device, image, false,
1117 (VkImageViewType)image->type, image->vk_format,
1118 &fixedmapping, 0, image->info.levels - 1, 0,
1119 image->info.array_size - 1,
1120 image->info.width, image->info.height,
1121 image->info.depth,
1122 desc, NULL);
1123
1124 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1125 image->planes[0].surface.blk_w, false, false, false, desc);
1126
1127 /* Clear the base address and set the relative DCC offset. */
1128 desc[0] = 0;
1129 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1130 desc[7] = image->dcc_offset >> 8;
1131
1132 /* Dwords [2:9] contain the image descriptor. */
1133 memcpy(&md->metadata[2], desc, sizeof(desc));
1134
1135 /* Dwords [10:..] contain the mipmap level offsets. */
1136 if (device->physical_device->rad_info.chip_class <= GFX8) {
1137 for (i = 0; i <= image->info.levels - 1; i++)
1138 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1139 md->size_metadata = (11 + image->info.levels - 1) * 4;
1140 } else
1141 md->size_metadata = 10 * 4;
1142 }
1143
1144 void
1145 radv_init_metadata(struct radv_device *device,
1146 struct radv_image *image,
1147 struct radeon_bo_metadata *metadata)
1148 {
1149 struct radeon_surf *surface = &image->planes[0].surface;
1150
1151 memset(metadata, 0, sizeof(*metadata));
1152
1153 if (device->physical_device->rad_info.chip_class >= GFX9) {
1154 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1155 } else {
1156 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1157 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1158 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1159 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1160 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1161 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1162 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1163 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1164 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1165 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1166 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1167 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1168 }
1169 radv_query_opaque_metadata(device, image, metadata);
1170 }
1171
1172 void
1173 radv_image_override_offset_stride(struct radv_device *device,
1174 struct radv_image *image,
1175 uint64_t offset, uint32_t stride)
1176 {
1177 struct radeon_surf *surface = &image->planes[0].surface;
1178 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1179
1180 if (device->physical_device->rad_info.chip_class >= GFX9) {
1181 if (stride) {
1182 surface->u.gfx9.surf_pitch = stride;
1183 surface->u.gfx9.surf_slice_size =
1184 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1185 }
1186 surface->u.gfx9.surf_offset = offset;
1187 } else {
1188 surface->u.legacy.level[0].nblk_x = stride;
1189 surface->u.legacy.level[0].slice_size_dw =
1190 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1191
1192 if (offset) {
1193 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1194 surface->u.legacy.level[i].offset += offset;
1195 }
1196
1197 }
1198 }
1199
1200 static void
1201 radv_image_alloc_fmask(struct radv_device *device,
1202 struct radv_image *image)
1203 {
1204 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1205
1206 image->fmask_offset = align64(image->size, fmask_alignment);
1207 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1208 image->alignment = MAX2(image->alignment, fmask_alignment);
1209 }
1210
1211 static void
1212 radv_image_alloc_cmask(struct radv_device *device,
1213 struct radv_image *image)
1214 {
1215 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1216 unsigned cmask_size = image->planes[0].surface.cmask_size;
1217 uint32_t clear_value_size = 0;
1218
1219 if (!cmask_size)
1220 return;
1221
1222 assert(cmask_alignment);
1223
1224 image->cmask_offset = align64(image->size, cmask_alignment);
1225 /* + 8 for storing the clear values */
1226 if (!image->clear_value_offset) {
1227 image->clear_value_offset = image->cmask_offset + cmask_size;
1228 clear_value_size = 8;
1229 }
1230 image->size = image->cmask_offset + cmask_size + clear_value_size;
1231 image->alignment = MAX2(image->alignment, cmask_alignment);
1232 }
1233
1234 static void
1235 radv_image_alloc_dcc(struct radv_image *image)
1236 {
1237 assert(image->plane_count == 1);
1238
1239 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1240 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1241 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1242 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1243 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1244 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1245 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1246 }
1247
1248 static void
1249 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1250 {
1251 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1252
1253 /* + 8 for storing the clear values */
1254 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1255 image->size = image->clear_value_offset + image->info.levels * 8;
1256 if (radv_image_is_tc_compat_htile(image) &&
1257 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1258 /* Metadata for the TC-compatible HTILE hardware bug which
1259 * have to be fixed by updating ZRANGE_PRECISION when doing
1260 * fast depth clears to 0.0f.
1261 */
1262 image->tc_compat_zrange_offset = image->size;
1263 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1264 }
1265 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1266 }
1267
1268 static inline bool
1269 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1270 {
1271 if (image->info.samples <= 1 &&
1272 image->info.width * image->info.height <= 512 * 512) {
1273 /* Do not enable CMASK or DCC for small surfaces where the cost
1274 * of the eliminate pass can be higher than the benefit of fast
1275 * clear. RadeonSI does this, but the image threshold is
1276 * different.
1277 */
1278 return false;
1279 }
1280
1281 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1282 (image->exclusive || image->queue_family_mask == 1);
1283 }
1284
1285 static inline bool
1286 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1287 {
1288 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1289 !radv_image_has_dcc(image))
1290 return false;
1291
1292 /* On GFX8, DCC layers can be interleaved and it's currently only
1293 * enabled if slice size is equal to the per slice fast clear size
1294 * because the driver assumes that portions of multiple layers are
1295 * contiguous during fast clears.
1296 */
1297 if (image->info.array_size > 1) {
1298 const struct legacy_surf_level *surf_level =
1299 &image->planes[0].surface.u.legacy.level[0];
1300
1301 assert(device->physical_device->rad_info.chip_class == GFX8);
1302
1303 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1304 return false;
1305 }
1306
1307 return true;
1308 }
1309
1310 static inline bool
1311 radv_image_can_enable_cmask(struct radv_image *image)
1312 {
1313 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1314 /* Do not enable CMASK for non-MSAA images (fast color clear)
1315 * because 128 bit formats are not supported, but FMASK might
1316 * still be used.
1317 */
1318 return false;
1319 }
1320
1321 return radv_image_can_enable_dcc_or_cmask(image) &&
1322 image->info.levels == 1 &&
1323 image->info.depth == 1 &&
1324 !image->planes[0].surface.is_linear;
1325 }
1326
1327 static inline bool
1328 radv_image_can_enable_fmask(struct radv_image *image)
1329 {
1330 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
1331 }
1332
1333 static inline bool
1334 radv_image_can_enable_htile(struct radv_image *image)
1335 {
1336 return radv_image_has_htile(image) &&
1337 image->info.levels == 1 &&
1338 image->info.width * image->info.height >= 8 * 8;
1339 }
1340
1341 static void radv_image_disable_dcc(struct radv_image *image)
1342 {
1343 for (unsigned i = 0; i < image->plane_count; ++i)
1344 image->planes[i].surface.dcc_size = 0;
1345 }
1346
1347 static void radv_image_disable_htile(struct radv_image *image)
1348 {
1349 for (unsigned i = 0; i < image->plane_count; ++i)
1350 image->planes[i].surface.htile_size = 0;
1351 }
1352
1353 VkResult
1354 radv_image_create_layout(struct radv_device *device,
1355 struct radv_image_create_info create_info,
1356 struct radv_image *image)
1357 {
1358 /* Check that we did not initialize things earlier */
1359 assert(!image->planes[0].surface.surf_size);
1360
1361 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1362 * common internal case. */
1363 create_info.vk_info = NULL;
1364
1365 struct ac_surf_info image_info = image->info;
1366 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1367 if (result != VK_SUCCESS)
1368 return result;
1369
1370 image->size = 0;
1371 image->alignment = 1;
1372 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1373 struct ac_surf_info info = image_info;
1374
1375 if (plane) {
1376 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1377 assert(info.width % desc->width_divisor == 0);
1378 assert(info.height % desc->height_divisor == 0);
1379
1380 info.width /= desc->width_divisor;
1381 info.height /= desc->height_divisor;
1382 }
1383
1384 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1385
1386 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1387 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1388 image->alignment = image->planes[plane].surface.surf_alignment;
1389
1390 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1391 }
1392
1393 if (!create_info.no_metadata_planes) {
1394 /* Try to enable DCC first. */
1395 if (radv_image_can_enable_dcc(device, image)) {
1396 radv_image_alloc_dcc(image);
1397 if (image->info.samples > 1) {
1398 /* CMASK should be enabled because DCC fast
1399 * clear with MSAA needs it.
1400 */
1401 assert(radv_image_can_enable_cmask(image));
1402 radv_image_alloc_cmask(device, image);
1403 }
1404 } else {
1405 /* When DCC cannot be enabled, try CMASK. */
1406 radv_image_disable_dcc(image);
1407 if (radv_image_can_enable_cmask(image)) {
1408 radv_image_alloc_cmask(device, image);
1409 }
1410 }
1411
1412 /* Try to enable FMASK for multisampled images. */
1413 if (radv_image_can_enable_fmask(image)) {
1414 radv_image_alloc_fmask(device, image);
1415
1416 if (radv_use_tc_compat_cmask_for_image(device, image))
1417 image->tc_compatible_cmask = true;
1418 } else {
1419 /* Otherwise, try to enable HTILE for depth surfaces. */
1420 if (radv_image_can_enable_htile(image) &&
1421 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1422 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1423 radv_image_alloc_htile(device, image);
1424 } else {
1425 radv_image_disable_htile(image);
1426 }
1427 }
1428 } else {
1429 radv_image_disable_dcc(image);
1430 radv_image_disable_htile(image);
1431 }
1432
1433 assert(image->planes[0].surface.surf_size);
1434 return VK_SUCCESS;
1435 }
1436
1437 VkResult
1438 radv_image_create(VkDevice _device,
1439 const struct radv_image_create_info *create_info,
1440 const VkAllocationCallbacks* alloc,
1441 VkImage *pImage)
1442 {
1443 RADV_FROM_HANDLE(radv_device, device, _device);
1444 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1445 struct radv_image *image = NULL;
1446 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1447 pCreateInfo->format);
1448 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1449
1450 const unsigned plane_count = vk_format_get_plane_count(format);
1451 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1452
1453 radv_assert(pCreateInfo->mipLevels > 0);
1454 radv_assert(pCreateInfo->arrayLayers > 0);
1455 radv_assert(pCreateInfo->samples > 0);
1456 radv_assert(pCreateInfo->extent.width > 0);
1457 radv_assert(pCreateInfo->extent.height > 0);
1458 radv_assert(pCreateInfo->extent.depth > 0);
1459
1460 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1461 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1462 if (!image)
1463 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1464
1465 image->type = pCreateInfo->imageType;
1466 image->info.width = pCreateInfo->extent.width;
1467 image->info.height = pCreateInfo->extent.height;
1468 image->info.depth = pCreateInfo->extent.depth;
1469 image->info.samples = pCreateInfo->samples;
1470 image->info.storage_samples = pCreateInfo->samples;
1471 image->info.array_size = pCreateInfo->arrayLayers;
1472 image->info.levels = pCreateInfo->mipLevels;
1473 image->info.num_channels = vk_format_get_nr_components(format);
1474
1475 image->vk_format = format;
1476 image->tiling = pCreateInfo->tiling;
1477 image->usage = pCreateInfo->usage;
1478 image->flags = pCreateInfo->flags;
1479 image->plane_count = plane_count;
1480
1481 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1482 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1483 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1484 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1485 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1486 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1487 else
1488 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1489 }
1490
1491 const VkExternalMemoryImageCreateInfo *external_info =
1492 vk_find_struct_const(pCreateInfo->pNext,
1493 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1494
1495 image->shareable = external_info;
1496 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1497 image->info.surf_index = &device->image_mrt_offset_counter;
1498 }
1499
1500 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1501 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1502 }
1503
1504 bool delay_layout = external_info &&
1505 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1506
1507 if (delay_layout) {
1508 *pImage = radv_image_to_handle(image);
1509 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1510 return VK_SUCCESS;
1511 }
1512
1513 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1514 assert(result == VK_SUCCESS);
1515
1516 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1517 image->alignment = MAX2(image->alignment, 4096);
1518 image->size = align64(image->size, image->alignment);
1519 image->offset = 0;
1520
1521 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1522 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1523 if (!image->bo) {
1524 vk_free2(&device->alloc, alloc, image);
1525 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1526 }
1527 }
1528
1529 *pImage = radv_image_to_handle(image);
1530
1531 return VK_SUCCESS;
1532 }
1533
1534 static void
1535 radv_image_view_make_descriptor(struct radv_image_view *iview,
1536 struct radv_device *device,
1537 VkFormat vk_format,
1538 const VkComponentMapping *components,
1539 bool is_storage_image, bool disable_compression,
1540 unsigned plane_id, unsigned descriptor_plane_id)
1541 {
1542 struct radv_image *image = iview->image;
1543 struct radv_image_plane *plane = &image->planes[plane_id];
1544 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1545 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1546 uint32_t blk_w;
1547 union radv_descriptor *descriptor;
1548 uint32_t hw_level = 0;
1549
1550 if (is_storage_image) {
1551 descriptor = &iview->storage_descriptor;
1552 } else {
1553 descriptor = &iview->descriptor;
1554 }
1555
1556 assert(vk_format_get_plane_count(vk_format) == 1);
1557 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1558 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1559
1560 if (device->physical_device->rad_info.chip_class >= GFX9)
1561 hw_level = iview->base_mip;
1562 radv_make_texture_descriptor(device, image, is_storage_image,
1563 iview->type,
1564 vk_format,
1565 components,
1566 hw_level, hw_level + iview->level_count - 1,
1567 iview->base_layer,
1568 iview->base_layer + iview->layer_count - 1,
1569 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1570 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1571 iview->extent.depth,
1572 descriptor->plane_descriptors[descriptor_plane_id],
1573 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1574
1575 const struct legacy_surf_level *base_level_info = NULL;
1576 if (device->physical_device->rad_info.chip_class <= GFX9) {
1577 if (is_stencil)
1578 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1579 else
1580 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1581 }
1582 si_set_mutable_tex_desc_fields(device, image,
1583 base_level_info,
1584 plane_id,
1585 iview->base_mip,
1586 iview->base_mip,
1587 blk_w, is_stencil, is_storage_image,
1588 is_storage_image || disable_compression,
1589 descriptor->plane_descriptors[descriptor_plane_id]);
1590 }
1591
1592 static unsigned
1593 radv_plane_from_aspect(VkImageAspectFlags mask)
1594 {
1595 switch(mask) {
1596 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1597 return 1;
1598 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1599 return 2;
1600 default:
1601 return 0;
1602 }
1603 }
1604
1605 VkFormat
1606 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1607 {
1608 switch(mask) {
1609 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1610 return image->planes[0].format;
1611 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1612 return image->planes[1].format;
1613 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1614 return image->planes[2].format;
1615 case VK_IMAGE_ASPECT_STENCIL_BIT:
1616 return vk_format_stencil_only(image->vk_format);
1617 case VK_IMAGE_ASPECT_DEPTH_BIT:
1618 return vk_format_depth_only(image->vk_format);
1619 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1620 return vk_format_depth_only(image->vk_format);
1621 default:
1622 return image->vk_format;
1623 }
1624 }
1625
1626 void
1627 radv_image_view_init(struct radv_image_view *iview,
1628 struct radv_device *device,
1629 const VkImageViewCreateInfo* pCreateInfo,
1630 const struct radv_image_view_extra_create_info* extra_create_info)
1631 {
1632 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1633 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1634
1635 switch (image->type) {
1636 case VK_IMAGE_TYPE_1D:
1637 case VK_IMAGE_TYPE_2D:
1638 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1639 break;
1640 case VK_IMAGE_TYPE_3D:
1641 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1642 <= radv_minify(image->info.depth, range->baseMipLevel));
1643 break;
1644 default:
1645 unreachable("bad VkImageType");
1646 }
1647 iview->image = image;
1648 iview->bo = image->bo;
1649 iview->type = pCreateInfo->viewType;
1650 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1651 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1652 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1653
1654 iview->vk_format = pCreateInfo->format;
1655
1656 /* If the image has an Android external format, pCreateInfo->format will be
1657 * VK_FORMAT_UNDEFINED. */
1658 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1659 iview->vk_format = image->vk_format;
1660
1661 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1662 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1663 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1664 iview->vk_format = vk_format_depth_only(iview->vk_format);
1665 }
1666
1667 if (device->physical_device->rad_info.chip_class >= GFX9) {
1668 iview->extent = (VkExtent3D) {
1669 .width = image->info.width,
1670 .height = image->info.height,
1671 .depth = image->info.depth,
1672 };
1673 } else {
1674 iview->extent = (VkExtent3D) {
1675 .width = radv_minify(image->info.width , range->baseMipLevel),
1676 .height = radv_minify(image->info.height, range->baseMipLevel),
1677 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1678 };
1679 }
1680
1681 if (iview->vk_format != image->planes[iview->plane_id].format) {
1682 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1683 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1684 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1685 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1686
1687 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1688 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1689
1690 /* Comment ported from amdvlk -
1691 * If we have the following image:
1692 * Uncompressed pixels Compressed block sizes (4x4)
1693 * mip0: 22 x 22 6 x 6
1694 * mip1: 11 x 11 3 x 3
1695 * mip2: 5 x 5 2 x 2
1696 * mip3: 2 x 2 1 x 1
1697 * mip4: 1 x 1 1 x 1
1698 *
1699 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1700 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1701 * divide-by-two integer math):
1702 * mip0: 6x6
1703 * mip1: 3x3
1704 * mip2: 1x1
1705 * mip3: 1x1
1706 *
1707 * This means that mip2 will be missing texels.
1708 *
1709 * Fix this by calculating the base mip's width and height, then convert that, and round it
1710 * back up to get the level 0 size.
1711 * Clamp the converted size between the original values, and next power of two, which
1712 * means we don't oversize the image.
1713 */
1714 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1715 vk_format_is_compressed(image->vk_format) &&
1716 !vk_format_is_compressed(iview->vk_format)) {
1717 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1718 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1719
1720 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1721 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1722
1723 lvl_width <<= range->baseMipLevel;
1724 lvl_height <<= range->baseMipLevel;
1725
1726 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1727 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1728 }
1729 }
1730
1731 iview->base_layer = range->baseArrayLayer;
1732 iview->layer_count = radv_get_layerCount(image, range);
1733 iview->base_mip = range->baseMipLevel;
1734 iview->level_count = radv_get_levelCount(image, range);
1735
1736 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1737 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1738 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1739 radv_image_view_make_descriptor(iview, device, format,
1740 &pCreateInfo->components,
1741 false, disable_compression,
1742 iview->plane_id + i, i);
1743 radv_image_view_make_descriptor(iview, device,
1744 format, &pCreateInfo->components,
1745 true, disable_compression,
1746 iview->plane_id + i, i);
1747 }
1748 }
1749
1750 bool radv_layout_has_htile(const struct radv_image *image,
1751 VkImageLayout layout,
1752 bool in_render_loop,
1753 unsigned queue_mask)
1754 {
1755 if (radv_image_is_tc_compat_htile(image))
1756 return layout != VK_IMAGE_LAYOUT_GENERAL;
1757
1758 return radv_image_has_htile(image) &&
1759 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1760 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1761 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1762 }
1763
1764 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1765 VkImageLayout layout,
1766 bool in_render_loop,
1767 unsigned queue_mask)
1768 {
1769 if (radv_image_is_tc_compat_htile(image))
1770 return layout != VK_IMAGE_LAYOUT_GENERAL;
1771
1772 return radv_image_has_htile(image) &&
1773 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1774 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1775 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1776 }
1777
1778 bool radv_layout_can_fast_clear(const struct radv_image *image,
1779 VkImageLayout layout,
1780 bool in_render_loop,
1781 unsigned queue_mask)
1782 {
1783 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1784 }
1785
1786 bool radv_layout_dcc_compressed(const struct radv_device *device,
1787 const struct radv_image *image,
1788 VkImageLayout layout,
1789 bool in_render_loop,
1790 unsigned queue_mask)
1791 {
1792 /* Don't compress compute transfer dst, as image stores are not supported. */
1793 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1794 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1795 return false;
1796
1797 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1798 }
1799
1800
1801 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1802 {
1803 if (!image->exclusive)
1804 return image->queue_family_mask;
1805 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1806 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1807 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1808 if (family == VK_QUEUE_FAMILY_IGNORED)
1809 return 1u << queue_family;
1810 return 1u << family;
1811 }
1812
1813 VkResult
1814 radv_CreateImage(VkDevice device,
1815 const VkImageCreateInfo *pCreateInfo,
1816 const VkAllocationCallbacks *pAllocator,
1817 VkImage *pImage)
1818 {
1819 #ifdef ANDROID
1820 const VkNativeBufferANDROID *gralloc_info =
1821 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1822
1823 if (gralloc_info)
1824 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1825 pAllocator, pImage);
1826 #endif
1827
1828 const struct wsi_image_create_info *wsi_info =
1829 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1830 bool scanout = wsi_info && wsi_info->scanout;
1831
1832 return radv_image_create(device,
1833 &(struct radv_image_create_info) {
1834 .vk_info = pCreateInfo,
1835 .scanout = scanout,
1836 },
1837 pAllocator,
1838 pImage);
1839 }
1840
1841 void
1842 radv_DestroyImage(VkDevice _device, VkImage _image,
1843 const VkAllocationCallbacks *pAllocator)
1844 {
1845 RADV_FROM_HANDLE(radv_device, device, _device);
1846 RADV_FROM_HANDLE(radv_image, image, _image);
1847
1848 if (!image)
1849 return;
1850
1851 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1852 device->ws->buffer_destroy(image->bo);
1853
1854 if (image->owned_memory != VK_NULL_HANDLE)
1855 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1856
1857 vk_free2(&device->alloc, pAllocator, image);
1858 }
1859
1860 void radv_GetImageSubresourceLayout(
1861 VkDevice _device,
1862 VkImage _image,
1863 const VkImageSubresource* pSubresource,
1864 VkSubresourceLayout* pLayout)
1865 {
1866 RADV_FROM_HANDLE(radv_image, image, _image);
1867 RADV_FROM_HANDLE(radv_device, device, _device);
1868 int level = pSubresource->mipLevel;
1869 int layer = pSubresource->arrayLayer;
1870
1871 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1872
1873 struct radv_image_plane *plane = &image->planes[plane_id];
1874 struct radeon_surf *surface = &plane->surface;
1875
1876 if (device->physical_device->rad_info.chip_class >= GFX9) {
1877 pLayout->offset = plane->offset + surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1878 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1879 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1880 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1881 /* Adjust the number of bytes between each row because
1882 * the pitch is actually the number of components per
1883 * row.
1884 */
1885 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1886 } else {
1887 assert(util_is_power_of_two_nonzero(surface->bpe));
1888 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1889 }
1890
1891 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1892 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1893 pLayout->size = surface->u.gfx9.surf_slice_size;
1894 if (image->type == VK_IMAGE_TYPE_3D)
1895 pLayout->size *= u_minify(image->info.depth, level);
1896 } else {
1897 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1898 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1899 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1900 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1901 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1902 if (image->type == VK_IMAGE_TYPE_3D)
1903 pLayout->size *= u_minify(image->info.depth, level);
1904 }
1905 }
1906
1907
1908 VkResult
1909 radv_CreateImageView(VkDevice _device,
1910 const VkImageViewCreateInfo *pCreateInfo,
1911 const VkAllocationCallbacks *pAllocator,
1912 VkImageView *pView)
1913 {
1914 RADV_FROM_HANDLE(radv_device, device, _device);
1915 struct radv_image_view *view;
1916
1917 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1918 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1919 if (view == NULL)
1920 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1921
1922 radv_image_view_init(view, device, pCreateInfo, NULL);
1923
1924 *pView = radv_image_view_to_handle(view);
1925
1926 return VK_SUCCESS;
1927 }
1928
1929 void
1930 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1931 const VkAllocationCallbacks *pAllocator)
1932 {
1933 RADV_FROM_HANDLE(radv_device, device, _device);
1934 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1935
1936 if (!iview)
1937 return;
1938 vk_free2(&device->alloc, pAllocator, iview);
1939 }
1940
1941 void radv_buffer_view_init(struct radv_buffer_view *view,
1942 struct radv_device *device,
1943 const VkBufferViewCreateInfo* pCreateInfo)
1944 {
1945 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1946
1947 view->bo = buffer->bo;
1948 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1949 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1950 view->vk_format = pCreateInfo->format;
1951
1952 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1953 pCreateInfo->offset, view->range, view->state);
1954 }
1955
1956 VkResult
1957 radv_CreateBufferView(VkDevice _device,
1958 const VkBufferViewCreateInfo *pCreateInfo,
1959 const VkAllocationCallbacks *pAllocator,
1960 VkBufferView *pView)
1961 {
1962 RADV_FROM_HANDLE(radv_device, device, _device);
1963 struct radv_buffer_view *view;
1964
1965 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1966 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1967 if (!view)
1968 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1969
1970 radv_buffer_view_init(view, device, pCreateInfo);
1971
1972 *pView = radv_buffer_view_to_handle(view);
1973
1974 return VK_SUCCESS;
1975 }
1976
1977 void
1978 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1979 const VkAllocationCallbacks *pAllocator)
1980 {
1981 RADV_FROM_HANDLE(radv_device, device, _device);
1982 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1983
1984 if (!view)
1985 return;
1986
1987 vk_free2(&device->alloc, pAllocator, view);
1988 }