radv: Unset vk_info in radv_image_create_layout.
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo)
40 {
41 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
42 assert(pCreateInfo->samples <= 1);
43 return RADEON_SURF_MODE_LINEAR_ALIGNED;
44 }
45
46 if (!vk_format_is_compressed(pCreateInfo->format) &&
47 !vk_format_is_depth_or_stencil(pCreateInfo->format)
48 && device->physical_device->rad_info.chip_class <= GFX8) {
49 /* this causes hangs in some VK CTS tests on GFX9. */
50 /* Textures with a very small height are recommended to be linear. */
51 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
52 /* Only very thin and long 2D textures should benefit from
53 * linear_aligned. */
54 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
55 return RADEON_SURF_MODE_LINEAR_ALIGNED;
56 }
57
58 /* MSAA resources must be 2D tiled. */
59 if (pCreateInfo->samples > 1)
60 return RADEON_SURF_MODE_2D;
61
62 return RADEON_SURF_MODE_2D;
63 }
64
65 static bool
66 radv_use_tc_compat_htile_for_image(struct radv_device *device,
67 const VkImageCreateInfo *pCreateInfo)
68 {
69 /* TC-compat HTILE is only available for GFX8+. */
70 if (device->physical_device->rad_info.chip_class < GFX8)
71 return false;
72
73 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
74 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
75 return false;
76
77 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
78 return false;
79
80 if (pCreateInfo->mipLevels > 1)
81 return false;
82
83 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
84 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
85 */
86 if (pCreateInfo->samples >= 2 &&
87 (pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
88 (pCreateInfo->format == VK_FORMAT_D32_SFLOAT &&
89 device->physical_device->rad_info.chip_class == GFX10)))
90 return false;
91
92 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
93 * supports 32-bit. Though, it's possible to enable TC-compat for
94 * 16-bit depth surfaces if no Z planes are compressed.
95 */
96 if (pCreateInfo->format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
97 pCreateInfo->format != VK_FORMAT_D32_SFLOAT &&
98 pCreateInfo->format != VK_FORMAT_D16_UNORM)
99 return false;
100
101 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
102 const struct VkImageFormatListCreateInfoKHR *format_list =
103 (const struct VkImageFormatListCreateInfoKHR *)
104 vk_find_struct_const(pCreateInfo->pNext,
105 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
106
107 /* We have to ignore the existence of the list if viewFormatCount = 0 */
108 if (format_list && format_list->viewFormatCount) {
109 /* compatibility is transitive, so we only need to check
110 * one format with everything else.
111 */
112 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
113 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
114 continue;
115
116 if (pCreateInfo->format != format_list->pViewFormats[i])
117 return false;
118 }
119 } else {
120 return false;
121 }
122 }
123
124 return true;
125 }
126
127 static bool
128 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
129 {
130 if (info->scanout)
131 return true;
132
133 if (!info->bo_metadata)
134 return false;
135
136 if (device->physical_device->rad_info.chip_class >= GFX9) {
137 return info->bo_metadata->u.gfx9.swizzle_mode == 0 || info->bo_metadata->u.gfx9.swizzle_mode % 4 == 2;
138 } else {
139 return info->bo_metadata->u.legacy.scanout;
140 }
141 }
142
143 static bool
144 radv_use_dcc_for_image(struct radv_device *device,
145 const struct radv_image *image,
146 const VkImageCreateInfo *pCreateInfo)
147 {
148 bool dcc_compatible_formats;
149 bool blendable;
150
151 /* DCC (Delta Color Compression) is only available for GFX8+. */
152 if (device->physical_device->rad_info.chip_class < GFX8)
153 return false;
154
155 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
156 return false;
157
158 if (image->shareable)
159 return false;
160
161 /* TODO: Enable DCC for storage images. */
162 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
163 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
164 return false;
165
166 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
167 return false;
168
169 if (vk_format_is_subsampled(pCreateInfo->format) ||
170 vk_format_get_plane_count(pCreateInfo->format) > 1)
171 return false;
172
173 /* TODO: Enable DCC for mipmaps on GFX9+. */
174 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
175 device->physical_device->rad_info.chip_class >= GFX9)
176 return false;
177
178 /* Do not enable DCC for mipmapped arrays because performance is worse. */
179 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
180 return false;
181
182 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
183 * 2x can be enabled with an option.
184 */
185 if (pCreateInfo->samples > 2 ||
186 (pCreateInfo->samples == 2 &&
187 !device->physical_device->dcc_msaa_allowed))
188 return false;
189
190 /* Determine if the formats are DCC compatible. */
191 dcc_compatible_formats =
192 radv_is_colorbuffer_format_supported(pCreateInfo->format,
193 &blendable);
194
195 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
196 const struct VkImageFormatListCreateInfoKHR *format_list =
197 (const struct VkImageFormatListCreateInfoKHR *)
198 vk_find_struct_const(pCreateInfo->pNext,
199 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
200
201 /* We have to ignore the existence of the list if viewFormatCount = 0 */
202 if (format_list && format_list->viewFormatCount) {
203 /* compatibility is transitive, so we only need to check
204 * one format with everything else. */
205 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
206 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
207 continue;
208
209 if (!radv_dcc_formats_compatible(pCreateInfo->format,
210 format_list->pViewFormats[i]))
211 dcc_compatible_formats = false;
212 }
213 } else {
214 dcc_compatible_formats = false;
215 }
216 }
217
218 if (!dcc_compatible_formats)
219 return false;
220
221 return true;
222 }
223
224 static bool
225 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
226 struct radv_image *image)
227 {
228 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
229 return false;
230
231 /* TC-compat CMASK is only available for GFX8+. */
232 if (device->physical_device->rad_info.chip_class < GFX8)
233 return false;
234
235 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
236 return false;
237
238 if (radv_image_has_dcc(image))
239 return false;
240
241 if (!radv_image_has_cmask(image))
242 return false;
243
244 return true;
245 }
246
247 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
248 {
249 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
250 }
251
252 static bool
253 radv_is_valid_opaque_metadata(const struct radv_device *device,
254 const struct radeon_bo_metadata *md)
255 {
256 if (md->metadata[0] != 1 ||
257 md->metadata[1] != si_get_bo_metadata_word1(device))
258 return false;
259
260 if (md->size_metadata < 40)
261 return false;
262
263 return true;
264 }
265
266 static void
267 radv_patch_surface_from_metadata(struct radv_device *device,
268 struct radeon_surf *surface,
269 const struct radeon_bo_metadata *md)
270 {
271 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
272
273 if (device->physical_device->rad_info.chip_class >= GFX9) {
274 if (md->u.gfx9.swizzle_mode > 0)
275 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
276 else
277 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
278
279 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
280 } else {
281 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
282 surface->u.legacy.bankw = md->u.legacy.bankw;
283 surface->u.legacy.bankh = md->u.legacy.bankh;
284 surface->u.legacy.tile_split = md->u.legacy.tile_split;
285 surface->u.legacy.mtilea = md->u.legacy.mtilea;
286 surface->u.legacy.num_banks = md->u.legacy.num_banks;
287
288 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
289 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
290 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
291 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
292 else
293 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
294
295 }
296 }
297
298 static VkResult
299 radv_patch_image_dimensions(struct radv_device *device,
300 struct radv_image *image,
301 const struct radv_image_create_info *create_info,
302 struct ac_surf_info *image_info)
303 {
304 unsigned width = image->info.width;
305 unsigned height = image->info.height;
306
307 /*
308 * minigbm sometimes allocates bigger images which is going to result in
309 * weird strides and other properties. Lets be lenient where possible and
310 * fail it on GFX10 (as we cannot cope there).
311 *
312 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
313 */
314 if (create_info->bo_metadata &&
315 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
316 const struct radeon_bo_metadata *md = create_info->bo_metadata;
317
318 if (device->physical_device->rad_info.chip_class >= GFX10) {
319 width = G_00A004_WIDTH_LO(md->metadata[3]) +
320 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
321 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
322 } else {
323 width = G_008F18_WIDTH(md->metadata[4]) + 1;
324 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
325 }
326 }
327
328 if (image->info.width == width && image->info.height == height)
329 return VK_SUCCESS;
330
331 if (width < image->info.width || height < image->info.height) {
332 fprintf(stderr,
333 "The imported image has smaller dimensions than the internal\n"
334 "dimensions. Using it is going to fail badly, so we reject\n"
335 "this import.\n"
336 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
337 image->info.width, image->info.height, width, height);
338 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
339 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
340 fprintf(stderr,
341 "Tried to import an image with inconsistent width on GFX10.\n"
342 "As GFX10 has no separate stride fields we cannot cope with\n"
343 "an inconsistency in width and will fail this import.\n"
344 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
345 image->info.width, image->info.height, width, height);
346 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
347 } else {
348 fprintf(stderr,
349 "Tried to import an image with inconsistent width on pre-GFX10.\n"
350 "As GFX10 has no separate stride fields we cannot cope with\n"
351 "an inconsistency and would fail on GFX10.\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image->info.width, image->info.height, width, height);
354 }
355 image_info->width = width;
356 image_info->height = height;
357
358 return VK_SUCCESS;
359 }
360
361 static VkResult
362 radv_patch_image_from_extra_info(struct radv_device *device,
363 struct radv_image *image,
364 const struct radv_image_create_info *create_info,
365 struct ac_surf_info *image_info)
366 {
367 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
368 if (result != VK_SUCCESS)
369 return result;
370
371 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
372 if (create_info->bo_metadata) {
373 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
374 create_info->bo_metadata);
375 }
376
377 if (radv_surface_has_scanout(device, create_info)) {
378 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
379 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
380
381 image->info.surf_index = NULL;
382 }
383 }
384 return VK_SUCCESS;
385 }
386
387 static int
388 radv_init_surface(struct radv_device *device,
389 const struct radv_image *image,
390 struct radeon_surf *surface,
391 unsigned plane_id,
392 const VkImageCreateInfo *pCreateInfo)
393 {
394 unsigned array_mode = radv_choose_tiling(device, pCreateInfo);
395 VkFormat format = vk_format_get_plane_format(pCreateInfo->format, plane_id);
396 const struct vk_format_description *desc = vk_format_description(format);
397 bool is_depth, is_stencil;
398
399 is_depth = vk_format_has_depth(desc);
400 is_stencil = vk_format_has_stencil(desc);
401
402 surface->blk_w = vk_format_get_blockwidth(format);
403 surface->blk_h = vk_format_get_blockheight(format);
404
405 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
406 /* align byte per element on dword */
407 if (surface->bpe == 3) {
408 surface->bpe = 4;
409 }
410
411 surface->flags = RADEON_SURF_SET(array_mode, MODE);
412
413 switch (pCreateInfo->imageType){
414 case VK_IMAGE_TYPE_1D:
415 if (pCreateInfo->arrayLayers > 1)
416 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
417 else
418 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
419 break;
420 case VK_IMAGE_TYPE_2D:
421 if (pCreateInfo->arrayLayers > 1)
422 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
423 else
424 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
425 break;
426 case VK_IMAGE_TYPE_3D:
427 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
428 break;
429 default:
430 unreachable("unhandled image type");
431 }
432
433 if (is_depth) {
434 surface->flags |= RADEON_SURF_ZBUFFER;
435 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo))
436 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
437 }
438
439 if (is_stencil)
440 surface->flags |= RADEON_SURF_SBUFFER;
441
442 if (device->physical_device->rad_info.chip_class >= GFX9 &&
443 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
444 vk_format_get_blocksizebits(pCreateInfo->format) == 128 &&
445 vk_format_is_compressed(pCreateInfo->format))
446 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
447
448 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
449
450 if (!radv_use_dcc_for_image(device, image, pCreateInfo))
451 surface->flags |= RADEON_SURF_DISABLE_DCC;
452
453 return 0;
454 }
455
456 static inline unsigned
457 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
458 {
459 if (stencil)
460 return plane->surface.u.legacy.stencil_tiling_index[level];
461 else
462 return plane->surface.u.legacy.tiling_index[level];
463 }
464
465 static unsigned radv_map_swizzle(unsigned swizzle)
466 {
467 switch (swizzle) {
468 case VK_SWIZZLE_Y:
469 return V_008F0C_SQ_SEL_Y;
470 case VK_SWIZZLE_Z:
471 return V_008F0C_SQ_SEL_Z;
472 case VK_SWIZZLE_W:
473 return V_008F0C_SQ_SEL_W;
474 case VK_SWIZZLE_0:
475 return V_008F0C_SQ_SEL_0;
476 case VK_SWIZZLE_1:
477 return V_008F0C_SQ_SEL_1;
478 default: /* VK_SWIZZLE_X */
479 return V_008F0C_SQ_SEL_X;
480 }
481 }
482
483 static void
484 radv_make_buffer_descriptor(struct radv_device *device,
485 struct radv_buffer *buffer,
486 VkFormat vk_format,
487 unsigned offset,
488 unsigned range,
489 uint32_t *state)
490 {
491 const struct vk_format_description *desc;
492 unsigned stride;
493 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
494 uint64_t va = gpu_address + buffer->offset;
495 unsigned num_format, data_format;
496 int first_non_void;
497 desc = vk_format_description(vk_format);
498 first_non_void = vk_format_get_first_non_void_channel(vk_format);
499 stride = desc->block.bits / 8;
500
501 va += offset;
502 state[0] = va;
503 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
504 S_008F04_STRIDE(stride);
505
506 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
507 range /= stride;
508 }
509
510 state[2] = range;
511 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
512 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
513 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
514 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
515
516 if (device->physical_device->rad_info.chip_class >= GFX10) {
517 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
518
519 /* OOB_SELECT chooses the out-of-bounds check:
520 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
521 * - 1: index >= NUM_RECORDS
522 * - 2: NUM_RECORDS == 0
523 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
524 * else: swizzle_address >= NUM_RECORDS
525 */
526 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
527 S_008F0C_OOB_SELECT(0) |
528 S_008F0C_RESOURCE_LEVEL(1);
529 } else {
530 num_format = radv_translate_buffer_numformat(desc, first_non_void);
531 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
532
533 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
534 assert(num_format != ~0);
535
536 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
537 S_008F0C_DATA_FORMAT(data_format);
538 }
539 }
540
541 static void
542 si_set_mutable_tex_desc_fields(struct radv_device *device,
543 struct radv_image *image,
544 const struct legacy_surf_level *base_level_info,
545 unsigned plane_id,
546 unsigned base_level, unsigned first_level,
547 unsigned block_width, bool is_stencil,
548 bool is_storage_image, bool disable_compression,
549 uint32_t *state)
550 {
551 struct radv_image_plane *plane = &image->planes[plane_id];
552 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
553 uint64_t va = gpu_address + plane->offset;
554 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
555 uint64_t meta_va = 0;
556 if (chip_class >= GFX9) {
557 if (is_stencil)
558 va += plane->surface.u.gfx9.stencil_offset;
559 else
560 va += plane->surface.u.gfx9.surf_offset;
561 } else
562 va += base_level_info->offset;
563
564 state[0] = va >> 8;
565 if (chip_class >= GFX9 ||
566 base_level_info->mode == RADEON_SURF_MODE_2D)
567 state[0] |= plane->surface.tile_swizzle;
568 state[1] &= C_008F14_BASE_ADDRESS_HI;
569 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
570
571 if (chip_class >= GFX8) {
572 state[6] &= C_008F28_COMPRESSION_EN;
573 state[7] = 0;
574 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
575 meta_va = gpu_address + image->dcc_offset;
576 if (chip_class <= GFX8)
577 meta_va += base_level_info->dcc_offset;
578
579 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
580 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
581 meta_va |= dcc_tile_swizzle;
582 } else if (!disable_compression &&
583 radv_image_is_tc_compat_htile(image)) {
584 meta_va = gpu_address + image->htile_offset;
585 }
586
587 if (meta_va) {
588 state[6] |= S_008F28_COMPRESSION_EN(1);
589 if (chip_class <= GFX9)
590 state[7] = meta_va >> 8;
591 }
592 }
593
594 if (chip_class >= GFX10) {
595 state[3] &= C_00A00C_SW_MODE;
596
597 if (is_stencil) {
598 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
599 } else {
600 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
601 }
602
603 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
604 C_00A018_META_PIPE_ALIGNED;
605
606 if (meta_va) {
607 struct gfx9_surf_meta_flags meta;
608
609 if (image->dcc_offset)
610 meta = plane->surface.u.gfx9.dcc;
611 else
612 meta = plane->surface.u.gfx9.htile;
613
614 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
615 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
616 }
617
618 state[7] = meta_va >> 16;
619 } else if (chip_class == GFX9) {
620 state[3] &= C_008F1C_SW_MODE;
621 state[4] &= C_008F20_PITCH;
622
623 if (is_stencil) {
624 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
625 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
626 } else {
627 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
628 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
629 }
630
631 state[5] &= C_008F24_META_DATA_ADDRESS &
632 C_008F24_META_PIPE_ALIGNED &
633 C_008F24_META_RB_ALIGNED;
634 if (meta_va) {
635 struct gfx9_surf_meta_flags meta;
636
637 if (image->dcc_offset)
638 meta = plane->surface.u.gfx9.dcc;
639 else
640 meta = plane->surface.u.gfx9.htile;
641
642 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
643 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
644 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
645 }
646 } else {
647 /* GFX6-GFX8 */
648 unsigned pitch = base_level_info->nblk_x * block_width;
649 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
650
651 state[3] &= C_008F1C_TILING_INDEX;
652 state[3] |= S_008F1C_TILING_INDEX(index);
653 state[4] &= C_008F20_PITCH;
654 state[4] |= S_008F20_PITCH(pitch - 1);
655 }
656 }
657
658 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
659 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
660 {
661 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
662 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
663
664 /* GFX9 allocates 1D textures as 2D. */
665 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
666 image_type = VK_IMAGE_TYPE_2D;
667 switch (image_type) {
668 case VK_IMAGE_TYPE_1D:
669 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
670 case VK_IMAGE_TYPE_2D:
671 if (nr_samples > 1)
672 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
673 else
674 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
675 case VK_IMAGE_TYPE_3D:
676 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
677 return V_008F1C_SQ_RSRC_IMG_3D;
678 else
679 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
680 default:
681 unreachable("illegal image type");
682 }
683 }
684
685 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
686 {
687 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
688
689 if (swizzle[3] == VK_SWIZZLE_X) {
690 /* For the pre-defined border color values (white, opaque
691 * black, transparent black), the only thing that matters is
692 * that the alpha channel winds up in the correct place
693 * (because the RGB channels are all the same) so either of
694 * these enumerations will work.
695 */
696 if (swizzle[2] == VK_SWIZZLE_Y)
697 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
698 else
699 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
700 } else if (swizzle[0] == VK_SWIZZLE_X) {
701 if (swizzle[1] == VK_SWIZZLE_Y)
702 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
703 else
704 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
705 } else if (swizzle[1] == VK_SWIZZLE_X) {
706 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
707 } else if (swizzle[2] == VK_SWIZZLE_X) {
708 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
709 }
710
711 return bc_swizzle;
712 }
713
714 static bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
715 {
716 const struct vk_format_description *desc = vk_format_description(format);
717
718 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
719 return desc->swizzle[3] == VK_SWIZZLE_X;
720
721 return radv_translate_colorswap(format, false) <= 1;
722 }
723 /**
724 * Build the sampler view descriptor for a texture (GFX10).
725 */
726 static void
727 gfx10_make_texture_descriptor(struct radv_device *device,
728 struct radv_image *image,
729 bool is_storage_image,
730 VkImageViewType view_type,
731 VkFormat vk_format,
732 const VkComponentMapping *mapping,
733 unsigned first_level, unsigned last_level,
734 unsigned first_layer, unsigned last_layer,
735 unsigned width, unsigned height, unsigned depth,
736 uint32_t *state,
737 uint32_t *fmask_state)
738 {
739 const struct vk_format_description *desc;
740 enum vk_swizzle swizzle[4];
741 unsigned img_format;
742 unsigned type;
743
744 desc = vk_format_description(vk_format);
745 img_format = gfx10_format_table[vk_format].img_format;
746
747 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
748 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
749 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
750 } else {
751 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
752 }
753
754 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
755 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
756 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
757 height = 1;
758 depth = image->info.array_size;
759 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
760 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
761 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
762 depth = image->info.array_size;
763 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
764 depth = image->info.array_size / 6;
765
766 state[0] = 0;
767 state[1] = S_00A004_FORMAT(img_format) |
768 S_00A004_WIDTH_LO(width - 1);
769 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
770 S_00A008_HEIGHT(height - 1) |
771 S_00A008_RESOURCE_LEVEL(1);
772 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
773 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
774 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
775 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
776 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
777 0 : first_level) |
778 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
779 util_logbase2(image->info.samples) :
780 last_level) |
781 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
782 S_00A00C_TYPE(type);
783 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
784 * to know the total number of layers.
785 */
786 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
787 S_00A010_BASE_ARRAY(first_layer);
788 state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D)) |
789 S_00A014_MAX_MIP(image->info.samples > 1 ?
790 util_logbase2(image->info.samples) :
791 image->info.levels - 1) |
792 S_00A014_PERF_MOD(4);
793 state[6] = 0;
794 state[7] = 0;
795
796 if (radv_dcc_enabled(image, first_level)) {
797 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
798 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
799 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
800 }
801
802 /* Initialize the sampler view for FMASK. */
803 if (radv_image_has_fmask(image)) {
804 uint64_t gpu_address = radv_buffer_get_va(image->bo);
805 uint32_t format;
806 uint64_t va;
807
808 assert(image->plane_count == 1);
809
810 va = gpu_address + image->offset + image->fmask_offset;
811
812 switch (image->info.samples) {
813 case 2:
814 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
815 break;
816 case 4:
817 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
818 break;
819 case 8:
820 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
821 break;
822 default:
823 unreachable("invalid nr_samples");
824 }
825
826 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
827 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
828 S_00A004_FORMAT(format) |
829 S_00A004_WIDTH_LO(width - 1);
830 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
831 S_00A008_HEIGHT(height - 1) |
832 S_00A008_RESOURCE_LEVEL(1);
833 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
834 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
835 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
836 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
837 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
838 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
839 fmask_state[4] = S_00A010_DEPTH(last_layer) |
840 S_00A010_BASE_ARRAY(first_layer);
841 fmask_state[5] = 0;
842 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
843 fmask_state[7] = 0;
844 } else if (fmask_state)
845 memset(fmask_state, 0, 8 * 4);
846 }
847
848 /**
849 * Build the sampler view descriptor for a texture (SI-GFX9)
850 */
851 static void
852 si_make_texture_descriptor(struct radv_device *device,
853 struct radv_image *image,
854 bool is_storage_image,
855 VkImageViewType view_type,
856 VkFormat vk_format,
857 const VkComponentMapping *mapping,
858 unsigned first_level, unsigned last_level,
859 unsigned first_layer, unsigned last_layer,
860 unsigned width, unsigned height, unsigned depth,
861 uint32_t *state,
862 uint32_t *fmask_state)
863 {
864 const struct vk_format_description *desc;
865 enum vk_swizzle swizzle[4];
866 int first_non_void;
867 unsigned num_format, data_format, type;
868
869 desc = vk_format_description(vk_format);
870
871 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
872 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
873 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
874 } else {
875 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
876 }
877
878 first_non_void = vk_format_get_first_non_void_channel(vk_format);
879
880 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
881 if (num_format == ~0) {
882 num_format = 0;
883 }
884
885 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
886 if (data_format == ~0) {
887 data_format = 0;
888 }
889
890 /* S8 with either Z16 or Z32 HTILE need a special format. */
891 if (device->physical_device->rad_info.chip_class == GFX9 &&
892 vk_format == VK_FORMAT_S8_UINT &&
893 radv_image_is_tc_compat_htile(image)) {
894 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
895 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
896 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
897 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
898 }
899 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
900 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
901 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
902 height = 1;
903 depth = image->info.array_size;
904 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
905 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
906 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
907 depth = image->info.array_size;
908 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
909 depth = image->info.array_size / 6;
910
911 state[0] = 0;
912 state[1] = (S_008F14_DATA_FORMAT(data_format) |
913 S_008F14_NUM_FORMAT(num_format));
914 state[2] = (S_008F18_WIDTH(width - 1) |
915 S_008F18_HEIGHT(height - 1) |
916 S_008F18_PERF_MOD(4));
917 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
918 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
919 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
920 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
921 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
922 0 : first_level) |
923 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
924 util_logbase2(image->info.samples) :
925 last_level) |
926 S_008F1C_TYPE(type));
927 state[4] = 0;
928 state[5] = S_008F24_BASE_ARRAY(first_layer);
929 state[6] = 0;
930 state[7] = 0;
931
932 if (device->physical_device->rad_info.chip_class == GFX9) {
933 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
934
935 /* Depth is the last accessible layer on Gfx9.
936 * The hw doesn't need to know the total number of layers.
937 */
938 if (type == V_008F1C_SQ_RSRC_IMG_3D)
939 state[4] |= S_008F20_DEPTH(depth - 1);
940 else
941 state[4] |= S_008F20_DEPTH(last_layer);
942
943 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
944 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
945 util_logbase2(image->info.samples) :
946 image->info.levels - 1);
947 } else {
948 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
949 state[4] |= S_008F20_DEPTH(depth - 1);
950 state[5] |= S_008F24_LAST_ARRAY(last_layer);
951 }
952 if (image->dcc_offset) {
953 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
954 } else {
955 /* The last dword is unused by hw. The shader uses it to clear
956 * bits in the first dword of sampler state.
957 */
958 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
959 if (first_level == last_level)
960 state[7] = C_008F30_MAX_ANISO_RATIO;
961 else
962 state[7] = 0xffffffff;
963 }
964 }
965
966 /* Initialize the sampler view for FMASK. */
967 if (radv_image_has_fmask(image)) {
968 uint32_t fmask_format, num_format;
969 uint64_t gpu_address = radv_buffer_get_va(image->bo);
970 uint64_t va;
971
972 assert(image->plane_count == 1);
973
974 va = gpu_address + image->offset + image->fmask_offset;
975
976 if (device->physical_device->rad_info.chip_class == GFX9) {
977 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
978 switch (image->info.samples) {
979 case 2:
980 num_format = V_008F14_IMG_FMASK_8_2_2;
981 break;
982 case 4:
983 num_format = V_008F14_IMG_FMASK_8_4_4;
984 break;
985 case 8:
986 num_format = V_008F14_IMG_FMASK_32_8_8;
987 break;
988 default:
989 unreachable("invalid nr_samples");
990 }
991 } else {
992 switch (image->info.samples) {
993 case 2:
994 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
995 break;
996 case 4:
997 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
998 break;
999 case 8:
1000 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1001 break;
1002 default:
1003 assert(0);
1004 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1005 }
1006 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1007 }
1008
1009 fmask_state[0] = va >> 8;
1010 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1011 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1012 S_008F14_DATA_FORMAT(fmask_format) |
1013 S_008F14_NUM_FORMAT(num_format);
1014 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1015 S_008F18_HEIGHT(height - 1);
1016 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1017 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1018 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1019 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1020 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1021 fmask_state[4] = 0;
1022 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1023 fmask_state[6] = 0;
1024 fmask_state[7] = 0;
1025
1026 if (device->physical_device->rad_info.chip_class == GFX9) {
1027 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1028 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1029 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1030 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1031 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1032
1033 if (radv_image_is_tc_compat_cmask(image)) {
1034 va = gpu_address + image->offset + image->cmask_offset;
1035
1036 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1037 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1038 fmask_state[7] |= va >> 8;
1039 }
1040 } else {
1041 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1042 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1043 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1044 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1045
1046 if (radv_image_is_tc_compat_cmask(image)) {
1047 va = gpu_address + image->offset + image->cmask_offset;
1048
1049 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1050 fmask_state[7] |= va >> 8;
1051 }
1052 }
1053 } else if (fmask_state)
1054 memset(fmask_state, 0, 8 * 4);
1055 }
1056
1057 static void
1058 radv_make_texture_descriptor(struct radv_device *device,
1059 struct radv_image *image,
1060 bool is_storage_image,
1061 VkImageViewType view_type,
1062 VkFormat vk_format,
1063 const VkComponentMapping *mapping,
1064 unsigned first_level, unsigned last_level,
1065 unsigned first_layer, unsigned last_layer,
1066 unsigned width, unsigned height, unsigned depth,
1067 uint32_t *state,
1068 uint32_t *fmask_state)
1069 {
1070 if (device->physical_device->rad_info.chip_class >= GFX10) {
1071 gfx10_make_texture_descriptor(device, image, is_storage_image,
1072 view_type, vk_format, mapping,
1073 first_level, last_level,
1074 first_layer, last_layer,
1075 width, height, depth,
1076 state, fmask_state);
1077 } else {
1078 si_make_texture_descriptor(device, image, is_storage_image,
1079 view_type, vk_format, mapping,
1080 first_level, last_level,
1081 first_layer, last_layer,
1082 width, height, depth,
1083 state, fmask_state);
1084 }
1085 }
1086
1087 static void
1088 radv_query_opaque_metadata(struct radv_device *device,
1089 struct radv_image *image,
1090 struct radeon_bo_metadata *md)
1091 {
1092 static const VkComponentMapping fixedmapping;
1093 uint32_t desc[8], i;
1094
1095 assert(image->plane_count == 1);
1096
1097 /* Metadata image format format version 1:
1098 * [0] = 1 (metadata format identifier)
1099 * [1] = (VENDOR_ID << 16) | PCI_ID
1100 * [2:9] = image descriptor for the whole resource
1101 * [2] is always 0, because the base address is cleared
1102 * [9] is the DCC offset bits [39:8] from the beginning of
1103 * the buffer
1104 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1105 */
1106 md->metadata[0] = 1; /* metadata image format version 1 */
1107
1108 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1109 md->metadata[1] = si_get_bo_metadata_word1(device);
1110
1111
1112 radv_make_texture_descriptor(device, image, false,
1113 (VkImageViewType)image->type, image->vk_format,
1114 &fixedmapping, 0, image->info.levels - 1, 0,
1115 image->info.array_size - 1,
1116 image->info.width, image->info.height,
1117 image->info.depth,
1118 desc, NULL);
1119
1120 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1121 image->planes[0].surface.blk_w, false, false, false, desc);
1122
1123 /* Clear the base address and set the relative DCC offset. */
1124 desc[0] = 0;
1125 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1126 desc[7] = image->dcc_offset >> 8;
1127
1128 /* Dwords [2:9] contain the image descriptor. */
1129 memcpy(&md->metadata[2], desc, sizeof(desc));
1130
1131 /* Dwords [10:..] contain the mipmap level offsets. */
1132 if (device->physical_device->rad_info.chip_class <= GFX8) {
1133 for (i = 0; i <= image->info.levels - 1; i++)
1134 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1135 md->size_metadata = (11 + image->info.levels - 1) * 4;
1136 } else
1137 md->size_metadata = 10 * 4;
1138 }
1139
1140 void
1141 radv_init_metadata(struct radv_device *device,
1142 struct radv_image *image,
1143 struct radeon_bo_metadata *metadata)
1144 {
1145 struct radeon_surf *surface = &image->planes[0].surface;
1146
1147 memset(metadata, 0, sizeof(*metadata));
1148
1149 if (device->physical_device->rad_info.chip_class >= GFX9) {
1150 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1151 } else {
1152 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1153 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1154 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1155 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1156 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1157 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1158 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1159 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1160 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1161 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1162 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1163 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1164 }
1165 radv_query_opaque_metadata(device, image, metadata);
1166 }
1167
1168 void
1169 radv_image_override_offset_stride(struct radv_device *device,
1170 struct radv_image *image,
1171 uint64_t offset, uint32_t stride)
1172 {
1173 struct radeon_surf *surface = &image->planes[0].surface;
1174 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1175
1176 if (device->physical_device->rad_info.chip_class >= GFX9) {
1177 if (stride) {
1178 surface->u.gfx9.surf_pitch = stride;
1179 surface->u.gfx9.surf_slice_size =
1180 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1181 }
1182 surface->u.gfx9.surf_offset = offset;
1183 } else {
1184 surface->u.legacy.level[0].nblk_x = stride;
1185 surface->u.legacy.level[0].slice_size_dw =
1186 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1187
1188 if (offset) {
1189 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1190 surface->u.legacy.level[i].offset += offset;
1191 }
1192
1193 }
1194 }
1195
1196 static void
1197 radv_image_alloc_fmask(struct radv_device *device,
1198 struct radv_image *image)
1199 {
1200 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1201
1202 image->fmask_offset = align64(image->size, fmask_alignment);
1203 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1204 image->alignment = MAX2(image->alignment, fmask_alignment);
1205 }
1206
1207 static void
1208 radv_image_alloc_cmask(struct radv_device *device,
1209 struct radv_image *image)
1210 {
1211 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1212 unsigned cmask_size = image->planes[0].surface.cmask_size;
1213 uint32_t clear_value_size = 0;
1214
1215 if (!cmask_size)
1216 return;
1217
1218 assert(cmask_alignment);
1219
1220 image->cmask_offset = align64(image->size, cmask_alignment);
1221 /* + 8 for storing the clear values */
1222 if (!image->clear_value_offset) {
1223 image->clear_value_offset = image->cmask_offset + cmask_size;
1224 clear_value_size = 8;
1225 }
1226 image->size = image->cmask_offset + cmask_size + clear_value_size;
1227 image->alignment = MAX2(image->alignment, cmask_alignment);
1228 }
1229
1230 static void
1231 radv_image_alloc_dcc(struct radv_image *image)
1232 {
1233 assert(image->plane_count == 1);
1234
1235 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1236 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1237 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1238 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1239 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1240 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1241 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1242 }
1243
1244 static void
1245 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1246 {
1247 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1248
1249 /* + 8 for storing the clear values */
1250 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1251 image->size = image->clear_value_offset + image->info.levels * 8;
1252 if (radv_image_is_tc_compat_htile(image) &&
1253 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1254 /* Metadata for the TC-compatible HTILE hardware bug which
1255 * have to be fixed by updating ZRANGE_PRECISION when doing
1256 * fast depth clears to 0.0f.
1257 */
1258 image->tc_compat_zrange_offset = image->size;
1259 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1260 }
1261 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1262 }
1263
1264 static inline bool
1265 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1266 {
1267 if (image->info.samples <= 1 &&
1268 image->info.width * image->info.height <= 512 * 512) {
1269 /* Do not enable CMASK or DCC for small surfaces where the cost
1270 * of the eliminate pass can be higher than the benefit of fast
1271 * clear. RadeonSI does this, but the image threshold is
1272 * different.
1273 */
1274 return false;
1275 }
1276
1277 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1278 (image->exclusive || image->queue_family_mask == 1);
1279 }
1280
1281 static inline bool
1282 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1283 {
1284 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1285 !radv_image_has_dcc(image))
1286 return false;
1287
1288 /* On GFX8, DCC layers can be interleaved and it's currently only
1289 * enabled if slice size is equal to the per slice fast clear size
1290 * because the driver assumes that portions of multiple layers are
1291 * contiguous during fast clears.
1292 */
1293 if (image->info.array_size > 1) {
1294 const struct legacy_surf_level *surf_level =
1295 &image->planes[0].surface.u.legacy.level[0];
1296
1297 assert(device->physical_device->rad_info.chip_class == GFX8);
1298
1299 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1300 return false;
1301 }
1302
1303 return true;
1304 }
1305
1306 static inline bool
1307 radv_image_can_enable_cmask(struct radv_image *image)
1308 {
1309 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1310 /* Do not enable CMASK for non-MSAA images (fast color clear)
1311 * because 128 bit formats are not supported, but FMASK might
1312 * still be used.
1313 */
1314 return false;
1315 }
1316
1317 return radv_image_can_enable_dcc_or_cmask(image) &&
1318 image->info.levels == 1 &&
1319 image->info.depth == 1 &&
1320 !image->planes[0].surface.is_linear;
1321 }
1322
1323 static inline bool
1324 radv_image_can_enable_fmask(struct radv_image *image)
1325 {
1326 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
1327 }
1328
1329 static inline bool
1330 radv_image_can_enable_htile(struct radv_image *image)
1331 {
1332 return radv_image_has_htile(image) &&
1333 image->info.levels == 1 &&
1334 image->info.width * image->info.height >= 8 * 8;
1335 }
1336
1337 static void radv_image_disable_dcc(struct radv_image *image)
1338 {
1339 for (unsigned i = 0; i < image->plane_count; ++i)
1340 image->planes[i].surface.dcc_size = 0;
1341 }
1342
1343 static void radv_image_disable_htile(struct radv_image *image)
1344 {
1345 for (unsigned i = 0; i < image->plane_count; ++i)
1346 image->planes[i].surface.htile_size = 0;
1347 }
1348
1349 static VkResult
1350 radv_image_create_layout(struct radv_device *device,
1351 struct radv_image_create_info create_info,
1352 struct radv_image *image)
1353 {
1354 /* Check that we did not initialize things earlier */
1355 assert(!image->planes[0].surface.surf_size);
1356
1357 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1358 * common internal case. */
1359 create_info.vk_info = NULL;
1360
1361 struct ac_surf_info image_info = image->info;
1362 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1363 if (result != VK_SUCCESS)
1364 return result;
1365
1366 image->size = 0;
1367 image->alignment = 1;
1368 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1369 struct ac_surf_info info = image_info;
1370
1371 if (plane) {
1372 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1373 assert(info.width % desc->width_divisor == 0);
1374 assert(info.height % desc->height_divisor == 0);
1375
1376 info.width /= desc->width_divisor;
1377 info.height /= desc->height_divisor;
1378 }
1379
1380 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1381
1382 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1383 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1384 image->alignment = image->planes[plane].surface.surf_alignment;
1385
1386 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1387 }
1388
1389 if (!create_info.no_metadata_planes) {
1390 /* Try to enable DCC first. */
1391 if (radv_image_can_enable_dcc(device, image)) {
1392 radv_image_alloc_dcc(image);
1393 if (image->info.samples > 1) {
1394 /* CMASK should be enabled because DCC fast
1395 * clear with MSAA needs it.
1396 */
1397 assert(radv_image_can_enable_cmask(image));
1398 radv_image_alloc_cmask(device, image);
1399 }
1400 } else {
1401 /* When DCC cannot be enabled, try CMASK. */
1402 radv_image_disable_dcc(image);
1403 if (radv_image_can_enable_cmask(image)) {
1404 radv_image_alloc_cmask(device, image);
1405 }
1406 }
1407
1408 /* Try to enable FMASK for multisampled images. */
1409 if (radv_image_can_enable_fmask(image)) {
1410 radv_image_alloc_fmask(device, image);
1411
1412 if (radv_use_tc_compat_cmask_for_image(device, image))
1413 image->tc_compatible_cmask = true;
1414 } else {
1415 /* Otherwise, try to enable HTILE for depth surfaces. */
1416 if (radv_image_can_enable_htile(image) &&
1417 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1418 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1419 radv_image_alloc_htile(device, image);
1420 } else {
1421 radv_image_disable_htile(image);
1422 }
1423 }
1424 } else {
1425 radv_image_disable_dcc(image);
1426 radv_image_disable_htile(image);
1427 }
1428
1429 assert(image->planes[0].surface.surf_size);
1430 return VK_SUCCESS;
1431 }
1432
1433 VkResult
1434 radv_image_create(VkDevice _device,
1435 const struct radv_image_create_info *create_info,
1436 const VkAllocationCallbacks* alloc,
1437 VkImage *pImage)
1438 {
1439 RADV_FROM_HANDLE(radv_device, device, _device);
1440 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1441 struct radv_image *image = NULL;
1442 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1443
1444 const unsigned plane_count = vk_format_get_plane_count(pCreateInfo->format);
1445 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1446
1447 radv_assert(pCreateInfo->mipLevels > 0);
1448 radv_assert(pCreateInfo->arrayLayers > 0);
1449 radv_assert(pCreateInfo->samples > 0);
1450 radv_assert(pCreateInfo->extent.width > 0);
1451 radv_assert(pCreateInfo->extent.height > 0);
1452 radv_assert(pCreateInfo->extent.depth > 0);
1453
1454 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1456 if (!image)
1457 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1458
1459 image->type = pCreateInfo->imageType;
1460 image->info.width = pCreateInfo->extent.width;
1461 image->info.height = pCreateInfo->extent.height;
1462 image->info.depth = pCreateInfo->extent.depth;
1463 image->info.samples = pCreateInfo->samples;
1464 image->info.storage_samples = pCreateInfo->samples;
1465 image->info.array_size = pCreateInfo->arrayLayers;
1466 image->info.levels = pCreateInfo->mipLevels;
1467 image->info.num_channels = vk_format_get_nr_components(pCreateInfo->format);
1468
1469 image->vk_format = pCreateInfo->format;
1470 image->tiling = pCreateInfo->tiling;
1471 image->usage = pCreateInfo->usage;
1472 image->flags = pCreateInfo->flags;
1473 image->plane_count = plane_count;
1474
1475 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1476 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1477 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1478 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1479 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1480 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1481 else
1482 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1483 }
1484
1485 image->shareable = vk_find_struct_const(pCreateInfo->pNext,
1486 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL;
1487 if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !image->shareable) {
1488 image->info.surf_index = &device->image_mrt_offset_counter;
1489 }
1490
1491 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1492 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo);
1493 }
1494
1495 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1496 assert(result == VK_SUCCESS);
1497
1498 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1499 image->alignment = MAX2(image->alignment, 4096);
1500 image->size = align64(image->size, image->alignment);
1501 image->offset = 0;
1502
1503 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1504 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1505 if (!image->bo) {
1506 vk_free2(&device->alloc, alloc, image);
1507 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1508 }
1509 }
1510
1511 *pImage = radv_image_to_handle(image);
1512
1513 return VK_SUCCESS;
1514 }
1515
1516 static void
1517 radv_image_view_make_descriptor(struct radv_image_view *iview,
1518 struct radv_device *device,
1519 VkFormat vk_format,
1520 const VkComponentMapping *components,
1521 bool is_storage_image, bool disable_compression,
1522 unsigned plane_id, unsigned descriptor_plane_id)
1523 {
1524 struct radv_image *image = iview->image;
1525 struct radv_image_plane *plane = &image->planes[plane_id];
1526 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1527 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1528 uint32_t blk_w;
1529 union radv_descriptor *descriptor;
1530 uint32_t hw_level = 0;
1531
1532 if (is_storage_image) {
1533 descriptor = &iview->storage_descriptor;
1534 } else {
1535 descriptor = &iview->descriptor;
1536 }
1537
1538 assert(vk_format_get_plane_count(vk_format) == 1);
1539 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1540 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1541
1542 if (device->physical_device->rad_info.chip_class >= GFX9)
1543 hw_level = iview->base_mip;
1544 radv_make_texture_descriptor(device, image, is_storage_image,
1545 iview->type,
1546 vk_format,
1547 components,
1548 hw_level, hw_level + iview->level_count - 1,
1549 iview->base_layer,
1550 iview->base_layer + iview->layer_count - 1,
1551 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1552 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1553 iview->extent.depth,
1554 descriptor->plane_descriptors[descriptor_plane_id],
1555 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1556
1557 const struct legacy_surf_level *base_level_info = NULL;
1558 if (device->physical_device->rad_info.chip_class <= GFX9) {
1559 if (is_stencil)
1560 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1561 else
1562 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1563 }
1564 si_set_mutable_tex_desc_fields(device, image,
1565 base_level_info,
1566 plane_id,
1567 iview->base_mip,
1568 iview->base_mip,
1569 blk_w, is_stencil, is_storage_image,
1570 is_storage_image || disable_compression,
1571 descriptor->plane_descriptors[descriptor_plane_id]);
1572 }
1573
1574 static unsigned
1575 radv_plane_from_aspect(VkImageAspectFlags mask)
1576 {
1577 switch(mask) {
1578 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1579 return 1;
1580 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1581 return 2;
1582 default:
1583 return 0;
1584 }
1585 }
1586
1587 VkFormat
1588 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1589 {
1590 switch(mask) {
1591 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1592 return image->planes[0].format;
1593 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1594 return image->planes[1].format;
1595 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1596 return image->planes[2].format;
1597 case VK_IMAGE_ASPECT_STENCIL_BIT:
1598 return vk_format_stencil_only(image->vk_format);
1599 case VK_IMAGE_ASPECT_DEPTH_BIT:
1600 return vk_format_depth_only(image->vk_format);
1601 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1602 return vk_format_depth_only(image->vk_format);
1603 default:
1604 return image->vk_format;
1605 }
1606 }
1607
1608 void
1609 radv_image_view_init(struct radv_image_view *iview,
1610 struct radv_device *device,
1611 const VkImageViewCreateInfo* pCreateInfo,
1612 const struct radv_image_view_extra_create_info* extra_create_info)
1613 {
1614 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1615 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1616
1617 switch (image->type) {
1618 case VK_IMAGE_TYPE_1D:
1619 case VK_IMAGE_TYPE_2D:
1620 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1621 break;
1622 case VK_IMAGE_TYPE_3D:
1623 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1624 <= radv_minify(image->info.depth, range->baseMipLevel));
1625 break;
1626 default:
1627 unreachable("bad VkImageType");
1628 }
1629 iview->image = image;
1630 iview->bo = image->bo;
1631 iview->type = pCreateInfo->viewType;
1632 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1633 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1634 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1635 iview->vk_format = pCreateInfo->format;
1636
1637 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1638 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1639 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1640 iview->vk_format = vk_format_depth_only(iview->vk_format);
1641 }
1642
1643 if (device->physical_device->rad_info.chip_class >= GFX9) {
1644 iview->extent = (VkExtent3D) {
1645 .width = image->info.width,
1646 .height = image->info.height,
1647 .depth = image->info.depth,
1648 };
1649 } else {
1650 iview->extent = (VkExtent3D) {
1651 .width = radv_minify(image->info.width , range->baseMipLevel),
1652 .height = radv_minify(image->info.height, range->baseMipLevel),
1653 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1654 };
1655 }
1656
1657 if (iview->vk_format != image->planes[iview->plane_id].format) {
1658 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1659 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1660 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1661 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1662
1663 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1664 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1665
1666 /* Comment ported from amdvlk -
1667 * If we have the following image:
1668 * Uncompressed pixels Compressed block sizes (4x4)
1669 * mip0: 22 x 22 6 x 6
1670 * mip1: 11 x 11 3 x 3
1671 * mip2: 5 x 5 2 x 2
1672 * mip3: 2 x 2 1 x 1
1673 * mip4: 1 x 1 1 x 1
1674 *
1675 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1676 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1677 * divide-by-two integer math):
1678 * mip0: 6x6
1679 * mip1: 3x3
1680 * mip2: 1x1
1681 * mip3: 1x1
1682 *
1683 * This means that mip2 will be missing texels.
1684 *
1685 * Fix this by calculating the base mip's width and height, then convert that, and round it
1686 * back up to get the level 0 size.
1687 * Clamp the converted size between the original values, and next power of two, which
1688 * means we don't oversize the image.
1689 */
1690 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1691 vk_format_is_compressed(image->vk_format) &&
1692 !vk_format_is_compressed(iview->vk_format)) {
1693 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1694 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1695
1696 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1697 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1698
1699 lvl_width <<= range->baseMipLevel;
1700 lvl_height <<= range->baseMipLevel;
1701
1702 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1703 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1704 }
1705 }
1706
1707 iview->base_layer = range->baseArrayLayer;
1708 iview->layer_count = radv_get_layerCount(image, range);
1709 iview->base_mip = range->baseMipLevel;
1710 iview->level_count = radv_get_levelCount(image, range);
1711
1712 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1713 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1714 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1715 radv_image_view_make_descriptor(iview, device, format,
1716 &pCreateInfo->components,
1717 false, disable_compression,
1718 iview->plane_id + i, i);
1719 radv_image_view_make_descriptor(iview, device,
1720 format, &pCreateInfo->components,
1721 true, disable_compression,
1722 iview->plane_id + i, i);
1723 }
1724 }
1725
1726 bool radv_layout_has_htile(const struct radv_image *image,
1727 VkImageLayout layout,
1728 bool in_render_loop,
1729 unsigned queue_mask)
1730 {
1731 if (radv_image_is_tc_compat_htile(image))
1732 return layout != VK_IMAGE_LAYOUT_GENERAL;
1733
1734 return radv_image_has_htile(image) &&
1735 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1736 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1737 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1738 }
1739
1740 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1741 VkImageLayout layout,
1742 bool in_render_loop,
1743 unsigned queue_mask)
1744 {
1745 if (radv_image_is_tc_compat_htile(image))
1746 return layout != VK_IMAGE_LAYOUT_GENERAL;
1747
1748 return radv_image_has_htile(image) &&
1749 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1750 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1751 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1752 }
1753
1754 bool radv_layout_can_fast_clear(const struct radv_image *image,
1755 VkImageLayout layout,
1756 bool in_render_loop,
1757 unsigned queue_mask)
1758 {
1759 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1760 }
1761
1762 bool radv_layout_dcc_compressed(const struct radv_device *device,
1763 const struct radv_image *image,
1764 VkImageLayout layout,
1765 bool in_render_loop,
1766 unsigned queue_mask)
1767 {
1768 /* Don't compress compute transfer dst, as image stores are not supported. */
1769 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1770 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1771 return false;
1772
1773 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1774 }
1775
1776
1777 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1778 {
1779 if (!image->exclusive)
1780 return image->queue_family_mask;
1781 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1782 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1783 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1784 if (family == VK_QUEUE_FAMILY_IGNORED)
1785 return 1u << queue_family;
1786 return 1u << family;
1787 }
1788
1789 VkResult
1790 radv_CreateImage(VkDevice device,
1791 const VkImageCreateInfo *pCreateInfo,
1792 const VkAllocationCallbacks *pAllocator,
1793 VkImage *pImage)
1794 {
1795 #ifdef ANDROID
1796 const VkNativeBufferANDROID *gralloc_info =
1797 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1798
1799 if (gralloc_info)
1800 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1801 pAllocator, pImage);
1802 #endif
1803
1804 const struct wsi_image_create_info *wsi_info =
1805 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1806 bool scanout = wsi_info && wsi_info->scanout;
1807
1808 return radv_image_create(device,
1809 &(struct radv_image_create_info) {
1810 .vk_info = pCreateInfo,
1811 .scanout = scanout,
1812 },
1813 pAllocator,
1814 pImage);
1815 }
1816
1817 void
1818 radv_DestroyImage(VkDevice _device, VkImage _image,
1819 const VkAllocationCallbacks *pAllocator)
1820 {
1821 RADV_FROM_HANDLE(radv_device, device, _device);
1822 RADV_FROM_HANDLE(radv_image, image, _image);
1823
1824 if (!image)
1825 return;
1826
1827 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1828 device->ws->buffer_destroy(image->bo);
1829
1830 if (image->owned_memory != VK_NULL_HANDLE)
1831 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1832
1833 vk_free2(&device->alloc, pAllocator, image);
1834 }
1835
1836 void radv_GetImageSubresourceLayout(
1837 VkDevice _device,
1838 VkImage _image,
1839 const VkImageSubresource* pSubresource,
1840 VkSubresourceLayout* pLayout)
1841 {
1842 RADV_FROM_HANDLE(radv_image, image, _image);
1843 RADV_FROM_HANDLE(radv_device, device, _device);
1844 int level = pSubresource->mipLevel;
1845 int layer = pSubresource->arrayLayer;
1846
1847 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1848
1849 struct radv_image_plane *plane = &image->planes[plane_id];
1850 struct radeon_surf *surface = &plane->surface;
1851
1852 if (device->physical_device->rad_info.chip_class >= GFX9) {
1853 pLayout->offset = plane->offset + surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1854 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1855 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1856 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1857 /* Adjust the number of bytes between each row because
1858 * the pitch is actually the number of components per
1859 * row.
1860 */
1861 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1862 } else {
1863 assert(util_is_power_of_two_nonzero(surface->bpe));
1864 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1865 }
1866
1867 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1868 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1869 pLayout->size = surface->u.gfx9.surf_slice_size;
1870 if (image->type == VK_IMAGE_TYPE_3D)
1871 pLayout->size *= u_minify(image->info.depth, level);
1872 } else {
1873 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1874 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1875 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1876 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1877 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1878 if (image->type == VK_IMAGE_TYPE_3D)
1879 pLayout->size *= u_minify(image->info.depth, level);
1880 }
1881 }
1882
1883
1884 VkResult
1885 radv_CreateImageView(VkDevice _device,
1886 const VkImageViewCreateInfo *pCreateInfo,
1887 const VkAllocationCallbacks *pAllocator,
1888 VkImageView *pView)
1889 {
1890 RADV_FROM_HANDLE(radv_device, device, _device);
1891 struct radv_image_view *view;
1892
1893 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1894 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1895 if (view == NULL)
1896 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1897
1898 radv_image_view_init(view, device, pCreateInfo, NULL);
1899
1900 *pView = radv_image_view_to_handle(view);
1901
1902 return VK_SUCCESS;
1903 }
1904
1905 void
1906 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1907 const VkAllocationCallbacks *pAllocator)
1908 {
1909 RADV_FROM_HANDLE(radv_device, device, _device);
1910 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1911
1912 if (!iview)
1913 return;
1914 vk_free2(&device->alloc, pAllocator, iview);
1915 }
1916
1917 void radv_buffer_view_init(struct radv_buffer_view *view,
1918 struct radv_device *device,
1919 const VkBufferViewCreateInfo* pCreateInfo)
1920 {
1921 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1922
1923 view->bo = buffer->bo;
1924 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1925 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1926 view->vk_format = pCreateInfo->format;
1927
1928 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1929 pCreateInfo->offset, view->range, view->state);
1930 }
1931
1932 VkResult
1933 radv_CreateBufferView(VkDevice _device,
1934 const VkBufferViewCreateInfo *pCreateInfo,
1935 const VkAllocationCallbacks *pAllocator,
1936 VkBufferView *pView)
1937 {
1938 RADV_FROM_HANDLE(radv_device, device, _device);
1939 struct radv_buffer_view *view;
1940
1941 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1942 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1943 if (!view)
1944 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1945
1946 radv_buffer_view_init(view, device, pCreateInfo);
1947
1948 *pView = radv_buffer_view_to_handle(view);
1949
1950 return VK_SUCCESS;
1951 }
1952
1953 void
1954 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1955 const VkAllocationCallbacks *pAllocator)
1956 {
1957 RADV_FROM_HANDLE(radv_device, device, _device);
1958 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1959
1960 if (!view)
1961 return;
1962
1963 vk_free2(&device->alloc, pAllocator, view);
1964 }