ac/surface: replace RADEON_SURF_OPTIMIZE_FOR_SPACE with !FORCE_SWIZZLE_MODE
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
76 return false;
77
78 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
79 return false;
80
81 if (pCreateInfo->mipLevels > 1)
82 return false;
83
84 /* Do not enable TC-compatible HTILE if the image isn't readable by a
85 * shader because no texture fetches will happen.
86 */
87 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
88 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
89 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
90 return false;
91
92 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
93 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
94 */
95 if (pCreateInfo->samples >= 2 &&
96 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
97 (format == VK_FORMAT_D32_SFLOAT &&
98 device->physical_device->rad_info.chip_class == GFX10)))
99 return false;
100
101 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
102 * supports 32-bit. Though, it's possible to enable TC-compat for
103 * 16-bit depth surfaces if no Z planes are compressed.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
106 format != VK_FORMAT_D32_SFLOAT &&
107 format != VK_FORMAT_D16_UNORM)
108 return false;
109
110 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
111 const struct VkImageFormatListCreateInfo *format_list =
112 (const struct VkImageFormatListCreateInfo *)
113 vk_find_struct_const(pCreateInfo->pNext,
114 IMAGE_FORMAT_LIST_CREATE_INFO);
115
116 /* We have to ignore the existence of the list if viewFormatCount = 0 */
117 if (format_list && format_list->viewFormatCount) {
118 /* compatibility is transitive, so we only need to check
119 * one format with everything else.
120 */
121 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
122 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
123 continue;
124
125 if (format != format_list->pViewFormats[i])
126 return false;
127 }
128 } else {
129 return false;
130 }
131 }
132
133 return true;
134 }
135
136 static bool
137 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
138 {
139 if (info->bo_metadata) {
140 if (device->physical_device->rad_info.chip_class >= GFX9)
141 return info->bo_metadata->u.gfx9.scanout;
142 else
143 return info->bo_metadata->u.legacy.scanout;
144 }
145
146 return info->scanout;
147 }
148
149 static bool
150 radv_use_dcc_for_image(struct radv_device *device,
151 const struct radv_image *image,
152 const VkImageCreateInfo *pCreateInfo,
153 VkFormat format)
154 {
155 bool dcc_compatible_formats;
156 bool blendable;
157
158 /* DCC (Delta Color Compression) is only available for GFX8+. */
159 if (device->physical_device->rad_info.chip_class < GFX8)
160 return false;
161
162 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
163 return false;
164
165 if (image->shareable)
166 return false;
167
168 /* TODO: Enable DCC for storage images. */
169 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
170 return false;
171
172 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
173 return false;
174
175 if (vk_format_is_subsampled(format) ||
176 vk_format_get_plane_count(format) > 1)
177 return false;
178
179 /* TODO: Enable DCC for mipmaps on GFX9+. */
180 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
181 device->physical_device->rad_info.chip_class >= GFX9)
182 return false;
183
184 /* Do not enable DCC for mipmapped arrays because performance is worse. */
185 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
186 return false;
187
188 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
189 * 2x can be enabled with an option.
190 */
191 if (pCreateInfo->samples > 2 ||
192 (pCreateInfo->samples == 2 &&
193 !device->physical_device->dcc_msaa_allowed))
194 return false;
195
196 /* Determine if the formats are DCC compatible. */
197 dcc_compatible_formats =
198 radv_is_colorbuffer_format_supported(format,
199 &blendable);
200
201 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
202 const struct VkImageFormatListCreateInfo *format_list =
203 (const struct VkImageFormatListCreateInfo *)
204 vk_find_struct_const(pCreateInfo->pNext,
205 IMAGE_FORMAT_LIST_CREATE_INFO);
206
207 /* We have to ignore the existence of the list if viewFormatCount = 0 */
208 if (format_list && format_list->viewFormatCount) {
209 /* compatibility is transitive, so we only need to check
210 * one format with everything else. */
211 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
212 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
213 continue;
214
215 if (!radv_dcc_formats_compatible(format,
216 format_list->pViewFormats[i]))
217 dcc_compatible_formats = false;
218 }
219 } else {
220 dcc_compatible_formats = false;
221 }
222 }
223
224 if (!dcc_compatible_formats)
225 return false;
226
227 return true;
228 }
229
230 static bool
231 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
232 struct radv_image *image)
233 {
234 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
235 return false;
236
237 /* TC-compat CMASK is only available for GFX8+. */
238 if (device->physical_device->rad_info.chip_class < GFX8)
239 return false;
240
241 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
242 return false;
243
244 if (radv_image_has_dcc(image))
245 return false;
246
247 if (!radv_image_has_cmask(image))
248 return false;
249
250 return true;
251 }
252
253 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
254 {
255 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
256 }
257
258 static bool
259 radv_is_valid_opaque_metadata(const struct radv_device *device,
260 const struct radeon_bo_metadata *md)
261 {
262 if (md->metadata[0] != 1 ||
263 md->metadata[1] != si_get_bo_metadata_word1(device))
264 return false;
265
266 if (md->size_metadata < 40)
267 return false;
268
269 return true;
270 }
271
272 static void
273 radv_patch_surface_from_metadata(struct radv_device *device,
274 struct radeon_surf *surface,
275 const struct radeon_bo_metadata *md)
276 {
277 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
278
279 if (device->physical_device->rad_info.chip_class >= GFX9) {
280 if (md->u.gfx9.swizzle_mode > 0)
281 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
282 else
283 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
284
285 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
286 } else {
287 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
288 surface->u.legacy.bankw = md->u.legacy.bankw;
289 surface->u.legacy.bankh = md->u.legacy.bankh;
290 surface->u.legacy.tile_split = md->u.legacy.tile_split;
291 surface->u.legacy.mtilea = md->u.legacy.mtilea;
292 surface->u.legacy.num_banks = md->u.legacy.num_banks;
293
294 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
295 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
296 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
297 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
298 else
299 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
300
301 }
302 }
303
304 static VkResult
305 radv_patch_image_dimensions(struct radv_device *device,
306 struct radv_image *image,
307 const struct radv_image_create_info *create_info,
308 struct ac_surf_info *image_info)
309 {
310 unsigned width = image->info.width;
311 unsigned height = image->info.height;
312
313 /*
314 * minigbm sometimes allocates bigger images which is going to result in
315 * weird strides and other properties. Lets be lenient where possible and
316 * fail it on GFX10 (as we cannot cope there).
317 *
318 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
319 */
320 if (create_info->bo_metadata &&
321 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
322 const struct radeon_bo_metadata *md = create_info->bo_metadata;
323
324 if (device->physical_device->rad_info.chip_class >= GFX10) {
325 width = G_00A004_WIDTH_LO(md->metadata[3]) +
326 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
327 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
328 } else {
329 width = G_008F18_WIDTH(md->metadata[4]) + 1;
330 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
331 }
332 }
333
334 if (image->info.width == width && image->info.height == height)
335 return VK_SUCCESS;
336
337 if (width < image->info.width || height < image->info.height) {
338 fprintf(stderr,
339 "The imported image has smaller dimensions than the internal\n"
340 "dimensions. Using it is going to fail badly, so we reject\n"
341 "this import.\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image->info.width, image->info.height, width, height);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
345 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
346 fprintf(stderr,
347 "Tried to import an image with inconsistent width on GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency in width and will fail this import.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image->info.width, image->info.height, width, height);
352 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
353 } else {
354 fprintf(stderr,
355 "Tried to import an image with inconsistent width on pre-GFX10.\n"
356 "As GFX10 has no separate stride fields we cannot cope with\n"
357 "an inconsistency and would fail on GFX10.\n"
358 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
359 image->info.width, image->info.height, width, height);
360 }
361 image_info->width = width;
362 image_info->height = height;
363
364 return VK_SUCCESS;
365 }
366
367 static VkResult
368 radv_patch_image_from_extra_info(struct radv_device *device,
369 struct radv_image *image,
370 const struct radv_image_create_info *create_info,
371 struct ac_surf_info *image_info)
372 {
373 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
374 if (result != VK_SUCCESS)
375 return result;
376
377 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
378 if (create_info->bo_metadata) {
379 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
380 create_info->bo_metadata);
381 }
382
383 if (radv_surface_has_scanout(device, create_info)) {
384 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
385 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
386
387 image->info.surf_index = NULL;
388 }
389 }
390 return VK_SUCCESS;
391 }
392
393 static int
394 radv_init_surface(struct radv_device *device,
395 const struct radv_image *image,
396 struct radeon_surf *surface,
397 unsigned plane_id,
398 const VkImageCreateInfo *pCreateInfo,
399 VkFormat image_format)
400 {
401 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
402 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
403 const struct vk_format_description *desc = vk_format_description(format);
404 bool is_depth, is_stencil;
405
406 is_depth = vk_format_has_depth(desc);
407 is_stencil = vk_format_has_stencil(desc);
408
409 surface->blk_w = vk_format_get_blockwidth(format);
410 surface->blk_h = vk_format_get_blockheight(format);
411
412 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
413 /* align byte per element on dword */
414 if (surface->bpe == 3) {
415 surface->bpe = 4;
416 }
417
418 surface->flags = RADEON_SURF_SET(array_mode, MODE);
419
420 switch (pCreateInfo->imageType){
421 case VK_IMAGE_TYPE_1D:
422 if (pCreateInfo->arrayLayers > 1)
423 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
424 else
425 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
426 break;
427 case VK_IMAGE_TYPE_2D:
428 if (pCreateInfo->arrayLayers > 1)
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
430 else
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
432 break;
433 case VK_IMAGE_TYPE_3D:
434 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
435 break;
436 default:
437 unreachable("unhandled image type");
438 }
439
440 if (is_depth)
441 surface->flags |= RADEON_SURF_ZBUFFER;
442
443 if (is_stencil)
444 surface->flags |= RADEON_SURF_SBUFFER;
445
446 if (device->physical_device->rad_info.chip_class >= GFX9 &&
447 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
448 vk_format_get_blocksizebits(image_format) == 128 &&
449 vk_format_is_compressed(image_format))
450 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
451
452 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
453 surface->flags |= RADEON_SURF_DISABLE_DCC;
454
455 return 0;
456 }
457
458 static inline unsigned
459 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
460 {
461 if (stencil)
462 return plane->surface.u.legacy.stencil_tiling_index[level];
463 else
464 return plane->surface.u.legacy.tiling_index[level];
465 }
466
467 static unsigned radv_map_swizzle(unsigned swizzle)
468 {
469 switch (swizzle) {
470 case VK_SWIZZLE_Y:
471 return V_008F0C_SQ_SEL_Y;
472 case VK_SWIZZLE_Z:
473 return V_008F0C_SQ_SEL_Z;
474 case VK_SWIZZLE_W:
475 return V_008F0C_SQ_SEL_W;
476 case VK_SWIZZLE_0:
477 return V_008F0C_SQ_SEL_0;
478 case VK_SWIZZLE_1:
479 return V_008F0C_SQ_SEL_1;
480 default: /* VK_SWIZZLE_X */
481 return V_008F0C_SQ_SEL_X;
482 }
483 }
484
485 static void
486 radv_make_buffer_descriptor(struct radv_device *device,
487 struct radv_buffer *buffer,
488 VkFormat vk_format,
489 unsigned offset,
490 unsigned range,
491 uint32_t *state)
492 {
493 const struct vk_format_description *desc;
494 unsigned stride;
495 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
496 uint64_t va = gpu_address + buffer->offset;
497 unsigned num_format, data_format;
498 int first_non_void;
499 desc = vk_format_description(vk_format);
500 first_non_void = vk_format_get_first_non_void_channel(vk_format);
501 stride = desc->block.bits / 8;
502
503 va += offset;
504 state[0] = va;
505 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
506 S_008F04_STRIDE(stride);
507
508 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
509 range /= stride;
510 }
511
512 state[2] = range;
513 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
514 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
515 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
516 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
517
518 if (device->physical_device->rad_info.chip_class >= GFX10) {
519 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
520
521 /* OOB_SELECT chooses the out-of-bounds check:
522 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
523 * - 1: index >= NUM_RECORDS
524 * - 2: NUM_RECORDS == 0
525 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
526 * else: swizzle_address >= NUM_RECORDS
527 */
528 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
529 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
530 S_008F0C_RESOURCE_LEVEL(1);
531 } else {
532 num_format = radv_translate_buffer_numformat(desc, first_non_void);
533 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
534
535 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
536 assert(num_format != ~0);
537
538 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
539 S_008F0C_DATA_FORMAT(data_format);
540 }
541 }
542
543 static void
544 si_set_mutable_tex_desc_fields(struct radv_device *device,
545 struct radv_image *image,
546 const struct legacy_surf_level *base_level_info,
547 unsigned plane_id,
548 unsigned base_level, unsigned first_level,
549 unsigned block_width, bool is_stencil,
550 bool is_storage_image, bool disable_compression,
551 uint32_t *state)
552 {
553 struct radv_image_plane *plane = &image->planes[plane_id];
554 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
555 uint64_t va = gpu_address + plane->offset;
556 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
557 uint64_t meta_va = 0;
558 if (chip_class >= GFX9) {
559 if (is_stencil)
560 va += plane->surface.u.gfx9.stencil_offset;
561 else
562 va += plane->surface.u.gfx9.surf_offset;
563 } else
564 va += base_level_info->offset;
565
566 state[0] = va >> 8;
567 if (chip_class >= GFX9 ||
568 base_level_info->mode == RADEON_SURF_MODE_2D)
569 state[0] |= plane->surface.tile_swizzle;
570 state[1] &= C_008F14_BASE_ADDRESS_HI;
571 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
572
573 if (chip_class >= GFX8) {
574 state[6] &= C_008F28_COMPRESSION_EN;
575 state[7] = 0;
576 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
577 meta_va = gpu_address + image->dcc_offset;
578 if (chip_class <= GFX8)
579 meta_va += base_level_info->dcc_offset;
580
581 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
582 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
583 meta_va |= dcc_tile_swizzle;
584 } else if (!disable_compression &&
585 radv_image_is_tc_compat_htile(image)) {
586 meta_va = gpu_address + image->htile_offset;
587 }
588
589 if (meta_va) {
590 state[6] |= S_008F28_COMPRESSION_EN(1);
591 if (chip_class <= GFX9)
592 state[7] = meta_va >> 8;
593 }
594 }
595
596 if (chip_class >= GFX10) {
597 state[3] &= C_00A00C_SW_MODE;
598
599 if (is_stencil) {
600 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
601 } else {
602 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
603 }
604
605 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
606 C_00A018_META_PIPE_ALIGNED;
607
608 if (meta_va) {
609 struct gfx9_surf_meta_flags meta;
610
611 if (image->dcc_offset)
612 meta = plane->surface.u.gfx9.dcc;
613 else
614 meta = plane->surface.u.gfx9.htile;
615
616 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
617 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
618 }
619
620 state[7] = meta_va >> 16;
621 } else if (chip_class == GFX9) {
622 state[3] &= C_008F1C_SW_MODE;
623 state[4] &= C_008F20_PITCH;
624
625 if (is_stencil) {
626 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
627 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
628 } else {
629 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
630 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
631 }
632
633 state[5] &= C_008F24_META_DATA_ADDRESS &
634 C_008F24_META_PIPE_ALIGNED &
635 C_008F24_META_RB_ALIGNED;
636 if (meta_va) {
637 struct gfx9_surf_meta_flags meta;
638
639 if (image->dcc_offset)
640 meta = plane->surface.u.gfx9.dcc;
641 else
642 meta = plane->surface.u.gfx9.htile;
643
644 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
645 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
646 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
647 }
648 } else {
649 /* GFX6-GFX8 */
650 unsigned pitch = base_level_info->nblk_x * block_width;
651 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
652
653 state[3] &= C_008F1C_TILING_INDEX;
654 state[3] |= S_008F1C_TILING_INDEX(index);
655 state[4] &= C_008F20_PITCH;
656 state[4] |= S_008F20_PITCH(pitch - 1);
657 }
658 }
659
660 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
661 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
662 {
663 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
664 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
665
666 /* GFX9 allocates 1D textures as 2D. */
667 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
668 image_type = VK_IMAGE_TYPE_2D;
669 switch (image_type) {
670 case VK_IMAGE_TYPE_1D:
671 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
672 case VK_IMAGE_TYPE_2D:
673 if (nr_samples > 1)
674 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
675 else
676 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
677 case VK_IMAGE_TYPE_3D:
678 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
679 return V_008F1C_SQ_RSRC_IMG_3D;
680 else
681 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
682 default:
683 unreachable("illegal image type");
684 }
685 }
686
687 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
688 {
689 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
690
691 if (swizzle[3] == VK_SWIZZLE_X) {
692 /* For the pre-defined border color values (white, opaque
693 * black, transparent black), the only thing that matters is
694 * that the alpha channel winds up in the correct place
695 * (because the RGB channels are all the same) so either of
696 * these enumerations will work.
697 */
698 if (swizzle[2] == VK_SWIZZLE_Y)
699 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
700 else
701 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
702 } else if (swizzle[0] == VK_SWIZZLE_X) {
703 if (swizzle[1] == VK_SWIZZLE_Y)
704 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
705 else
706 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
707 } else if (swizzle[1] == VK_SWIZZLE_X) {
708 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
709 } else if (swizzle[2] == VK_SWIZZLE_X) {
710 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
711 }
712
713 return bc_swizzle;
714 }
715
716 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
717 {
718 const struct vk_format_description *desc = vk_format_description(format);
719
720 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
721 return desc->swizzle[3] == VK_SWIZZLE_X;
722
723 return radv_translate_colorswap(format, false) <= 1;
724 }
725 /**
726 * Build the sampler view descriptor for a texture (GFX10).
727 */
728 static void
729 gfx10_make_texture_descriptor(struct radv_device *device,
730 struct radv_image *image,
731 bool is_storage_image,
732 VkImageViewType view_type,
733 VkFormat vk_format,
734 const VkComponentMapping *mapping,
735 unsigned first_level, unsigned last_level,
736 unsigned first_layer, unsigned last_layer,
737 unsigned width, unsigned height, unsigned depth,
738 uint32_t *state,
739 uint32_t *fmask_state)
740 {
741 const struct vk_format_description *desc;
742 enum vk_swizzle swizzle[4];
743 unsigned img_format;
744 unsigned type;
745
746 desc = vk_format_description(vk_format);
747 img_format = gfx10_format_table[vk_format].img_format;
748
749 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
750 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
751 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
752 } else {
753 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
754 }
755
756 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
757 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
758 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
759 height = 1;
760 depth = image->info.array_size;
761 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
762 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
763 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
764 depth = image->info.array_size;
765 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
766 depth = image->info.array_size / 6;
767
768 state[0] = 0;
769 state[1] = S_00A004_FORMAT(img_format) |
770 S_00A004_WIDTH_LO(width - 1);
771 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
772 S_00A008_HEIGHT(height - 1) |
773 S_00A008_RESOURCE_LEVEL(1);
774 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
775 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
776 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
777 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
778 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
779 0 : first_level) |
780 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
781 util_logbase2(image->info.samples) :
782 last_level) |
783 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
784 S_00A00C_TYPE(type);
785 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
786 * to know the total number of layers.
787 */
788 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
789 S_00A010_BASE_ARRAY(first_layer);
790 state[5] = S_00A014_ARRAY_PITCH(0) |
791 S_00A014_MAX_MIP(image->info.samples > 1 ?
792 util_logbase2(image->info.samples) :
793 image->info.levels - 1) |
794 S_00A014_PERF_MOD(4);
795 state[6] = 0;
796 state[7] = 0;
797
798 if (radv_dcc_enabled(image, first_level)) {
799 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
800 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
801 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
802 }
803
804 /* Initialize the sampler view for FMASK. */
805 if (radv_image_has_fmask(image)) {
806 uint64_t gpu_address = radv_buffer_get_va(image->bo);
807 uint32_t format;
808 uint64_t va;
809
810 assert(image->plane_count == 1);
811
812 va = gpu_address + image->offset + image->fmask_offset;
813
814 switch (image->info.samples) {
815 case 2:
816 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
817 break;
818 case 4:
819 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
820 break;
821 case 8:
822 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
823 break;
824 default:
825 unreachable("invalid nr_samples");
826 }
827
828 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
829 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
830 S_00A004_FORMAT(format) |
831 S_00A004_WIDTH_LO(width - 1);
832 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
833 S_00A008_HEIGHT(height - 1) |
834 S_00A008_RESOURCE_LEVEL(1);
835 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
836 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
837 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
838 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
839 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
840 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
841 fmask_state[4] = S_00A010_DEPTH(last_layer) |
842 S_00A010_BASE_ARRAY(first_layer);
843 fmask_state[5] = 0;
844 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
845 fmask_state[7] = 0;
846 } else if (fmask_state)
847 memset(fmask_state, 0, 8 * 4);
848 }
849
850 /**
851 * Build the sampler view descriptor for a texture (SI-GFX9)
852 */
853 static void
854 si_make_texture_descriptor(struct radv_device *device,
855 struct radv_image *image,
856 bool is_storage_image,
857 VkImageViewType view_type,
858 VkFormat vk_format,
859 const VkComponentMapping *mapping,
860 unsigned first_level, unsigned last_level,
861 unsigned first_layer, unsigned last_layer,
862 unsigned width, unsigned height, unsigned depth,
863 uint32_t *state,
864 uint32_t *fmask_state)
865 {
866 const struct vk_format_description *desc;
867 enum vk_swizzle swizzle[4];
868 int first_non_void;
869 unsigned num_format, data_format, type;
870
871 desc = vk_format_description(vk_format);
872
873 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
874 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
875 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
876 } else {
877 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
878 }
879
880 first_non_void = vk_format_get_first_non_void_channel(vk_format);
881
882 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
883 if (num_format == ~0) {
884 num_format = 0;
885 }
886
887 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
888 if (data_format == ~0) {
889 data_format = 0;
890 }
891
892 /* S8 with either Z16 or Z32 HTILE need a special format. */
893 if (device->physical_device->rad_info.chip_class == GFX9 &&
894 vk_format == VK_FORMAT_S8_UINT &&
895 radv_image_is_tc_compat_htile(image)) {
896 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
897 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
898 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
899 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
900 }
901 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
902 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
903 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
904 height = 1;
905 depth = image->info.array_size;
906 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
907 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
908 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
909 depth = image->info.array_size;
910 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
911 depth = image->info.array_size / 6;
912
913 state[0] = 0;
914 state[1] = (S_008F14_DATA_FORMAT(data_format) |
915 S_008F14_NUM_FORMAT(num_format));
916 state[2] = (S_008F18_WIDTH(width - 1) |
917 S_008F18_HEIGHT(height - 1) |
918 S_008F18_PERF_MOD(4));
919 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
920 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
921 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
922 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
923 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
924 0 : first_level) |
925 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
926 util_logbase2(image->info.samples) :
927 last_level) |
928 S_008F1C_TYPE(type));
929 state[4] = 0;
930 state[5] = S_008F24_BASE_ARRAY(first_layer);
931 state[6] = 0;
932 state[7] = 0;
933
934 if (device->physical_device->rad_info.chip_class == GFX9) {
935 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
936
937 /* Depth is the last accessible layer on Gfx9.
938 * The hw doesn't need to know the total number of layers.
939 */
940 if (type == V_008F1C_SQ_RSRC_IMG_3D)
941 state[4] |= S_008F20_DEPTH(depth - 1);
942 else
943 state[4] |= S_008F20_DEPTH(last_layer);
944
945 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
946 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
947 util_logbase2(image->info.samples) :
948 image->info.levels - 1);
949 } else {
950 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
951 state[4] |= S_008F20_DEPTH(depth - 1);
952 state[5] |= S_008F24_LAST_ARRAY(last_layer);
953 }
954 if (image->dcc_offset) {
955 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
956 } else {
957 /* The last dword is unused by hw. The shader uses it to clear
958 * bits in the first dword of sampler state.
959 */
960 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
961 if (first_level == last_level)
962 state[7] = C_008F30_MAX_ANISO_RATIO;
963 else
964 state[7] = 0xffffffff;
965 }
966 }
967
968 /* Initialize the sampler view for FMASK. */
969 if (radv_image_has_fmask(image)) {
970 uint32_t fmask_format, num_format;
971 uint64_t gpu_address = radv_buffer_get_va(image->bo);
972 uint64_t va;
973
974 assert(image->plane_count == 1);
975
976 va = gpu_address + image->offset + image->fmask_offset;
977
978 if (device->physical_device->rad_info.chip_class == GFX9) {
979 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
980 switch (image->info.samples) {
981 case 2:
982 num_format = V_008F14_IMG_FMASK_8_2_2;
983 break;
984 case 4:
985 num_format = V_008F14_IMG_FMASK_8_4_4;
986 break;
987 case 8:
988 num_format = V_008F14_IMG_FMASK_32_8_8;
989 break;
990 default:
991 unreachable("invalid nr_samples");
992 }
993 } else {
994 switch (image->info.samples) {
995 case 2:
996 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
997 break;
998 case 4:
999 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1000 break;
1001 case 8:
1002 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1003 break;
1004 default:
1005 assert(0);
1006 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1007 }
1008 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1009 }
1010
1011 fmask_state[0] = va >> 8;
1012 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1013 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1014 S_008F14_DATA_FORMAT(fmask_format) |
1015 S_008F14_NUM_FORMAT(num_format);
1016 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1017 S_008F18_HEIGHT(height - 1);
1018 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1019 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1020 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1021 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1022 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1023 fmask_state[4] = 0;
1024 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1025 fmask_state[6] = 0;
1026 fmask_state[7] = 0;
1027
1028 if (device->physical_device->rad_info.chip_class == GFX9) {
1029 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1030 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1031 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1032 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1033 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1034
1035 if (radv_image_is_tc_compat_cmask(image)) {
1036 va = gpu_address + image->offset + image->cmask_offset;
1037
1038 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1039 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1040 fmask_state[7] |= va >> 8;
1041 }
1042 } else {
1043 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1044 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1045 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1046 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1047
1048 if (radv_image_is_tc_compat_cmask(image)) {
1049 va = gpu_address + image->offset + image->cmask_offset;
1050
1051 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1052 fmask_state[7] |= va >> 8;
1053 }
1054 }
1055 } else if (fmask_state)
1056 memset(fmask_state, 0, 8 * 4);
1057 }
1058
1059 static void
1060 radv_make_texture_descriptor(struct radv_device *device,
1061 struct radv_image *image,
1062 bool is_storage_image,
1063 VkImageViewType view_type,
1064 VkFormat vk_format,
1065 const VkComponentMapping *mapping,
1066 unsigned first_level, unsigned last_level,
1067 unsigned first_layer, unsigned last_layer,
1068 unsigned width, unsigned height, unsigned depth,
1069 uint32_t *state,
1070 uint32_t *fmask_state)
1071 {
1072 if (device->physical_device->rad_info.chip_class >= GFX10) {
1073 gfx10_make_texture_descriptor(device, image, is_storage_image,
1074 view_type, vk_format, mapping,
1075 first_level, last_level,
1076 first_layer, last_layer,
1077 width, height, depth,
1078 state, fmask_state);
1079 } else {
1080 si_make_texture_descriptor(device, image, is_storage_image,
1081 view_type, vk_format, mapping,
1082 first_level, last_level,
1083 first_layer, last_layer,
1084 width, height, depth,
1085 state, fmask_state);
1086 }
1087 }
1088
1089 static void
1090 radv_query_opaque_metadata(struct radv_device *device,
1091 struct radv_image *image,
1092 struct radeon_bo_metadata *md)
1093 {
1094 static const VkComponentMapping fixedmapping;
1095 uint32_t desc[8], i;
1096
1097 assert(image->plane_count == 1);
1098
1099 /* Metadata image format format version 1:
1100 * [0] = 1 (metadata format identifier)
1101 * [1] = (VENDOR_ID << 16) | PCI_ID
1102 * [2:9] = image descriptor for the whole resource
1103 * [2] is always 0, because the base address is cleared
1104 * [9] is the DCC offset bits [39:8] from the beginning of
1105 * the buffer
1106 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1107 */
1108 md->metadata[0] = 1; /* metadata image format version 1 */
1109
1110 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1111 md->metadata[1] = si_get_bo_metadata_word1(device);
1112
1113
1114 radv_make_texture_descriptor(device, image, false,
1115 (VkImageViewType)image->type, image->vk_format,
1116 &fixedmapping, 0, image->info.levels - 1, 0,
1117 image->info.array_size - 1,
1118 image->info.width, image->info.height,
1119 image->info.depth,
1120 desc, NULL);
1121
1122 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1123 image->planes[0].surface.blk_w, false, false, false, desc);
1124
1125 /* Clear the base address and set the relative DCC offset. */
1126 desc[0] = 0;
1127 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1128 desc[7] = image->dcc_offset >> 8;
1129
1130 /* Dwords [2:9] contain the image descriptor. */
1131 memcpy(&md->metadata[2], desc, sizeof(desc));
1132
1133 /* Dwords [10:..] contain the mipmap level offsets. */
1134 if (device->physical_device->rad_info.chip_class <= GFX8) {
1135 for (i = 0; i <= image->info.levels - 1; i++)
1136 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1137 md->size_metadata = (11 + image->info.levels - 1) * 4;
1138 } else
1139 md->size_metadata = 10 * 4;
1140 }
1141
1142 void
1143 radv_init_metadata(struct radv_device *device,
1144 struct radv_image *image,
1145 struct radeon_bo_metadata *metadata)
1146 {
1147 struct radeon_surf *surface = &image->planes[0].surface;
1148
1149 memset(metadata, 0, sizeof(*metadata));
1150
1151 if (device->physical_device->rad_info.chip_class >= GFX9) {
1152 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1153 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1154 } else {
1155 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1156 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1157 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1158 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1159 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1160 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1161 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1162 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1163 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1164 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1165 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1166 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1167 }
1168 radv_query_opaque_metadata(device, image, metadata);
1169 }
1170
1171 void
1172 radv_image_override_offset_stride(struct radv_device *device,
1173 struct radv_image *image,
1174 uint64_t offset, uint32_t stride)
1175 {
1176 struct radeon_surf *surface = &image->planes[0].surface;
1177 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1178
1179 if (device->physical_device->rad_info.chip_class >= GFX9) {
1180 if (stride) {
1181 surface->u.gfx9.surf_pitch = stride;
1182 surface->u.gfx9.surf_slice_size =
1183 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1184 }
1185 surface->u.gfx9.surf_offset = offset;
1186 } else {
1187 surface->u.legacy.level[0].nblk_x = stride;
1188 surface->u.legacy.level[0].slice_size_dw =
1189 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1190
1191 if (offset) {
1192 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1193 surface->u.legacy.level[i].offset += offset;
1194 }
1195
1196 }
1197 }
1198
1199 static void
1200 radv_image_alloc_fmask(struct radv_device *device,
1201 struct radv_image *image)
1202 {
1203 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1204
1205 image->fmask_offset = align64(image->size, fmask_alignment);
1206 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1207 image->alignment = MAX2(image->alignment, fmask_alignment);
1208 }
1209
1210 static void
1211 radv_image_alloc_cmask(struct radv_device *device,
1212 struct radv_image *image)
1213 {
1214 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1215 unsigned cmask_size = image->planes[0].surface.cmask_size;
1216 uint32_t clear_value_size = 0;
1217
1218 if (!cmask_size)
1219 return;
1220
1221 assert(cmask_alignment);
1222
1223 image->cmask_offset = align64(image->size, cmask_alignment);
1224 /* + 8 for storing the clear values */
1225 if (!image->clear_value_offset) {
1226 image->clear_value_offset = image->cmask_offset + cmask_size;
1227 clear_value_size = 8;
1228 }
1229 image->size = image->cmask_offset + cmask_size + clear_value_size;
1230 image->alignment = MAX2(image->alignment, cmask_alignment);
1231 }
1232
1233 static void
1234 radv_image_alloc_dcc(struct radv_image *image)
1235 {
1236 assert(image->plane_count == 1);
1237
1238 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1239 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1240 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1241 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1242 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1243 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1244 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1245 }
1246
1247 static void
1248 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1249 {
1250 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1251
1252 /* + 8 for storing the clear values */
1253 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1254 image->size = image->clear_value_offset + image->info.levels * 8;
1255 if (radv_image_is_tc_compat_htile(image) &&
1256 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1257 /* Metadata for the TC-compatible HTILE hardware bug which
1258 * have to be fixed by updating ZRANGE_PRECISION when doing
1259 * fast depth clears to 0.0f.
1260 */
1261 image->tc_compat_zrange_offset = image->size;
1262 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1263 }
1264 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1265 }
1266
1267 static inline bool
1268 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1269 {
1270 if (image->info.samples <= 1 &&
1271 image->info.width * image->info.height <= 512 * 512) {
1272 /* Do not enable CMASK or DCC for small surfaces where the cost
1273 * of the eliminate pass can be higher than the benefit of fast
1274 * clear. RadeonSI does this, but the image threshold is
1275 * different.
1276 */
1277 return false;
1278 }
1279
1280 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1281 (image->exclusive || image->queue_family_mask == 1);
1282 }
1283
1284 static inline bool
1285 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1286 {
1287 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1288 !radv_image_has_dcc(image))
1289 return false;
1290
1291 /* On GFX8, DCC layers can be interleaved and it's currently only
1292 * enabled if slice size is equal to the per slice fast clear size
1293 * because the driver assumes that portions of multiple layers are
1294 * contiguous during fast clears.
1295 */
1296 if (image->info.array_size > 1) {
1297 const struct legacy_surf_level *surf_level =
1298 &image->planes[0].surface.u.legacy.level[0];
1299
1300 assert(device->physical_device->rad_info.chip_class == GFX8);
1301
1302 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1303 return false;
1304 }
1305
1306 return true;
1307 }
1308
1309 static inline bool
1310 radv_image_can_enable_cmask(struct radv_image *image)
1311 {
1312 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1313 /* Do not enable CMASK for non-MSAA images (fast color clear)
1314 * because 128 bit formats are not supported, but FMASK might
1315 * still be used.
1316 */
1317 return false;
1318 }
1319
1320 return radv_image_can_enable_dcc_or_cmask(image) &&
1321 image->info.levels == 1 &&
1322 image->info.depth == 1 &&
1323 !image->planes[0].surface.is_linear;
1324 }
1325
1326 static inline bool
1327 radv_image_can_enable_fmask(struct radv_image *image)
1328 {
1329 return image->info.samples > 1 &&
1330 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1331 }
1332
1333 static inline bool
1334 radv_image_can_enable_htile(struct radv_image *image)
1335 {
1336 return radv_image_has_htile(image) &&
1337 image->info.levels == 1 &&
1338 image->info.width * image->info.height >= 8 * 8;
1339 }
1340
1341 static void radv_image_disable_dcc(struct radv_image *image)
1342 {
1343 for (unsigned i = 0; i < image->plane_count; ++i)
1344 image->planes[i].surface.dcc_size = 0;
1345 }
1346
1347 static void radv_image_disable_htile(struct radv_image *image)
1348 {
1349 for (unsigned i = 0; i < image->plane_count; ++i)
1350 image->planes[i].surface.htile_size = 0;
1351
1352 image->tc_compatible_htile = false;
1353 }
1354
1355 VkResult
1356 radv_image_create_layout(struct radv_device *device,
1357 struct radv_image_create_info create_info,
1358 struct radv_image *image)
1359 {
1360 /* Check that we did not initialize things earlier */
1361 assert(!image->planes[0].surface.surf_size);
1362
1363 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1364 * common internal case. */
1365 create_info.vk_info = NULL;
1366
1367 struct ac_surf_info image_info = image->info;
1368 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1369 if (result != VK_SUCCESS)
1370 return result;
1371
1372 image->size = 0;
1373 image->alignment = 1;
1374 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1375 struct ac_surf_info info = image_info;
1376
1377 if (plane) {
1378 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1379 assert(info.width % desc->width_divisor == 0);
1380 assert(info.height % desc->height_divisor == 0);
1381
1382 info.width /= desc->width_divisor;
1383 info.height /= desc->height_divisor;
1384 }
1385
1386 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1387
1388 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1389 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1390 image->alignment = image->planes[plane].surface.surf_alignment;
1391
1392 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1393 }
1394
1395 if (!create_info.no_metadata_planes) {
1396 /* Try to enable DCC first. */
1397 if (radv_image_can_enable_dcc(device, image)) {
1398 radv_image_alloc_dcc(image);
1399 if (image->info.samples > 1) {
1400 /* CMASK should be enabled because DCC fast
1401 * clear with MSAA needs it.
1402 */
1403 assert(radv_image_can_enable_cmask(image));
1404 radv_image_alloc_cmask(device, image);
1405 }
1406 } else {
1407 /* When DCC cannot be enabled, try CMASK. */
1408 radv_image_disable_dcc(image);
1409 if (radv_image_can_enable_cmask(image)) {
1410 radv_image_alloc_cmask(device, image);
1411 }
1412 }
1413
1414 /* Try to enable FMASK for multisampled images. */
1415 if (radv_image_can_enable_fmask(image)) {
1416 radv_image_alloc_fmask(device, image);
1417
1418 if (radv_use_tc_compat_cmask_for_image(device, image))
1419 image->tc_compatible_cmask = true;
1420 } else {
1421 /* Otherwise, try to enable HTILE for depth surfaces. */
1422 if (radv_image_can_enable_htile(image) &&
1423 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1424 if (!image->planes[0].surface.tc_compatible_htile_allowed)
1425 image->tc_compatible_htile = false;
1426 radv_image_alloc_htile(device, image);
1427 } else {
1428 radv_image_disable_htile(image);
1429 }
1430 }
1431 } else {
1432 radv_image_disable_dcc(image);
1433 radv_image_disable_htile(image);
1434 }
1435
1436 assert(image->planes[0].surface.surf_size);
1437 return VK_SUCCESS;
1438 }
1439
1440 VkResult
1441 radv_image_create(VkDevice _device,
1442 const struct radv_image_create_info *create_info,
1443 const VkAllocationCallbacks* alloc,
1444 VkImage *pImage)
1445 {
1446 RADV_FROM_HANDLE(radv_device, device, _device);
1447 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1448 struct radv_image *image = NULL;
1449 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1450 pCreateInfo->format);
1451 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1452
1453 const unsigned plane_count = vk_format_get_plane_count(format);
1454 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1455
1456 radv_assert(pCreateInfo->mipLevels > 0);
1457 radv_assert(pCreateInfo->arrayLayers > 0);
1458 radv_assert(pCreateInfo->samples > 0);
1459 radv_assert(pCreateInfo->extent.width > 0);
1460 radv_assert(pCreateInfo->extent.height > 0);
1461 radv_assert(pCreateInfo->extent.depth > 0);
1462
1463 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1464 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1465 if (!image)
1466 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1467
1468 image->type = pCreateInfo->imageType;
1469 image->info.width = pCreateInfo->extent.width;
1470 image->info.height = pCreateInfo->extent.height;
1471 image->info.depth = pCreateInfo->extent.depth;
1472 image->info.samples = pCreateInfo->samples;
1473 image->info.storage_samples = pCreateInfo->samples;
1474 image->info.array_size = pCreateInfo->arrayLayers;
1475 image->info.levels = pCreateInfo->mipLevels;
1476 image->info.num_channels = vk_format_get_nr_components(format);
1477
1478 image->vk_format = format;
1479 image->tiling = pCreateInfo->tiling;
1480 image->usage = pCreateInfo->usage;
1481 image->flags = pCreateInfo->flags;
1482 image->plane_count = plane_count;
1483
1484 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1485 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1486 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1487 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1488 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1489 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1490 else
1491 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1492 }
1493
1494 const VkExternalMemoryImageCreateInfo *external_info =
1495 vk_find_struct_const(pCreateInfo->pNext,
1496 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1497
1498 image->shareable = external_info;
1499 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1500 image->info.surf_index = &device->image_mrt_offset_counter;
1501 }
1502
1503 image->tc_compatible_htile =
1504 radv_use_tc_compat_htile_for_image(device, create_info->vk_info,
1505 image->vk_format);
1506
1507 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1508 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1509 }
1510
1511 bool delay_layout = external_info &&
1512 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1513
1514 if (delay_layout) {
1515 *pImage = radv_image_to_handle(image);
1516 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1517 return VK_SUCCESS;
1518 }
1519
1520 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1521 assert(result == VK_SUCCESS);
1522
1523 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1524 image->alignment = MAX2(image->alignment, 4096);
1525 image->size = align64(image->size, image->alignment);
1526 image->offset = 0;
1527
1528 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1529 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1530 if (!image->bo) {
1531 vk_free2(&device->alloc, alloc, image);
1532 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1533 }
1534 }
1535
1536 *pImage = radv_image_to_handle(image);
1537
1538 return VK_SUCCESS;
1539 }
1540
1541 static void
1542 radv_image_view_make_descriptor(struct radv_image_view *iview,
1543 struct radv_device *device,
1544 VkFormat vk_format,
1545 const VkComponentMapping *components,
1546 bool is_storage_image, bool disable_compression,
1547 unsigned plane_id, unsigned descriptor_plane_id)
1548 {
1549 struct radv_image *image = iview->image;
1550 struct radv_image_plane *plane = &image->planes[plane_id];
1551 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1552 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1553 uint32_t blk_w;
1554 union radv_descriptor *descriptor;
1555 uint32_t hw_level = 0;
1556
1557 if (is_storage_image) {
1558 descriptor = &iview->storage_descriptor;
1559 } else {
1560 descriptor = &iview->descriptor;
1561 }
1562
1563 assert(vk_format_get_plane_count(vk_format) == 1);
1564 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1565 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1566
1567 if (device->physical_device->rad_info.chip_class >= GFX9)
1568 hw_level = iview->base_mip;
1569 radv_make_texture_descriptor(device, image, is_storage_image,
1570 iview->type,
1571 vk_format,
1572 components,
1573 hw_level, hw_level + iview->level_count - 1,
1574 iview->base_layer,
1575 iview->base_layer + iview->layer_count - 1,
1576 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1577 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1578 iview->extent.depth,
1579 descriptor->plane_descriptors[descriptor_plane_id],
1580 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1581
1582 const struct legacy_surf_level *base_level_info = NULL;
1583 if (device->physical_device->rad_info.chip_class <= GFX9) {
1584 if (is_stencil)
1585 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1586 else
1587 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1588 }
1589 si_set_mutable_tex_desc_fields(device, image,
1590 base_level_info,
1591 plane_id,
1592 iview->base_mip,
1593 iview->base_mip,
1594 blk_w, is_stencil, is_storage_image,
1595 is_storage_image || disable_compression,
1596 descriptor->plane_descriptors[descriptor_plane_id]);
1597 }
1598
1599 static unsigned
1600 radv_plane_from_aspect(VkImageAspectFlags mask)
1601 {
1602 switch(mask) {
1603 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1604 return 1;
1605 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1606 return 2;
1607 default:
1608 return 0;
1609 }
1610 }
1611
1612 VkFormat
1613 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1614 {
1615 switch(mask) {
1616 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1617 return image->planes[0].format;
1618 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1619 return image->planes[1].format;
1620 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1621 return image->planes[2].format;
1622 case VK_IMAGE_ASPECT_STENCIL_BIT:
1623 return vk_format_stencil_only(image->vk_format);
1624 case VK_IMAGE_ASPECT_DEPTH_BIT:
1625 return vk_format_depth_only(image->vk_format);
1626 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1627 return vk_format_depth_only(image->vk_format);
1628 default:
1629 return image->vk_format;
1630 }
1631 }
1632
1633 void
1634 radv_image_view_init(struct radv_image_view *iview,
1635 struct radv_device *device,
1636 const VkImageViewCreateInfo* pCreateInfo,
1637 const struct radv_image_view_extra_create_info* extra_create_info)
1638 {
1639 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1640 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1641
1642 switch (image->type) {
1643 case VK_IMAGE_TYPE_1D:
1644 case VK_IMAGE_TYPE_2D:
1645 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1646 break;
1647 case VK_IMAGE_TYPE_3D:
1648 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1649 <= radv_minify(image->info.depth, range->baseMipLevel));
1650 break;
1651 default:
1652 unreachable("bad VkImageType");
1653 }
1654 iview->image = image;
1655 iview->bo = image->bo;
1656 iview->type = pCreateInfo->viewType;
1657 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1658 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1659 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1660
1661 iview->vk_format = pCreateInfo->format;
1662
1663 /* If the image has an Android external format, pCreateInfo->format will be
1664 * VK_FORMAT_UNDEFINED. */
1665 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1666 iview->vk_format = image->vk_format;
1667
1668 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1669 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1670 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1671 iview->vk_format = vk_format_depth_only(iview->vk_format);
1672 }
1673
1674 if (device->physical_device->rad_info.chip_class >= GFX9) {
1675 iview->extent = (VkExtent3D) {
1676 .width = image->info.width,
1677 .height = image->info.height,
1678 .depth = image->info.depth,
1679 };
1680 } else {
1681 iview->extent = (VkExtent3D) {
1682 .width = radv_minify(image->info.width , range->baseMipLevel),
1683 .height = radv_minify(image->info.height, range->baseMipLevel),
1684 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1685 };
1686 }
1687
1688 if (iview->vk_format != image->planes[iview->plane_id].format) {
1689 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1690 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1691 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1692 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1693
1694 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1695 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1696
1697 /* Comment ported from amdvlk -
1698 * If we have the following image:
1699 * Uncompressed pixels Compressed block sizes (4x4)
1700 * mip0: 22 x 22 6 x 6
1701 * mip1: 11 x 11 3 x 3
1702 * mip2: 5 x 5 2 x 2
1703 * mip3: 2 x 2 1 x 1
1704 * mip4: 1 x 1 1 x 1
1705 *
1706 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1707 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1708 * divide-by-two integer math):
1709 * mip0: 6x6
1710 * mip1: 3x3
1711 * mip2: 1x1
1712 * mip3: 1x1
1713 *
1714 * This means that mip2 will be missing texels.
1715 *
1716 * Fix this by calculating the base mip's width and height, then convert that, and round it
1717 * back up to get the level 0 size.
1718 * Clamp the converted size between the original values, and next power of two, which
1719 * means we don't oversize the image.
1720 */
1721 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1722 vk_format_is_compressed(image->vk_format) &&
1723 !vk_format_is_compressed(iview->vk_format)) {
1724 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1725 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1726
1727 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1728 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1729
1730 lvl_width <<= range->baseMipLevel;
1731 lvl_height <<= range->baseMipLevel;
1732
1733 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1734 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1735 }
1736 }
1737
1738 iview->base_layer = range->baseArrayLayer;
1739 iview->layer_count = radv_get_layerCount(image, range);
1740 iview->base_mip = range->baseMipLevel;
1741 iview->level_count = radv_get_levelCount(image, range);
1742
1743 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1744 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1745 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1746 radv_image_view_make_descriptor(iview, device, format,
1747 &pCreateInfo->components,
1748 false, disable_compression,
1749 iview->plane_id + i, i);
1750 radv_image_view_make_descriptor(iview, device,
1751 format, &pCreateInfo->components,
1752 true, disable_compression,
1753 iview->plane_id + i, i);
1754 }
1755 }
1756
1757 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1758 VkImageLayout layout,
1759 bool in_render_loop,
1760 unsigned queue_mask)
1761 {
1762 if (radv_image_is_tc_compat_htile(image)) {
1763 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1764 !in_render_loop &&
1765 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1766 /* It should be safe to enable TC-compat HTILE with
1767 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1768 * loop and if the image doesn't have the storage bit
1769 * set. This improves performance for apps that use
1770 * GENERAL for the main depth pass because this allows
1771 * compression and this reduces the number of
1772 * decompressions from/to GENERAL.
1773 */
1774 return true;
1775 }
1776
1777 return layout != VK_IMAGE_LAYOUT_GENERAL;
1778 }
1779
1780 return radv_image_has_htile(image) &&
1781 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1782 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1783 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1784 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1785 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1786 }
1787
1788 bool radv_layout_can_fast_clear(const struct radv_image *image,
1789 VkImageLayout layout,
1790 bool in_render_loop,
1791 unsigned queue_mask)
1792 {
1793 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1794 }
1795
1796 bool radv_layout_dcc_compressed(const struct radv_device *device,
1797 const struct radv_image *image,
1798 VkImageLayout layout,
1799 bool in_render_loop,
1800 unsigned queue_mask)
1801 {
1802 /* Don't compress compute transfer dst, as image stores are not supported. */
1803 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1804 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1805 return false;
1806
1807 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1808 }
1809
1810
1811 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1812 {
1813 if (!image->exclusive)
1814 return image->queue_family_mask;
1815 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1816 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1817 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1818 if (family == VK_QUEUE_FAMILY_IGNORED)
1819 return 1u << queue_family;
1820 return 1u << family;
1821 }
1822
1823 VkResult
1824 radv_CreateImage(VkDevice device,
1825 const VkImageCreateInfo *pCreateInfo,
1826 const VkAllocationCallbacks *pAllocator,
1827 VkImage *pImage)
1828 {
1829 #ifdef ANDROID
1830 const VkNativeBufferANDROID *gralloc_info =
1831 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1832
1833 if (gralloc_info)
1834 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1835 pAllocator, pImage);
1836 #endif
1837
1838 const struct wsi_image_create_info *wsi_info =
1839 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1840 bool scanout = wsi_info && wsi_info->scanout;
1841
1842 return radv_image_create(device,
1843 &(struct radv_image_create_info) {
1844 .vk_info = pCreateInfo,
1845 .scanout = scanout,
1846 },
1847 pAllocator,
1848 pImage);
1849 }
1850
1851 void
1852 radv_DestroyImage(VkDevice _device, VkImage _image,
1853 const VkAllocationCallbacks *pAllocator)
1854 {
1855 RADV_FROM_HANDLE(radv_device, device, _device);
1856 RADV_FROM_HANDLE(radv_image, image, _image);
1857
1858 if (!image)
1859 return;
1860
1861 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1862 device->ws->buffer_destroy(image->bo);
1863
1864 if (image->owned_memory != VK_NULL_HANDLE)
1865 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1866
1867 vk_free2(&device->alloc, pAllocator, image);
1868 }
1869
1870 void radv_GetImageSubresourceLayout(
1871 VkDevice _device,
1872 VkImage _image,
1873 const VkImageSubresource* pSubresource,
1874 VkSubresourceLayout* pLayout)
1875 {
1876 RADV_FROM_HANDLE(radv_image, image, _image);
1877 RADV_FROM_HANDLE(radv_device, device, _device);
1878 int level = pSubresource->mipLevel;
1879 int layer = pSubresource->arrayLayer;
1880
1881 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1882
1883 struct radv_image_plane *plane = &image->planes[plane_id];
1884 struct radeon_surf *surface = &plane->surface;
1885
1886 if (device->physical_device->rad_info.chip_class >= GFX9) {
1887 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1888
1889 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1890 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1891 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1892 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1893 /* Adjust the number of bytes between each row because
1894 * the pitch is actually the number of components per
1895 * row.
1896 */
1897 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1898 } else {
1899 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1900
1901 assert(util_is_power_of_two_nonzero(surface->bpe));
1902 pLayout->rowPitch = pitch * surface->bpe;
1903 }
1904
1905 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1906 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1907 pLayout->size = surface->u.gfx9.surf_slice_size;
1908 if (image->type == VK_IMAGE_TYPE_3D)
1909 pLayout->size *= u_minify(image->info.depth, level);
1910 } else {
1911 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1912 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1913 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1914 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1915 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1916 if (image->type == VK_IMAGE_TYPE_3D)
1917 pLayout->size *= u_minify(image->info.depth, level);
1918 }
1919 }
1920
1921
1922 VkResult
1923 radv_CreateImageView(VkDevice _device,
1924 const VkImageViewCreateInfo *pCreateInfo,
1925 const VkAllocationCallbacks *pAllocator,
1926 VkImageView *pView)
1927 {
1928 RADV_FROM_HANDLE(radv_device, device, _device);
1929 struct radv_image_view *view;
1930
1931 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1932 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1933 if (view == NULL)
1934 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1935
1936 radv_image_view_init(view, device, pCreateInfo, NULL);
1937
1938 *pView = radv_image_view_to_handle(view);
1939
1940 return VK_SUCCESS;
1941 }
1942
1943 void
1944 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1945 const VkAllocationCallbacks *pAllocator)
1946 {
1947 RADV_FROM_HANDLE(radv_device, device, _device);
1948 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1949
1950 if (!iview)
1951 return;
1952 vk_free2(&device->alloc, pAllocator, iview);
1953 }
1954
1955 void radv_buffer_view_init(struct radv_buffer_view *view,
1956 struct radv_device *device,
1957 const VkBufferViewCreateInfo* pCreateInfo)
1958 {
1959 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1960
1961 view->bo = buffer->bo;
1962 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1963 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1964 view->vk_format = pCreateInfo->format;
1965
1966 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1967 pCreateInfo->offset, view->range, view->state);
1968 }
1969
1970 VkResult
1971 radv_CreateBufferView(VkDevice _device,
1972 const VkBufferViewCreateInfo *pCreateInfo,
1973 const VkAllocationCallbacks *pAllocator,
1974 VkBufferView *pView)
1975 {
1976 RADV_FROM_HANDLE(radv_device, device, _device);
1977 struct radv_buffer_view *view;
1978
1979 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1980 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1981 if (!view)
1982 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1983
1984 radv_buffer_view_init(view, device, pCreateInfo);
1985
1986 *pView = radv_buffer_view_to_handle(view);
1987
1988 return VK_SUCCESS;
1989 }
1990
1991 void
1992 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1993 const VkAllocationCallbacks *pAllocator)
1994 {
1995 RADV_FROM_HANDLE(radv_device, device, _device);
1996 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1997
1998 if (!view)
1999 return;
2000
2001 vk_free2(&device->alloc, pAllocator, view);
2002 }