radv: only enable TC-compat HTILE for images readable by a shader
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
76 return false;
77
78 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
79 return false;
80
81 if (pCreateInfo->mipLevels > 1)
82 return false;
83
84 /* Do not enable TC-compatible HTILE if the image isn't readable by a
85 * shader because no texture fetches will happen.
86 */
87 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
88 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
89 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
90 return false;
91
92 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
93 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
94 */
95 if (pCreateInfo->samples >= 2 &&
96 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
97 (format == VK_FORMAT_D32_SFLOAT &&
98 device->physical_device->rad_info.chip_class == GFX10)))
99 return false;
100
101 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
102 * supports 32-bit. Though, it's possible to enable TC-compat for
103 * 16-bit depth surfaces if no Z planes are compressed.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
106 format != VK_FORMAT_D32_SFLOAT &&
107 format != VK_FORMAT_D16_UNORM)
108 return false;
109
110 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
111 const struct VkImageFormatListCreateInfo *format_list =
112 (const struct VkImageFormatListCreateInfo *)
113 vk_find_struct_const(pCreateInfo->pNext,
114 IMAGE_FORMAT_LIST_CREATE_INFO);
115
116 /* We have to ignore the existence of the list if viewFormatCount = 0 */
117 if (format_list && format_list->viewFormatCount) {
118 /* compatibility is transitive, so we only need to check
119 * one format with everything else.
120 */
121 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
122 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
123 continue;
124
125 if (format != format_list->pViewFormats[i])
126 return false;
127 }
128 } else {
129 return false;
130 }
131 }
132
133 return true;
134 }
135
136 static bool
137 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
138 {
139 if (info->bo_metadata) {
140 if (device->physical_device->rad_info.chip_class >= GFX9)
141 return info->bo_metadata->u.gfx9.scanout;
142 else
143 return info->bo_metadata->u.legacy.scanout;
144 }
145
146 return info->scanout;
147 }
148
149 static bool
150 radv_use_dcc_for_image(struct radv_device *device,
151 const struct radv_image *image,
152 const VkImageCreateInfo *pCreateInfo,
153 VkFormat format)
154 {
155 bool dcc_compatible_formats;
156 bool blendable;
157
158 /* DCC (Delta Color Compression) is only available for GFX8+. */
159 if (device->physical_device->rad_info.chip_class < GFX8)
160 return false;
161
162 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
163 return false;
164
165 if (image->shareable)
166 return false;
167
168 /* TODO: Enable DCC for storage images. */
169 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
170 return false;
171
172 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
173 return false;
174
175 if (vk_format_is_subsampled(format) ||
176 vk_format_get_plane_count(format) > 1)
177 return false;
178
179 /* TODO: Enable DCC for mipmaps on GFX9+. */
180 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
181 device->physical_device->rad_info.chip_class >= GFX9)
182 return false;
183
184 /* Do not enable DCC for mipmapped arrays because performance is worse. */
185 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
186 return false;
187
188 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
189 * 2x can be enabled with an option.
190 */
191 if (pCreateInfo->samples > 2 ||
192 (pCreateInfo->samples == 2 &&
193 !device->physical_device->dcc_msaa_allowed))
194 return false;
195
196 /* Determine if the formats are DCC compatible. */
197 dcc_compatible_formats =
198 radv_is_colorbuffer_format_supported(format,
199 &blendable);
200
201 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
202 const struct VkImageFormatListCreateInfo *format_list =
203 (const struct VkImageFormatListCreateInfo *)
204 vk_find_struct_const(pCreateInfo->pNext,
205 IMAGE_FORMAT_LIST_CREATE_INFO);
206
207 /* We have to ignore the existence of the list if viewFormatCount = 0 */
208 if (format_list && format_list->viewFormatCount) {
209 /* compatibility is transitive, so we only need to check
210 * one format with everything else. */
211 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
212 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
213 continue;
214
215 if (!radv_dcc_formats_compatible(format,
216 format_list->pViewFormats[i]))
217 dcc_compatible_formats = false;
218 }
219 } else {
220 dcc_compatible_formats = false;
221 }
222 }
223
224 if (!dcc_compatible_formats)
225 return false;
226
227 return true;
228 }
229
230 static bool
231 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
232 struct radv_image *image)
233 {
234 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
235 return false;
236
237 /* TC-compat CMASK is only available for GFX8+. */
238 if (device->physical_device->rad_info.chip_class < GFX8)
239 return false;
240
241 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
242 return false;
243
244 if (radv_image_has_dcc(image))
245 return false;
246
247 if (!radv_image_has_cmask(image))
248 return false;
249
250 return true;
251 }
252
253 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
254 {
255 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
256 }
257
258 static bool
259 radv_is_valid_opaque_metadata(const struct radv_device *device,
260 const struct radeon_bo_metadata *md)
261 {
262 if (md->metadata[0] != 1 ||
263 md->metadata[1] != si_get_bo_metadata_word1(device))
264 return false;
265
266 if (md->size_metadata < 40)
267 return false;
268
269 return true;
270 }
271
272 static void
273 radv_patch_surface_from_metadata(struct radv_device *device,
274 struct radeon_surf *surface,
275 const struct radeon_bo_metadata *md)
276 {
277 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
278
279 if (device->physical_device->rad_info.chip_class >= GFX9) {
280 if (md->u.gfx9.swizzle_mode > 0)
281 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
282 else
283 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
284
285 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
286 } else {
287 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
288 surface->u.legacy.bankw = md->u.legacy.bankw;
289 surface->u.legacy.bankh = md->u.legacy.bankh;
290 surface->u.legacy.tile_split = md->u.legacy.tile_split;
291 surface->u.legacy.mtilea = md->u.legacy.mtilea;
292 surface->u.legacy.num_banks = md->u.legacy.num_banks;
293
294 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
295 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
296 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
297 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
298 else
299 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
300
301 }
302 }
303
304 static VkResult
305 radv_patch_image_dimensions(struct radv_device *device,
306 struct radv_image *image,
307 const struct radv_image_create_info *create_info,
308 struct ac_surf_info *image_info)
309 {
310 unsigned width = image->info.width;
311 unsigned height = image->info.height;
312
313 /*
314 * minigbm sometimes allocates bigger images which is going to result in
315 * weird strides and other properties. Lets be lenient where possible and
316 * fail it on GFX10 (as we cannot cope there).
317 *
318 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
319 */
320 if (create_info->bo_metadata &&
321 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
322 const struct radeon_bo_metadata *md = create_info->bo_metadata;
323
324 if (device->physical_device->rad_info.chip_class >= GFX10) {
325 width = G_00A004_WIDTH_LO(md->metadata[3]) +
326 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
327 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
328 } else {
329 width = G_008F18_WIDTH(md->metadata[4]) + 1;
330 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
331 }
332 }
333
334 if (image->info.width == width && image->info.height == height)
335 return VK_SUCCESS;
336
337 if (width < image->info.width || height < image->info.height) {
338 fprintf(stderr,
339 "The imported image has smaller dimensions than the internal\n"
340 "dimensions. Using it is going to fail badly, so we reject\n"
341 "this import.\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image->info.width, image->info.height, width, height);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
345 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
346 fprintf(stderr,
347 "Tried to import an image with inconsistent width on GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency in width and will fail this import.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image->info.width, image->info.height, width, height);
352 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
353 } else {
354 fprintf(stderr,
355 "Tried to import an image with inconsistent width on pre-GFX10.\n"
356 "As GFX10 has no separate stride fields we cannot cope with\n"
357 "an inconsistency and would fail on GFX10.\n"
358 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
359 image->info.width, image->info.height, width, height);
360 }
361 image_info->width = width;
362 image_info->height = height;
363
364 return VK_SUCCESS;
365 }
366
367 static VkResult
368 radv_patch_image_from_extra_info(struct radv_device *device,
369 struct radv_image *image,
370 const struct radv_image_create_info *create_info,
371 struct ac_surf_info *image_info)
372 {
373 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
374 if (result != VK_SUCCESS)
375 return result;
376
377 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
378 if (create_info->bo_metadata) {
379 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
380 create_info->bo_metadata);
381 }
382
383 if (radv_surface_has_scanout(device, create_info)) {
384 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
385 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
386
387 image->info.surf_index = NULL;
388 }
389 }
390 return VK_SUCCESS;
391 }
392
393 static int
394 radv_init_surface(struct radv_device *device,
395 const struct radv_image *image,
396 struct radeon_surf *surface,
397 unsigned plane_id,
398 const VkImageCreateInfo *pCreateInfo,
399 VkFormat image_format)
400 {
401 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
402 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
403 const struct vk_format_description *desc = vk_format_description(format);
404 bool is_depth, is_stencil;
405
406 is_depth = vk_format_has_depth(desc);
407 is_stencil = vk_format_has_stencil(desc);
408
409 surface->blk_w = vk_format_get_blockwidth(format);
410 surface->blk_h = vk_format_get_blockheight(format);
411
412 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
413 /* align byte per element on dword */
414 if (surface->bpe == 3) {
415 surface->bpe = 4;
416 }
417
418 surface->flags = RADEON_SURF_SET(array_mode, MODE);
419
420 switch (pCreateInfo->imageType){
421 case VK_IMAGE_TYPE_1D:
422 if (pCreateInfo->arrayLayers > 1)
423 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
424 else
425 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
426 break;
427 case VK_IMAGE_TYPE_2D:
428 if (pCreateInfo->arrayLayers > 1)
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
430 else
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
432 break;
433 case VK_IMAGE_TYPE_3D:
434 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
435 break;
436 default:
437 unreachable("unhandled image type");
438 }
439
440 if (is_depth) {
441 surface->flags |= RADEON_SURF_ZBUFFER;
442 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
443 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
444 }
445
446 if (is_stencil)
447 surface->flags |= RADEON_SURF_SBUFFER;
448
449 if (device->physical_device->rad_info.chip_class >= GFX9 &&
450 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
451 vk_format_get_blocksizebits(image_format) == 128 &&
452 vk_format_is_compressed(image_format))
453 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
454
455 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
456
457 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
458 surface->flags |= RADEON_SURF_DISABLE_DCC;
459
460 return 0;
461 }
462
463 static inline unsigned
464 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
465 {
466 if (stencil)
467 return plane->surface.u.legacy.stencil_tiling_index[level];
468 else
469 return plane->surface.u.legacy.tiling_index[level];
470 }
471
472 static unsigned radv_map_swizzle(unsigned swizzle)
473 {
474 switch (swizzle) {
475 case VK_SWIZZLE_Y:
476 return V_008F0C_SQ_SEL_Y;
477 case VK_SWIZZLE_Z:
478 return V_008F0C_SQ_SEL_Z;
479 case VK_SWIZZLE_W:
480 return V_008F0C_SQ_SEL_W;
481 case VK_SWIZZLE_0:
482 return V_008F0C_SQ_SEL_0;
483 case VK_SWIZZLE_1:
484 return V_008F0C_SQ_SEL_1;
485 default: /* VK_SWIZZLE_X */
486 return V_008F0C_SQ_SEL_X;
487 }
488 }
489
490 static void
491 radv_make_buffer_descriptor(struct radv_device *device,
492 struct radv_buffer *buffer,
493 VkFormat vk_format,
494 unsigned offset,
495 unsigned range,
496 uint32_t *state)
497 {
498 const struct vk_format_description *desc;
499 unsigned stride;
500 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
501 uint64_t va = gpu_address + buffer->offset;
502 unsigned num_format, data_format;
503 int first_non_void;
504 desc = vk_format_description(vk_format);
505 first_non_void = vk_format_get_first_non_void_channel(vk_format);
506 stride = desc->block.bits / 8;
507
508 va += offset;
509 state[0] = va;
510 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
511 S_008F04_STRIDE(stride);
512
513 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
514 range /= stride;
515 }
516
517 state[2] = range;
518 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
519 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
520 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
521 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
522
523 if (device->physical_device->rad_info.chip_class >= GFX10) {
524 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
525
526 /* OOB_SELECT chooses the out-of-bounds check:
527 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
528 * - 1: index >= NUM_RECORDS
529 * - 2: NUM_RECORDS == 0
530 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
531 * else: swizzle_address >= NUM_RECORDS
532 */
533 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
534 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
535 S_008F0C_RESOURCE_LEVEL(1);
536 } else {
537 num_format = radv_translate_buffer_numformat(desc, first_non_void);
538 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
539
540 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
541 assert(num_format != ~0);
542
543 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
544 S_008F0C_DATA_FORMAT(data_format);
545 }
546 }
547
548 static void
549 si_set_mutable_tex_desc_fields(struct radv_device *device,
550 struct radv_image *image,
551 const struct legacy_surf_level *base_level_info,
552 unsigned plane_id,
553 unsigned base_level, unsigned first_level,
554 unsigned block_width, bool is_stencil,
555 bool is_storage_image, bool disable_compression,
556 uint32_t *state)
557 {
558 struct radv_image_plane *plane = &image->planes[plane_id];
559 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
560 uint64_t va = gpu_address + plane->offset;
561 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
562 uint64_t meta_va = 0;
563 if (chip_class >= GFX9) {
564 if (is_stencil)
565 va += plane->surface.u.gfx9.stencil_offset;
566 else
567 va += plane->surface.u.gfx9.surf_offset;
568 } else
569 va += base_level_info->offset;
570
571 state[0] = va >> 8;
572 if (chip_class >= GFX9 ||
573 base_level_info->mode == RADEON_SURF_MODE_2D)
574 state[0] |= plane->surface.tile_swizzle;
575 state[1] &= C_008F14_BASE_ADDRESS_HI;
576 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
577
578 if (chip_class >= GFX8) {
579 state[6] &= C_008F28_COMPRESSION_EN;
580 state[7] = 0;
581 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
582 meta_va = gpu_address + image->dcc_offset;
583 if (chip_class <= GFX8)
584 meta_va += base_level_info->dcc_offset;
585
586 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
587 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
588 meta_va |= dcc_tile_swizzle;
589 } else if (!disable_compression &&
590 radv_image_is_tc_compat_htile(image)) {
591 meta_va = gpu_address + image->htile_offset;
592 }
593
594 if (meta_va) {
595 state[6] |= S_008F28_COMPRESSION_EN(1);
596 if (chip_class <= GFX9)
597 state[7] = meta_va >> 8;
598 }
599 }
600
601 if (chip_class >= GFX10) {
602 state[3] &= C_00A00C_SW_MODE;
603
604 if (is_stencil) {
605 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
606 } else {
607 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
608 }
609
610 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
611 C_00A018_META_PIPE_ALIGNED;
612
613 if (meta_va) {
614 struct gfx9_surf_meta_flags meta;
615
616 if (image->dcc_offset)
617 meta = plane->surface.u.gfx9.dcc;
618 else
619 meta = plane->surface.u.gfx9.htile;
620
621 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
622 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
623 }
624
625 state[7] = meta_va >> 16;
626 } else if (chip_class == GFX9) {
627 state[3] &= C_008F1C_SW_MODE;
628 state[4] &= C_008F20_PITCH;
629
630 if (is_stencil) {
631 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
632 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
633 } else {
634 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
635 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
636 }
637
638 state[5] &= C_008F24_META_DATA_ADDRESS &
639 C_008F24_META_PIPE_ALIGNED &
640 C_008F24_META_RB_ALIGNED;
641 if (meta_va) {
642 struct gfx9_surf_meta_flags meta;
643
644 if (image->dcc_offset)
645 meta = plane->surface.u.gfx9.dcc;
646 else
647 meta = plane->surface.u.gfx9.htile;
648
649 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
650 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
651 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
652 }
653 } else {
654 /* GFX6-GFX8 */
655 unsigned pitch = base_level_info->nblk_x * block_width;
656 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
657
658 state[3] &= C_008F1C_TILING_INDEX;
659 state[3] |= S_008F1C_TILING_INDEX(index);
660 state[4] &= C_008F20_PITCH;
661 state[4] |= S_008F20_PITCH(pitch - 1);
662 }
663 }
664
665 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
666 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
667 {
668 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
669 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
670
671 /* GFX9 allocates 1D textures as 2D. */
672 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
673 image_type = VK_IMAGE_TYPE_2D;
674 switch (image_type) {
675 case VK_IMAGE_TYPE_1D:
676 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
677 case VK_IMAGE_TYPE_2D:
678 if (nr_samples > 1)
679 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
680 else
681 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
682 case VK_IMAGE_TYPE_3D:
683 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
684 return V_008F1C_SQ_RSRC_IMG_3D;
685 else
686 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
687 default:
688 unreachable("illegal image type");
689 }
690 }
691
692 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
693 {
694 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
695
696 if (swizzle[3] == VK_SWIZZLE_X) {
697 /* For the pre-defined border color values (white, opaque
698 * black, transparent black), the only thing that matters is
699 * that the alpha channel winds up in the correct place
700 * (because the RGB channels are all the same) so either of
701 * these enumerations will work.
702 */
703 if (swizzle[2] == VK_SWIZZLE_Y)
704 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
705 else
706 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
707 } else if (swizzle[0] == VK_SWIZZLE_X) {
708 if (swizzle[1] == VK_SWIZZLE_Y)
709 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
710 else
711 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
712 } else if (swizzle[1] == VK_SWIZZLE_X) {
713 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
714 } else if (swizzle[2] == VK_SWIZZLE_X) {
715 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
716 }
717
718 return bc_swizzle;
719 }
720
721 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
722 {
723 const struct vk_format_description *desc = vk_format_description(format);
724
725 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
726 return desc->swizzle[3] == VK_SWIZZLE_X;
727
728 return radv_translate_colorswap(format, false) <= 1;
729 }
730 /**
731 * Build the sampler view descriptor for a texture (GFX10).
732 */
733 static void
734 gfx10_make_texture_descriptor(struct radv_device *device,
735 struct radv_image *image,
736 bool is_storage_image,
737 VkImageViewType view_type,
738 VkFormat vk_format,
739 const VkComponentMapping *mapping,
740 unsigned first_level, unsigned last_level,
741 unsigned first_layer, unsigned last_layer,
742 unsigned width, unsigned height, unsigned depth,
743 uint32_t *state,
744 uint32_t *fmask_state)
745 {
746 const struct vk_format_description *desc;
747 enum vk_swizzle swizzle[4];
748 unsigned img_format;
749 unsigned type;
750
751 desc = vk_format_description(vk_format);
752 img_format = gfx10_format_table[vk_format].img_format;
753
754 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
755 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
756 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
757 } else {
758 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
759 }
760
761 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
762 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
763 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
764 height = 1;
765 depth = image->info.array_size;
766 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
767 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
768 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
769 depth = image->info.array_size;
770 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
771 depth = image->info.array_size / 6;
772
773 state[0] = 0;
774 state[1] = S_00A004_FORMAT(img_format) |
775 S_00A004_WIDTH_LO(width - 1);
776 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
777 S_00A008_HEIGHT(height - 1) |
778 S_00A008_RESOURCE_LEVEL(1);
779 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
780 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
781 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
782 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
783 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
784 0 : first_level) |
785 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
786 util_logbase2(image->info.samples) :
787 last_level) |
788 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
789 S_00A00C_TYPE(type);
790 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
791 * to know the total number of layers.
792 */
793 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
794 S_00A010_BASE_ARRAY(first_layer);
795 state[5] = S_00A014_ARRAY_PITCH(0) |
796 S_00A014_MAX_MIP(image->info.samples > 1 ?
797 util_logbase2(image->info.samples) :
798 image->info.levels - 1) |
799 S_00A014_PERF_MOD(4);
800 state[6] = 0;
801 state[7] = 0;
802
803 if (radv_dcc_enabled(image, first_level)) {
804 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
805 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
806 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
807 }
808
809 /* Initialize the sampler view for FMASK. */
810 if (radv_image_has_fmask(image)) {
811 uint64_t gpu_address = radv_buffer_get_va(image->bo);
812 uint32_t format;
813 uint64_t va;
814
815 assert(image->plane_count == 1);
816
817 va = gpu_address + image->offset + image->fmask_offset;
818
819 switch (image->info.samples) {
820 case 2:
821 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
822 break;
823 case 4:
824 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
825 break;
826 case 8:
827 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
828 break;
829 default:
830 unreachable("invalid nr_samples");
831 }
832
833 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
834 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
835 S_00A004_FORMAT(format) |
836 S_00A004_WIDTH_LO(width - 1);
837 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
838 S_00A008_HEIGHT(height - 1) |
839 S_00A008_RESOURCE_LEVEL(1);
840 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
841 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
842 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
843 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
844 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
845 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
846 fmask_state[4] = S_00A010_DEPTH(last_layer) |
847 S_00A010_BASE_ARRAY(first_layer);
848 fmask_state[5] = 0;
849 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
850 fmask_state[7] = 0;
851 } else if (fmask_state)
852 memset(fmask_state, 0, 8 * 4);
853 }
854
855 /**
856 * Build the sampler view descriptor for a texture (SI-GFX9)
857 */
858 static void
859 si_make_texture_descriptor(struct radv_device *device,
860 struct radv_image *image,
861 bool is_storage_image,
862 VkImageViewType view_type,
863 VkFormat vk_format,
864 const VkComponentMapping *mapping,
865 unsigned first_level, unsigned last_level,
866 unsigned first_layer, unsigned last_layer,
867 unsigned width, unsigned height, unsigned depth,
868 uint32_t *state,
869 uint32_t *fmask_state)
870 {
871 const struct vk_format_description *desc;
872 enum vk_swizzle swizzle[4];
873 int first_non_void;
874 unsigned num_format, data_format, type;
875
876 desc = vk_format_description(vk_format);
877
878 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
879 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
880 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
881 } else {
882 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
883 }
884
885 first_non_void = vk_format_get_first_non_void_channel(vk_format);
886
887 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
888 if (num_format == ~0) {
889 num_format = 0;
890 }
891
892 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
893 if (data_format == ~0) {
894 data_format = 0;
895 }
896
897 /* S8 with either Z16 or Z32 HTILE need a special format. */
898 if (device->physical_device->rad_info.chip_class == GFX9 &&
899 vk_format == VK_FORMAT_S8_UINT &&
900 radv_image_is_tc_compat_htile(image)) {
901 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
902 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
903 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
904 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
905 }
906 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
907 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
908 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
909 height = 1;
910 depth = image->info.array_size;
911 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
912 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
913 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
914 depth = image->info.array_size;
915 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
916 depth = image->info.array_size / 6;
917
918 state[0] = 0;
919 state[1] = (S_008F14_DATA_FORMAT(data_format) |
920 S_008F14_NUM_FORMAT(num_format));
921 state[2] = (S_008F18_WIDTH(width - 1) |
922 S_008F18_HEIGHT(height - 1) |
923 S_008F18_PERF_MOD(4));
924 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
925 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
926 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
927 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
928 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
929 0 : first_level) |
930 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
931 util_logbase2(image->info.samples) :
932 last_level) |
933 S_008F1C_TYPE(type));
934 state[4] = 0;
935 state[5] = S_008F24_BASE_ARRAY(first_layer);
936 state[6] = 0;
937 state[7] = 0;
938
939 if (device->physical_device->rad_info.chip_class == GFX9) {
940 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
941
942 /* Depth is the last accessible layer on Gfx9.
943 * The hw doesn't need to know the total number of layers.
944 */
945 if (type == V_008F1C_SQ_RSRC_IMG_3D)
946 state[4] |= S_008F20_DEPTH(depth - 1);
947 else
948 state[4] |= S_008F20_DEPTH(last_layer);
949
950 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
951 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
952 util_logbase2(image->info.samples) :
953 image->info.levels - 1);
954 } else {
955 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
956 state[4] |= S_008F20_DEPTH(depth - 1);
957 state[5] |= S_008F24_LAST_ARRAY(last_layer);
958 }
959 if (image->dcc_offset) {
960 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
961 } else {
962 /* The last dword is unused by hw. The shader uses it to clear
963 * bits in the first dword of sampler state.
964 */
965 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
966 if (first_level == last_level)
967 state[7] = C_008F30_MAX_ANISO_RATIO;
968 else
969 state[7] = 0xffffffff;
970 }
971 }
972
973 /* Initialize the sampler view for FMASK. */
974 if (radv_image_has_fmask(image)) {
975 uint32_t fmask_format, num_format;
976 uint64_t gpu_address = radv_buffer_get_va(image->bo);
977 uint64_t va;
978
979 assert(image->plane_count == 1);
980
981 va = gpu_address + image->offset + image->fmask_offset;
982
983 if (device->physical_device->rad_info.chip_class == GFX9) {
984 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
985 switch (image->info.samples) {
986 case 2:
987 num_format = V_008F14_IMG_FMASK_8_2_2;
988 break;
989 case 4:
990 num_format = V_008F14_IMG_FMASK_8_4_4;
991 break;
992 case 8:
993 num_format = V_008F14_IMG_FMASK_32_8_8;
994 break;
995 default:
996 unreachable("invalid nr_samples");
997 }
998 } else {
999 switch (image->info.samples) {
1000 case 2:
1001 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1002 break;
1003 case 4:
1004 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1005 break;
1006 case 8:
1007 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1008 break;
1009 default:
1010 assert(0);
1011 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1012 }
1013 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1014 }
1015
1016 fmask_state[0] = va >> 8;
1017 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1018 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1019 S_008F14_DATA_FORMAT(fmask_format) |
1020 S_008F14_NUM_FORMAT(num_format);
1021 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1022 S_008F18_HEIGHT(height - 1);
1023 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1024 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1025 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1026 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1027 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1028 fmask_state[4] = 0;
1029 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1030 fmask_state[6] = 0;
1031 fmask_state[7] = 0;
1032
1033 if (device->physical_device->rad_info.chip_class == GFX9) {
1034 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1035 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1036 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1037 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1038 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1039
1040 if (radv_image_is_tc_compat_cmask(image)) {
1041 va = gpu_address + image->offset + image->cmask_offset;
1042
1043 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1044 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1045 fmask_state[7] |= va >> 8;
1046 }
1047 } else {
1048 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1049 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1050 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1051 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1052
1053 if (radv_image_is_tc_compat_cmask(image)) {
1054 va = gpu_address + image->offset + image->cmask_offset;
1055
1056 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1057 fmask_state[7] |= va >> 8;
1058 }
1059 }
1060 } else if (fmask_state)
1061 memset(fmask_state, 0, 8 * 4);
1062 }
1063
1064 static void
1065 radv_make_texture_descriptor(struct radv_device *device,
1066 struct radv_image *image,
1067 bool is_storage_image,
1068 VkImageViewType view_type,
1069 VkFormat vk_format,
1070 const VkComponentMapping *mapping,
1071 unsigned first_level, unsigned last_level,
1072 unsigned first_layer, unsigned last_layer,
1073 unsigned width, unsigned height, unsigned depth,
1074 uint32_t *state,
1075 uint32_t *fmask_state)
1076 {
1077 if (device->physical_device->rad_info.chip_class >= GFX10) {
1078 gfx10_make_texture_descriptor(device, image, is_storage_image,
1079 view_type, vk_format, mapping,
1080 first_level, last_level,
1081 first_layer, last_layer,
1082 width, height, depth,
1083 state, fmask_state);
1084 } else {
1085 si_make_texture_descriptor(device, image, is_storage_image,
1086 view_type, vk_format, mapping,
1087 first_level, last_level,
1088 first_layer, last_layer,
1089 width, height, depth,
1090 state, fmask_state);
1091 }
1092 }
1093
1094 static void
1095 radv_query_opaque_metadata(struct radv_device *device,
1096 struct radv_image *image,
1097 struct radeon_bo_metadata *md)
1098 {
1099 static const VkComponentMapping fixedmapping;
1100 uint32_t desc[8], i;
1101
1102 assert(image->plane_count == 1);
1103
1104 /* Metadata image format format version 1:
1105 * [0] = 1 (metadata format identifier)
1106 * [1] = (VENDOR_ID << 16) | PCI_ID
1107 * [2:9] = image descriptor for the whole resource
1108 * [2] is always 0, because the base address is cleared
1109 * [9] is the DCC offset bits [39:8] from the beginning of
1110 * the buffer
1111 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1112 */
1113 md->metadata[0] = 1; /* metadata image format version 1 */
1114
1115 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1116 md->metadata[1] = si_get_bo_metadata_word1(device);
1117
1118
1119 radv_make_texture_descriptor(device, image, false,
1120 (VkImageViewType)image->type, image->vk_format,
1121 &fixedmapping, 0, image->info.levels - 1, 0,
1122 image->info.array_size - 1,
1123 image->info.width, image->info.height,
1124 image->info.depth,
1125 desc, NULL);
1126
1127 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1128 image->planes[0].surface.blk_w, false, false, false, desc);
1129
1130 /* Clear the base address and set the relative DCC offset. */
1131 desc[0] = 0;
1132 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1133 desc[7] = image->dcc_offset >> 8;
1134
1135 /* Dwords [2:9] contain the image descriptor. */
1136 memcpy(&md->metadata[2], desc, sizeof(desc));
1137
1138 /* Dwords [10:..] contain the mipmap level offsets. */
1139 if (device->physical_device->rad_info.chip_class <= GFX8) {
1140 for (i = 0; i <= image->info.levels - 1; i++)
1141 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1142 md->size_metadata = (11 + image->info.levels - 1) * 4;
1143 } else
1144 md->size_metadata = 10 * 4;
1145 }
1146
1147 void
1148 radv_init_metadata(struct radv_device *device,
1149 struct radv_image *image,
1150 struct radeon_bo_metadata *metadata)
1151 {
1152 struct radeon_surf *surface = &image->planes[0].surface;
1153
1154 memset(metadata, 0, sizeof(*metadata));
1155
1156 if (device->physical_device->rad_info.chip_class >= GFX9) {
1157 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1158 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1159 } else {
1160 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1161 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1162 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1163 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1164 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1165 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1166 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1167 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1168 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1169 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1170 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1171 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1172 }
1173 radv_query_opaque_metadata(device, image, metadata);
1174 }
1175
1176 void
1177 radv_image_override_offset_stride(struct radv_device *device,
1178 struct radv_image *image,
1179 uint64_t offset, uint32_t stride)
1180 {
1181 struct radeon_surf *surface = &image->planes[0].surface;
1182 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1183
1184 if (device->physical_device->rad_info.chip_class >= GFX9) {
1185 if (stride) {
1186 surface->u.gfx9.surf_pitch = stride;
1187 surface->u.gfx9.surf_slice_size =
1188 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1189 }
1190 surface->u.gfx9.surf_offset = offset;
1191 } else {
1192 surface->u.legacy.level[0].nblk_x = stride;
1193 surface->u.legacy.level[0].slice_size_dw =
1194 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1195
1196 if (offset) {
1197 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1198 surface->u.legacy.level[i].offset += offset;
1199 }
1200
1201 }
1202 }
1203
1204 static void
1205 radv_image_alloc_fmask(struct radv_device *device,
1206 struct radv_image *image)
1207 {
1208 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1209
1210 image->fmask_offset = align64(image->size, fmask_alignment);
1211 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1212 image->alignment = MAX2(image->alignment, fmask_alignment);
1213 }
1214
1215 static void
1216 radv_image_alloc_cmask(struct radv_device *device,
1217 struct radv_image *image)
1218 {
1219 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1220 unsigned cmask_size = image->planes[0].surface.cmask_size;
1221 uint32_t clear_value_size = 0;
1222
1223 if (!cmask_size)
1224 return;
1225
1226 assert(cmask_alignment);
1227
1228 image->cmask_offset = align64(image->size, cmask_alignment);
1229 /* + 8 for storing the clear values */
1230 if (!image->clear_value_offset) {
1231 image->clear_value_offset = image->cmask_offset + cmask_size;
1232 clear_value_size = 8;
1233 }
1234 image->size = image->cmask_offset + cmask_size + clear_value_size;
1235 image->alignment = MAX2(image->alignment, cmask_alignment);
1236 }
1237
1238 static void
1239 radv_image_alloc_dcc(struct radv_image *image)
1240 {
1241 assert(image->plane_count == 1);
1242
1243 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1244 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1245 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1246 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1247 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1248 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1249 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1250 }
1251
1252 static void
1253 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1254 {
1255 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1256
1257 /* + 8 for storing the clear values */
1258 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1259 image->size = image->clear_value_offset + image->info.levels * 8;
1260 if (radv_image_is_tc_compat_htile(image) &&
1261 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1262 /* Metadata for the TC-compatible HTILE hardware bug which
1263 * have to be fixed by updating ZRANGE_PRECISION when doing
1264 * fast depth clears to 0.0f.
1265 */
1266 image->tc_compat_zrange_offset = image->size;
1267 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1268 }
1269 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1270 }
1271
1272 static inline bool
1273 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1274 {
1275 if (image->info.samples <= 1 &&
1276 image->info.width * image->info.height <= 512 * 512) {
1277 /* Do not enable CMASK or DCC for small surfaces where the cost
1278 * of the eliminate pass can be higher than the benefit of fast
1279 * clear. RadeonSI does this, but the image threshold is
1280 * different.
1281 */
1282 return false;
1283 }
1284
1285 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1286 (image->exclusive || image->queue_family_mask == 1);
1287 }
1288
1289 static inline bool
1290 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1291 {
1292 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1293 !radv_image_has_dcc(image))
1294 return false;
1295
1296 /* On GFX8, DCC layers can be interleaved and it's currently only
1297 * enabled if slice size is equal to the per slice fast clear size
1298 * because the driver assumes that portions of multiple layers are
1299 * contiguous during fast clears.
1300 */
1301 if (image->info.array_size > 1) {
1302 const struct legacy_surf_level *surf_level =
1303 &image->planes[0].surface.u.legacy.level[0];
1304
1305 assert(device->physical_device->rad_info.chip_class == GFX8);
1306
1307 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1308 return false;
1309 }
1310
1311 return true;
1312 }
1313
1314 static inline bool
1315 radv_image_can_enable_cmask(struct radv_image *image)
1316 {
1317 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1318 /* Do not enable CMASK for non-MSAA images (fast color clear)
1319 * because 128 bit formats are not supported, but FMASK might
1320 * still be used.
1321 */
1322 return false;
1323 }
1324
1325 return radv_image_can_enable_dcc_or_cmask(image) &&
1326 image->info.levels == 1 &&
1327 image->info.depth == 1 &&
1328 !image->planes[0].surface.is_linear;
1329 }
1330
1331 static inline bool
1332 radv_image_can_enable_fmask(struct radv_image *image)
1333 {
1334 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
1335 }
1336
1337 static inline bool
1338 radv_image_can_enable_htile(struct radv_image *image)
1339 {
1340 return radv_image_has_htile(image) &&
1341 image->info.levels == 1 &&
1342 image->info.width * image->info.height >= 8 * 8;
1343 }
1344
1345 static void radv_image_disable_dcc(struct radv_image *image)
1346 {
1347 for (unsigned i = 0; i < image->plane_count; ++i)
1348 image->planes[i].surface.dcc_size = 0;
1349 }
1350
1351 static void radv_image_disable_htile(struct radv_image *image)
1352 {
1353 for (unsigned i = 0; i < image->plane_count; ++i)
1354 image->planes[i].surface.htile_size = 0;
1355 }
1356
1357 VkResult
1358 radv_image_create_layout(struct radv_device *device,
1359 struct radv_image_create_info create_info,
1360 struct radv_image *image)
1361 {
1362 /* Check that we did not initialize things earlier */
1363 assert(!image->planes[0].surface.surf_size);
1364
1365 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1366 * common internal case. */
1367 create_info.vk_info = NULL;
1368
1369 struct ac_surf_info image_info = image->info;
1370 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1371 if (result != VK_SUCCESS)
1372 return result;
1373
1374 image->size = 0;
1375 image->alignment = 1;
1376 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1377 struct ac_surf_info info = image_info;
1378
1379 if (plane) {
1380 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1381 assert(info.width % desc->width_divisor == 0);
1382 assert(info.height % desc->height_divisor == 0);
1383
1384 info.width /= desc->width_divisor;
1385 info.height /= desc->height_divisor;
1386 }
1387
1388 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1389
1390 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1391 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1392 image->alignment = image->planes[plane].surface.surf_alignment;
1393
1394 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1395 }
1396
1397 if (!create_info.no_metadata_planes) {
1398 /* Try to enable DCC first. */
1399 if (radv_image_can_enable_dcc(device, image)) {
1400 radv_image_alloc_dcc(image);
1401 if (image->info.samples > 1) {
1402 /* CMASK should be enabled because DCC fast
1403 * clear with MSAA needs it.
1404 */
1405 assert(radv_image_can_enable_cmask(image));
1406 radv_image_alloc_cmask(device, image);
1407 }
1408 } else {
1409 /* When DCC cannot be enabled, try CMASK. */
1410 radv_image_disable_dcc(image);
1411 if (radv_image_can_enable_cmask(image)) {
1412 radv_image_alloc_cmask(device, image);
1413 }
1414 }
1415
1416 /* Try to enable FMASK for multisampled images. */
1417 if (radv_image_can_enable_fmask(image)) {
1418 radv_image_alloc_fmask(device, image);
1419
1420 if (radv_use_tc_compat_cmask_for_image(device, image))
1421 image->tc_compatible_cmask = true;
1422 } else {
1423 /* Otherwise, try to enable HTILE for depth surfaces. */
1424 if (radv_image_can_enable_htile(image) &&
1425 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1426 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1427 radv_image_alloc_htile(device, image);
1428 } else {
1429 radv_image_disable_htile(image);
1430 }
1431 }
1432 } else {
1433 radv_image_disable_dcc(image);
1434 radv_image_disable_htile(image);
1435 }
1436
1437 assert(image->planes[0].surface.surf_size);
1438 return VK_SUCCESS;
1439 }
1440
1441 VkResult
1442 radv_image_create(VkDevice _device,
1443 const struct radv_image_create_info *create_info,
1444 const VkAllocationCallbacks* alloc,
1445 VkImage *pImage)
1446 {
1447 RADV_FROM_HANDLE(radv_device, device, _device);
1448 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1449 struct radv_image *image = NULL;
1450 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1451 pCreateInfo->format);
1452 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1453
1454 const unsigned plane_count = vk_format_get_plane_count(format);
1455 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1456
1457 radv_assert(pCreateInfo->mipLevels > 0);
1458 radv_assert(pCreateInfo->arrayLayers > 0);
1459 radv_assert(pCreateInfo->samples > 0);
1460 radv_assert(pCreateInfo->extent.width > 0);
1461 radv_assert(pCreateInfo->extent.height > 0);
1462 radv_assert(pCreateInfo->extent.depth > 0);
1463
1464 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1465 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1466 if (!image)
1467 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1468
1469 image->type = pCreateInfo->imageType;
1470 image->info.width = pCreateInfo->extent.width;
1471 image->info.height = pCreateInfo->extent.height;
1472 image->info.depth = pCreateInfo->extent.depth;
1473 image->info.samples = pCreateInfo->samples;
1474 image->info.storage_samples = pCreateInfo->samples;
1475 image->info.array_size = pCreateInfo->arrayLayers;
1476 image->info.levels = pCreateInfo->mipLevels;
1477 image->info.num_channels = vk_format_get_nr_components(format);
1478
1479 image->vk_format = format;
1480 image->tiling = pCreateInfo->tiling;
1481 image->usage = pCreateInfo->usage;
1482 image->flags = pCreateInfo->flags;
1483 image->plane_count = plane_count;
1484
1485 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1486 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1487 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1488 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1489 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1490 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1491 else
1492 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1493 }
1494
1495 const VkExternalMemoryImageCreateInfo *external_info =
1496 vk_find_struct_const(pCreateInfo->pNext,
1497 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1498
1499 image->shareable = external_info;
1500 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1501 image->info.surf_index = &device->image_mrt_offset_counter;
1502 }
1503
1504 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1505 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1506 }
1507
1508 bool delay_layout = external_info &&
1509 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1510
1511 if (delay_layout) {
1512 *pImage = radv_image_to_handle(image);
1513 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1514 return VK_SUCCESS;
1515 }
1516
1517 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1518 assert(result == VK_SUCCESS);
1519
1520 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1521 image->alignment = MAX2(image->alignment, 4096);
1522 image->size = align64(image->size, image->alignment);
1523 image->offset = 0;
1524
1525 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1526 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1527 if (!image->bo) {
1528 vk_free2(&device->alloc, alloc, image);
1529 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1530 }
1531 }
1532
1533 *pImage = radv_image_to_handle(image);
1534
1535 return VK_SUCCESS;
1536 }
1537
1538 static void
1539 radv_image_view_make_descriptor(struct radv_image_view *iview,
1540 struct radv_device *device,
1541 VkFormat vk_format,
1542 const VkComponentMapping *components,
1543 bool is_storage_image, bool disable_compression,
1544 unsigned plane_id, unsigned descriptor_plane_id)
1545 {
1546 struct radv_image *image = iview->image;
1547 struct radv_image_plane *plane = &image->planes[plane_id];
1548 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1549 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1550 uint32_t blk_w;
1551 union radv_descriptor *descriptor;
1552 uint32_t hw_level = 0;
1553
1554 if (is_storage_image) {
1555 descriptor = &iview->storage_descriptor;
1556 } else {
1557 descriptor = &iview->descriptor;
1558 }
1559
1560 assert(vk_format_get_plane_count(vk_format) == 1);
1561 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1562 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1563
1564 if (device->physical_device->rad_info.chip_class >= GFX9)
1565 hw_level = iview->base_mip;
1566 radv_make_texture_descriptor(device, image, is_storage_image,
1567 iview->type,
1568 vk_format,
1569 components,
1570 hw_level, hw_level + iview->level_count - 1,
1571 iview->base_layer,
1572 iview->base_layer + iview->layer_count - 1,
1573 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1574 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1575 iview->extent.depth,
1576 descriptor->plane_descriptors[descriptor_plane_id],
1577 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1578
1579 const struct legacy_surf_level *base_level_info = NULL;
1580 if (device->physical_device->rad_info.chip_class <= GFX9) {
1581 if (is_stencil)
1582 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1583 else
1584 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1585 }
1586 si_set_mutable_tex_desc_fields(device, image,
1587 base_level_info,
1588 plane_id,
1589 iview->base_mip,
1590 iview->base_mip,
1591 blk_w, is_stencil, is_storage_image,
1592 is_storage_image || disable_compression,
1593 descriptor->plane_descriptors[descriptor_plane_id]);
1594 }
1595
1596 static unsigned
1597 radv_plane_from_aspect(VkImageAspectFlags mask)
1598 {
1599 switch(mask) {
1600 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1601 return 1;
1602 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1603 return 2;
1604 default:
1605 return 0;
1606 }
1607 }
1608
1609 VkFormat
1610 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1611 {
1612 switch(mask) {
1613 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1614 return image->planes[0].format;
1615 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1616 return image->planes[1].format;
1617 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1618 return image->planes[2].format;
1619 case VK_IMAGE_ASPECT_STENCIL_BIT:
1620 return vk_format_stencil_only(image->vk_format);
1621 case VK_IMAGE_ASPECT_DEPTH_BIT:
1622 return vk_format_depth_only(image->vk_format);
1623 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1624 return vk_format_depth_only(image->vk_format);
1625 default:
1626 return image->vk_format;
1627 }
1628 }
1629
1630 void
1631 radv_image_view_init(struct radv_image_view *iview,
1632 struct radv_device *device,
1633 const VkImageViewCreateInfo* pCreateInfo,
1634 const struct radv_image_view_extra_create_info* extra_create_info)
1635 {
1636 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1637 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1638
1639 switch (image->type) {
1640 case VK_IMAGE_TYPE_1D:
1641 case VK_IMAGE_TYPE_2D:
1642 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1643 break;
1644 case VK_IMAGE_TYPE_3D:
1645 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1646 <= radv_minify(image->info.depth, range->baseMipLevel));
1647 break;
1648 default:
1649 unreachable("bad VkImageType");
1650 }
1651 iview->image = image;
1652 iview->bo = image->bo;
1653 iview->type = pCreateInfo->viewType;
1654 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1655 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1656 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1657
1658 iview->vk_format = pCreateInfo->format;
1659
1660 /* If the image has an Android external format, pCreateInfo->format will be
1661 * VK_FORMAT_UNDEFINED. */
1662 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1663 iview->vk_format = image->vk_format;
1664
1665 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1666 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1667 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1668 iview->vk_format = vk_format_depth_only(iview->vk_format);
1669 }
1670
1671 if (device->physical_device->rad_info.chip_class >= GFX9) {
1672 iview->extent = (VkExtent3D) {
1673 .width = image->info.width,
1674 .height = image->info.height,
1675 .depth = image->info.depth,
1676 };
1677 } else {
1678 iview->extent = (VkExtent3D) {
1679 .width = radv_minify(image->info.width , range->baseMipLevel),
1680 .height = radv_minify(image->info.height, range->baseMipLevel),
1681 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1682 };
1683 }
1684
1685 if (iview->vk_format != image->planes[iview->plane_id].format) {
1686 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1687 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1688 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1689 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1690
1691 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1692 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1693
1694 /* Comment ported from amdvlk -
1695 * If we have the following image:
1696 * Uncompressed pixels Compressed block sizes (4x4)
1697 * mip0: 22 x 22 6 x 6
1698 * mip1: 11 x 11 3 x 3
1699 * mip2: 5 x 5 2 x 2
1700 * mip3: 2 x 2 1 x 1
1701 * mip4: 1 x 1 1 x 1
1702 *
1703 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1704 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1705 * divide-by-two integer math):
1706 * mip0: 6x6
1707 * mip1: 3x3
1708 * mip2: 1x1
1709 * mip3: 1x1
1710 *
1711 * This means that mip2 will be missing texels.
1712 *
1713 * Fix this by calculating the base mip's width and height, then convert that, and round it
1714 * back up to get the level 0 size.
1715 * Clamp the converted size between the original values, and next power of two, which
1716 * means we don't oversize the image.
1717 */
1718 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1719 vk_format_is_compressed(image->vk_format) &&
1720 !vk_format_is_compressed(iview->vk_format)) {
1721 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1722 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1723
1724 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1725 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1726
1727 lvl_width <<= range->baseMipLevel;
1728 lvl_height <<= range->baseMipLevel;
1729
1730 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1731 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1732 }
1733 }
1734
1735 iview->base_layer = range->baseArrayLayer;
1736 iview->layer_count = radv_get_layerCount(image, range);
1737 iview->base_mip = range->baseMipLevel;
1738 iview->level_count = radv_get_levelCount(image, range);
1739
1740 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1741 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1742 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1743 radv_image_view_make_descriptor(iview, device, format,
1744 &pCreateInfo->components,
1745 false, disable_compression,
1746 iview->plane_id + i, i);
1747 radv_image_view_make_descriptor(iview, device,
1748 format, &pCreateInfo->components,
1749 true, disable_compression,
1750 iview->plane_id + i, i);
1751 }
1752 }
1753
1754 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1755 VkImageLayout layout,
1756 bool in_render_loop,
1757 unsigned queue_mask)
1758 {
1759 if (radv_image_is_tc_compat_htile(image))
1760 return layout != VK_IMAGE_LAYOUT_GENERAL;
1761
1762 return radv_image_has_htile(image) &&
1763 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1764 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1765 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1766 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1767 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1768 }
1769
1770 bool radv_layout_can_fast_clear(const struct radv_image *image,
1771 VkImageLayout layout,
1772 bool in_render_loop,
1773 unsigned queue_mask)
1774 {
1775 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1776 }
1777
1778 bool radv_layout_dcc_compressed(const struct radv_device *device,
1779 const struct radv_image *image,
1780 VkImageLayout layout,
1781 bool in_render_loop,
1782 unsigned queue_mask)
1783 {
1784 /* Don't compress compute transfer dst, as image stores are not supported. */
1785 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1786 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1787 return false;
1788
1789 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1790 }
1791
1792
1793 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1794 {
1795 if (!image->exclusive)
1796 return image->queue_family_mask;
1797 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1798 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1799 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1800 if (family == VK_QUEUE_FAMILY_IGNORED)
1801 return 1u << queue_family;
1802 return 1u << family;
1803 }
1804
1805 VkResult
1806 radv_CreateImage(VkDevice device,
1807 const VkImageCreateInfo *pCreateInfo,
1808 const VkAllocationCallbacks *pAllocator,
1809 VkImage *pImage)
1810 {
1811 #ifdef ANDROID
1812 const VkNativeBufferANDROID *gralloc_info =
1813 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1814
1815 if (gralloc_info)
1816 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1817 pAllocator, pImage);
1818 #endif
1819
1820 const struct wsi_image_create_info *wsi_info =
1821 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1822 bool scanout = wsi_info && wsi_info->scanout;
1823
1824 return radv_image_create(device,
1825 &(struct radv_image_create_info) {
1826 .vk_info = pCreateInfo,
1827 .scanout = scanout,
1828 },
1829 pAllocator,
1830 pImage);
1831 }
1832
1833 void
1834 radv_DestroyImage(VkDevice _device, VkImage _image,
1835 const VkAllocationCallbacks *pAllocator)
1836 {
1837 RADV_FROM_HANDLE(radv_device, device, _device);
1838 RADV_FROM_HANDLE(radv_image, image, _image);
1839
1840 if (!image)
1841 return;
1842
1843 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1844 device->ws->buffer_destroy(image->bo);
1845
1846 if (image->owned_memory != VK_NULL_HANDLE)
1847 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1848
1849 vk_free2(&device->alloc, pAllocator, image);
1850 }
1851
1852 void radv_GetImageSubresourceLayout(
1853 VkDevice _device,
1854 VkImage _image,
1855 const VkImageSubresource* pSubresource,
1856 VkSubresourceLayout* pLayout)
1857 {
1858 RADV_FROM_HANDLE(radv_image, image, _image);
1859 RADV_FROM_HANDLE(radv_device, device, _device);
1860 int level = pSubresource->mipLevel;
1861 int layer = pSubresource->arrayLayer;
1862
1863 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1864
1865 struct radv_image_plane *plane = &image->planes[plane_id];
1866 struct radeon_surf *surface = &plane->surface;
1867
1868 if (device->physical_device->rad_info.chip_class >= GFX9) {
1869 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1870
1871 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1872 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1873 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1874 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1875 /* Adjust the number of bytes between each row because
1876 * the pitch is actually the number of components per
1877 * row.
1878 */
1879 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1880 } else {
1881 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1882
1883 assert(util_is_power_of_two_nonzero(surface->bpe));
1884 pLayout->rowPitch = pitch * surface->bpe;
1885 }
1886
1887 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1888 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1889 pLayout->size = surface->u.gfx9.surf_slice_size;
1890 if (image->type == VK_IMAGE_TYPE_3D)
1891 pLayout->size *= u_minify(image->info.depth, level);
1892 } else {
1893 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1894 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1895 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1896 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1897 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1898 if (image->type == VK_IMAGE_TYPE_3D)
1899 pLayout->size *= u_minify(image->info.depth, level);
1900 }
1901 }
1902
1903
1904 VkResult
1905 radv_CreateImageView(VkDevice _device,
1906 const VkImageViewCreateInfo *pCreateInfo,
1907 const VkAllocationCallbacks *pAllocator,
1908 VkImageView *pView)
1909 {
1910 RADV_FROM_HANDLE(radv_device, device, _device);
1911 struct radv_image_view *view;
1912
1913 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1914 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1915 if (view == NULL)
1916 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1917
1918 radv_image_view_init(view, device, pCreateInfo, NULL);
1919
1920 *pView = radv_image_view_to_handle(view);
1921
1922 return VK_SUCCESS;
1923 }
1924
1925 void
1926 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1927 const VkAllocationCallbacks *pAllocator)
1928 {
1929 RADV_FROM_HANDLE(radv_device, device, _device);
1930 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1931
1932 if (!iview)
1933 return;
1934 vk_free2(&device->alloc, pAllocator, iview);
1935 }
1936
1937 void radv_buffer_view_init(struct radv_buffer_view *view,
1938 struct radv_device *device,
1939 const VkBufferViewCreateInfo* pCreateInfo)
1940 {
1941 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1942
1943 view->bo = buffer->bo;
1944 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1945 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1946 view->vk_format = pCreateInfo->format;
1947
1948 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1949 pCreateInfo->offset, view->range, view->state);
1950 }
1951
1952 VkResult
1953 radv_CreateBufferView(VkDevice _device,
1954 const VkBufferViewCreateInfo *pCreateInfo,
1955 const VkAllocationCallbacks *pAllocator,
1956 VkBufferView *pView)
1957 {
1958 RADV_FROM_HANDLE(radv_device, device, _device);
1959 struct radv_buffer_view *view;
1960
1961 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1962 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1963 if (!view)
1964 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1965
1966 radv_buffer_view_init(view, device, pCreateInfo);
1967
1968 *pView = radv_buffer_view_to_handle(view);
1969
1970 return VK_SUCCESS;
1971 }
1972
1973 void
1974 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1975 const VkAllocationCallbacks *pAllocator)
1976 {
1977 RADV_FROM_HANDLE(radv_device, device, _device);
1978 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1979
1980 if (!view)
1981 return;
1982
1983 vk_free2(&device->alloc, pAllocator, view);
1984 }