radv: Pass no_metadata_planes info in to ac_surface.
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class == GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_use_dcc_for_image(struct radv_device *device,
154 const struct radv_image *image,
155 const VkImageCreateInfo *pCreateInfo,
156 VkFormat format)
157 {
158 bool dcc_compatible_formats;
159 bool blendable;
160
161 /* DCC (Delta Color Compression) is only available for GFX8+. */
162 if (device->physical_device->rad_info.chip_class < GFX8)
163 return false;
164
165 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
166 return false;
167
168 if (image->shareable)
169 return false;
170
171 /* TODO: Enable DCC for storage images. */
172 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
173 return false;
174
175 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
176 return false;
177
178 if (vk_format_is_subsampled(format) ||
179 vk_format_get_plane_count(format) > 1)
180 return false;
181
182 /* TODO: Enable DCC for mipmaps on GFX9+. */
183 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
184 device->physical_device->rad_info.chip_class >= GFX9)
185 return false;
186
187 /* Do not enable DCC for mipmapped arrays because performance is worse. */
188 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
189 return false;
190
191 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
192 * 2x can be enabled with an option.
193 */
194 if (pCreateInfo->samples > 2 ||
195 (pCreateInfo->samples == 2 &&
196 !device->physical_device->dcc_msaa_allowed))
197 return false;
198
199 /* Determine if the formats are DCC compatible. */
200 dcc_compatible_formats =
201 radv_is_colorbuffer_format_supported(format,
202 &blendable);
203
204 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
205 const struct VkImageFormatListCreateInfo *format_list =
206 (const struct VkImageFormatListCreateInfo *)
207 vk_find_struct_const(pCreateInfo->pNext,
208 IMAGE_FORMAT_LIST_CREATE_INFO);
209
210 /* We have to ignore the existence of the list if viewFormatCount = 0 */
211 if (format_list && format_list->viewFormatCount) {
212 /* compatibility is transitive, so we only need to check
213 * one format with everything else. */
214 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
215 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
216 continue;
217
218 if (!radv_dcc_formats_compatible(format,
219 format_list->pViewFormats[i]))
220 dcc_compatible_formats = false;
221 }
222 } else {
223 dcc_compatible_formats = false;
224 }
225 }
226
227 if (!dcc_compatible_formats)
228 return false;
229
230 return true;
231 }
232
233 static inline bool
234 radv_use_fmask_for_image(const struct radv_image *image)
235 {
236 return image->info.samples > 1 &&
237 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
238 }
239
240 static bool
241 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
242 struct radv_image *image)
243 {
244 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
245 return false;
246
247 /* TC-compat CMASK is only available for GFX8+. */
248 if (device->physical_device->rad_info.chip_class < GFX8)
249 return false;
250
251 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
252 return false;
253
254 if (radv_image_has_dcc(image))
255 return false;
256
257 if (!radv_image_has_cmask(image))
258 return false;
259
260 return true;
261 }
262
263 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
264 {
265 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
266 }
267
268 static bool
269 radv_is_valid_opaque_metadata(const struct radv_device *device,
270 const struct radeon_bo_metadata *md)
271 {
272 if (md->metadata[0] != 1 ||
273 md->metadata[1] != si_get_bo_metadata_word1(device))
274 return false;
275
276 if (md->size_metadata < 40)
277 return false;
278
279 return true;
280 }
281
282 static void
283 radv_patch_surface_from_metadata(struct radv_device *device,
284 struct radeon_surf *surface,
285 const struct radeon_bo_metadata *md)
286 {
287 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
288
289 if (device->physical_device->rad_info.chip_class >= GFX9) {
290 if (md->u.gfx9.swizzle_mode > 0)
291 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
292 else
293 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
294
295 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
296 } else {
297 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
298 surface->u.legacy.bankw = md->u.legacy.bankw;
299 surface->u.legacy.bankh = md->u.legacy.bankh;
300 surface->u.legacy.tile_split = md->u.legacy.tile_split;
301 surface->u.legacy.mtilea = md->u.legacy.mtilea;
302 surface->u.legacy.num_banks = md->u.legacy.num_banks;
303
304 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
305 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
306 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
307 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
308 else
309 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
310
311 }
312 }
313
314 static VkResult
315 radv_patch_image_dimensions(struct radv_device *device,
316 struct radv_image *image,
317 const struct radv_image_create_info *create_info,
318 struct ac_surf_info *image_info)
319 {
320 unsigned width = image->info.width;
321 unsigned height = image->info.height;
322
323 /*
324 * minigbm sometimes allocates bigger images which is going to result in
325 * weird strides and other properties. Lets be lenient where possible and
326 * fail it on GFX10 (as we cannot cope there).
327 *
328 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
329 */
330 if (create_info->bo_metadata &&
331 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
332 const struct radeon_bo_metadata *md = create_info->bo_metadata;
333
334 if (device->physical_device->rad_info.chip_class >= GFX10) {
335 width = G_00A004_WIDTH_LO(md->metadata[3]) +
336 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
337 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
338 } else {
339 width = G_008F18_WIDTH(md->metadata[4]) + 1;
340 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
341 }
342 }
343
344 if (image->info.width == width && image->info.height == height)
345 return VK_SUCCESS;
346
347 if (width < image->info.width || height < image->info.height) {
348 fprintf(stderr,
349 "The imported image has smaller dimensions than the internal\n"
350 "dimensions. Using it is going to fail badly, so we reject\n"
351 "this import.\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image->info.width, image->info.height, width, height);
354 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
355 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
356 fprintf(stderr,
357 "Tried to import an image with inconsistent width on GFX10.\n"
358 "As GFX10 has no separate stride fields we cannot cope with\n"
359 "an inconsistency in width and will fail this import.\n"
360 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
361 image->info.width, image->info.height, width, height);
362 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
363 } else {
364 fprintf(stderr,
365 "Tried to import an image with inconsistent width on pre-GFX10.\n"
366 "As GFX10 has no separate stride fields we cannot cope with\n"
367 "an inconsistency and would fail on GFX10.\n"
368 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
369 image->info.width, image->info.height, width, height);
370 }
371 image_info->width = width;
372 image_info->height = height;
373
374 return VK_SUCCESS;
375 }
376
377 static VkResult
378 radv_patch_image_from_extra_info(struct radv_device *device,
379 struct radv_image *image,
380 const struct radv_image_create_info *create_info,
381 struct ac_surf_info *image_info)
382 {
383 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
384 if (result != VK_SUCCESS)
385 return result;
386
387 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
388 if (create_info->bo_metadata) {
389 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
390 create_info->bo_metadata);
391 }
392
393 if (radv_surface_has_scanout(device, create_info)) {
394 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
395 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
396
397 image->info.surf_index = NULL;
398 }
399 }
400 return VK_SUCCESS;
401 }
402
403 static int
404 radv_init_surface(struct radv_device *device,
405 const struct radv_image *image,
406 struct radeon_surf *surface,
407 unsigned plane_id,
408 const VkImageCreateInfo *pCreateInfo,
409 VkFormat image_format)
410 {
411 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
412 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
413 const struct vk_format_description *desc = vk_format_description(format);
414 bool is_depth, is_stencil;
415
416 is_depth = vk_format_has_depth(desc);
417 is_stencil = vk_format_has_stencil(desc);
418
419 surface->blk_w = vk_format_get_blockwidth(format);
420 surface->blk_h = vk_format_get_blockheight(format);
421
422 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
423 /* align byte per element on dword */
424 if (surface->bpe == 3) {
425 surface->bpe = 4;
426 }
427
428 surface->flags = RADEON_SURF_SET(array_mode, MODE);
429
430 switch (pCreateInfo->imageType){
431 case VK_IMAGE_TYPE_1D:
432 if (pCreateInfo->arrayLayers > 1)
433 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
434 else
435 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
436 break;
437 case VK_IMAGE_TYPE_2D:
438 if (pCreateInfo->arrayLayers > 1)
439 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
440 else
441 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
442 break;
443 case VK_IMAGE_TYPE_3D:
444 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
445 break;
446 default:
447 unreachable("unhandled image type");
448 }
449
450 if (is_depth) {
451 surface->flags |= RADEON_SURF_ZBUFFER;
452 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
453 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
454 }
455
456 if (is_stencil)
457 surface->flags |= RADEON_SURF_SBUFFER;
458
459 if (device->physical_device->rad_info.chip_class >= GFX9 &&
460 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
461 vk_format_get_blocksizebits(image_format) == 128 &&
462 vk_format_is_compressed(image_format))
463 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
464
465 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
466 surface->flags |= RADEON_SURF_DISABLE_DCC;
467
468 if (!radv_use_fmask_for_image(image))
469 surface->flags |= RADEON_SURF_NO_FMASK;
470
471 return 0;
472 }
473
474 static inline unsigned
475 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
476 {
477 if (stencil)
478 return plane->surface.u.legacy.stencil_tiling_index[level];
479 else
480 return plane->surface.u.legacy.tiling_index[level];
481 }
482
483 static unsigned radv_map_swizzle(unsigned swizzle)
484 {
485 switch (swizzle) {
486 case VK_SWIZZLE_Y:
487 return V_008F0C_SQ_SEL_Y;
488 case VK_SWIZZLE_Z:
489 return V_008F0C_SQ_SEL_Z;
490 case VK_SWIZZLE_W:
491 return V_008F0C_SQ_SEL_W;
492 case VK_SWIZZLE_0:
493 return V_008F0C_SQ_SEL_0;
494 case VK_SWIZZLE_1:
495 return V_008F0C_SQ_SEL_1;
496 default: /* VK_SWIZZLE_X */
497 return V_008F0C_SQ_SEL_X;
498 }
499 }
500
501 static void
502 radv_make_buffer_descriptor(struct radv_device *device,
503 struct radv_buffer *buffer,
504 VkFormat vk_format,
505 unsigned offset,
506 unsigned range,
507 uint32_t *state)
508 {
509 const struct vk_format_description *desc;
510 unsigned stride;
511 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
512 uint64_t va = gpu_address + buffer->offset;
513 unsigned num_format, data_format;
514 int first_non_void;
515 desc = vk_format_description(vk_format);
516 first_non_void = vk_format_get_first_non_void_channel(vk_format);
517 stride = desc->block.bits / 8;
518
519 va += offset;
520 state[0] = va;
521 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
522 S_008F04_STRIDE(stride);
523
524 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
525 range /= stride;
526 }
527
528 state[2] = range;
529 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
530 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
531 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
532 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
533
534 if (device->physical_device->rad_info.chip_class >= GFX10) {
535 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
536
537 /* OOB_SELECT chooses the out-of-bounds check:
538 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
539 * - 1: index >= NUM_RECORDS
540 * - 2: NUM_RECORDS == 0
541 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
542 * else: swizzle_address >= NUM_RECORDS
543 */
544 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
545 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
546 S_008F0C_RESOURCE_LEVEL(1);
547 } else {
548 num_format = radv_translate_buffer_numformat(desc, first_non_void);
549 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
550
551 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
552 assert(num_format != ~0);
553
554 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
555 S_008F0C_DATA_FORMAT(data_format);
556 }
557 }
558
559 static void
560 si_set_mutable_tex_desc_fields(struct radv_device *device,
561 struct radv_image *image,
562 const struct legacy_surf_level *base_level_info,
563 unsigned plane_id,
564 unsigned base_level, unsigned first_level,
565 unsigned block_width, bool is_stencil,
566 bool is_storage_image, bool disable_compression,
567 uint32_t *state)
568 {
569 struct radv_image_plane *plane = &image->planes[plane_id];
570 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
571 uint64_t va = gpu_address + plane->offset;
572 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
573 uint64_t meta_va = 0;
574 if (chip_class >= GFX9) {
575 if (is_stencil)
576 va += plane->surface.u.gfx9.stencil_offset;
577 else
578 va += plane->surface.u.gfx9.surf_offset;
579 } else
580 va += base_level_info->offset;
581
582 state[0] = va >> 8;
583 if (chip_class >= GFX9 ||
584 base_level_info->mode == RADEON_SURF_MODE_2D)
585 state[0] |= plane->surface.tile_swizzle;
586 state[1] &= C_008F14_BASE_ADDRESS_HI;
587 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
588
589 if (chip_class >= GFX8) {
590 state[6] &= C_008F28_COMPRESSION_EN;
591 state[7] = 0;
592 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
593 meta_va = gpu_address + image->dcc_offset;
594 if (chip_class <= GFX8)
595 meta_va += base_level_info->dcc_offset;
596
597 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
598 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
599 meta_va |= dcc_tile_swizzle;
600 } else if (!disable_compression &&
601 radv_image_is_tc_compat_htile(image)) {
602 meta_va = gpu_address + image->htile_offset;
603 }
604
605 if (meta_va) {
606 state[6] |= S_008F28_COMPRESSION_EN(1);
607 if (chip_class <= GFX9)
608 state[7] = meta_va >> 8;
609 }
610 }
611
612 if (chip_class >= GFX10) {
613 state[3] &= C_00A00C_SW_MODE;
614
615 if (is_stencil) {
616 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
617 } else {
618 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
619 }
620
621 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
622 C_00A018_META_PIPE_ALIGNED;
623
624 if (meta_va) {
625 struct gfx9_surf_meta_flags meta = {
626 .rb_aligned = 1,
627 .pipe_aligned = 1,
628 };
629
630 if (image->dcc_offset)
631 meta = plane->surface.u.gfx9.dcc;
632
633 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
634 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
635 }
636
637 state[7] = meta_va >> 16;
638 } else if (chip_class == GFX9) {
639 state[3] &= C_008F1C_SW_MODE;
640 state[4] &= C_008F20_PITCH;
641
642 if (is_stencil) {
643 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
644 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
645 } else {
646 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
647 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
648 }
649
650 state[5] &= C_008F24_META_DATA_ADDRESS &
651 C_008F24_META_PIPE_ALIGNED &
652 C_008F24_META_RB_ALIGNED;
653 if (meta_va) {
654 struct gfx9_surf_meta_flags meta = {
655 .rb_aligned = 1,
656 .pipe_aligned = 1,
657 };
658
659 if (image->dcc_offset)
660 meta = plane->surface.u.gfx9.dcc;
661
662 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
663 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
664 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
665 }
666 } else {
667 /* GFX6-GFX8 */
668 unsigned pitch = base_level_info->nblk_x * block_width;
669 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
670
671 state[3] &= C_008F1C_TILING_INDEX;
672 state[3] |= S_008F1C_TILING_INDEX(index);
673 state[4] &= C_008F20_PITCH;
674 state[4] |= S_008F20_PITCH(pitch - 1);
675 }
676 }
677
678 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
679 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
680 {
681 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
682 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
683
684 /* GFX9 allocates 1D textures as 2D. */
685 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
686 image_type = VK_IMAGE_TYPE_2D;
687 switch (image_type) {
688 case VK_IMAGE_TYPE_1D:
689 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
690 case VK_IMAGE_TYPE_2D:
691 if (nr_samples > 1)
692 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
693 else
694 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
695 case VK_IMAGE_TYPE_3D:
696 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
697 return V_008F1C_SQ_RSRC_IMG_3D;
698 else
699 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
700 default:
701 unreachable("illegal image type");
702 }
703 }
704
705 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
706 {
707 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
708
709 if (swizzle[3] == VK_SWIZZLE_X) {
710 /* For the pre-defined border color values (white, opaque
711 * black, transparent black), the only thing that matters is
712 * that the alpha channel winds up in the correct place
713 * (because the RGB channels are all the same) so either of
714 * these enumerations will work.
715 */
716 if (swizzle[2] == VK_SWIZZLE_Y)
717 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
718 else
719 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
720 } else if (swizzle[0] == VK_SWIZZLE_X) {
721 if (swizzle[1] == VK_SWIZZLE_Y)
722 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
723 else
724 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
725 } else if (swizzle[1] == VK_SWIZZLE_X) {
726 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
727 } else if (swizzle[2] == VK_SWIZZLE_X) {
728 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
729 }
730
731 return bc_swizzle;
732 }
733
734 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
735 {
736 const struct vk_format_description *desc = vk_format_description(format);
737
738 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
739 return desc->swizzle[3] == VK_SWIZZLE_X;
740
741 return radv_translate_colorswap(format, false) <= 1;
742 }
743 /**
744 * Build the sampler view descriptor for a texture (GFX10).
745 */
746 static void
747 gfx10_make_texture_descriptor(struct radv_device *device,
748 struct radv_image *image,
749 bool is_storage_image,
750 VkImageViewType view_type,
751 VkFormat vk_format,
752 const VkComponentMapping *mapping,
753 unsigned first_level, unsigned last_level,
754 unsigned first_layer, unsigned last_layer,
755 unsigned width, unsigned height, unsigned depth,
756 uint32_t *state,
757 uint32_t *fmask_state)
758 {
759 const struct vk_format_description *desc;
760 enum vk_swizzle swizzle[4];
761 unsigned img_format;
762 unsigned type;
763
764 desc = vk_format_description(vk_format);
765 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
766
767 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
768 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
769 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
770 } else {
771 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
772 }
773
774 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
775 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
776 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
777 height = 1;
778 depth = image->info.array_size;
779 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
780 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
781 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
782 depth = image->info.array_size;
783 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
784 depth = image->info.array_size / 6;
785
786 state[0] = 0;
787 state[1] = S_00A004_FORMAT(img_format) |
788 S_00A004_WIDTH_LO(width - 1);
789 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
790 S_00A008_HEIGHT(height - 1) |
791 S_00A008_RESOURCE_LEVEL(1);
792 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
793 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
794 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
795 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
796 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
797 0 : first_level) |
798 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
799 util_logbase2(image->info.samples) :
800 last_level) |
801 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
802 S_00A00C_TYPE(type);
803 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
804 * to know the total number of layers.
805 */
806 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
807 S_00A010_BASE_ARRAY(first_layer);
808 state[5] = S_00A014_ARRAY_PITCH(0) |
809 S_00A014_MAX_MIP(image->info.samples > 1 ?
810 util_logbase2(image->info.samples) :
811 image->info.levels - 1) |
812 S_00A014_PERF_MOD(4);
813 state[6] = 0;
814 state[7] = 0;
815
816 if (radv_dcc_enabled(image, first_level)) {
817 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
818 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
819 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
820 }
821
822 /* Initialize the sampler view for FMASK. */
823 if (radv_image_has_fmask(image)) {
824 uint64_t gpu_address = radv_buffer_get_va(image->bo);
825 uint32_t format;
826 uint64_t va;
827
828 assert(image->plane_count == 1);
829
830 va = gpu_address + image->offset + image->fmask_offset;
831
832 switch (image->info.samples) {
833 case 2:
834 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
835 break;
836 case 4:
837 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
838 break;
839 case 8:
840 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
841 break;
842 default:
843 unreachable("invalid nr_samples");
844 }
845
846 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
847 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
848 S_00A004_FORMAT(format) |
849 S_00A004_WIDTH_LO(width - 1);
850 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
851 S_00A008_HEIGHT(height - 1) |
852 S_00A008_RESOURCE_LEVEL(1);
853 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
854 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
855 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
856 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
857 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
858 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
859 fmask_state[4] = S_00A010_DEPTH(last_layer) |
860 S_00A010_BASE_ARRAY(first_layer);
861 fmask_state[5] = 0;
862 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
863 fmask_state[7] = 0;
864 } else if (fmask_state)
865 memset(fmask_state, 0, 8 * 4);
866 }
867
868 /**
869 * Build the sampler view descriptor for a texture (SI-GFX9)
870 */
871 static void
872 si_make_texture_descriptor(struct radv_device *device,
873 struct radv_image *image,
874 bool is_storage_image,
875 VkImageViewType view_type,
876 VkFormat vk_format,
877 const VkComponentMapping *mapping,
878 unsigned first_level, unsigned last_level,
879 unsigned first_layer, unsigned last_layer,
880 unsigned width, unsigned height, unsigned depth,
881 uint32_t *state,
882 uint32_t *fmask_state)
883 {
884 const struct vk_format_description *desc;
885 enum vk_swizzle swizzle[4];
886 int first_non_void;
887 unsigned num_format, data_format, type;
888
889 desc = vk_format_description(vk_format);
890
891 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
892 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
893 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
894 } else {
895 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
896 }
897
898 first_non_void = vk_format_get_first_non_void_channel(vk_format);
899
900 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
901 if (num_format == ~0) {
902 num_format = 0;
903 }
904
905 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
906 if (data_format == ~0) {
907 data_format = 0;
908 }
909
910 /* S8 with either Z16 or Z32 HTILE need a special format. */
911 if (device->physical_device->rad_info.chip_class == GFX9 &&
912 vk_format == VK_FORMAT_S8_UINT &&
913 radv_image_is_tc_compat_htile(image)) {
914 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
915 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
916 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
917 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
918 }
919 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
920 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
921 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
922 height = 1;
923 depth = image->info.array_size;
924 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
925 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
926 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
927 depth = image->info.array_size;
928 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
929 depth = image->info.array_size / 6;
930
931 state[0] = 0;
932 state[1] = (S_008F14_DATA_FORMAT(data_format) |
933 S_008F14_NUM_FORMAT(num_format));
934 state[2] = (S_008F18_WIDTH(width - 1) |
935 S_008F18_HEIGHT(height - 1) |
936 S_008F18_PERF_MOD(4));
937 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
938 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
939 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
940 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
941 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
942 0 : first_level) |
943 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
944 util_logbase2(image->info.samples) :
945 last_level) |
946 S_008F1C_TYPE(type));
947 state[4] = 0;
948 state[5] = S_008F24_BASE_ARRAY(first_layer);
949 state[6] = 0;
950 state[7] = 0;
951
952 if (device->physical_device->rad_info.chip_class == GFX9) {
953 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
954
955 /* Depth is the last accessible layer on Gfx9.
956 * The hw doesn't need to know the total number of layers.
957 */
958 if (type == V_008F1C_SQ_RSRC_IMG_3D)
959 state[4] |= S_008F20_DEPTH(depth - 1);
960 else
961 state[4] |= S_008F20_DEPTH(last_layer);
962
963 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
964 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
965 util_logbase2(image->info.samples) :
966 image->info.levels - 1);
967 } else {
968 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
969 state[4] |= S_008F20_DEPTH(depth - 1);
970 state[5] |= S_008F24_LAST_ARRAY(last_layer);
971 }
972 if (image->dcc_offset) {
973 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
974 } else {
975 /* The last dword is unused by hw. The shader uses it to clear
976 * bits in the first dword of sampler state.
977 */
978 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
979 if (first_level == last_level)
980 state[7] = C_008F30_MAX_ANISO_RATIO;
981 else
982 state[7] = 0xffffffff;
983 }
984 }
985
986 /* Initialize the sampler view for FMASK. */
987 if (radv_image_has_fmask(image)) {
988 uint32_t fmask_format, num_format;
989 uint64_t gpu_address = radv_buffer_get_va(image->bo);
990 uint64_t va;
991
992 assert(image->plane_count == 1);
993
994 va = gpu_address + image->offset + image->fmask_offset;
995
996 if (device->physical_device->rad_info.chip_class == GFX9) {
997 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
998 switch (image->info.samples) {
999 case 2:
1000 num_format = V_008F14_IMG_FMASK_8_2_2;
1001 break;
1002 case 4:
1003 num_format = V_008F14_IMG_FMASK_8_4_4;
1004 break;
1005 case 8:
1006 num_format = V_008F14_IMG_FMASK_32_8_8;
1007 break;
1008 default:
1009 unreachable("invalid nr_samples");
1010 }
1011 } else {
1012 switch (image->info.samples) {
1013 case 2:
1014 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1015 break;
1016 case 4:
1017 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1018 break;
1019 case 8:
1020 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1021 break;
1022 default:
1023 assert(0);
1024 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1025 }
1026 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1027 }
1028
1029 fmask_state[0] = va >> 8;
1030 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1031 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1032 S_008F14_DATA_FORMAT(fmask_format) |
1033 S_008F14_NUM_FORMAT(num_format);
1034 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1035 S_008F18_HEIGHT(height - 1);
1036 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1037 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1038 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1039 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1040 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1041 fmask_state[4] = 0;
1042 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1043 fmask_state[6] = 0;
1044 fmask_state[7] = 0;
1045
1046 if (device->physical_device->rad_info.chip_class == GFX9) {
1047 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1048 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1049 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1050 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1051 S_008F24_META_RB_ALIGNED(1);
1052
1053 if (radv_image_is_tc_compat_cmask(image)) {
1054 va = gpu_address + image->offset + image->cmask_offset;
1055
1056 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1057 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1058 fmask_state[7] |= va >> 8;
1059 }
1060 } else {
1061 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1062 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1063 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1064 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1065
1066 if (radv_image_is_tc_compat_cmask(image)) {
1067 va = gpu_address + image->offset + image->cmask_offset;
1068
1069 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1070 fmask_state[7] |= va >> 8;
1071 }
1072 }
1073 } else if (fmask_state)
1074 memset(fmask_state, 0, 8 * 4);
1075 }
1076
1077 static void
1078 radv_make_texture_descriptor(struct radv_device *device,
1079 struct radv_image *image,
1080 bool is_storage_image,
1081 VkImageViewType view_type,
1082 VkFormat vk_format,
1083 const VkComponentMapping *mapping,
1084 unsigned first_level, unsigned last_level,
1085 unsigned first_layer, unsigned last_layer,
1086 unsigned width, unsigned height, unsigned depth,
1087 uint32_t *state,
1088 uint32_t *fmask_state)
1089 {
1090 if (device->physical_device->rad_info.chip_class >= GFX10) {
1091 gfx10_make_texture_descriptor(device, image, is_storage_image,
1092 view_type, vk_format, mapping,
1093 first_level, last_level,
1094 first_layer, last_layer,
1095 width, height, depth,
1096 state, fmask_state);
1097 } else {
1098 si_make_texture_descriptor(device, image, is_storage_image,
1099 view_type, vk_format, mapping,
1100 first_level, last_level,
1101 first_layer, last_layer,
1102 width, height, depth,
1103 state, fmask_state);
1104 }
1105 }
1106
1107 static void
1108 radv_query_opaque_metadata(struct radv_device *device,
1109 struct radv_image *image,
1110 struct radeon_bo_metadata *md)
1111 {
1112 static const VkComponentMapping fixedmapping;
1113 uint32_t desc[8], i;
1114
1115 assert(image->plane_count == 1);
1116
1117 /* Metadata image format format version 1:
1118 * [0] = 1 (metadata format identifier)
1119 * [1] = (VENDOR_ID << 16) | PCI_ID
1120 * [2:9] = image descriptor for the whole resource
1121 * [2] is always 0, because the base address is cleared
1122 * [9] is the DCC offset bits [39:8] from the beginning of
1123 * the buffer
1124 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1125 */
1126 md->metadata[0] = 1; /* metadata image format version 1 */
1127
1128 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1129 md->metadata[1] = si_get_bo_metadata_word1(device);
1130
1131
1132 radv_make_texture_descriptor(device, image, false,
1133 (VkImageViewType)image->type, image->vk_format,
1134 &fixedmapping, 0, image->info.levels - 1, 0,
1135 image->info.array_size - 1,
1136 image->info.width, image->info.height,
1137 image->info.depth,
1138 desc, NULL);
1139
1140 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1141 image->planes[0].surface.blk_w, false, false, false, desc);
1142
1143 /* Clear the base address and set the relative DCC offset. */
1144 desc[0] = 0;
1145 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1146 desc[7] = image->dcc_offset >> 8;
1147
1148 /* Dwords [2:9] contain the image descriptor. */
1149 memcpy(&md->metadata[2], desc, sizeof(desc));
1150
1151 /* Dwords [10:..] contain the mipmap level offsets. */
1152 if (device->physical_device->rad_info.chip_class <= GFX8) {
1153 for (i = 0; i <= image->info.levels - 1; i++)
1154 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1155 md->size_metadata = (11 + image->info.levels - 1) * 4;
1156 } else
1157 md->size_metadata = 10 * 4;
1158 }
1159
1160 void
1161 radv_init_metadata(struct radv_device *device,
1162 struct radv_image *image,
1163 struct radeon_bo_metadata *metadata)
1164 {
1165 struct radeon_surf *surface = &image->planes[0].surface;
1166
1167 memset(metadata, 0, sizeof(*metadata));
1168
1169 if (device->physical_device->rad_info.chip_class >= GFX9) {
1170 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1171 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1172 } else {
1173 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1174 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1175 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1176 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1177 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1178 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1179 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1180 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1181 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1182 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1183 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1184 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1185 }
1186 radv_query_opaque_metadata(device, image, metadata);
1187 }
1188
1189 void
1190 radv_image_override_offset_stride(struct radv_device *device,
1191 struct radv_image *image,
1192 uint64_t offset, uint32_t stride)
1193 {
1194 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1195 &image->planes[0].surface,
1196 image->info.levels, offset, stride);
1197 }
1198
1199 static void
1200 radv_image_alloc_fmask(struct radv_device *device,
1201 struct radv_image *image)
1202 {
1203 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1204
1205 image->fmask_offset = align64(image->size, fmask_alignment);
1206 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1207 image->alignment = MAX2(image->alignment, fmask_alignment);
1208 }
1209
1210 static void
1211 radv_image_alloc_cmask(struct radv_device *device,
1212 struct radv_image *image)
1213 {
1214 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1215 unsigned cmask_size = image->planes[0].surface.cmask_size;
1216 uint32_t clear_value_size = 0;
1217
1218 if (!cmask_size)
1219 return;
1220
1221 assert(cmask_alignment);
1222
1223 image->cmask_offset = align64(image->size, cmask_alignment);
1224 /* + 8 for storing the clear values */
1225 if (!image->clear_value_offset) {
1226 image->clear_value_offset = image->cmask_offset + cmask_size;
1227 clear_value_size = 8;
1228 }
1229 image->size = image->cmask_offset + cmask_size + clear_value_size;
1230 image->alignment = MAX2(image->alignment, cmask_alignment);
1231 }
1232
1233 static void
1234 radv_image_alloc_dcc(struct radv_image *image)
1235 {
1236 assert(image->plane_count == 1);
1237
1238 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1239 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1240 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1241 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1242 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1243 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1244 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1245 }
1246
1247 static void
1248 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1249 {
1250 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1251
1252 /* + 8 for storing the clear values */
1253 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1254 image->size = image->clear_value_offset + image->info.levels * 8;
1255 if (radv_image_is_tc_compat_htile(image) &&
1256 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1257 /* Metadata for the TC-compatible HTILE hardware bug which
1258 * have to be fixed by updating ZRANGE_PRECISION when doing
1259 * fast depth clears to 0.0f.
1260 */
1261 image->tc_compat_zrange_offset = image->size;
1262 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1263 }
1264 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1265 }
1266
1267 static inline bool
1268 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1269 {
1270 if (image->info.samples <= 1 &&
1271 image->info.width * image->info.height <= 512 * 512) {
1272 /* Do not enable CMASK or DCC for small surfaces where the cost
1273 * of the eliminate pass can be higher than the benefit of fast
1274 * clear. RadeonSI does this, but the image threshold is
1275 * different.
1276 */
1277 return false;
1278 }
1279
1280 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1281 (image->exclusive || image->queue_family_mask == 1);
1282 }
1283
1284 static inline bool
1285 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1286 {
1287 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1288 !radv_image_has_dcc(image))
1289 return false;
1290
1291 /* On GFX8, DCC layers can be interleaved and it's currently only
1292 * enabled if slice size is equal to the per slice fast clear size
1293 * because the driver assumes that portions of multiple layers are
1294 * contiguous during fast clears.
1295 */
1296 if (image->info.array_size > 1) {
1297 const struct legacy_surf_level *surf_level =
1298 &image->planes[0].surface.u.legacy.level[0];
1299
1300 assert(device->physical_device->rad_info.chip_class == GFX8);
1301
1302 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1303 return false;
1304 }
1305
1306 return true;
1307 }
1308
1309 static inline bool
1310 radv_image_can_enable_cmask(struct radv_image *image)
1311 {
1312 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1313 /* Do not enable CMASK for non-MSAA images (fast color clear)
1314 * because 128 bit formats are not supported, but FMASK might
1315 * still be used.
1316 */
1317 return false;
1318 }
1319
1320 return radv_image_can_enable_dcc_or_cmask(image) &&
1321 image->info.levels == 1 &&
1322 image->info.depth == 1 &&
1323 !image->planes[0].surface.is_linear;
1324 }
1325
1326 static inline bool
1327 radv_image_can_enable_htile(struct radv_image *image)
1328 {
1329 return radv_image_has_htile(image) &&
1330 image->info.levels == 1 &&
1331 image->info.width * image->info.height >= 8 * 8;
1332 }
1333
1334 static void radv_image_disable_dcc(struct radv_image *image)
1335 {
1336 for (unsigned i = 0; i < image->plane_count; ++i)
1337 image->planes[i].surface.dcc_size = 0;
1338 }
1339
1340 static void radv_image_disable_htile(struct radv_image *image)
1341 {
1342 for (unsigned i = 0; i < image->plane_count; ++i)
1343 image->planes[i].surface.htile_size = 0;
1344 }
1345
1346 VkResult
1347 radv_image_create_layout(struct radv_device *device,
1348 struct radv_image_create_info create_info,
1349 struct radv_image *image)
1350 {
1351 /* Check that we did not initialize things earlier */
1352 assert(!image->planes[0].surface.surf_size);
1353
1354 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1355 * common internal case. */
1356 create_info.vk_info = NULL;
1357
1358 struct ac_surf_info image_info = image->info;
1359 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1360 if (result != VK_SUCCESS)
1361 return result;
1362
1363 image->size = 0;
1364 image->alignment = 1;
1365 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1366 struct ac_surf_info info = image_info;
1367
1368 if (plane) {
1369 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1370 assert(info.width % desc->width_divisor == 0);
1371 assert(info.height % desc->height_divisor == 0);
1372
1373 info.width /= desc->width_divisor;
1374 info.height /= desc->height_divisor;
1375 }
1376
1377 if (create_info.no_metadata_planes || image->plane_count > 1) {
1378 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1379 RADEON_SURF_NO_FMASK |
1380 RADEON_SURF_NO_HTILE;
1381 }
1382
1383 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1384
1385 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1386 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1387 image->alignment = image->planes[plane].surface.surf_alignment;
1388
1389 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1390 }
1391
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device, image)) {
1394 radv_image_alloc_dcc(image);
1395 if (image->info.samples > 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1398 */
1399 assert(radv_image_can_enable_cmask(image));
1400 radv_image_alloc_cmask(device, image);
1401 }
1402 } else {
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image);
1405 if (radv_image_can_enable_cmask(image)) {
1406 radv_image_alloc_cmask(device, image);
1407 }
1408 }
1409
1410 /* Try to enable FMASK for multisampled images. */
1411 if (image->planes[0].surface.fmask_size) {
1412 radv_image_alloc_fmask(device, image);
1413
1414 if (radv_use_tc_compat_cmask_for_image(device, image))
1415 image->tc_compatible_cmask = true;
1416 } else {
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image) &&
1419 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1420 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1421 radv_image_alloc_htile(device, image);
1422 } else {
1423 radv_image_disable_htile(image);
1424 }
1425 }
1426
1427 assert(image->planes[0].surface.surf_size);
1428 return VK_SUCCESS;
1429 }
1430
1431 VkResult
1432 radv_image_create(VkDevice _device,
1433 const struct radv_image_create_info *create_info,
1434 const VkAllocationCallbacks* alloc,
1435 VkImage *pImage)
1436 {
1437 RADV_FROM_HANDLE(radv_device, device, _device);
1438 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1439 struct radv_image *image = NULL;
1440 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1441 pCreateInfo->format);
1442 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1443
1444 const unsigned plane_count = vk_format_get_plane_count(format);
1445 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1446
1447 radv_assert(pCreateInfo->mipLevels > 0);
1448 radv_assert(pCreateInfo->arrayLayers > 0);
1449 radv_assert(pCreateInfo->samples > 0);
1450 radv_assert(pCreateInfo->extent.width > 0);
1451 radv_assert(pCreateInfo->extent.height > 0);
1452 radv_assert(pCreateInfo->extent.depth > 0);
1453
1454 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1456 if (!image)
1457 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1458
1459 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1460
1461 image->type = pCreateInfo->imageType;
1462 image->info.width = pCreateInfo->extent.width;
1463 image->info.height = pCreateInfo->extent.height;
1464 image->info.depth = pCreateInfo->extent.depth;
1465 image->info.samples = pCreateInfo->samples;
1466 image->info.storage_samples = pCreateInfo->samples;
1467 image->info.array_size = pCreateInfo->arrayLayers;
1468 image->info.levels = pCreateInfo->mipLevels;
1469 image->info.num_channels = vk_format_get_nr_components(format);
1470
1471 image->vk_format = format;
1472 image->tiling = pCreateInfo->tiling;
1473 image->usage = pCreateInfo->usage;
1474 image->flags = pCreateInfo->flags;
1475 image->plane_count = plane_count;
1476
1477 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1478 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1479 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1480 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1481 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1482 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1483 else
1484 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1485 }
1486
1487 const VkExternalMemoryImageCreateInfo *external_info =
1488 vk_find_struct_const(pCreateInfo->pNext,
1489 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1490
1491 image->shareable = external_info;
1492 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1493 image->info.surf_index = &device->image_mrt_offset_counter;
1494 }
1495
1496 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1497 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1498 }
1499
1500 bool delay_layout = external_info &&
1501 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1502
1503 if (delay_layout) {
1504 *pImage = radv_image_to_handle(image);
1505 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1506 return VK_SUCCESS;
1507 }
1508
1509 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1510 assert(result == VK_SUCCESS);
1511
1512 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1513 image->alignment = MAX2(image->alignment, 4096);
1514 image->size = align64(image->size, image->alignment);
1515 image->offset = 0;
1516
1517 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1518 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1519 if (!image->bo) {
1520 vk_free2(&device->vk.alloc, alloc, image);
1521 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1522 }
1523 }
1524
1525 *pImage = radv_image_to_handle(image);
1526
1527 return VK_SUCCESS;
1528 }
1529
1530 static void
1531 radv_image_view_make_descriptor(struct radv_image_view *iview,
1532 struct radv_device *device,
1533 VkFormat vk_format,
1534 const VkComponentMapping *components,
1535 bool is_storage_image, bool disable_compression,
1536 unsigned plane_id, unsigned descriptor_plane_id)
1537 {
1538 struct radv_image *image = iview->image;
1539 struct radv_image_plane *plane = &image->planes[plane_id];
1540 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1541 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1542 uint32_t blk_w;
1543 union radv_descriptor *descriptor;
1544 uint32_t hw_level = 0;
1545
1546 if (is_storage_image) {
1547 descriptor = &iview->storage_descriptor;
1548 } else {
1549 descriptor = &iview->descriptor;
1550 }
1551
1552 assert(vk_format_get_plane_count(vk_format) == 1);
1553 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1554 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1555
1556 if (device->physical_device->rad_info.chip_class >= GFX9)
1557 hw_level = iview->base_mip;
1558 radv_make_texture_descriptor(device, image, is_storage_image,
1559 iview->type,
1560 vk_format,
1561 components,
1562 hw_level, hw_level + iview->level_count - 1,
1563 iview->base_layer,
1564 iview->base_layer + iview->layer_count - 1,
1565 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1566 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1567 iview->extent.depth,
1568 descriptor->plane_descriptors[descriptor_plane_id],
1569 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1570
1571 const struct legacy_surf_level *base_level_info = NULL;
1572 if (device->physical_device->rad_info.chip_class <= GFX9) {
1573 if (is_stencil)
1574 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1575 else
1576 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1577 }
1578 si_set_mutable_tex_desc_fields(device, image,
1579 base_level_info,
1580 plane_id,
1581 iview->base_mip,
1582 iview->base_mip,
1583 blk_w, is_stencil, is_storage_image,
1584 is_storage_image || disable_compression,
1585 descriptor->plane_descriptors[descriptor_plane_id]);
1586 }
1587
1588 static unsigned
1589 radv_plane_from_aspect(VkImageAspectFlags mask)
1590 {
1591 switch(mask) {
1592 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1593 return 1;
1594 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1595 return 2;
1596 default:
1597 return 0;
1598 }
1599 }
1600
1601 VkFormat
1602 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1603 {
1604 switch(mask) {
1605 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1606 return image->planes[0].format;
1607 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1608 return image->planes[1].format;
1609 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1610 return image->planes[2].format;
1611 case VK_IMAGE_ASPECT_STENCIL_BIT:
1612 return vk_format_stencil_only(image->vk_format);
1613 case VK_IMAGE_ASPECT_DEPTH_BIT:
1614 return vk_format_depth_only(image->vk_format);
1615 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1616 return vk_format_depth_only(image->vk_format);
1617 default:
1618 return image->vk_format;
1619 }
1620 }
1621
1622 void
1623 radv_image_view_init(struct radv_image_view *iview,
1624 struct radv_device *device,
1625 const VkImageViewCreateInfo* pCreateInfo,
1626 const struct radv_image_view_extra_create_info* extra_create_info)
1627 {
1628 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1629 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1630
1631 switch (image->type) {
1632 case VK_IMAGE_TYPE_1D:
1633 case VK_IMAGE_TYPE_2D:
1634 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1635 break;
1636 case VK_IMAGE_TYPE_3D:
1637 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1638 <= radv_minify(image->info.depth, range->baseMipLevel));
1639 break;
1640 default:
1641 unreachable("bad VkImageType");
1642 }
1643 iview->image = image;
1644 iview->bo = image->bo;
1645 iview->type = pCreateInfo->viewType;
1646 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1647 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1648 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1649
1650 iview->vk_format = pCreateInfo->format;
1651
1652 /* If the image has an Android external format, pCreateInfo->format will be
1653 * VK_FORMAT_UNDEFINED. */
1654 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1655 iview->vk_format = image->vk_format;
1656
1657 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1658 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1659 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1660 iview->vk_format = vk_format_depth_only(iview->vk_format);
1661 }
1662
1663 if (device->physical_device->rad_info.chip_class >= GFX9) {
1664 iview->extent = (VkExtent3D) {
1665 .width = image->info.width,
1666 .height = image->info.height,
1667 .depth = image->info.depth,
1668 };
1669 } else {
1670 iview->extent = (VkExtent3D) {
1671 .width = radv_minify(image->info.width , range->baseMipLevel),
1672 .height = radv_minify(image->info.height, range->baseMipLevel),
1673 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1674 };
1675 }
1676
1677 if (iview->vk_format != image->planes[iview->plane_id].format) {
1678 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1679 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1680 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1681 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1682
1683 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1684 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1685
1686 /* Comment ported from amdvlk -
1687 * If we have the following image:
1688 * Uncompressed pixels Compressed block sizes (4x4)
1689 * mip0: 22 x 22 6 x 6
1690 * mip1: 11 x 11 3 x 3
1691 * mip2: 5 x 5 2 x 2
1692 * mip3: 2 x 2 1 x 1
1693 * mip4: 1 x 1 1 x 1
1694 *
1695 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1696 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1697 * divide-by-two integer math):
1698 * mip0: 6x6
1699 * mip1: 3x3
1700 * mip2: 1x1
1701 * mip3: 1x1
1702 *
1703 * This means that mip2 will be missing texels.
1704 *
1705 * Fix this by calculating the base mip's width and height, then convert that, and round it
1706 * back up to get the level 0 size.
1707 * Clamp the converted size between the original values, and next power of two, which
1708 * means we don't oversize the image.
1709 */
1710 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1711 vk_format_is_compressed(image->vk_format) &&
1712 !vk_format_is_compressed(iview->vk_format)) {
1713 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1714 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1715
1716 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1717 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1718
1719 lvl_width <<= range->baseMipLevel;
1720 lvl_height <<= range->baseMipLevel;
1721
1722 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1723 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1724 }
1725 }
1726
1727 iview->base_layer = range->baseArrayLayer;
1728 iview->layer_count = radv_get_layerCount(image, range);
1729 iview->base_mip = range->baseMipLevel;
1730 iview->level_count = radv_get_levelCount(image, range);
1731
1732 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1733 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1734 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1735 radv_image_view_make_descriptor(iview, device, format,
1736 &pCreateInfo->components,
1737 false, disable_compression,
1738 iview->plane_id + i, i);
1739 radv_image_view_make_descriptor(iview, device,
1740 format, &pCreateInfo->components,
1741 true, disable_compression,
1742 iview->plane_id + i, i);
1743 }
1744 }
1745
1746 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1747 VkImageLayout layout,
1748 bool in_render_loop,
1749 unsigned queue_mask)
1750 {
1751 if (radv_image_is_tc_compat_htile(image)) {
1752 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1753 !in_render_loop &&
1754 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1755 /* It should be safe to enable TC-compat HTILE with
1756 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1757 * loop and if the image doesn't have the storage bit
1758 * set. This improves performance for apps that use
1759 * GENERAL for the main depth pass because this allows
1760 * compression and this reduces the number of
1761 * decompressions from/to GENERAL.
1762 */
1763 return true;
1764 }
1765
1766 return layout != VK_IMAGE_LAYOUT_GENERAL;
1767 }
1768
1769 return radv_image_has_htile(image) &&
1770 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1771 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1772 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1773 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1774 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1775 }
1776
1777 bool radv_layout_can_fast_clear(const struct radv_image *image,
1778 VkImageLayout layout,
1779 bool in_render_loop,
1780 unsigned queue_mask)
1781 {
1782 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1783 }
1784
1785 bool radv_layout_dcc_compressed(const struct radv_device *device,
1786 const struct radv_image *image,
1787 VkImageLayout layout,
1788 bool in_render_loop,
1789 unsigned queue_mask)
1790 {
1791 /* Don't compress compute transfer dst, as image stores are not supported. */
1792 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1793 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1794 return false;
1795
1796 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1797 }
1798
1799
1800 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1801 {
1802 if (!image->exclusive)
1803 return image->queue_family_mask;
1804 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1805 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1806 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1807 if (family == VK_QUEUE_FAMILY_IGNORED)
1808 return 1u << queue_family;
1809 return 1u << family;
1810 }
1811
1812 VkResult
1813 radv_CreateImage(VkDevice device,
1814 const VkImageCreateInfo *pCreateInfo,
1815 const VkAllocationCallbacks *pAllocator,
1816 VkImage *pImage)
1817 {
1818 #ifdef ANDROID
1819 const VkNativeBufferANDROID *gralloc_info =
1820 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1821
1822 if (gralloc_info)
1823 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1824 pAllocator, pImage);
1825 #endif
1826
1827 const struct wsi_image_create_info *wsi_info =
1828 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1829 bool scanout = wsi_info && wsi_info->scanout;
1830
1831 return radv_image_create(device,
1832 &(struct radv_image_create_info) {
1833 .vk_info = pCreateInfo,
1834 .scanout = scanout,
1835 },
1836 pAllocator,
1837 pImage);
1838 }
1839
1840 void
1841 radv_DestroyImage(VkDevice _device, VkImage _image,
1842 const VkAllocationCallbacks *pAllocator)
1843 {
1844 RADV_FROM_HANDLE(radv_device, device, _device);
1845 RADV_FROM_HANDLE(radv_image, image, _image);
1846
1847 if (!image)
1848 return;
1849
1850 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1851 device->ws->buffer_destroy(image->bo);
1852
1853 if (image->owned_memory != VK_NULL_HANDLE)
1854 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1855
1856 vk_object_base_finish(&image->base);
1857 vk_free2(&device->vk.alloc, pAllocator, image);
1858 }
1859
1860 void radv_GetImageSubresourceLayout(
1861 VkDevice _device,
1862 VkImage _image,
1863 const VkImageSubresource* pSubresource,
1864 VkSubresourceLayout* pLayout)
1865 {
1866 RADV_FROM_HANDLE(radv_image, image, _image);
1867 RADV_FROM_HANDLE(radv_device, device, _device);
1868 int level = pSubresource->mipLevel;
1869 int layer = pSubresource->arrayLayer;
1870
1871 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1872
1873 struct radv_image_plane *plane = &image->planes[plane_id];
1874 struct radeon_surf *surface = &plane->surface;
1875
1876 if (device->physical_device->rad_info.chip_class >= GFX9) {
1877 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1878
1879 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1880 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1881 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1882 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1883 /* Adjust the number of bytes between each row because
1884 * the pitch is actually the number of components per
1885 * row.
1886 */
1887 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1888 } else {
1889 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1890
1891 assert(util_is_power_of_two_nonzero(surface->bpe));
1892 pLayout->rowPitch = pitch * surface->bpe;
1893 }
1894
1895 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1896 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1897 pLayout->size = surface->u.gfx9.surf_slice_size;
1898 if (image->type == VK_IMAGE_TYPE_3D)
1899 pLayout->size *= u_minify(image->info.depth, level);
1900 } else {
1901 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1902 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1903 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1904 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1905 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1906 if (image->type == VK_IMAGE_TYPE_3D)
1907 pLayout->size *= u_minify(image->info.depth, level);
1908 }
1909 }
1910
1911
1912 VkResult
1913 radv_CreateImageView(VkDevice _device,
1914 const VkImageViewCreateInfo *pCreateInfo,
1915 const VkAllocationCallbacks *pAllocator,
1916 VkImageView *pView)
1917 {
1918 RADV_FROM_HANDLE(radv_device, device, _device);
1919 struct radv_image_view *view;
1920
1921 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1922 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1923 if (view == NULL)
1924 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1925
1926 vk_object_base_init(&device->vk, &view->base,
1927 VK_OBJECT_TYPE_IMAGE_VIEW);
1928
1929 radv_image_view_init(view, device, pCreateInfo, NULL);
1930
1931 *pView = radv_image_view_to_handle(view);
1932
1933 return VK_SUCCESS;
1934 }
1935
1936 void
1937 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1938 const VkAllocationCallbacks *pAllocator)
1939 {
1940 RADV_FROM_HANDLE(radv_device, device, _device);
1941 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1942
1943 if (!iview)
1944 return;
1945
1946 vk_object_base_finish(&iview->base);
1947 vk_free2(&device->vk.alloc, pAllocator, iview);
1948 }
1949
1950 void radv_buffer_view_init(struct radv_buffer_view *view,
1951 struct radv_device *device,
1952 const VkBufferViewCreateInfo* pCreateInfo)
1953 {
1954 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1955
1956 view->bo = buffer->bo;
1957 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1958 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1959 view->vk_format = pCreateInfo->format;
1960
1961 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1962 pCreateInfo->offset, view->range, view->state);
1963 }
1964
1965 VkResult
1966 radv_CreateBufferView(VkDevice _device,
1967 const VkBufferViewCreateInfo *pCreateInfo,
1968 const VkAllocationCallbacks *pAllocator,
1969 VkBufferView *pView)
1970 {
1971 RADV_FROM_HANDLE(radv_device, device, _device);
1972 struct radv_buffer_view *view;
1973
1974 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1975 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1976 if (!view)
1977 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1978
1979 vk_object_base_init(&device->vk, &view->base,
1980 VK_OBJECT_TYPE_BUFFER_VIEW);
1981
1982 radv_buffer_view_init(view, device, pCreateInfo);
1983
1984 *pView = radv_buffer_view_to_handle(view);
1985
1986 return VK_SUCCESS;
1987 }
1988
1989 void
1990 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1991 const VkAllocationCallbacks *pAllocator)
1992 {
1993 RADV_FROM_HANDLE(radv_device, device, _device);
1994 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1995
1996 if (!view)
1997 return;
1998
1999 vk_object_base_finish(&view->base);
2000 vk_free2(&device->vk.alloc, pAllocator, view);
2001 }