radv: Include gfx10_format_table.h only from a single source file.
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 struct gfx10_format {
38 unsigned img_format:9;
39
40 /* Various formats are only supported with workarounds for vertex fetch,
41 * and some 32_32_32 formats are supported natively, but only for buffers
42 * (possibly with some image support, actually, but no filtering). */
43 bool buffers_only:1;
44 };
45
46 #include "gfx10_format_table.h"
47
48 static unsigned
49 radv_choose_tiling(struct radv_device *device,
50 const VkImageCreateInfo *pCreateInfo,
51 VkFormat format)
52 {
53 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
54 assert(pCreateInfo->samples <= 1);
55 return RADEON_SURF_MODE_LINEAR_ALIGNED;
56 }
57
58 if (!vk_format_is_compressed(format) &&
59 !vk_format_is_depth_or_stencil(format)
60 && device->physical_device->rad_info.chip_class <= GFX8) {
61 /* this causes hangs in some VK CTS tests on GFX9. */
62 /* Textures with a very small height are recommended to be linear. */
63 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
64 /* Only very thin and long 2D textures should benefit from
65 * linear_aligned. */
66 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
67 return RADEON_SURF_MODE_LINEAR_ALIGNED;
68 }
69
70 /* MSAA resources must be 2D tiled. */
71 if (pCreateInfo->samples > 1)
72 return RADEON_SURF_MODE_2D;
73
74 return RADEON_SURF_MODE_2D;
75 }
76
77 static bool
78 radv_use_tc_compat_htile_for_image(struct radv_device *device,
79 const VkImageCreateInfo *pCreateInfo,
80 VkFormat format)
81 {
82 /* TC-compat HTILE is only available for GFX8+. */
83 if (device->physical_device->rad_info.chip_class < GFX8)
84 return false;
85
86 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
87 return false;
88
89 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
90 return false;
91
92 if (pCreateInfo->mipLevels > 1)
93 return false;
94
95 /* Do not enable TC-compatible HTILE if the image isn't readable by a
96 * shader because no texture fetches will happen.
97 */
98 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
99 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
100 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
101 return false;
102
103 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
104 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
105 */
106 if (pCreateInfo->samples >= 2 &&
107 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
108 (format == VK_FORMAT_D32_SFLOAT &&
109 device->physical_device->rad_info.chip_class == GFX10)))
110 return false;
111
112 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
113 * supports 32-bit. Though, it's possible to enable TC-compat for
114 * 16-bit depth surfaces if no Z planes are compressed.
115 */
116 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
117 format != VK_FORMAT_D32_SFLOAT &&
118 format != VK_FORMAT_D16_UNORM)
119 return false;
120
121 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
122 const struct VkImageFormatListCreateInfo *format_list =
123 (const struct VkImageFormatListCreateInfo *)
124 vk_find_struct_const(pCreateInfo->pNext,
125 IMAGE_FORMAT_LIST_CREATE_INFO);
126
127 /* We have to ignore the existence of the list if viewFormatCount = 0 */
128 if (format_list && format_list->viewFormatCount) {
129 /* compatibility is transitive, so we only need to check
130 * one format with everything else.
131 */
132 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
133 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
134 continue;
135
136 if (format != format_list->pViewFormats[i])
137 return false;
138 }
139 } else {
140 return false;
141 }
142 }
143
144 return true;
145 }
146
147 static bool
148 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
149 {
150 if (info->bo_metadata) {
151 if (device->physical_device->rad_info.chip_class >= GFX9)
152 return info->bo_metadata->u.gfx9.scanout;
153 else
154 return info->bo_metadata->u.legacy.scanout;
155 }
156
157 return info->scanout;
158 }
159
160 static bool
161 radv_use_dcc_for_image(struct radv_device *device,
162 const struct radv_image *image,
163 const VkImageCreateInfo *pCreateInfo,
164 VkFormat format)
165 {
166 bool dcc_compatible_formats;
167 bool blendable;
168
169 /* DCC (Delta Color Compression) is only available for GFX8+. */
170 if (device->physical_device->rad_info.chip_class < GFX8)
171 return false;
172
173 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
174 return false;
175
176 if (image->shareable)
177 return false;
178
179 /* TODO: Enable DCC for storage images. */
180 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
181 return false;
182
183 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
184 return false;
185
186 if (vk_format_is_subsampled(format) ||
187 vk_format_get_plane_count(format) > 1)
188 return false;
189
190 /* TODO: Enable DCC for mipmaps on GFX9+. */
191 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
192 device->physical_device->rad_info.chip_class >= GFX9)
193 return false;
194
195 /* Do not enable DCC for mipmapped arrays because performance is worse. */
196 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
197 return false;
198
199 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
200 * 2x can be enabled with an option.
201 */
202 if (pCreateInfo->samples > 2 ||
203 (pCreateInfo->samples == 2 &&
204 !device->physical_device->dcc_msaa_allowed))
205 return false;
206
207 /* Determine if the formats are DCC compatible. */
208 dcc_compatible_formats =
209 radv_is_colorbuffer_format_supported(format,
210 &blendable);
211
212 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
213 const struct VkImageFormatListCreateInfo *format_list =
214 (const struct VkImageFormatListCreateInfo *)
215 vk_find_struct_const(pCreateInfo->pNext,
216 IMAGE_FORMAT_LIST_CREATE_INFO);
217
218 /* We have to ignore the existence of the list if viewFormatCount = 0 */
219 if (format_list && format_list->viewFormatCount) {
220 /* compatibility is transitive, so we only need to check
221 * one format with everything else. */
222 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
223 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
224 continue;
225
226 if (!radv_dcc_formats_compatible(format,
227 format_list->pViewFormats[i]))
228 dcc_compatible_formats = false;
229 }
230 } else {
231 dcc_compatible_formats = false;
232 }
233 }
234
235 if (!dcc_compatible_formats)
236 return false;
237
238 return true;
239 }
240
241 static bool
242 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
243 struct radv_image *image)
244 {
245 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
246 return false;
247
248 /* TC-compat CMASK is only available for GFX8+. */
249 if (device->physical_device->rad_info.chip_class < GFX8)
250 return false;
251
252 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
253 return false;
254
255 if (radv_image_has_dcc(image))
256 return false;
257
258 if (!radv_image_has_cmask(image))
259 return false;
260
261 return true;
262 }
263
264 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
265 {
266 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
267 }
268
269 static bool
270 radv_is_valid_opaque_metadata(const struct radv_device *device,
271 const struct radeon_bo_metadata *md)
272 {
273 if (md->metadata[0] != 1 ||
274 md->metadata[1] != si_get_bo_metadata_word1(device))
275 return false;
276
277 if (md->size_metadata < 40)
278 return false;
279
280 return true;
281 }
282
283 static void
284 radv_patch_surface_from_metadata(struct radv_device *device,
285 struct radeon_surf *surface,
286 const struct radeon_bo_metadata *md)
287 {
288 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
289
290 if (device->physical_device->rad_info.chip_class >= GFX9) {
291 if (md->u.gfx9.swizzle_mode > 0)
292 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
293 else
294 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
295
296 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
297 } else {
298 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
299 surface->u.legacy.bankw = md->u.legacy.bankw;
300 surface->u.legacy.bankh = md->u.legacy.bankh;
301 surface->u.legacy.tile_split = md->u.legacy.tile_split;
302 surface->u.legacy.mtilea = md->u.legacy.mtilea;
303 surface->u.legacy.num_banks = md->u.legacy.num_banks;
304
305 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
306 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
307 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
308 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
309 else
310 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
311
312 }
313 }
314
315 static VkResult
316 radv_patch_image_dimensions(struct radv_device *device,
317 struct radv_image *image,
318 const struct radv_image_create_info *create_info,
319 struct ac_surf_info *image_info)
320 {
321 unsigned width = image->info.width;
322 unsigned height = image->info.height;
323
324 /*
325 * minigbm sometimes allocates bigger images which is going to result in
326 * weird strides and other properties. Lets be lenient where possible and
327 * fail it on GFX10 (as we cannot cope there).
328 *
329 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
330 */
331 if (create_info->bo_metadata &&
332 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
333 const struct radeon_bo_metadata *md = create_info->bo_metadata;
334
335 if (device->physical_device->rad_info.chip_class >= GFX10) {
336 width = G_00A004_WIDTH_LO(md->metadata[3]) +
337 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
338 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
339 } else {
340 width = G_008F18_WIDTH(md->metadata[4]) + 1;
341 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
342 }
343 }
344
345 if (image->info.width == width && image->info.height == height)
346 return VK_SUCCESS;
347
348 if (width < image->info.width || height < image->info.height) {
349 fprintf(stderr,
350 "The imported image has smaller dimensions than the internal\n"
351 "dimensions. Using it is going to fail badly, so we reject\n"
352 "this import.\n"
353 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
354 image->info.width, image->info.height, width, height);
355 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
356 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
357 fprintf(stderr,
358 "Tried to import an image with inconsistent width on GFX10.\n"
359 "As GFX10 has no separate stride fields we cannot cope with\n"
360 "an inconsistency in width and will fail this import.\n"
361 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
362 image->info.width, image->info.height, width, height);
363 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
364 } else {
365 fprintf(stderr,
366 "Tried to import an image with inconsistent width on pre-GFX10.\n"
367 "As GFX10 has no separate stride fields we cannot cope with\n"
368 "an inconsistency and would fail on GFX10.\n"
369 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
370 image->info.width, image->info.height, width, height);
371 }
372 image_info->width = width;
373 image_info->height = height;
374
375 return VK_SUCCESS;
376 }
377
378 static VkResult
379 radv_patch_image_from_extra_info(struct radv_device *device,
380 struct radv_image *image,
381 const struct radv_image_create_info *create_info,
382 struct ac_surf_info *image_info)
383 {
384 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
385 if (result != VK_SUCCESS)
386 return result;
387
388 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
389 if (create_info->bo_metadata) {
390 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
391 create_info->bo_metadata);
392 }
393
394 if (radv_surface_has_scanout(device, create_info)) {
395 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
396 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
397
398 image->info.surf_index = NULL;
399 }
400 }
401 return VK_SUCCESS;
402 }
403
404 static int
405 radv_init_surface(struct radv_device *device,
406 const struct radv_image *image,
407 struct radeon_surf *surface,
408 unsigned plane_id,
409 const VkImageCreateInfo *pCreateInfo,
410 VkFormat image_format)
411 {
412 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
413 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
414 const struct vk_format_description *desc = vk_format_description(format);
415 bool is_depth, is_stencil;
416
417 is_depth = vk_format_has_depth(desc);
418 is_stencil = vk_format_has_stencil(desc);
419
420 surface->blk_w = vk_format_get_blockwidth(format);
421 surface->blk_h = vk_format_get_blockheight(format);
422
423 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
424 /* align byte per element on dword */
425 if (surface->bpe == 3) {
426 surface->bpe = 4;
427 }
428
429 surface->flags = RADEON_SURF_SET(array_mode, MODE);
430
431 switch (pCreateInfo->imageType){
432 case VK_IMAGE_TYPE_1D:
433 if (pCreateInfo->arrayLayers > 1)
434 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
435 else
436 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
437 break;
438 case VK_IMAGE_TYPE_2D:
439 if (pCreateInfo->arrayLayers > 1)
440 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
441 else
442 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
443 break;
444 case VK_IMAGE_TYPE_3D:
445 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
446 break;
447 default:
448 unreachable("unhandled image type");
449 }
450
451 if (is_depth) {
452 surface->flags |= RADEON_SURF_ZBUFFER;
453 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
454 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
455 }
456
457 if (is_stencil)
458 surface->flags |= RADEON_SURF_SBUFFER;
459
460 if (device->physical_device->rad_info.chip_class >= GFX9 &&
461 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
462 vk_format_get_blocksizebits(image_format) == 128 &&
463 vk_format_is_compressed(image_format))
464 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
465
466 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
467 surface->flags |= RADEON_SURF_DISABLE_DCC;
468
469 return 0;
470 }
471
472 static inline unsigned
473 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
474 {
475 if (stencil)
476 return plane->surface.u.legacy.stencil_tiling_index[level];
477 else
478 return plane->surface.u.legacy.tiling_index[level];
479 }
480
481 static unsigned radv_map_swizzle(unsigned swizzle)
482 {
483 switch (swizzle) {
484 case VK_SWIZZLE_Y:
485 return V_008F0C_SQ_SEL_Y;
486 case VK_SWIZZLE_Z:
487 return V_008F0C_SQ_SEL_Z;
488 case VK_SWIZZLE_W:
489 return V_008F0C_SQ_SEL_W;
490 case VK_SWIZZLE_0:
491 return V_008F0C_SQ_SEL_0;
492 case VK_SWIZZLE_1:
493 return V_008F0C_SQ_SEL_1;
494 default: /* VK_SWIZZLE_X */
495 return V_008F0C_SQ_SEL_X;
496 }
497 }
498
499 static void
500 radv_make_buffer_descriptor(struct radv_device *device,
501 struct radv_buffer *buffer,
502 VkFormat vk_format,
503 unsigned offset,
504 unsigned range,
505 uint32_t *state)
506 {
507 const struct vk_format_description *desc;
508 unsigned stride;
509 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
510 uint64_t va = gpu_address + buffer->offset;
511 unsigned num_format, data_format;
512 int first_non_void;
513 desc = vk_format_description(vk_format);
514 first_non_void = vk_format_get_first_non_void_channel(vk_format);
515 stride = desc->block.bits / 8;
516
517 va += offset;
518 state[0] = va;
519 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
520 S_008F04_STRIDE(stride);
521
522 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
523 range /= stride;
524 }
525
526 state[2] = range;
527 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
528 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
529 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
530 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
531
532 if (device->physical_device->rad_info.chip_class >= GFX10) {
533 const struct gfx10_format *fmt = gfx10_format_description(vk_format);
534
535 /* OOB_SELECT chooses the out-of-bounds check:
536 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
537 * - 1: index >= NUM_RECORDS
538 * - 2: NUM_RECORDS == 0
539 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
540 * else: swizzle_address >= NUM_RECORDS
541 */
542 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
543 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
544 S_008F0C_RESOURCE_LEVEL(1);
545 } else {
546 num_format = radv_translate_buffer_numformat(desc, first_non_void);
547 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
548
549 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
550 assert(num_format != ~0);
551
552 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
553 S_008F0C_DATA_FORMAT(data_format);
554 }
555 }
556
557 static void
558 si_set_mutable_tex_desc_fields(struct radv_device *device,
559 struct radv_image *image,
560 const struct legacy_surf_level *base_level_info,
561 unsigned plane_id,
562 unsigned base_level, unsigned first_level,
563 unsigned block_width, bool is_stencil,
564 bool is_storage_image, bool disable_compression,
565 uint32_t *state)
566 {
567 struct radv_image_plane *plane = &image->planes[plane_id];
568 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
569 uint64_t va = gpu_address + plane->offset;
570 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
571 uint64_t meta_va = 0;
572 if (chip_class >= GFX9) {
573 if (is_stencil)
574 va += plane->surface.u.gfx9.stencil_offset;
575 else
576 va += plane->surface.u.gfx9.surf_offset;
577 } else
578 va += base_level_info->offset;
579
580 state[0] = va >> 8;
581 if (chip_class >= GFX9 ||
582 base_level_info->mode == RADEON_SURF_MODE_2D)
583 state[0] |= plane->surface.tile_swizzle;
584 state[1] &= C_008F14_BASE_ADDRESS_HI;
585 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
586
587 if (chip_class >= GFX8) {
588 state[6] &= C_008F28_COMPRESSION_EN;
589 state[7] = 0;
590 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
591 meta_va = gpu_address + image->dcc_offset;
592 if (chip_class <= GFX8)
593 meta_va += base_level_info->dcc_offset;
594
595 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
596 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
597 meta_va |= dcc_tile_swizzle;
598 } else if (!disable_compression &&
599 radv_image_is_tc_compat_htile(image)) {
600 meta_va = gpu_address + image->htile_offset;
601 }
602
603 if (meta_va) {
604 state[6] |= S_008F28_COMPRESSION_EN(1);
605 if (chip_class <= GFX9)
606 state[7] = meta_va >> 8;
607 }
608 }
609
610 if (chip_class >= GFX10) {
611 state[3] &= C_00A00C_SW_MODE;
612
613 if (is_stencil) {
614 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
615 } else {
616 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
617 }
618
619 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
620 C_00A018_META_PIPE_ALIGNED;
621
622 if (meta_va) {
623 struct gfx9_surf_meta_flags meta = {
624 .rb_aligned = 1,
625 .pipe_aligned = 1,
626 };
627
628 if (image->dcc_offset)
629 meta = plane->surface.u.gfx9.dcc;
630
631 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
632 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
633 }
634
635 state[7] = meta_va >> 16;
636 } else if (chip_class == GFX9) {
637 state[3] &= C_008F1C_SW_MODE;
638 state[4] &= C_008F20_PITCH;
639
640 if (is_stencil) {
641 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
642 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
643 } else {
644 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
645 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
646 }
647
648 state[5] &= C_008F24_META_DATA_ADDRESS &
649 C_008F24_META_PIPE_ALIGNED &
650 C_008F24_META_RB_ALIGNED;
651 if (meta_va) {
652 struct gfx9_surf_meta_flags meta = {
653 .rb_aligned = 1,
654 .pipe_aligned = 1,
655 };
656
657 if (image->dcc_offset)
658 meta = plane->surface.u.gfx9.dcc;
659
660 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
661 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
662 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
663 }
664 } else {
665 /* GFX6-GFX8 */
666 unsigned pitch = base_level_info->nblk_x * block_width;
667 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
668
669 state[3] &= C_008F1C_TILING_INDEX;
670 state[3] |= S_008F1C_TILING_INDEX(index);
671 state[4] &= C_008F20_PITCH;
672 state[4] |= S_008F20_PITCH(pitch - 1);
673 }
674 }
675
676 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
677 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
678 {
679 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
680 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
681
682 /* GFX9 allocates 1D textures as 2D. */
683 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
684 image_type = VK_IMAGE_TYPE_2D;
685 switch (image_type) {
686 case VK_IMAGE_TYPE_1D:
687 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
688 case VK_IMAGE_TYPE_2D:
689 if (nr_samples > 1)
690 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
691 else
692 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
693 case VK_IMAGE_TYPE_3D:
694 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
695 return V_008F1C_SQ_RSRC_IMG_3D;
696 else
697 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
698 default:
699 unreachable("illegal image type");
700 }
701 }
702
703 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
704 {
705 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
706
707 if (swizzle[3] == VK_SWIZZLE_X) {
708 /* For the pre-defined border color values (white, opaque
709 * black, transparent black), the only thing that matters is
710 * that the alpha channel winds up in the correct place
711 * (because the RGB channels are all the same) so either of
712 * these enumerations will work.
713 */
714 if (swizzle[2] == VK_SWIZZLE_Y)
715 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
716 else
717 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
718 } else if (swizzle[0] == VK_SWIZZLE_X) {
719 if (swizzle[1] == VK_SWIZZLE_Y)
720 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
721 else
722 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
723 } else if (swizzle[1] == VK_SWIZZLE_X) {
724 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
725 } else if (swizzle[2] == VK_SWIZZLE_X) {
726 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
727 }
728
729 return bc_swizzle;
730 }
731
732 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
733 {
734 const struct vk_format_description *desc = vk_format_description(format);
735
736 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
737 return desc->swizzle[3] == VK_SWIZZLE_X;
738
739 return radv_translate_colorswap(format, false) <= 1;
740 }
741 /**
742 * Build the sampler view descriptor for a texture (GFX10).
743 */
744 static void
745 gfx10_make_texture_descriptor(struct radv_device *device,
746 struct radv_image *image,
747 bool is_storage_image,
748 VkImageViewType view_type,
749 VkFormat vk_format,
750 const VkComponentMapping *mapping,
751 unsigned first_level, unsigned last_level,
752 unsigned first_layer, unsigned last_layer,
753 unsigned width, unsigned height, unsigned depth,
754 uint32_t *state,
755 uint32_t *fmask_state)
756 {
757 const struct vk_format_description *desc;
758 enum vk_swizzle swizzle[4];
759 unsigned img_format;
760 unsigned type;
761
762 desc = vk_format_description(vk_format);
763 img_format = gfx10_format_description(vk_format)->img_format;
764
765 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
766 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
767 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
768 } else {
769 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
770 }
771
772 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
773 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
774 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
775 height = 1;
776 depth = image->info.array_size;
777 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
778 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
779 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
780 depth = image->info.array_size;
781 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
782 depth = image->info.array_size / 6;
783
784 state[0] = 0;
785 state[1] = S_00A004_FORMAT(img_format) |
786 S_00A004_WIDTH_LO(width - 1);
787 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
788 S_00A008_HEIGHT(height - 1) |
789 S_00A008_RESOURCE_LEVEL(1);
790 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
791 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
792 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
793 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
794 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
795 0 : first_level) |
796 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
797 util_logbase2(image->info.samples) :
798 last_level) |
799 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
800 S_00A00C_TYPE(type);
801 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
802 * to know the total number of layers.
803 */
804 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
805 S_00A010_BASE_ARRAY(first_layer);
806 state[5] = S_00A014_ARRAY_PITCH(0) |
807 S_00A014_MAX_MIP(image->info.samples > 1 ?
808 util_logbase2(image->info.samples) :
809 image->info.levels - 1) |
810 S_00A014_PERF_MOD(4);
811 state[6] = 0;
812 state[7] = 0;
813
814 if (radv_dcc_enabled(image, first_level)) {
815 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
816 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
817 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
818 }
819
820 /* Initialize the sampler view for FMASK. */
821 if (radv_image_has_fmask(image)) {
822 uint64_t gpu_address = radv_buffer_get_va(image->bo);
823 uint32_t format;
824 uint64_t va;
825
826 assert(image->plane_count == 1);
827
828 va = gpu_address + image->offset + image->fmask_offset;
829
830 switch (image->info.samples) {
831 case 2:
832 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
833 break;
834 case 4:
835 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
836 break;
837 case 8:
838 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
839 break;
840 default:
841 unreachable("invalid nr_samples");
842 }
843
844 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
845 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
846 S_00A004_FORMAT(format) |
847 S_00A004_WIDTH_LO(width - 1);
848 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
849 S_00A008_HEIGHT(height - 1) |
850 S_00A008_RESOURCE_LEVEL(1);
851 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
852 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
853 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
854 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
855 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
856 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
857 fmask_state[4] = S_00A010_DEPTH(last_layer) |
858 S_00A010_BASE_ARRAY(first_layer);
859 fmask_state[5] = 0;
860 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
861 fmask_state[7] = 0;
862 } else if (fmask_state)
863 memset(fmask_state, 0, 8 * 4);
864 }
865
866 /**
867 * Build the sampler view descriptor for a texture (SI-GFX9)
868 */
869 static void
870 si_make_texture_descriptor(struct radv_device *device,
871 struct radv_image *image,
872 bool is_storage_image,
873 VkImageViewType view_type,
874 VkFormat vk_format,
875 const VkComponentMapping *mapping,
876 unsigned first_level, unsigned last_level,
877 unsigned first_layer, unsigned last_layer,
878 unsigned width, unsigned height, unsigned depth,
879 uint32_t *state,
880 uint32_t *fmask_state)
881 {
882 const struct vk_format_description *desc;
883 enum vk_swizzle swizzle[4];
884 int first_non_void;
885 unsigned num_format, data_format, type;
886
887 desc = vk_format_description(vk_format);
888
889 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
890 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
891 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
892 } else {
893 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
894 }
895
896 first_non_void = vk_format_get_first_non_void_channel(vk_format);
897
898 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
899 if (num_format == ~0) {
900 num_format = 0;
901 }
902
903 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
904 if (data_format == ~0) {
905 data_format = 0;
906 }
907
908 /* S8 with either Z16 or Z32 HTILE need a special format. */
909 if (device->physical_device->rad_info.chip_class == GFX9 &&
910 vk_format == VK_FORMAT_S8_UINT &&
911 radv_image_is_tc_compat_htile(image)) {
912 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
913 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
914 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
915 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
916 }
917 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
918 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
919 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
920 height = 1;
921 depth = image->info.array_size;
922 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
923 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
924 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
925 depth = image->info.array_size;
926 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
927 depth = image->info.array_size / 6;
928
929 state[0] = 0;
930 state[1] = (S_008F14_DATA_FORMAT(data_format) |
931 S_008F14_NUM_FORMAT(num_format));
932 state[2] = (S_008F18_WIDTH(width - 1) |
933 S_008F18_HEIGHT(height - 1) |
934 S_008F18_PERF_MOD(4));
935 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
936 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
937 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
938 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
939 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
940 0 : first_level) |
941 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
942 util_logbase2(image->info.samples) :
943 last_level) |
944 S_008F1C_TYPE(type));
945 state[4] = 0;
946 state[5] = S_008F24_BASE_ARRAY(first_layer);
947 state[6] = 0;
948 state[7] = 0;
949
950 if (device->physical_device->rad_info.chip_class == GFX9) {
951 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
952
953 /* Depth is the last accessible layer on Gfx9.
954 * The hw doesn't need to know the total number of layers.
955 */
956 if (type == V_008F1C_SQ_RSRC_IMG_3D)
957 state[4] |= S_008F20_DEPTH(depth - 1);
958 else
959 state[4] |= S_008F20_DEPTH(last_layer);
960
961 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
962 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
963 util_logbase2(image->info.samples) :
964 image->info.levels - 1);
965 } else {
966 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
967 state[4] |= S_008F20_DEPTH(depth - 1);
968 state[5] |= S_008F24_LAST_ARRAY(last_layer);
969 }
970 if (image->dcc_offset) {
971 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
972 } else {
973 /* The last dword is unused by hw. The shader uses it to clear
974 * bits in the first dword of sampler state.
975 */
976 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
977 if (first_level == last_level)
978 state[7] = C_008F30_MAX_ANISO_RATIO;
979 else
980 state[7] = 0xffffffff;
981 }
982 }
983
984 /* Initialize the sampler view for FMASK. */
985 if (radv_image_has_fmask(image)) {
986 uint32_t fmask_format, num_format;
987 uint64_t gpu_address = radv_buffer_get_va(image->bo);
988 uint64_t va;
989
990 assert(image->plane_count == 1);
991
992 va = gpu_address + image->offset + image->fmask_offset;
993
994 if (device->physical_device->rad_info.chip_class == GFX9) {
995 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
996 switch (image->info.samples) {
997 case 2:
998 num_format = V_008F14_IMG_FMASK_8_2_2;
999 break;
1000 case 4:
1001 num_format = V_008F14_IMG_FMASK_8_4_4;
1002 break;
1003 case 8:
1004 num_format = V_008F14_IMG_FMASK_32_8_8;
1005 break;
1006 default:
1007 unreachable("invalid nr_samples");
1008 }
1009 } else {
1010 switch (image->info.samples) {
1011 case 2:
1012 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1013 break;
1014 case 4:
1015 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1016 break;
1017 case 8:
1018 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1019 break;
1020 default:
1021 assert(0);
1022 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1023 }
1024 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1025 }
1026
1027 fmask_state[0] = va >> 8;
1028 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1029 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1030 S_008F14_DATA_FORMAT(fmask_format) |
1031 S_008F14_NUM_FORMAT(num_format);
1032 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1033 S_008F18_HEIGHT(height - 1);
1034 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1035 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1036 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1037 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1038 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1039 fmask_state[4] = 0;
1040 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1041 fmask_state[6] = 0;
1042 fmask_state[7] = 0;
1043
1044 if (device->physical_device->rad_info.chip_class == GFX9) {
1045 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1046 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1047 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1048 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1049 S_008F24_META_RB_ALIGNED(1);
1050
1051 if (radv_image_is_tc_compat_cmask(image)) {
1052 va = gpu_address + image->offset + image->cmask_offset;
1053
1054 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1055 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1056 fmask_state[7] |= va >> 8;
1057 }
1058 } else {
1059 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1060 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1061 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1062 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1063
1064 if (radv_image_is_tc_compat_cmask(image)) {
1065 va = gpu_address + image->offset + image->cmask_offset;
1066
1067 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1068 fmask_state[7] |= va >> 8;
1069 }
1070 }
1071 } else if (fmask_state)
1072 memset(fmask_state, 0, 8 * 4);
1073 }
1074
1075 static void
1076 radv_make_texture_descriptor(struct radv_device *device,
1077 struct radv_image *image,
1078 bool is_storage_image,
1079 VkImageViewType view_type,
1080 VkFormat vk_format,
1081 const VkComponentMapping *mapping,
1082 unsigned first_level, unsigned last_level,
1083 unsigned first_layer, unsigned last_layer,
1084 unsigned width, unsigned height, unsigned depth,
1085 uint32_t *state,
1086 uint32_t *fmask_state)
1087 {
1088 if (device->physical_device->rad_info.chip_class >= GFX10) {
1089 gfx10_make_texture_descriptor(device, image, is_storage_image,
1090 view_type, vk_format, mapping,
1091 first_level, last_level,
1092 first_layer, last_layer,
1093 width, height, depth,
1094 state, fmask_state);
1095 } else {
1096 si_make_texture_descriptor(device, image, is_storage_image,
1097 view_type, vk_format, mapping,
1098 first_level, last_level,
1099 first_layer, last_layer,
1100 width, height, depth,
1101 state, fmask_state);
1102 }
1103 }
1104
1105 static void
1106 radv_query_opaque_metadata(struct radv_device *device,
1107 struct radv_image *image,
1108 struct radeon_bo_metadata *md)
1109 {
1110 static const VkComponentMapping fixedmapping;
1111 uint32_t desc[8], i;
1112
1113 assert(image->plane_count == 1);
1114
1115 /* Metadata image format format version 1:
1116 * [0] = 1 (metadata format identifier)
1117 * [1] = (VENDOR_ID << 16) | PCI_ID
1118 * [2:9] = image descriptor for the whole resource
1119 * [2] is always 0, because the base address is cleared
1120 * [9] is the DCC offset bits [39:8] from the beginning of
1121 * the buffer
1122 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1123 */
1124 md->metadata[0] = 1; /* metadata image format version 1 */
1125
1126 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1127 md->metadata[1] = si_get_bo_metadata_word1(device);
1128
1129
1130 radv_make_texture_descriptor(device, image, false,
1131 (VkImageViewType)image->type, image->vk_format,
1132 &fixedmapping, 0, image->info.levels - 1, 0,
1133 image->info.array_size - 1,
1134 image->info.width, image->info.height,
1135 image->info.depth,
1136 desc, NULL);
1137
1138 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1139 image->planes[0].surface.blk_w, false, false, false, desc);
1140
1141 /* Clear the base address and set the relative DCC offset. */
1142 desc[0] = 0;
1143 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1144 desc[7] = image->dcc_offset >> 8;
1145
1146 /* Dwords [2:9] contain the image descriptor. */
1147 memcpy(&md->metadata[2], desc, sizeof(desc));
1148
1149 /* Dwords [10:..] contain the mipmap level offsets. */
1150 if (device->physical_device->rad_info.chip_class <= GFX8) {
1151 for (i = 0; i <= image->info.levels - 1; i++)
1152 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1153 md->size_metadata = (11 + image->info.levels - 1) * 4;
1154 } else
1155 md->size_metadata = 10 * 4;
1156 }
1157
1158 void
1159 radv_init_metadata(struct radv_device *device,
1160 struct radv_image *image,
1161 struct radeon_bo_metadata *metadata)
1162 {
1163 struct radeon_surf *surface = &image->planes[0].surface;
1164
1165 memset(metadata, 0, sizeof(*metadata));
1166
1167 if (device->physical_device->rad_info.chip_class >= GFX9) {
1168 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1169 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1170 } else {
1171 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1172 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1173 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1174 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1175 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1176 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1177 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1178 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1179 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1180 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1181 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1182 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1183 }
1184 radv_query_opaque_metadata(device, image, metadata);
1185 }
1186
1187 void
1188 radv_image_override_offset_stride(struct radv_device *device,
1189 struct radv_image *image,
1190 uint64_t offset, uint32_t stride)
1191 {
1192 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1193 &image->planes[0].surface,
1194 image->info.levels, offset, stride);
1195 }
1196
1197 static void
1198 radv_image_alloc_fmask(struct radv_device *device,
1199 struct radv_image *image)
1200 {
1201 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1202
1203 image->fmask_offset = align64(image->size, fmask_alignment);
1204 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1205 image->alignment = MAX2(image->alignment, fmask_alignment);
1206 }
1207
1208 static void
1209 radv_image_alloc_cmask(struct radv_device *device,
1210 struct radv_image *image)
1211 {
1212 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1213 unsigned cmask_size = image->planes[0].surface.cmask_size;
1214 uint32_t clear_value_size = 0;
1215
1216 if (!cmask_size)
1217 return;
1218
1219 assert(cmask_alignment);
1220
1221 image->cmask_offset = align64(image->size, cmask_alignment);
1222 /* + 8 for storing the clear values */
1223 if (!image->clear_value_offset) {
1224 image->clear_value_offset = image->cmask_offset + cmask_size;
1225 clear_value_size = 8;
1226 }
1227 image->size = image->cmask_offset + cmask_size + clear_value_size;
1228 image->alignment = MAX2(image->alignment, cmask_alignment);
1229 }
1230
1231 static void
1232 radv_image_alloc_dcc(struct radv_image *image)
1233 {
1234 assert(image->plane_count == 1);
1235
1236 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1237 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1238 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1239 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1240 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1241 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1242 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1243 }
1244
1245 static void
1246 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1247 {
1248 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1249
1250 /* + 8 for storing the clear values */
1251 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1252 image->size = image->clear_value_offset + image->info.levels * 8;
1253 if (radv_image_is_tc_compat_htile(image) &&
1254 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1255 /* Metadata for the TC-compatible HTILE hardware bug which
1256 * have to be fixed by updating ZRANGE_PRECISION when doing
1257 * fast depth clears to 0.0f.
1258 */
1259 image->tc_compat_zrange_offset = image->size;
1260 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1261 }
1262 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1263 }
1264
1265 static inline bool
1266 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1267 {
1268 if (image->info.samples <= 1 &&
1269 image->info.width * image->info.height <= 512 * 512) {
1270 /* Do not enable CMASK or DCC for small surfaces where the cost
1271 * of the eliminate pass can be higher than the benefit of fast
1272 * clear. RadeonSI does this, but the image threshold is
1273 * different.
1274 */
1275 return false;
1276 }
1277
1278 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1279 (image->exclusive || image->queue_family_mask == 1);
1280 }
1281
1282 static inline bool
1283 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1284 {
1285 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1286 !radv_image_has_dcc(image))
1287 return false;
1288
1289 /* On GFX8, DCC layers can be interleaved and it's currently only
1290 * enabled if slice size is equal to the per slice fast clear size
1291 * because the driver assumes that portions of multiple layers are
1292 * contiguous during fast clears.
1293 */
1294 if (image->info.array_size > 1) {
1295 const struct legacy_surf_level *surf_level =
1296 &image->planes[0].surface.u.legacy.level[0];
1297
1298 assert(device->physical_device->rad_info.chip_class == GFX8);
1299
1300 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1301 return false;
1302 }
1303
1304 return true;
1305 }
1306
1307 static inline bool
1308 radv_image_can_enable_cmask(struct radv_image *image)
1309 {
1310 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1311 /* Do not enable CMASK for non-MSAA images (fast color clear)
1312 * because 128 bit formats are not supported, but FMASK might
1313 * still be used.
1314 */
1315 return false;
1316 }
1317
1318 return radv_image_can_enable_dcc_or_cmask(image) &&
1319 image->info.levels == 1 &&
1320 image->info.depth == 1 &&
1321 !image->planes[0].surface.is_linear;
1322 }
1323
1324 static inline bool
1325 radv_image_can_enable_fmask(struct radv_image *image)
1326 {
1327 return image->info.samples > 1 &&
1328 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1329 }
1330
1331 static inline bool
1332 radv_image_can_enable_htile(struct radv_image *image)
1333 {
1334 return radv_image_has_htile(image) &&
1335 image->info.levels == 1 &&
1336 image->info.width * image->info.height >= 8 * 8;
1337 }
1338
1339 static void radv_image_disable_dcc(struct radv_image *image)
1340 {
1341 for (unsigned i = 0; i < image->plane_count; ++i)
1342 image->planes[i].surface.dcc_size = 0;
1343 }
1344
1345 static void radv_image_disable_htile(struct radv_image *image)
1346 {
1347 for (unsigned i = 0; i < image->plane_count; ++i)
1348 image->planes[i].surface.htile_size = 0;
1349 }
1350
1351 VkResult
1352 radv_image_create_layout(struct radv_device *device,
1353 struct radv_image_create_info create_info,
1354 struct radv_image *image)
1355 {
1356 /* Check that we did not initialize things earlier */
1357 assert(!image->planes[0].surface.surf_size);
1358
1359 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1360 * common internal case. */
1361 create_info.vk_info = NULL;
1362
1363 struct ac_surf_info image_info = image->info;
1364 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1365 if (result != VK_SUCCESS)
1366 return result;
1367
1368 image->size = 0;
1369 image->alignment = 1;
1370 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1371 struct ac_surf_info info = image_info;
1372
1373 if (plane) {
1374 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1375 assert(info.width % desc->width_divisor == 0);
1376 assert(info.height % desc->height_divisor == 0);
1377
1378 info.width /= desc->width_divisor;
1379 info.height /= desc->height_divisor;
1380 }
1381
1382 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1383
1384 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1385 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1386 image->alignment = image->planes[plane].surface.surf_alignment;
1387
1388 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1389 }
1390
1391 if (!create_info.no_metadata_planes) {
1392 /* Try to enable DCC first. */
1393 if (radv_image_can_enable_dcc(device, image)) {
1394 radv_image_alloc_dcc(image);
1395 if (image->info.samples > 1) {
1396 /* CMASK should be enabled because DCC fast
1397 * clear with MSAA needs it.
1398 */
1399 assert(radv_image_can_enable_cmask(image));
1400 radv_image_alloc_cmask(device, image);
1401 }
1402 } else {
1403 /* When DCC cannot be enabled, try CMASK. */
1404 radv_image_disable_dcc(image);
1405 if (radv_image_can_enable_cmask(image)) {
1406 radv_image_alloc_cmask(device, image);
1407 }
1408 }
1409
1410 /* Try to enable FMASK for multisampled images. */
1411 if (radv_image_can_enable_fmask(image)) {
1412 radv_image_alloc_fmask(device, image);
1413
1414 if (radv_use_tc_compat_cmask_for_image(device, image))
1415 image->tc_compatible_cmask = true;
1416 } else {
1417 /* Otherwise, try to enable HTILE for depth surfaces. */
1418 if (radv_image_can_enable_htile(image) &&
1419 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1420 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1421 radv_image_alloc_htile(device, image);
1422 } else {
1423 radv_image_disable_htile(image);
1424 }
1425 }
1426 } else {
1427 radv_image_disable_dcc(image);
1428 radv_image_disable_htile(image);
1429 }
1430
1431 assert(image->planes[0].surface.surf_size);
1432 return VK_SUCCESS;
1433 }
1434
1435 VkResult
1436 radv_image_create(VkDevice _device,
1437 const struct radv_image_create_info *create_info,
1438 const VkAllocationCallbacks* alloc,
1439 VkImage *pImage)
1440 {
1441 RADV_FROM_HANDLE(radv_device, device, _device);
1442 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1443 struct radv_image *image = NULL;
1444 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1445 pCreateInfo->format);
1446 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1447
1448 const unsigned plane_count = vk_format_get_plane_count(format);
1449 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1450
1451 radv_assert(pCreateInfo->mipLevels > 0);
1452 radv_assert(pCreateInfo->arrayLayers > 0);
1453 radv_assert(pCreateInfo->samples > 0);
1454 radv_assert(pCreateInfo->extent.width > 0);
1455 radv_assert(pCreateInfo->extent.height > 0);
1456 radv_assert(pCreateInfo->extent.depth > 0);
1457
1458 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1459 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1460 if (!image)
1461 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1462
1463 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1464
1465 image->type = pCreateInfo->imageType;
1466 image->info.width = pCreateInfo->extent.width;
1467 image->info.height = pCreateInfo->extent.height;
1468 image->info.depth = pCreateInfo->extent.depth;
1469 image->info.samples = pCreateInfo->samples;
1470 image->info.storage_samples = pCreateInfo->samples;
1471 image->info.array_size = pCreateInfo->arrayLayers;
1472 image->info.levels = pCreateInfo->mipLevels;
1473 image->info.num_channels = vk_format_get_nr_components(format);
1474
1475 image->vk_format = format;
1476 image->tiling = pCreateInfo->tiling;
1477 image->usage = pCreateInfo->usage;
1478 image->flags = pCreateInfo->flags;
1479 image->plane_count = plane_count;
1480
1481 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1482 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1483 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1484 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1485 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1486 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1487 else
1488 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1489 }
1490
1491 const VkExternalMemoryImageCreateInfo *external_info =
1492 vk_find_struct_const(pCreateInfo->pNext,
1493 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1494
1495 image->shareable = external_info;
1496 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1497 image->info.surf_index = &device->image_mrt_offset_counter;
1498 }
1499
1500 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1501 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1502 }
1503
1504 bool delay_layout = external_info &&
1505 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1506
1507 if (delay_layout) {
1508 *pImage = radv_image_to_handle(image);
1509 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1510 return VK_SUCCESS;
1511 }
1512
1513 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1514 assert(result == VK_SUCCESS);
1515
1516 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1517 image->alignment = MAX2(image->alignment, 4096);
1518 image->size = align64(image->size, image->alignment);
1519 image->offset = 0;
1520
1521 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1522 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1523 if (!image->bo) {
1524 vk_free2(&device->vk.alloc, alloc, image);
1525 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1526 }
1527 }
1528
1529 *pImage = radv_image_to_handle(image);
1530
1531 return VK_SUCCESS;
1532 }
1533
1534 static void
1535 radv_image_view_make_descriptor(struct radv_image_view *iview,
1536 struct radv_device *device,
1537 VkFormat vk_format,
1538 const VkComponentMapping *components,
1539 bool is_storage_image, bool disable_compression,
1540 unsigned plane_id, unsigned descriptor_plane_id)
1541 {
1542 struct radv_image *image = iview->image;
1543 struct radv_image_plane *plane = &image->planes[plane_id];
1544 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1545 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1546 uint32_t blk_w;
1547 union radv_descriptor *descriptor;
1548 uint32_t hw_level = 0;
1549
1550 if (is_storage_image) {
1551 descriptor = &iview->storage_descriptor;
1552 } else {
1553 descriptor = &iview->descriptor;
1554 }
1555
1556 assert(vk_format_get_plane_count(vk_format) == 1);
1557 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1558 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1559
1560 if (device->physical_device->rad_info.chip_class >= GFX9)
1561 hw_level = iview->base_mip;
1562 radv_make_texture_descriptor(device, image, is_storage_image,
1563 iview->type,
1564 vk_format,
1565 components,
1566 hw_level, hw_level + iview->level_count - 1,
1567 iview->base_layer,
1568 iview->base_layer + iview->layer_count - 1,
1569 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1570 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1571 iview->extent.depth,
1572 descriptor->plane_descriptors[descriptor_plane_id],
1573 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1574
1575 const struct legacy_surf_level *base_level_info = NULL;
1576 if (device->physical_device->rad_info.chip_class <= GFX9) {
1577 if (is_stencil)
1578 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1579 else
1580 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1581 }
1582 si_set_mutable_tex_desc_fields(device, image,
1583 base_level_info,
1584 plane_id,
1585 iview->base_mip,
1586 iview->base_mip,
1587 blk_w, is_stencil, is_storage_image,
1588 is_storage_image || disable_compression,
1589 descriptor->plane_descriptors[descriptor_plane_id]);
1590 }
1591
1592 static unsigned
1593 radv_plane_from_aspect(VkImageAspectFlags mask)
1594 {
1595 switch(mask) {
1596 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1597 return 1;
1598 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1599 return 2;
1600 default:
1601 return 0;
1602 }
1603 }
1604
1605 VkFormat
1606 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1607 {
1608 switch(mask) {
1609 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1610 return image->planes[0].format;
1611 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1612 return image->planes[1].format;
1613 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1614 return image->planes[2].format;
1615 case VK_IMAGE_ASPECT_STENCIL_BIT:
1616 return vk_format_stencil_only(image->vk_format);
1617 case VK_IMAGE_ASPECT_DEPTH_BIT:
1618 return vk_format_depth_only(image->vk_format);
1619 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1620 return vk_format_depth_only(image->vk_format);
1621 default:
1622 return image->vk_format;
1623 }
1624 }
1625
1626 void
1627 radv_image_view_init(struct radv_image_view *iview,
1628 struct radv_device *device,
1629 const VkImageViewCreateInfo* pCreateInfo,
1630 const struct radv_image_view_extra_create_info* extra_create_info)
1631 {
1632 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1633 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1634
1635 switch (image->type) {
1636 case VK_IMAGE_TYPE_1D:
1637 case VK_IMAGE_TYPE_2D:
1638 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1639 break;
1640 case VK_IMAGE_TYPE_3D:
1641 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1642 <= radv_minify(image->info.depth, range->baseMipLevel));
1643 break;
1644 default:
1645 unreachable("bad VkImageType");
1646 }
1647 iview->image = image;
1648 iview->bo = image->bo;
1649 iview->type = pCreateInfo->viewType;
1650 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1651 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1652 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1653
1654 iview->vk_format = pCreateInfo->format;
1655
1656 /* If the image has an Android external format, pCreateInfo->format will be
1657 * VK_FORMAT_UNDEFINED. */
1658 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1659 iview->vk_format = image->vk_format;
1660
1661 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1662 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1663 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1664 iview->vk_format = vk_format_depth_only(iview->vk_format);
1665 }
1666
1667 if (device->physical_device->rad_info.chip_class >= GFX9) {
1668 iview->extent = (VkExtent3D) {
1669 .width = image->info.width,
1670 .height = image->info.height,
1671 .depth = image->info.depth,
1672 };
1673 } else {
1674 iview->extent = (VkExtent3D) {
1675 .width = radv_minify(image->info.width , range->baseMipLevel),
1676 .height = radv_minify(image->info.height, range->baseMipLevel),
1677 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1678 };
1679 }
1680
1681 if (iview->vk_format != image->planes[iview->plane_id].format) {
1682 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1683 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1684 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1685 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1686
1687 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1688 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1689
1690 /* Comment ported from amdvlk -
1691 * If we have the following image:
1692 * Uncompressed pixels Compressed block sizes (4x4)
1693 * mip0: 22 x 22 6 x 6
1694 * mip1: 11 x 11 3 x 3
1695 * mip2: 5 x 5 2 x 2
1696 * mip3: 2 x 2 1 x 1
1697 * mip4: 1 x 1 1 x 1
1698 *
1699 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1700 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1701 * divide-by-two integer math):
1702 * mip0: 6x6
1703 * mip1: 3x3
1704 * mip2: 1x1
1705 * mip3: 1x1
1706 *
1707 * This means that mip2 will be missing texels.
1708 *
1709 * Fix this by calculating the base mip's width and height, then convert that, and round it
1710 * back up to get the level 0 size.
1711 * Clamp the converted size between the original values, and next power of two, which
1712 * means we don't oversize the image.
1713 */
1714 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1715 vk_format_is_compressed(image->vk_format) &&
1716 !vk_format_is_compressed(iview->vk_format)) {
1717 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1718 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1719
1720 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1721 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1722
1723 lvl_width <<= range->baseMipLevel;
1724 lvl_height <<= range->baseMipLevel;
1725
1726 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1727 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1728 }
1729 }
1730
1731 iview->base_layer = range->baseArrayLayer;
1732 iview->layer_count = radv_get_layerCount(image, range);
1733 iview->base_mip = range->baseMipLevel;
1734 iview->level_count = radv_get_levelCount(image, range);
1735
1736 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1737 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1738 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1739 radv_image_view_make_descriptor(iview, device, format,
1740 &pCreateInfo->components,
1741 false, disable_compression,
1742 iview->plane_id + i, i);
1743 radv_image_view_make_descriptor(iview, device,
1744 format, &pCreateInfo->components,
1745 true, disable_compression,
1746 iview->plane_id + i, i);
1747 }
1748 }
1749
1750 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1751 VkImageLayout layout,
1752 bool in_render_loop,
1753 unsigned queue_mask)
1754 {
1755 if (radv_image_is_tc_compat_htile(image)) {
1756 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1757 !in_render_loop &&
1758 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1759 /* It should be safe to enable TC-compat HTILE with
1760 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1761 * loop and if the image doesn't have the storage bit
1762 * set. This improves performance for apps that use
1763 * GENERAL for the main depth pass because this allows
1764 * compression and this reduces the number of
1765 * decompressions from/to GENERAL.
1766 */
1767 return true;
1768 }
1769
1770 return layout != VK_IMAGE_LAYOUT_GENERAL;
1771 }
1772
1773 return radv_image_has_htile(image) &&
1774 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1775 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1776 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1777 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1778 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1779 }
1780
1781 bool radv_layout_can_fast_clear(const struct radv_image *image,
1782 VkImageLayout layout,
1783 bool in_render_loop,
1784 unsigned queue_mask)
1785 {
1786 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1787 }
1788
1789 bool radv_layout_dcc_compressed(const struct radv_device *device,
1790 const struct radv_image *image,
1791 VkImageLayout layout,
1792 bool in_render_loop,
1793 unsigned queue_mask)
1794 {
1795 /* Don't compress compute transfer dst, as image stores are not supported. */
1796 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1797 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1798 return false;
1799
1800 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1801 }
1802
1803
1804 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1805 {
1806 if (!image->exclusive)
1807 return image->queue_family_mask;
1808 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1809 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1810 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1811 if (family == VK_QUEUE_FAMILY_IGNORED)
1812 return 1u << queue_family;
1813 return 1u << family;
1814 }
1815
1816 VkResult
1817 radv_CreateImage(VkDevice device,
1818 const VkImageCreateInfo *pCreateInfo,
1819 const VkAllocationCallbacks *pAllocator,
1820 VkImage *pImage)
1821 {
1822 #ifdef ANDROID
1823 const VkNativeBufferANDROID *gralloc_info =
1824 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1825
1826 if (gralloc_info)
1827 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1828 pAllocator, pImage);
1829 #endif
1830
1831 const struct wsi_image_create_info *wsi_info =
1832 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1833 bool scanout = wsi_info && wsi_info->scanout;
1834
1835 return radv_image_create(device,
1836 &(struct radv_image_create_info) {
1837 .vk_info = pCreateInfo,
1838 .scanout = scanout,
1839 },
1840 pAllocator,
1841 pImage);
1842 }
1843
1844 void
1845 radv_DestroyImage(VkDevice _device, VkImage _image,
1846 const VkAllocationCallbacks *pAllocator)
1847 {
1848 RADV_FROM_HANDLE(radv_device, device, _device);
1849 RADV_FROM_HANDLE(radv_image, image, _image);
1850
1851 if (!image)
1852 return;
1853
1854 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1855 device->ws->buffer_destroy(image->bo);
1856
1857 if (image->owned_memory != VK_NULL_HANDLE)
1858 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1859
1860 vk_object_base_finish(&image->base);
1861 vk_free2(&device->vk.alloc, pAllocator, image);
1862 }
1863
1864 void radv_GetImageSubresourceLayout(
1865 VkDevice _device,
1866 VkImage _image,
1867 const VkImageSubresource* pSubresource,
1868 VkSubresourceLayout* pLayout)
1869 {
1870 RADV_FROM_HANDLE(radv_image, image, _image);
1871 RADV_FROM_HANDLE(radv_device, device, _device);
1872 int level = pSubresource->mipLevel;
1873 int layer = pSubresource->arrayLayer;
1874
1875 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1876
1877 struct radv_image_plane *plane = &image->planes[plane_id];
1878 struct radeon_surf *surface = &plane->surface;
1879
1880 if (device->physical_device->rad_info.chip_class >= GFX9) {
1881 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1882
1883 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1884 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1885 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1886 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1887 /* Adjust the number of bytes between each row because
1888 * the pitch is actually the number of components per
1889 * row.
1890 */
1891 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1892 } else {
1893 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1894
1895 assert(util_is_power_of_two_nonzero(surface->bpe));
1896 pLayout->rowPitch = pitch * surface->bpe;
1897 }
1898
1899 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1900 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1901 pLayout->size = surface->u.gfx9.surf_slice_size;
1902 if (image->type == VK_IMAGE_TYPE_3D)
1903 pLayout->size *= u_minify(image->info.depth, level);
1904 } else {
1905 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1906 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1907 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1908 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1909 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1910 if (image->type == VK_IMAGE_TYPE_3D)
1911 pLayout->size *= u_minify(image->info.depth, level);
1912 }
1913 }
1914
1915
1916 VkResult
1917 radv_CreateImageView(VkDevice _device,
1918 const VkImageViewCreateInfo *pCreateInfo,
1919 const VkAllocationCallbacks *pAllocator,
1920 VkImageView *pView)
1921 {
1922 RADV_FROM_HANDLE(radv_device, device, _device);
1923 struct radv_image_view *view;
1924
1925 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1926 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1927 if (view == NULL)
1928 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1929
1930 vk_object_base_init(&device->vk, &view->base,
1931 VK_OBJECT_TYPE_IMAGE_VIEW);
1932
1933 radv_image_view_init(view, device, pCreateInfo, NULL);
1934
1935 *pView = radv_image_view_to_handle(view);
1936
1937 return VK_SUCCESS;
1938 }
1939
1940 void
1941 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1942 const VkAllocationCallbacks *pAllocator)
1943 {
1944 RADV_FROM_HANDLE(radv_device, device, _device);
1945 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1946
1947 if (!iview)
1948 return;
1949
1950 vk_object_base_finish(&iview->base);
1951 vk_free2(&device->vk.alloc, pAllocator, iview);
1952 }
1953
1954 void radv_buffer_view_init(struct radv_buffer_view *view,
1955 struct radv_device *device,
1956 const VkBufferViewCreateInfo* pCreateInfo)
1957 {
1958 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1959
1960 view->bo = buffer->bo;
1961 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1962 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1963 view->vk_format = pCreateInfo->format;
1964
1965 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1966 pCreateInfo->offset, view->range, view->state);
1967 }
1968
1969 VkResult
1970 radv_CreateBufferView(VkDevice _device,
1971 const VkBufferViewCreateInfo *pCreateInfo,
1972 const VkAllocationCallbacks *pAllocator,
1973 VkBufferView *pView)
1974 {
1975 RADV_FROM_HANDLE(radv_device, device, _device);
1976 struct radv_buffer_view *view;
1977
1978 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1979 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1980 if (!view)
1981 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1982
1983 vk_object_base_init(&device->vk, &view->base,
1984 VK_OBJECT_TYPE_BUFFER_VIEW);
1985
1986 radv_buffer_view_init(view, device, pCreateInfo);
1987
1988 *pView = radv_buffer_view_to_handle(view);
1989
1990 return VK_SUCCESS;
1991 }
1992
1993 void
1994 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1995 const VkAllocationCallbacks *pAllocator)
1996 {
1997 RADV_FROM_HANDLE(radv_device, device, _device);
1998 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1999
2000 if (!view)
2001 return;
2002
2003 vk_object_base_finish(&view->base);
2004 vk_free2(&device->vk.alloc, pAllocator, view);
2005 }