radv: implement VK_AMD_shader_explicit_vertex_parameter
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
76 return false;
77
78 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
79 return false;
80
81 if (pCreateInfo->mipLevels > 1)
82 return false;
83
84 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
85 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
86 */
87 if (pCreateInfo->samples >= 2 &&
88 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
89 (format == VK_FORMAT_D32_SFLOAT &&
90 device->physical_device->rad_info.chip_class == GFX10)))
91 return false;
92
93 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
94 * supports 32-bit. Though, it's possible to enable TC-compat for
95 * 16-bit depth surfaces if no Z planes are compressed.
96 */
97 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
98 format != VK_FORMAT_D32_SFLOAT &&
99 format != VK_FORMAT_D16_UNORM)
100 return false;
101
102 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
103 const struct VkImageFormatListCreateInfo *format_list =
104 (const struct VkImageFormatListCreateInfo *)
105 vk_find_struct_const(pCreateInfo->pNext,
106 IMAGE_FORMAT_LIST_CREATE_INFO);
107
108 /* We have to ignore the existence of the list if viewFormatCount = 0 */
109 if (format_list && format_list->viewFormatCount) {
110 /* compatibility is transitive, so we only need to check
111 * one format with everything else.
112 */
113 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
114 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
115 continue;
116
117 if (format != format_list->pViewFormats[i])
118 return false;
119 }
120 } else {
121 return false;
122 }
123 }
124
125 return true;
126 }
127
128 static bool
129 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
130 {
131 if (info->bo_metadata) {
132 if (device->physical_device->rad_info.chip_class >= GFX9)
133 return info->bo_metadata->u.gfx9.scanout;
134 else
135 return info->bo_metadata->u.legacy.scanout;
136 }
137
138 return info->scanout;
139 }
140
141 static bool
142 radv_use_dcc_for_image(struct radv_device *device,
143 const struct radv_image *image,
144 const VkImageCreateInfo *pCreateInfo,
145 VkFormat format)
146 {
147 bool dcc_compatible_formats;
148 bool blendable;
149
150 /* DCC (Delta Color Compression) is only available for GFX8+. */
151 if (device->physical_device->rad_info.chip_class < GFX8)
152 return false;
153
154 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
155 return false;
156
157 if (image->shareable)
158 return false;
159
160 /* TODO: Enable DCC for storage images. */
161 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
162 return false;
163
164 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
165 return false;
166
167 if (vk_format_is_subsampled(format) ||
168 vk_format_get_plane_count(format) > 1)
169 return false;
170
171 /* TODO: Enable DCC for mipmaps on GFX9+. */
172 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
173 device->physical_device->rad_info.chip_class >= GFX9)
174 return false;
175
176 /* Do not enable DCC for mipmapped arrays because performance is worse. */
177 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
178 return false;
179
180 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
181 * 2x can be enabled with an option.
182 */
183 if (pCreateInfo->samples > 2 ||
184 (pCreateInfo->samples == 2 &&
185 !device->physical_device->dcc_msaa_allowed))
186 return false;
187
188 /* Determine if the formats are DCC compatible. */
189 dcc_compatible_formats =
190 radv_is_colorbuffer_format_supported(format,
191 &blendable);
192
193 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
194 const struct VkImageFormatListCreateInfo *format_list =
195 (const struct VkImageFormatListCreateInfo *)
196 vk_find_struct_const(pCreateInfo->pNext,
197 IMAGE_FORMAT_LIST_CREATE_INFO);
198
199 /* We have to ignore the existence of the list if viewFormatCount = 0 */
200 if (format_list && format_list->viewFormatCount) {
201 /* compatibility is transitive, so we only need to check
202 * one format with everything else. */
203 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
204 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
205 continue;
206
207 if (!radv_dcc_formats_compatible(format,
208 format_list->pViewFormats[i]))
209 dcc_compatible_formats = false;
210 }
211 } else {
212 dcc_compatible_formats = false;
213 }
214 }
215
216 if (!dcc_compatible_formats)
217 return false;
218
219 return true;
220 }
221
222 static bool
223 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
224 struct radv_image *image)
225 {
226 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
227 return false;
228
229 /* TC-compat CMASK is only available for GFX8+. */
230 if (device->physical_device->rad_info.chip_class < GFX8)
231 return false;
232
233 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
234 return false;
235
236 if (radv_image_has_dcc(image))
237 return false;
238
239 if (!radv_image_has_cmask(image))
240 return false;
241
242 return true;
243 }
244
245 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
246 {
247 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
248 }
249
250 static bool
251 radv_is_valid_opaque_metadata(const struct radv_device *device,
252 const struct radeon_bo_metadata *md)
253 {
254 if (md->metadata[0] != 1 ||
255 md->metadata[1] != si_get_bo_metadata_word1(device))
256 return false;
257
258 if (md->size_metadata < 40)
259 return false;
260
261 return true;
262 }
263
264 static void
265 radv_patch_surface_from_metadata(struct radv_device *device,
266 struct radeon_surf *surface,
267 const struct radeon_bo_metadata *md)
268 {
269 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
270
271 if (device->physical_device->rad_info.chip_class >= GFX9) {
272 if (md->u.gfx9.swizzle_mode > 0)
273 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
274 else
275 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
276
277 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
278 } else {
279 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
280 surface->u.legacy.bankw = md->u.legacy.bankw;
281 surface->u.legacy.bankh = md->u.legacy.bankh;
282 surface->u.legacy.tile_split = md->u.legacy.tile_split;
283 surface->u.legacy.mtilea = md->u.legacy.mtilea;
284 surface->u.legacy.num_banks = md->u.legacy.num_banks;
285
286 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
287 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
288 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
289 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
290 else
291 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
292
293 }
294 }
295
296 static VkResult
297 radv_patch_image_dimensions(struct radv_device *device,
298 struct radv_image *image,
299 const struct radv_image_create_info *create_info,
300 struct ac_surf_info *image_info)
301 {
302 unsigned width = image->info.width;
303 unsigned height = image->info.height;
304
305 /*
306 * minigbm sometimes allocates bigger images which is going to result in
307 * weird strides and other properties. Lets be lenient where possible and
308 * fail it on GFX10 (as we cannot cope there).
309 *
310 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
311 */
312 if (create_info->bo_metadata &&
313 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
314 const struct radeon_bo_metadata *md = create_info->bo_metadata;
315
316 if (device->physical_device->rad_info.chip_class >= GFX10) {
317 width = G_00A004_WIDTH_LO(md->metadata[3]) +
318 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
319 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
320 } else {
321 width = G_008F18_WIDTH(md->metadata[4]) + 1;
322 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
323 }
324 }
325
326 if (image->info.width == width && image->info.height == height)
327 return VK_SUCCESS;
328
329 if (width < image->info.width || height < image->info.height) {
330 fprintf(stderr,
331 "The imported image has smaller dimensions than the internal\n"
332 "dimensions. Using it is going to fail badly, so we reject\n"
333 "this import.\n"
334 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
335 image->info.width, image->info.height, width, height);
336 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
337 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
338 fprintf(stderr,
339 "Tried to import an image with inconsistent width on GFX10.\n"
340 "As GFX10 has no separate stride fields we cannot cope with\n"
341 "an inconsistency in width and will fail this import.\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image->info.width, image->info.height, width, height);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
345 } else {
346 fprintf(stderr,
347 "Tried to import an image with inconsistent width on pre-GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency and would fail on GFX10.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image->info.width, image->info.height, width, height);
352 }
353 image_info->width = width;
354 image_info->height = height;
355
356 return VK_SUCCESS;
357 }
358
359 static VkResult
360 radv_patch_image_from_extra_info(struct radv_device *device,
361 struct radv_image *image,
362 const struct radv_image_create_info *create_info,
363 struct ac_surf_info *image_info)
364 {
365 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
366 if (result != VK_SUCCESS)
367 return result;
368
369 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
370 if (create_info->bo_metadata) {
371 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
372 create_info->bo_metadata);
373 }
374
375 if (radv_surface_has_scanout(device, create_info)) {
376 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
377 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
378
379 image->info.surf_index = NULL;
380 }
381 }
382 return VK_SUCCESS;
383 }
384
385 static int
386 radv_init_surface(struct radv_device *device,
387 const struct radv_image *image,
388 struct radeon_surf *surface,
389 unsigned plane_id,
390 const VkImageCreateInfo *pCreateInfo,
391 VkFormat image_format)
392 {
393 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
394 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
395 const struct vk_format_description *desc = vk_format_description(format);
396 bool is_depth, is_stencil;
397
398 is_depth = vk_format_has_depth(desc);
399 is_stencil = vk_format_has_stencil(desc);
400
401 surface->blk_w = vk_format_get_blockwidth(format);
402 surface->blk_h = vk_format_get_blockheight(format);
403
404 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
405 /* align byte per element on dword */
406 if (surface->bpe == 3) {
407 surface->bpe = 4;
408 }
409
410 surface->flags = RADEON_SURF_SET(array_mode, MODE);
411
412 switch (pCreateInfo->imageType){
413 case VK_IMAGE_TYPE_1D:
414 if (pCreateInfo->arrayLayers > 1)
415 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
416 else
417 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
418 break;
419 case VK_IMAGE_TYPE_2D:
420 if (pCreateInfo->arrayLayers > 1)
421 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
422 else
423 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
424 break;
425 case VK_IMAGE_TYPE_3D:
426 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
427 break;
428 default:
429 unreachable("unhandled image type");
430 }
431
432 if (is_depth) {
433 surface->flags |= RADEON_SURF_ZBUFFER;
434 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
435 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
436 }
437
438 if (is_stencil)
439 surface->flags |= RADEON_SURF_SBUFFER;
440
441 if (device->physical_device->rad_info.chip_class >= GFX9 &&
442 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
443 vk_format_get_blocksizebits(image_format) == 128 &&
444 vk_format_is_compressed(image_format))
445 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
446
447 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
448
449 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
450 surface->flags |= RADEON_SURF_DISABLE_DCC;
451
452 return 0;
453 }
454
455 static inline unsigned
456 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
457 {
458 if (stencil)
459 return plane->surface.u.legacy.stencil_tiling_index[level];
460 else
461 return plane->surface.u.legacy.tiling_index[level];
462 }
463
464 static unsigned radv_map_swizzle(unsigned swizzle)
465 {
466 switch (swizzle) {
467 case VK_SWIZZLE_Y:
468 return V_008F0C_SQ_SEL_Y;
469 case VK_SWIZZLE_Z:
470 return V_008F0C_SQ_SEL_Z;
471 case VK_SWIZZLE_W:
472 return V_008F0C_SQ_SEL_W;
473 case VK_SWIZZLE_0:
474 return V_008F0C_SQ_SEL_0;
475 case VK_SWIZZLE_1:
476 return V_008F0C_SQ_SEL_1;
477 default: /* VK_SWIZZLE_X */
478 return V_008F0C_SQ_SEL_X;
479 }
480 }
481
482 static void
483 radv_make_buffer_descriptor(struct radv_device *device,
484 struct radv_buffer *buffer,
485 VkFormat vk_format,
486 unsigned offset,
487 unsigned range,
488 uint32_t *state)
489 {
490 const struct vk_format_description *desc;
491 unsigned stride;
492 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
493 uint64_t va = gpu_address + buffer->offset;
494 unsigned num_format, data_format;
495 int first_non_void;
496 desc = vk_format_description(vk_format);
497 first_non_void = vk_format_get_first_non_void_channel(vk_format);
498 stride = desc->block.bits / 8;
499
500 va += offset;
501 state[0] = va;
502 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
503 S_008F04_STRIDE(stride);
504
505 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
506 range /= stride;
507 }
508
509 state[2] = range;
510 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
511 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
512 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
513 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
514
515 if (device->physical_device->rad_info.chip_class >= GFX10) {
516 const struct gfx10_format *fmt = &gfx10_format_table[vk_format];
517
518 /* OOB_SELECT chooses the out-of-bounds check:
519 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
520 * - 1: index >= NUM_RECORDS
521 * - 2: NUM_RECORDS == 0
522 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
523 * else: swizzle_address >= NUM_RECORDS
524 */
525 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
526 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
527 S_008F0C_RESOURCE_LEVEL(1);
528 } else {
529 num_format = radv_translate_buffer_numformat(desc, first_non_void);
530 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
531
532 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
533 assert(num_format != ~0);
534
535 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
536 S_008F0C_DATA_FORMAT(data_format);
537 }
538 }
539
540 static void
541 si_set_mutable_tex_desc_fields(struct radv_device *device,
542 struct radv_image *image,
543 const struct legacy_surf_level *base_level_info,
544 unsigned plane_id,
545 unsigned base_level, unsigned first_level,
546 unsigned block_width, bool is_stencil,
547 bool is_storage_image, bool disable_compression,
548 uint32_t *state)
549 {
550 struct radv_image_plane *plane = &image->planes[plane_id];
551 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
552 uint64_t va = gpu_address + plane->offset;
553 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
554 uint64_t meta_va = 0;
555 if (chip_class >= GFX9) {
556 if (is_stencil)
557 va += plane->surface.u.gfx9.stencil_offset;
558 else
559 va += plane->surface.u.gfx9.surf_offset;
560 } else
561 va += base_level_info->offset;
562
563 state[0] = va >> 8;
564 if (chip_class >= GFX9 ||
565 base_level_info->mode == RADEON_SURF_MODE_2D)
566 state[0] |= plane->surface.tile_swizzle;
567 state[1] &= C_008F14_BASE_ADDRESS_HI;
568 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
569
570 if (chip_class >= GFX8) {
571 state[6] &= C_008F28_COMPRESSION_EN;
572 state[7] = 0;
573 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
574 meta_va = gpu_address + image->dcc_offset;
575 if (chip_class <= GFX8)
576 meta_va += base_level_info->dcc_offset;
577
578 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
579 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
580 meta_va |= dcc_tile_swizzle;
581 } else if (!disable_compression &&
582 radv_image_is_tc_compat_htile(image)) {
583 meta_va = gpu_address + image->htile_offset;
584 }
585
586 if (meta_va) {
587 state[6] |= S_008F28_COMPRESSION_EN(1);
588 if (chip_class <= GFX9)
589 state[7] = meta_va >> 8;
590 }
591 }
592
593 if (chip_class >= GFX10) {
594 state[3] &= C_00A00C_SW_MODE;
595
596 if (is_stencil) {
597 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
598 } else {
599 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
600 }
601
602 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
603 C_00A018_META_PIPE_ALIGNED;
604
605 if (meta_va) {
606 struct gfx9_surf_meta_flags meta;
607
608 if (image->dcc_offset)
609 meta = plane->surface.u.gfx9.dcc;
610 else
611 meta = plane->surface.u.gfx9.htile;
612
613 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
614 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
615 }
616
617 state[7] = meta_va >> 16;
618 } else if (chip_class == GFX9) {
619 state[3] &= C_008F1C_SW_MODE;
620 state[4] &= C_008F20_PITCH;
621
622 if (is_stencil) {
623 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
624 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
625 } else {
626 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
627 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
628 }
629
630 state[5] &= C_008F24_META_DATA_ADDRESS &
631 C_008F24_META_PIPE_ALIGNED &
632 C_008F24_META_RB_ALIGNED;
633 if (meta_va) {
634 struct gfx9_surf_meta_flags meta;
635
636 if (image->dcc_offset)
637 meta = plane->surface.u.gfx9.dcc;
638 else
639 meta = plane->surface.u.gfx9.htile;
640
641 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
642 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
643 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
644 }
645 } else {
646 /* GFX6-GFX8 */
647 unsigned pitch = base_level_info->nblk_x * block_width;
648 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
649
650 state[3] &= C_008F1C_TILING_INDEX;
651 state[3] |= S_008F1C_TILING_INDEX(index);
652 state[4] &= C_008F20_PITCH;
653 state[4] |= S_008F20_PITCH(pitch - 1);
654 }
655 }
656
657 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
658 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
659 {
660 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
661 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
662
663 /* GFX9 allocates 1D textures as 2D. */
664 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
665 image_type = VK_IMAGE_TYPE_2D;
666 switch (image_type) {
667 case VK_IMAGE_TYPE_1D:
668 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
669 case VK_IMAGE_TYPE_2D:
670 if (nr_samples > 1)
671 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
672 else
673 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
674 case VK_IMAGE_TYPE_3D:
675 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
676 return V_008F1C_SQ_RSRC_IMG_3D;
677 else
678 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
679 default:
680 unreachable("illegal image type");
681 }
682 }
683
684 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
685 {
686 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
687
688 if (swizzle[3] == VK_SWIZZLE_X) {
689 /* For the pre-defined border color values (white, opaque
690 * black, transparent black), the only thing that matters is
691 * that the alpha channel winds up in the correct place
692 * (because the RGB channels are all the same) so either of
693 * these enumerations will work.
694 */
695 if (swizzle[2] == VK_SWIZZLE_Y)
696 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
697 else
698 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
699 } else if (swizzle[0] == VK_SWIZZLE_X) {
700 if (swizzle[1] == VK_SWIZZLE_Y)
701 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
702 else
703 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
704 } else if (swizzle[1] == VK_SWIZZLE_X) {
705 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
706 } else if (swizzle[2] == VK_SWIZZLE_X) {
707 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
708 }
709
710 return bc_swizzle;
711 }
712
713 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
714 {
715 const struct vk_format_description *desc = vk_format_description(format);
716
717 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
718 return desc->swizzle[3] == VK_SWIZZLE_X;
719
720 return radv_translate_colorswap(format, false) <= 1;
721 }
722 /**
723 * Build the sampler view descriptor for a texture (GFX10).
724 */
725 static void
726 gfx10_make_texture_descriptor(struct radv_device *device,
727 struct radv_image *image,
728 bool is_storage_image,
729 VkImageViewType view_type,
730 VkFormat vk_format,
731 const VkComponentMapping *mapping,
732 unsigned first_level, unsigned last_level,
733 unsigned first_layer, unsigned last_layer,
734 unsigned width, unsigned height, unsigned depth,
735 uint32_t *state,
736 uint32_t *fmask_state)
737 {
738 const struct vk_format_description *desc;
739 enum vk_swizzle swizzle[4];
740 unsigned img_format;
741 unsigned type;
742
743 desc = vk_format_description(vk_format);
744 img_format = gfx10_format_table[vk_format].img_format;
745
746 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
747 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
748 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
749 } else {
750 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
751 }
752
753 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
754 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
755 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
756 height = 1;
757 depth = image->info.array_size;
758 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
759 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
760 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
761 depth = image->info.array_size;
762 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
763 depth = image->info.array_size / 6;
764
765 state[0] = 0;
766 state[1] = S_00A004_FORMAT(img_format) |
767 S_00A004_WIDTH_LO(width - 1);
768 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
769 S_00A008_HEIGHT(height - 1) |
770 S_00A008_RESOURCE_LEVEL(1);
771 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
772 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
773 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
774 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
775 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
776 0 : first_level) |
777 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
778 util_logbase2(image->info.samples) :
779 last_level) |
780 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
781 S_00A00C_TYPE(type);
782 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
783 * to know the total number of layers.
784 */
785 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
786 S_00A010_BASE_ARRAY(first_layer);
787 state[5] = S_00A014_ARRAY_PITCH(0) |
788 S_00A014_MAX_MIP(image->info.samples > 1 ?
789 util_logbase2(image->info.samples) :
790 image->info.levels - 1) |
791 S_00A014_PERF_MOD(4);
792 state[6] = 0;
793 state[7] = 0;
794
795 if (radv_dcc_enabled(image, first_level)) {
796 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
797 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
798 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
799 }
800
801 /* Initialize the sampler view for FMASK. */
802 if (radv_image_has_fmask(image)) {
803 uint64_t gpu_address = radv_buffer_get_va(image->bo);
804 uint32_t format;
805 uint64_t va;
806
807 assert(image->plane_count == 1);
808
809 va = gpu_address + image->offset + image->fmask_offset;
810
811 switch (image->info.samples) {
812 case 2:
813 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
814 break;
815 case 4:
816 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
817 break;
818 case 8:
819 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
820 break;
821 default:
822 unreachable("invalid nr_samples");
823 }
824
825 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
826 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
827 S_00A004_FORMAT(format) |
828 S_00A004_WIDTH_LO(width - 1);
829 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
830 S_00A008_HEIGHT(height - 1) |
831 S_00A008_RESOURCE_LEVEL(1);
832 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
833 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
834 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
835 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
836 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
837 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
838 fmask_state[4] = S_00A010_DEPTH(last_layer) |
839 S_00A010_BASE_ARRAY(first_layer);
840 fmask_state[5] = 0;
841 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
842 fmask_state[7] = 0;
843 } else if (fmask_state)
844 memset(fmask_state, 0, 8 * 4);
845 }
846
847 /**
848 * Build the sampler view descriptor for a texture (SI-GFX9)
849 */
850 static void
851 si_make_texture_descriptor(struct radv_device *device,
852 struct radv_image *image,
853 bool is_storage_image,
854 VkImageViewType view_type,
855 VkFormat vk_format,
856 const VkComponentMapping *mapping,
857 unsigned first_level, unsigned last_level,
858 unsigned first_layer, unsigned last_layer,
859 unsigned width, unsigned height, unsigned depth,
860 uint32_t *state,
861 uint32_t *fmask_state)
862 {
863 const struct vk_format_description *desc;
864 enum vk_swizzle swizzle[4];
865 int first_non_void;
866 unsigned num_format, data_format, type;
867
868 desc = vk_format_description(vk_format);
869
870 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
871 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
872 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
873 } else {
874 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
875 }
876
877 first_non_void = vk_format_get_first_non_void_channel(vk_format);
878
879 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
880 if (num_format == ~0) {
881 num_format = 0;
882 }
883
884 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
885 if (data_format == ~0) {
886 data_format = 0;
887 }
888
889 /* S8 with either Z16 or Z32 HTILE need a special format. */
890 if (device->physical_device->rad_info.chip_class == GFX9 &&
891 vk_format == VK_FORMAT_S8_UINT &&
892 radv_image_is_tc_compat_htile(image)) {
893 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
894 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
895 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
896 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
897 }
898 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
899 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
900 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
901 height = 1;
902 depth = image->info.array_size;
903 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
904 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
905 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
906 depth = image->info.array_size;
907 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
908 depth = image->info.array_size / 6;
909
910 state[0] = 0;
911 state[1] = (S_008F14_DATA_FORMAT(data_format) |
912 S_008F14_NUM_FORMAT(num_format));
913 state[2] = (S_008F18_WIDTH(width - 1) |
914 S_008F18_HEIGHT(height - 1) |
915 S_008F18_PERF_MOD(4));
916 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
917 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
918 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
919 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
920 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
921 0 : first_level) |
922 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
923 util_logbase2(image->info.samples) :
924 last_level) |
925 S_008F1C_TYPE(type));
926 state[4] = 0;
927 state[5] = S_008F24_BASE_ARRAY(first_layer);
928 state[6] = 0;
929 state[7] = 0;
930
931 if (device->physical_device->rad_info.chip_class == GFX9) {
932 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
933
934 /* Depth is the last accessible layer on Gfx9.
935 * The hw doesn't need to know the total number of layers.
936 */
937 if (type == V_008F1C_SQ_RSRC_IMG_3D)
938 state[4] |= S_008F20_DEPTH(depth - 1);
939 else
940 state[4] |= S_008F20_DEPTH(last_layer);
941
942 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
943 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
944 util_logbase2(image->info.samples) :
945 image->info.levels - 1);
946 } else {
947 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
948 state[4] |= S_008F20_DEPTH(depth - 1);
949 state[5] |= S_008F24_LAST_ARRAY(last_layer);
950 }
951 if (image->dcc_offset) {
952 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
953 } else {
954 /* The last dword is unused by hw. The shader uses it to clear
955 * bits in the first dword of sampler state.
956 */
957 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
958 if (first_level == last_level)
959 state[7] = C_008F30_MAX_ANISO_RATIO;
960 else
961 state[7] = 0xffffffff;
962 }
963 }
964
965 /* Initialize the sampler view for FMASK. */
966 if (radv_image_has_fmask(image)) {
967 uint32_t fmask_format, num_format;
968 uint64_t gpu_address = radv_buffer_get_va(image->bo);
969 uint64_t va;
970
971 assert(image->plane_count == 1);
972
973 va = gpu_address + image->offset + image->fmask_offset;
974
975 if (device->physical_device->rad_info.chip_class == GFX9) {
976 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
977 switch (image->info.samples) {
978 case 2:
979 num_format = V_008F14_IMG_FMASK_8_2_2;
980 break;
981 case 4:
982 num_format = V_008F14_IMG_FMASK_8_4_4;
983 break;
984 case 8:
985 num_format = V_008F14_IMG_FMASK_32_8_8;
986 break;
987 default:
988 unreachable("invalid nr_samples");
989 }
990 } else {
991 switch (image->info.samples) {
992 case 2:
993 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
994 break;
995 case 4:
996 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
997 break;
998 case 8:
999 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1000 break;
1001 default:
1002 assert(0);
1003 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1004 }
1005 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1006 }
1007
1008 fmask_state[0] = va >> 8;
1009 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1010 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1011 S_008F14_DATA_FORMAT(fmask_format) |
1012 S_008F14_NUM_FORMAT(num_format);
1013 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1014 S_008F18_HEIGHT(height - 1);
1015 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1016 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1017 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1018 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1019 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1020 fmask_state[4] = 0;
1021 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1022 fmask_state[6] = 0;
1023 fmask_state[7] = 0;
1024
1025 if (device->physical_device->rad_info.chip_class == GFX9) {
1026 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1027 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1028 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1029 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1030 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1031
1032 if (radv_image_is_tc_compat_cmask(image)) {
1033 va = gpu_address + image->offset + image->cmask_offset;
1034
1035 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1036 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1037 fmask_state[7] |= va >> 8;
1038 }
1039 } else {
1040 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1041 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1042 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1043 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1044
1045 if (radv_image_is_tc_compat_cmask(image)) {
1046 va = gpu_address + image->offset + image->cmask_offset;
1047
1048 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1049 fmask_state[7] |= va >> 8;
1050 }
1051 }
1052 } else if (fmask_state)
1053 memset(fmask_state, 0, 8 * 4);
1054 }
1055
1056 static void
1057 radv_make_texture_descriptor(struct radv_device *device,
1058 struct radv_image *image,
1059 bool is_storage_image,
1060 VkImageViewType view_type,
1061 VkFormat vk_format,
1062 const VkComponentMapping *mapping,
1063 unsigned first_level, unsigned last_level,
1064 unsigned first_layer, unsigned last_layer,
1065 unsigned width, unsigned height, unsigned depth,
1066 uint32_t *state,
1067 uint32_t *fmask_state)
1068 {
1069 if (device->physical_device->rad_info.chip_class >= GFX10) {
1070 gfx10_make_texture_descriptor(device, image, is_storage_image,
1071 view_type, vk_format, mapping,
1072 first_level, last_level,
1073 first_layer, last_layer,
1074 width, height, depth,
1075 state, fmask_state);
1076 } else {
1077 si_make_texture_descriptor(device, image, is_storage_image,
1078 view_type, vk_format, mapping,
1079 first_level, last_level,
1080 first_layer, last_layer,
1081 width, height, depth,
1082 state, fmask_state);
1083 }
1084 }
1085
1086 static void
1087 radv_query_opaque_metadata(struct radv_device *device,
1088 struct radv_image *image,
1089 struct radeon_bo_metadata *md)
1090 {
1091 static const VkComponentMapping fixedmapping;
1092 uint32_t desc[8], i;
1093
1094 assert(image->plane_count == 1);
1095
1096 /* Metadata image format format version 1:
1097 * [0] = 1 (metadata format identifier)
1098 * [1] = (VENDOR_ID << 16) | PCI_ID
1099 * [2:9] = image descriptor for the whole resource
1100 * [2] is always 0, because the base address is cleared
1101 * [9] is the DCC offset bits [39:8] from the beginning of
1102 * the buffer
1103 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1104 */
1105 md->metadata[0] = 1; /* metadata image format version 1 */
1106
1107 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1108 md->metadata[1] = si_get_bo_metadata_word1(device);
1109
1110
1111 radv_make_texture_descriptor(device, image, false,
1112 (VkImageViewType)image->type, image->vk_format,
1113 &fixedmapping, 0, image->info.levels - 1, 0,
1114 image->info.array_size - 1,
1115 image->info.width, image->info.height,
1116 image->info.depth,
1117 desc, NULL);
1118
1119 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1120 image->planes[0].surface.blk_w, false, false, false, desc);
1121
1122 /* Clear the base address and set the relative DCC offset. */
1123 desc[0] = 0;
1124 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1125 desc[7] = image->dcc_offset >> 8;
1126
1127 /* Dwords [2:9] contain the image descriptor. */
1128 memcpy(&md->metadata[2], desc, sizeof(desc));
1129
1130 /* Dwords [10:..] contain the mipmap level offsets. */
1131 if (device->physical_device->rad_info.chip_class <= GFX8) {
1132 for (i = 0; i <= image->info.levels - 1; i++)
1133 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1134 md->size_metadata = (11 + image->info.levels - 1) * 4;
1135 } else
1136 md->size_metadata = 10 * 4;
1137 }
1138
1139 void
1140 radv_init_metadata(struct radv_device *device,
1141 struct radv_image *image,
1142 struct radeon_bo_metadata *metadata)
1143 {
1144 struct radeon_surf *surface = &image->planes[0].surface;
1145
1146 memset(metadata, 0, sizeof(*metadata));
1147
1148 if (device->physical_device->rad_info.chip_class >= GFX9) {
1149 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1150 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1151 } else {
1152 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1153 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1154 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1155 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1156 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1157 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1158 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1159 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1160 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1161 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1162 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1163 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1164 }
1165 radv_query_opaque_metadata(device, image, metadata);
1166 }
1167
1168 void
1169 radv_image_override_offset_stride(struct radv_device *device,
1170 struct radv_image *image,
1171 uint64_t offset, uint32_t stride)
1172 {
1173 struct radeon_surf *surface = &image->planes[0].surface;
1174 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1175
1176 if (device->physical_device->rad_info.chip_class >= GFX9) {
1177 if (stride) {
1178 surface->u.gfx9.surf_pitch = stride;
1179 surface->u.gfx9.surf_slice_size =
1180 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1181 }
1182 surface->u.gfx9.surf_offset = offset;
1183 } else {
1184 surface->u.legacy.level[0].nblk_x = stride;
1185 surface->u.legacy.level[0].slice_size_dw =
1186 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1187
1188 if (offset) {
1189 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1190 surface->u.legacy.level[i].offset += offset;
1191 }
1192
1193 }
1194 }
1195
1196 static void
1197 radv_image_alloc_fmask(struct radv_device *device,
1198 struct radv_image *image)
1199 {
1200 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1201
1202 image->fmask_offset = align64(image->size, fmask_alignment);
1203 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1204 image->alignment = MAX2(image->alignment, fmask_alignment);
1205 }
1206
1207 static void
1208 radv_image_alloc_cmask(struct radv_device *device,
1209 struct radv_image *image)
1210 {
1211 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1212 unsigned cmask_size = image->planes[0].surface.cmask_size;
1213 uint32_t clear_value_size = 0;
1214
1215 if (!cmask_size)
1216 return;
1217
1218 assert(cmask_alignment);
1219
1220 image->cmask_offset = align64(image->size, cmask_alignment);
1221 /* + 8 for storing the clear values */
1222 if (!image->clear_value_offset) {
1223 image->clear_value_offset = image->cmask_offset + cmask_size;
1224 clear_value_size = 8;
1225 }
1226 image->size = image->cmask_offset + cmask_size + clear_value_size;
1227 image->alignment = MAX2(image->alignment, cmask_alignment);
1228 }
1229
1230 static void
1231 radv_image_alloc_dcc(struct radv_image *image)
1232 {
1233 assert(image->plane_count == 1);
1234
1235 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1236 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1237 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1238 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1239 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1240 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1241 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1242 }
1243
1244 static void
1245 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1246 {
1247 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1248
1249 /* + 8 for storing the clear values */
1250 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1251 image->size = image->clear_value_offset + image->info.levels * 8;
1252 if (radv_image_is_tc_compat_htile(image) &&
1253 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1254 /* Metadata for the TC-compatible HTILE hardware bug which
1255 * have to be fixed by updating ZRANGE_PRECISION when doing
1256 * fast depth clears to 0.0f.
1257 */
1258 image->tc_compat_zrange_offset = image->size;
1259 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1260 }
1261 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1262 }
1263
1264 static inline bool
1265 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1266 {
1267 if (image->info.samples <= 1 &&
1268 image->info.width * image->info.height <= 512 * 512) {
1269 /* Do not enable CMASK or DCC for small surfaces where the cost
1270 * of the eliminate pass can be higher than the benefit of fast
1271 * clear. RadeonSI does this, but the image threshold is
1272 * different.
1273 */
1274 return false;
1275 }
1276
1277 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1278 (image->exclusive || image->queue_family_mask == 1);
1279 }
1280
1281 static inline bool
1282 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1283 {
1284 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1285 !radv_image_has_dcc(image))
1286 return false;
1287
1288 /* On GFX8, DCC layers can be interleaved and it's currently only
1289 * enabled if slice size is equal to the per slice fast clear size
1290 * because the driver assumes that portions of multiple layers are
1291 * contiguous during fast clears.
1292 */
1293 if (image->info.array_size > 1) {
1294 const struct legacy_surf_level *surf_level =
1295 &image->planes[0].surface.u.legacy.level[0];
1296
1297 assert(device->physical_device->rad_info.chip_class == GFX8);
1298
1299 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1300 return false;
1301 }
1302
1303 return true;
1304 }
1305
1306 static inline bool
1307 radv_image_can_enable_cmask(struct radv_image *image)
1308 {
1309 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1310 /* Do not enable CMASK for non-MSAA images (fast color clear)
1311 * because 128 bit formats are not supported, but FMASK might
1312 * still be used.
1313 */
1314 return false;
1315 }
1316
1317 return radv_image_can_enable_dcc_or_cmask(image) &&
1318 image->info.levels == 1 &&
1319 image->info.depth == 1 &&
1320 !image->planes[0].surface.is_linear;
1321 }
1322
1323 static inline bool
1324 radv_image_can_enable_fmask(struct radv_image *image)
1325 {
1326 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
1327 }
1328
1329 static inline bool
1330 radv_image_can_enable_htile(struct radv_image *image)
1331 {
1332 return radv_image_has_htile(image) &&
1333 image->info.levels == 1 &&
1334 image->info.width * image->info.height >= 8 * 8;
1335 }
1336
1337 static void radv_image_disable_dcc(struct radv_image *image)
1338 {
1339 for (unsigned i = 0; i < image->plane_count; ++i)
1340 image->planes[i].surface.dcc_size = 0;
1341 }
1342
1343 static void radv_image_disable_htile(struct radv_image *image)
1344 {
1345 for (unsigned i = 0; i < image->plane_count; ++i)
1346 image->planes[i].surface.htile_size = 0;
1347 }
1348
1349 VkResult
1350 radv_image_create_layout(struct radv_device *device,
1351 struct radv_image_create_info create_info,
1352 struct radv_image *image)
1353 {
1354 /* Check that we did not initialize things earlier */
1355 assert(!image->planes[0].surface.surf_size);
1356
1357 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1358 * common internal case. */
1359 create_info.vk_info = NULL;
1360
1361 struct ac_surf_info image_info = image->info;
1362 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1363 if (result != VK_SUCCESS)
1364 return result;
1365
1366 image->size = 0;
1367 image->alignment = 1;
1368 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1369 struct ac_surf_info info = image_info;
1370
1371 if (plane) {
1372 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1373 assert(info.width % desc->width_divisor == 0);
1374 assert(info.height % desc->height_divisor == 0);
1375
1376 info.width /= desc->width_divisor;
1377 info.height /= desc->height_divisor;
1378 }
1379
1380 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1381
1382 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1383 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1384 image->alignment = image->planes[plane].surface.surf_alignment;
1385
1386 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1387 }
1388
1389 if (!create_info.no_metadata_planes) {
1390 /* Try to enable DCC first. */
1391 if (radv_image_can_enable_dcc(device, image)) {
1392 radv_image_alloc_dcc(image);
1393 if (image->info.samples > 1) {
1394 /* CMASK should be enabled because DCC fast
1395 * clear with MSAA needs it.
1396 */
1397 assert(radv_image_can_enable_cmask(image));
1398 radv_image_alloc_cmask(device, image);
1399 }
1400 } else {
1401 /* When DCC cannot be enabled, try CMASK. */
1402 radv_image_disable_dcc(image);
1403 if (radv_image_can_enable_cmask(image)) {
1404 radv_image_alloc_cmask(device, image);
1405 }
1406 }
1407
1408 /* Try to enable FMASK for multisampled images. */
1409 if (radv_image_can_enable_fmask(image)) {
1410 radv_image_alloc_fmask(device, image);
1411
1412 if (radv_use_tc_compat_cmask_for_image(device, image))
1413 image->tc_compatible_cmask = true;
1414 } else {
1415 /* Otherwise, try to enable HTILE for depth surfaces. */
1416 if (radv_image_can_enable_htile(image) &&
1417 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1418 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1419 radv_image_alloc_htile(device, image);
1420 } else {
1421 radv_image_disable_htile(image);
1422 }
1423 }
1424 } else {
1425 radv_image_disable_dcc(image);
1426 radv_image_disable_htile(image);
1427 }
1428
1429 assert(image->planes[0].surface.surf_size);
1430 return VK_SUCCESS;
1431 }
1432
1433 VkResult
1434 radv_image_create(VkDevice _device,
1435 const struct radv_image_create_info *create_info,
1436 const VkAllocationCallbacks* alloc,
1437 VkImage *pImage)
1438 {
1439 RADV_FROM_HANDLE(radv_device, device, _device);
1440 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1441 struct radv_image *image = NULL;
1442 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1443 pCreateInfo->format);
1444 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1445
1446 const unsigned plane_count = vk_format_get_plane_count(format);
1447 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1448
1449 radv_assert(pCreateInfo->mipLevels > 0);
1450 radv_assert(pCreateInfo->arrayLayers > 0);
1451 radv_assert(pCreateInfo->samples > 0);
1452 radv_assert(pCreateInfo->extent.width > 0);
1453 radv_assert(pCreateInfo->extent.height > 0);
1454 radv_assert(pCreateInfo->extent.depth > 0);
1455
1456 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1457 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1458 if (!image)
1459 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1460
1461 image->type = pCreateInfo->imageType;
1462 image->info.width = pCreateInfo->extent.width;
1463 image->info.height = pCreateInfo->extent.height;
1464 image->info.depth = pCreateInfo->extent.depth;
1465 image->info.samples = pCreateInfo->samples;
1466 image->info.storage_samples = pCreateInfo->samples;
1467 image->info.array_size = pCreateInfo->arrayLayers;
1468 image->info.levels = pCreateInfo->mipLevels;
1469 image->info.num_channels = vk_format_get_nr_components(format);
1470
1471 image->vk_format = format;
1472 image->tiling = pCreateInfo->tiling;
1473 image->usage = pCreateInfo->usage;
1474 image->flags = pCreateInfo->flags;
1475 image->plane_count = plane_count;
1476
1477 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1478 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1479 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1480 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1481 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1482 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1483 else
1484 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1485 }
1486
1487 const VkExternalMemoryImageCreateInfo *external_info =
1488 vk_find_struct_const(pCreateInfo->pNext,
1489 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1490
1491 image->shareable = external_info;
1492 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1493 image->info.surf_index = &device->image_mrt_offset_counter;
1494 }
1495
1496 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1497 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1498 }
1499
1500 bool delay_layout = external_info &&
1501 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1502
1503 if (delay_layout) {
1504 *pImage = radv_image_to_handle(image);
1505 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1506 return VK_SUCCESS;
1507 }
1508
1509 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1510 assert(result == VK_SUCCESS);
1511
1512 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1513 image->alignment = MAX2(image->alignment, 4096);
1514 image->size = align64(image->size, image->alignment);
1515 image->offset = 0;
1516
1517 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1518 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1519 if (!image->bo) {
1520 vk_free2(&device->alloc, alloc, image);
1521 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1522 }
1523 }
1524
1525 *pImage = radv_image_to_handle(image);
1526
1527 return VK_SUCCESS;
1528 }
1529
1530 static void
1531 radv_image_view_make_descriptor(struct radv_image_view *iview,
1532 struct radv_device *device,
1533 VkFormat vk_format,
1534 const VkComponentMapping *components,
1535 bool is_storage_image, bool disable_compression,
1536 unsigned plane_id, unsigned descriptor_plane_id)
1537 {
1538 struct radv_image *image = iview->image;
1539 struct radv_image_plane *plane = &image->planes[plane_id];
1540 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1541 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1542 uint32_t blk_w;
1543 union radv_descriptor *descriptor;
1544 uint32_t hw_level = 0;
1545
1546 if (is_storage_image) {
1547 descriptor = &iview->storage_descriptor;
1548 } else {
1549 descriptor = &iview->descriptor;
1550 }
1551
1552 assert(vk_format_get_plane_count(vk_format) == 1);
1553 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1554 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1555
1556 if (device->physical_device->rad_info.chip_class >= GFX9)
1557 hw_level = iview->base_mip;
1558 radv_make_texture_descriptor(device, image, is_storage_image,
1559 iview->type,
1560 vk_format,
1561 components,
1562 hw_level, hw_level + iview->level_count - 1,
1563 iview->base_layer,
1564 iview->base_layer + iview->layer_count - 1,
1565 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1566 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1567 iview->extent.depth,
1568 descriptor->plane_descriptors[descriptor_plane_id],
1569 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1570
1571 const struct legacy_surf_level *base_level_info = NULL;
1572 if (device->physical_device->rad_info.chip_class <= GFX9) {
1573 if (is_stencil)
1574 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1575 else
1576 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1577 }
1578 si_set_mutable_tex_desc_fields(device, image,
1579 base_level_info,
1580 plane_id,
1581 iview->base_mip,
1582 iview->base_mip,
1583 blk_w, is_stencil, is_storage_image,
1584 is_storage_image || disable_compression,
1585 descriptor->plane_descriptors[descriptor_plane_id]);
1586 }
1587
1588 static unsigned
1589 radv_plane_from_aspect(VkImageAspectFlags mask)
1590 {
1591 switch(mask) {
1592 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1593 return 1;
1594 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1595 return 2;
1596 default:
1597 return 0;
1598 }
1599 }
1600
1601 VkFormat
1602 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1603 {
1604 switch(mask) {
1605 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1606 return image->planes[0].format;
1607 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1608 return image->planes[1].format;
1609 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1610 return image->planes[2].format;
1611 case VK_IMAGE_ASPECT_STENCIL_BIT:
1612 return vk_format_stencil_only(image->vk_format);
1613 case VK_IMAGE_ASPECT_DEPTH_BIT:
1614 return vk_format_depth_only(image->vk_format);
1615 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1616 return vk_format_depth_only(image->vk_format);
1617 default:
1618 return image->vk_format;
1619 }
1620 }
1621
1622 void
1623 radv_image_view_init(struct radv_image_view *iview,
1624 struct radv_device *device,
1625 const VkImageViewCreateInfo* pCreateInfo,
1626 const struct radv_image_view_extra_create_info* extra_create_info)
1627 {
1628 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1629 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1630
1631 switch (image->type) {
1632 case VK_IMAGE_TYPE_1D:
1633 case VK_IMAGE_TYPE_2D:
1634 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1635 break;
1636 case VK_IMAGE_TYPE_3D:
1637 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1638 <= radv_minify(image->info.depth, range->baseMipLevel));
1639 break;
1640 default:
1641 unreachable("bad VkImageType");
1642 }
1643 iview->image = image;
1644 iview->bo = image->bo;
1645 iview->type = pCreateInfo->viewType;
1646 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1647 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1648 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1649
1650 iview->vk_format = pCreateInfo->format;
1651
1652 /* If the image has an Android external format, pCreateInfo->format will be
1653 * VK_FORMAT_UNDEFINED. */
1654 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1655 iview->vk_format = image->vk_format;
1656
1657 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1658 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1659 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1660 iview->vk_format = vk_format_depth_only(iview->vk_format);
1661 }
1662
1663 if (device->physical_device->rad_info.chip_class >= GFX9) {
1664 iview->extent = (VkExtent3D) {
1665 .width = image->info.width,
1666 .height = image->info.height,
1667 .depth = image->info.depth,
1668 };
1669 } else {
1670 iview->extent = (VkExtent3D) {
1671 .width = radv_minify(image->info.width , range->baseMipLevel),
1672 .height = radv_minify(image->info.height, range->baseMipLevel),
1673 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1674 };
1675 }
1676
1677 if (iview->vk_format != image->planes[iview->plane_id].format) {
1678 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1679 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1680 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1681 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1682
1683 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1684 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1685
1686 /* Comment ported from amdvlk -
1687 * If we have the following image:
1688 * Uncompressed pixels Compressed block sizes (4x4)
1689 * mip0: 22 x 22 6 x 6
1690 * mip1: 11 x 11 3 x 3
1691 * mip2: 5 x 5 2 x 2
1692 * mip3: 2 x 2 1 x 1
1693 * mip4: 1 x 1 1 x 1
1694 *
1695 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1696 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1697 * divide-by-two integer math):
1698 * mip0: 6x6
1699 * mip1: 3x3
1700 * mip2: 1x1
1701 * mip3: 1x1
1702 *
1703 * This means that mip2 will be missing texels.
1704 *
1705 * Fix this by calculating the base mip's width and height, then convert that, and round it
1706 * back up to get the level 0 size.
1707 * Clamp the converted size between the original values, and next power of two, which
1708 * means we don't oversize the image.
1709 */
1710 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1711 vk_format_is_compressed(image->vk_format) &&
1712 !vk_format_is_compressed(iview->vk_format)) {
1713 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1714 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1715
1716 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1717 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1718
1719 lvl_width <<= range->baseMipLevel;
1720 lvl_height <<= range->baseMipLevel;
1721
1722 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1723 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1724 }
1725 }
1726
1727 iview->base_layer = range->baseArrayLayer;
1728 iview->layer_count = radv_get_layerCount(image, range);
1729 iview->base_mip = range->baseMipLevel;
1730 iview->level_count = radv_get_levelCount(image, range);
1731
1732 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1733 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1734 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1735 radv_image_view_make_descriptor(iview, device, format,
1736 &pCreateInfo->components,
1737 false, disable_compression,
1738 iview->plane_id + i, i);
1739 radv_image_view_make_descriptor(iview, device,
1740 format, &pCreateInfo->components,
1741 true, disable_compression,
1742 iview->plane_id + i, i);
1743 }
1744 }
1745
1746 bool radv_layout_has_htile(const struct radv_image *image,
1747 VkImageLayout layout,
1748 bool in_render_loop,
1749 unsigned queue_mask)
1750 {
1751 if (radv_image_is_tc_compat_htile(image))
1752 return layout != VK_IMAGE_LAYOUT_GENERAL;
1753
1754 return radv_image_has_htile(image) &&
1755 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1756 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1757 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1758 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1759 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1760 }
1761
1762 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1763 VkImageLayout layout,
1764 bool in_render_loop,
1765 unsigned queue_mask)
1766 {
1767 if (radv_image_is_tc_compat_htile(image))
1768 return layout != VK_IMAGE_LAYOUT_GENERAL;
1769
1770 return radv_image_has_htile(image) &&
1771 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1772 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1773 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1774 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1775 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1776 }
1777
1778 bool radv_layout_can_fast_clear(const struct radv_image *image,
1779 VkImageLayout layout,
1780 bool in_render_loop,
1781 unsigned queue_mask)
1782 {
1783 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1784 }
1785
1786 bool radv_layout_dcc_compressed(const struct radv_device *device,
1787 const struct radv_image *image,
1788 VkImageLayout layout,
1789 bool in_render_loop,
1790 unsigned queue_mask)
1791 {
1792 /* Don't compress compute transfer dst, as image stores are not supported. */
1793 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1794 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1795 return false;
1796
1797 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1798 }
1799
1800
1801 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1802 {
1803 if (!image->exclusive)
1804 return image->queue_family_mask;
1805 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1806 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1807 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1808 if (family == VK_QUEUE_FAMILY_IGNORED)
1809 return 1u << queue_family;
1810 return 1u << family;
1811 }
1812
1813 VkResult
1814 radv_CreateImage(VkDevice device,
1815 const VkImageCreateInfo *pCreateInfo,
1816 const VkAllocationCallbacks *pAllocator,
1817 VkImage *pImage)
1818 {
1819 #ifdef ANDROID
1820 const VkNativeBufferANDROID *gralloc_info =
1821 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1822
1823 if (gralloc_info)
1824 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1825 pAllocator, pImage);
1826 #endif
1827
1828 const struct wsi_image_create_info *wsi_info =
1829 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1830 bool scanout = wsi_info && wsi_info->scanout;
1831
1832 return radv_image_create(device,
1833 &(struct radv_image_create_info) {
1834 .vk_info = pCreateInfo,
1835 .scanout = scanout,
1836 },
1837 pAllocator,
1838 pImage);
1839 }
1840
1841 void
1842 radv_DestroyImage(VkDevice _device, VkImage _image,
1843 const VkAllocationCallbacks *pAllocator)
1844 {
1845 RADV_FROM_HANDLE(radv_device, device, _device);
1846 RADV_FROM_HANDLE(radv_image, image, _image);
1847
1848 if (!image)
1849 return;
1850
1851 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1852 device->ws->buffer_destroy(image->bo);
1853
1854 if (image->owned_memory != VK_NULL_HANDLE)
1855 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1856
1857 vk_free2(&device->alloc, pAllocator, image);
1858 }
1859
1860 void radv_GetImageSubresourceLayout(
1861 VkDevice _device,
1862 VkImage _image,
1863 const VkImageSubresource* pSubresource,
1864 VkSubresourceLayout* pLayout)
1865 {
1866 RADV_FROM_HANDLE(radv_image, image, _image);
1867 RADV_FROM_HANDLE(radv_device, device, _device);
1868 int level = pSubresource->mipLevel;
1869 int layer = pSubresource->arrayLayer;
1870
1871 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1872
1873 struct radv_image_plane *plane = &image->planes[plane_id];
1874 struct radeon_surf *surface = &plane->surface;
1875
1876 if (device->physical_device->rad_info.chip_class >= GFX9) {
1877 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1878
1879 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1880 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1881 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1882 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1883 /* Adjust the number of bytes between each row because
1884 * the pitch is actually the number of components per
1885 * row.
1886 */
1887 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1888 } else {
1889 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1890
1891 assert(util_is_power_of_two_nonzero(surface->bpe));
1892 pLayout->rowPitch = pitch * surface->bpe;
1893 }
1894
1895 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1896 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1897 pLayout->size = surface->u.gfx9.surf_slice_size;
1898 if (image->type == VK_IMAGE_TYPE_3D)
1899 pLayout->size *= u_minify(image->info.depth, level);
1900 } else {
1901 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1902 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1903 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1904 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1905 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1906 if (image->type == VK_IMAGE_TYPE_3D)
1907 pLayout->size *= u_minify(image->info.depth, level);
1908 }
1909 }
1910
1911
1912 VkResult
1913 radv_CreateImageView(VkDevice _device,
1914 const VkImageViewCreateInfo *pCreateInfo,
1915 const VkAllocationCallbacks *pAllocator,
1916 VkImageView *pView)
1917 {
1918 RADV_FROM_HANDLE(radv_device, device, _device);
1919 struct radv_image_view *view;
1920
1921 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1922 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1923 if (view == NULL)
1924 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1925
1926 radv_image_view_init(view, device, pCreateInfo, NULL);
1927
1928 *pView = radv_image_view_to_handle(view);
1929
1930 return VK_SUCCESS;
1931 }
1932
1933 void
1934 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1935 const VkAllocationCallbacks *pAllocator)
1936 {
1937 RADV_FROM_HANDLE(radv_device, device, _device);
1938 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1939
1940 if (!iview)
1941 return;
1942 vk_free2(&device->alloc, pAllocator, iview);
1943 }
1944
1945 void radv_buffer_view_init(struct radv_buffer_view *view,
1946 struct radv_device *device,
1947 const VkBufferViewCreateInfo* pCreateInfo)
1948 {
1949 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1950
1951 view->bo = buffer->bo;
1952 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1953 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1954 view->vk_format = pCreateInfo->format;
1955
1956 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1957 pCreateInfo->offset, view->range, view->state);
1958 }
1959
1960 VkResult
1961 radv_CreateBufferView(VkDevice _device,
1962 const VkBufferViewCreateInfo *pCreateInfo,
1963 const VkAllocationCallbacks *pAllocator,
1964 VkBufferView *pView)
1965 {
1966 RADV_FROM_HANDLE(radv_device, device, _device);
1967 struct radv_buffer_view *view;
1968
1969 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1970 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1971 if (!view)
1972 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1973
1974 radv_buffer_view_init(view, device, pCreateInfo);
1975
1976 *pView = radv_buffer_view_to_handle(view);
1977
1978 return VK_SUCCESS;
1979 }
1980
1981 void
1982 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1983 const VkAllocationCallbacks *pAllocator)
1984 {
1985 RADV_FROM_HANDLE(radv_device, device, _device);
1986 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1987
1988 if (!view)
1989 return;
1990
1991 vk_free2(&device->alloc, pAllocator, view);
1992 }