radv: Remove RANGE_SIZE usage
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const VkImageCreateInfo *pCreateInfo,
40 VkFormat format)
41 {
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (!vk_format_is_compressed(format) &&
48 !vk_format_is_depth_or_stencil(format)
49 && device->physical_device->rad_info.chip_class <= GFX8) {
50 /* this causes hangs in some VK CTS tests on GFX9. */
51 /* Textures with a very small height are recommended to be linear. */
52 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
53 /* Only very thin and long 2D textures should benefit from
54 * linear_aligned. */
55 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
56 return RADEON_SURF_MODE_LINEAR_ALIGNED;
57 }
58
59 /* MSAA resources must be 2D tiled. */
60 if (pCreateInfo->samples > 1)
61 return RADEON_SURF_MODE_2D;
62
63 return RADEON_SURF_MODE_2D;
64 }
65
66 static bool
67 radv_use_tc_compat_htile_for_image(struct radv_device *device,
68 const VkImageCreateInfo *pCreateInfo,
69 VkFormat format)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < GFX8)
73 return false;
74
75 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
76 return false;
77
78 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
79 return false;
80
81 if (pCreateInfo->mipLevels > 1)
82 return false;
83
84 /* Do not enable TC-compatible HTILE if the image isn't readable by a
85 * shader because no texture fetches will happen.
86 */
87 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
88 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
89 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
90 return false;
91
92 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
93 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
94 */
95 if (pCreateInfo->samples >= 2 &&
96 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
97 (format == VK_FORMAT_D32_SFLOAT &&
98 device->physical_device->rad_info.chip_class == GFX10)))
99 return false;
100
101 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
102 * supports 32-bit. Though, it's possible to enable TC-compat for
103 * 16-bit depth surfaces if no Z planes are compressed.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
106 format != VK_FORMAT_D32_SFLOAT &&
107 format != VK_FORMAT_D16_UNORM)
108 return false;
109
110 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
111 const struct VkImageFormatListCreateInfo *format_list =
112 (const struct VkImageFormatListCreateInfo *)
113 vk_find_struct_const(pCreateInfo->pNext,
114 IMAGE_FORMAT_LIST_CREATE_INFO);
115
116 /* We have to ignore the existence of the list if viewFormatCount = 0 */
117 if (format_list && format_list->viewFormatCount) {
118 /* compatibility is transitive, so we only need to check
119 * one format with everything else.
120 */
121 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
122 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
123 continue;
124
125 if (format != format_list->pViewFormats[i])
126 return false;
127 }
128 } else {
129 return false;
130 }
131 }
132
133 return true;
134 }
135
136 static bool
137 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
138 {
139 if (info->bo_metadata) {
140 if (device->physical_device->rad_info.chip_class >= GFX9)
141 return info->bo_metadata->u.gfx9.scanout;
142 else
143 return info->bo_metadata->u.legacy.scanout;
144 }
145
146 return info->scanout;
147 }
148
149 static bool
150 radv_use_dcc_for_image(struct radv_device *device,
151 const struct radv_image *image,
152 const VkImageCreateInfo *pCreateInfo,
153 VkFormat format)
154 {
155 bool dcc_compatible_formats;
156 bool blendable;
157
158 /* DCC (Delta Color Compression) is only available for GFX8+. */
159 if (device->physical_device->rad_info.chip_class < GFX8)
160 return false;
161
162 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
163 return false;
164
165 if (image->shareable)
166 return false;
167
168 /* TODO: Enable DCC for storage images. */
169 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
170 return false;
171
172 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
173 return false;
174
175 if (vk_format_is_subsampled(format) ||
176 vk_format_get_plane_count(format) > 1)
177 return false;
178
179 /* TODO: Enable DCC for mipmaps on GFX9+. */
180 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
181 device->physical_device->rad_info.chip_class >= GFX9)
182 return false;
183
184 /* Do not enable DCC for mipmapped arrays because performance is worse. */
185 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
186 return false;
187
188 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
189 * 2x can be enabled with an option.
190 */
191 if (pCreateInfo->samples > 2 ||
192 (pCreateInfo->samples == 2 &&
193 !device->physical_device->dcc_msaa_allowed))
194 return false;
195
196 /* Determine if the formats are DCC compatible. */
197 dcc_compatible_formats =
198 radv_is_colorbuffer_format_supported(format,
199 &blendable);
200
201 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
202 const struct VkImageFormatListCreateInfo *format_list =
203 (const struct VkImageFormatListCreateInfo *)
204 vk_find_struct_const(pCreateInfo->pNext,
205 IMAGE_FORMAT_LIST_CREATE_INFO);
206
207 /* We have to ignore the existence of the list if viewFormatCount = 0 */
208 if (format_list && format_list->viewFormatCount) {
209 /* compatibility is transitive, so we only need to check
210 * one format with everything else. */
211 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
212 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
213 continue;
214
215 if (!radv_dcc_formats_compatible(format,
216 format_list->pViewFormats[i]))
217 dcc_compatible_formats = false;
218 }
219 } else {
220 dcc_compatible_formats = false;
221 }
222 }
223
224 if (!dcc_compatible_formats)
225 return false;
226
227 return true;
228 }
229
230 static bool
231 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
232 struct radv_image *image)
233 {
234 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
235 return false;
236
237 /* TC-compat CMASK is only available for GFX8+. */
238 if (device->physical_device->rad_info.chip_class < GFX8)
239 return false;
240
241 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
242 return false;
243
244 if (radv_image_has_dcc(image))
245 return false;
246
247 if (!radv_image_has_cmask(image))
248 return false;
249
250 return true;
251 }
252
253 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
254 {
255 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
256 }
257
258 static bool
259 radv_is_valid_opaque_metadata(const struct radv_device *device,
260 const struct radeon_bo_metadata *md)
261 {
262 if (md->metadata[0] != 1 ||
263 md->metadata[1] != si_get_bo_metadata_word1(device))
264 return false;
265
266 if (md->size_metadata < 40)
267 return false;
268
269 return true;
270 }
271
272 static void
273 radv_patch_surface_from_metadata(struct radv_device *device,
274 struct radeon_surf *surface,
275 const struct radeon_bo_metadata *md)
276 {
277 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
278
279 if (device->physical_device->rad_info.chip_class >= GFX9) {
280 if (md->u.gfx9.swizzle_mode > 0)
281 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
282 else
283 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
284
285 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
286 } else {
287 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
288 surface->u.legacy.bankw = md->u.legacy.bankw;
289 surface->u.legacy.bankh = md->u.legacy.bankh;
290 surface->u.legacy.tile_split = md->u.legacy.tile_split;
291 surface->u.legacy.mtilea = md->u.legacy.mtilea;
292 surface->u.legacy.num_banks = md->u.legacy.num_banks;
293
294 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
295 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
296 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
297 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
298 else
299 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
300
301 }
302 }
303
304 static VkResult
305 radv_patch_image_dimensions(struct radv_device *device,
306 struct radv_image *image,
307 const struct radv_image_create_info *create_info,
308 struct ac_surf_info *image_info)
309 {
310 unsigned width = image->info.width;
311 unsigned height = image->info.height;
312
313 /*
314 * minigbm sometimes allocates bigger images which is going to result in
315 * weird strides and other properties. Lets be lenient where possible and
316 * fail it on GFX10 (as we cannot cope there).
317 *
318 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
319 */
320 if (create_info->bo_metadata &&
321 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
322 const struct radeon_bo_metadata *md = create_info->bo_metadata;
323
324 if (device->physical_device->rad_info.chip_class >= GFX10) {
325 width = G_00A004_WIDTH_LO(md->metadata[3]) +
326 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
327 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
328 } else {
329 width = G_008F18_WIDTH(md->metadata[4]) + 1;
330 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
331 }
332 }
333
334 if (image->info.width == width && image->info.height == height)
335 return VK_SUCCESS;
336
337 if (width < image->info.width || height < image->info.height) {
338 fprintf(stderr,
339 "The imported image has smaller dimensions than the internal\n"
340 "dimensions. Using it is going to fail badly, so we reject\n"
341 "this import.\n"
342 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
343 image->info.width, image->info.height, width, height);
344 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
345 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
346 fprintf(stderr,
347 "Tried to import an image with inconsistent width on GFX10.\n"
348 "As GFX10 has no separate stride fields we cannot cope with\n"
349 "an inconsistency in width and will fail this import.\n"
350 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
351 image->info.width, image->info.height, width, height);
352 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
353 } else {
354 fprintf(stderr,
355 "Tried to import an image with inconsistent width on pre-GFX10.\n"
356 "As GFX10 has no separate stride fields we cannot cope with\n"
357 "an inconsistency and would fail on GFX10.\n"
358 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
359 image->info.width, image->info.height, width, height);
360 }
361 image_info->width = width;
362 image_info->height = height;
363
364 return VK_SUCCESS;
365 }
366
367 static VkResult
368 radv_patch_image_from_extra_info(struct radv_device *device,
369 struct radv_image *image,
370 const struct radv_image_create_info *create_info,
371 struct ac_surf_info *image_info)
372 {
373 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
374 if (result != VK_SUCCESS)
375 return result;
376
377 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
378 if (create_info->bo_metadata) {
379 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
380 create_info->bo_metadata);
381 }
382
383 if (radv_surface_has_scanout(device, create_info)) {
384 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
385 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
386
387 image->info.surf_index = NULL;
388 }
389 }
390 return VK_SUCCESS;
391 }
392
393 static int
394 radv_init_surface(struct radv_device *device,
395 const struct radv_image *image,
396 struct radeon_surf *surface,
397 unsigned plane_id,
398 const VkImageCreateInfo *pCreateInfo,
399 VkFormat image_format)
400 {
401 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
402 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
403 const struct vk_format_description *desc = vk_format_description(format);
404 bool is_depth, is_stencil;
405
406 is_depth = vk_format_has_depth(desc);
407 is_stencil = vk_format_has_stencil(desc);
408
409 surface->blk_w = vk_format_get_blockwidth(format);
410 surface->blk_h = vk_format_get_blockheight(format);
411
412 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
413 /* align byte per element on dword */
414 if (surface->bpe == 3) {
415 surface->bpe = 4;
416 }
417
418 surface->flags = RADEON_SURF_SET(array_mode, MODE);
419
420 switch (pCreateInfo->imageType){
421 case VK_IMAGE_TYPE_1D:
422 if (pCreateInfo->arrayLayers > 1)
423 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
424 else
425 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
426 break;
427 case VK_IMAGE_TYPE_2D:
428 if (pCreateInfo->arrayLayers > 1)
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
430 else
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
432 break;
433 case VK_IMAGE_TYPE_3D:
434 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
435 break;
436 default:
437 unreachable("unhandled image type");
438 }
439
440 if (is_depth) {
441 surface->flags |= RADEON_SURF_ZBUFFER;
442 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
443 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
444 }
445
446 if (is_stencil)
447 surface->flags |= RADEON_SURF_SBUFFER;
448
449 if (device->physical_device->rad_info.chip_class >= GFX9 &&
450 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
451 vk_format_get_blocksizebits(image_format) == 128 &&
452 vk_format_is_compressed(image_format))
453 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
454
455 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
456 surface->flags |= RADEON_SURF_DISABLE_DCC;
457
458 return 0;
459 }
460
461 static inline unsigned
462 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
463 {
464 if (stencil)
465 return plane->surface.u.legacy.stencil_tiling_index[level];
466 else
467 return plane->surface.u.legacy.tiling_index[level];
468 }
469
470 static unsigned radv_map_swizzle(unsigned swizzle)
471 {
472 switch (swizzle) {
473 case VK_SWIZZLE_Y:
474 return V_008F0C_SQ_SEL_Y;
475 case VK_SWIZZLE_Z:
476 return V_008F0C_SQ_SEL_Z;
477 case VK_SWIZZLE_W:
478 return V_008F0C_SQ_SEL_W;
479 case VK_SWIZZLE_0:
480 return V_008F0C_SQ_SEL_0;
481 case VK_SWIZZLE_1:
482 return V_008F0C_SQ_SEL_1;
483 default: /* VK_SWIZZLE_X */
484 return V_008F0C_SQ_SEL_X;
485 }
486 }
487
488 static void
489 radv_make_buffer_descriptor(struct radv_device *device,
490 struct radv_buffer *buffer,
491 VkFormat vk_format,
492 unsigned offset,
493 unsigned range,
494 uint32_t *state)
495 {
496 const struct vk_format_description *desc;
497 unsigned stride;
498 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
499 uint64_t va = gpu_address + buffer->offset;
500 unsigned num_format, data_format;
501 int first_non_void;
502 desc = vk_format_description(vk_format);
503 first_non_void = vk_format_get_first_non_void_channel(vk_format);
504 stride = desc->block.bits / 8;
505
506 va += offset;
507 state[0] = va;
508 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
509 S_008F04_STRIDE(stride);
510
511 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
512 range /= stride;
513 }
514
515 state[2] = range;
516 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
517 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
518 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
519 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
520
521 if (device->physical_device->rad_info.chip_class >= GFX10) {
522 const struct gfx10_format *fmt = gfx10_format_description(vk_format);
523
524 /* OOB_SELECT chooses the out-of-bounds check:
525 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
526 * - 1: index >= NUM_RECORDS
527 * - 2: NUM_RECORDS == 0
528 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
529 * else: swizzle_address >= NUM_RECORDS
530 */
531 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
532 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
533 S_008F0C_RESOURCE_LEVEL(1);
534 } else {
535 num_format = radv_translate_buffer_numformat(desc, first_non_void);
536 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
537
538 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
539 assert(num_format != ~0);
540
541 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
542 S_008F0C_DATA_FORMAT(data_format);
543 }
544 }
545
546 static void
547 si_set_mutable_tex_desc_fields(struct radv_device *device,
548 struct radv_image *image,
549 const struct legacy_surf_level *base_level_info,
550 unsigned plane_id,
551 unsigned base_level, unsigned first_level,
552 unsigned block_width, bool is_stencil,
553 bool is_storage_image, bool disable_compression,
554 uint32_t *state)
555 {
556 struct radv_image_plane *plane = &image->planes[plane_id];
557 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
558 uint64_t va = gpu_address + plane->offset;
559 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
560 uint64_t meta_va = 0;
561 if (chip_class >= GFX9) {
562 if (is_stencil)
563 va += plane->surface.u.gfx9.stencil_offset;
564 else
565 va += plane->surface.u.gfx9.surf_offset;
566 } else
567 va += base_level_info->offset;
568
569 state[0] = va >> 8;
570 if (chip_class >= GFX9 ||
571 base_level_info->mode == RADEON_SURF_MODE_2D)
572 state[0] |= plane->surface.tile_swizzle;
573 state[1] &= C_008F14_BASE_ADDRESS_HI;
574 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
575
576 if (chip_class >= GFX8) {
577 state[6] &= C_008F28_COMPRESSION_EN;
578 state[7] = 0;
579 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
580 meta_va = gpu_address + image->dcc_offset;
581 if (chip_class <= GFX8)
582 meta_va += base_level_info->dcc_offset;
583
584 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
585 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
586 meta_va |= dcc_tile_swizzle;
587 } else if (!disable_compression &&
588 radv_image_is_tc_compat_htile(image)) {
589 meta_va = gpu_address + image->htile_offset;
590 }
591
592 if (meta_va) {
593 state[6] |= S_008F28_COMPRESSION_EN(1);
594 if (chip_class <= GFX9)
595 state[7] = meta_va >> 8;
596 }
597 }
598
599 if (chip_class >= GFX10) {
600 state[3] &= C_00A00C_SW_MODE;
601
602 if (is_stencil) {
603 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
604 } else {
605 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
606 }
607
608 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
609 C_00A018_META_PIPE_ALIGNED;
610
611 if (meta_va) {
612 struct gfx9_surf_meta_flags meta;
613
614 if (image->dcc_offset)
615 meta = plane->surface.u.gfx9.dcc;
616 else
617 meta = plane->surface.u.gfx9.htile;
618
619 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
620 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
621 }
622
623 state[7] = meta_va >> 16;
624 } else if (chip_class == GFX9) {
625 state[3] &= C_008F1C_SW_MODE;
626 state[4] &= C_008F20_PITCH;
627
628 if (is_stencil) {
629 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
630 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
631 } else {
632 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
633 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
634 }
635
636 state[5] &= C_008F24_META_DATA_ADDRESS &
637 C_008F24_META_PIPE_ALIGNED &
638 C_008F24_META_RB_ALIGNED;
639 if (meta_va) {
640 struct gfx9_surf_meta_flags meta;
641
642 if (image->dcc_offset)
643 meta = plane->surface.u.gfx9.dcc;
644 else
645 meta = plane->surface.u.gfx9.htile;
646
647 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
648 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
649 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
650 }
651 } else {
652 /* GFX6-GFX8 */
653 unsigned pitch = base_level_info->nblk_x * block_width;
654 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
655
656 state[3] &= C_008F1C_TILING_INDEX;
657 state[3] |= S_008F1C_TILING_INDEX(index);
658 state[4] &= C_008F20_PITCH;
659 state[4] |= S_008F20_PITCH(pitch - 1);
660 }
661 }
662
663 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
664 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
665 {
666 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
667 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
668
669 /* GFX9 allocates 1D textures as 2D. */
670 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
671 image_type = VK_IMAGE_TYPE_2D;
672 switch (image_type) {
673 case VK_IMAGE_TYPE_1D:
674 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
675 case VK_IMAGE_TYPE_2D:
676 if (nr_samples > 1)
677 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
678 else
679 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
680 case VK_IMAGE_TYPE_3D:
681 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
682 return V_008F1C_SQ_RSRC_IMG_3D;
683 else
684 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
685 default:
686 unreachable("illegal image type");
687 }
688 }
689
690 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
691 {
692 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
693
694 if (swizzle[3] == VK_SWIZZLE_X) {
695 /* For the pre-defined border color values (white, opaque
696 * black, transparent black), the only thing that matters is
697 * that the alpha channel winds up in the correct place
698 * (because the RGB channels are all the same) so either of
699 * these enumerations will work.
700 */
701 if (swizzle[2] == VK_SWIZZLE_Y)
702 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
703 else
704 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
705 } else if (swizzle[0] == VK_SWIZZLE_X) {
706 if (swizzle[1] == VK_SWIZZLE_Y)
707 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
708 else
709 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
710 } else if (swizzle[1] == VK_SWIZZLE_X) {
711 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
712 } else if (swizzle[2] == VK_SWIZZLE_X) {
713 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
714 }
715
716 return bc_swizzle;
717 }
718
719 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
720 {
721 const struct vk_format_description *desc = vk_format_description(format);
722
723 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
724 return desc->swizzle[3] == VK_SWIZZLE_X;
725
726 return radv_translate_colorswap(format, false) <= 1;
727 }
728 /**
729 * Build the sampler view descriptor for a texture (GFX10).
730 */
731 static void
732 gfx10_make_texture_descriptor(struct radv_device *device,
733 struct radv_image *image,
734 bool is_storage_image,
735 VkImageViewType view_type,
736 VkFormat vk_format,
737 const VkComponentMapping *mapping,
738 unsigned first_level, unsigned last_level,
739 unsigned first_layer, unsigned last_layer,
740 unsigned width, unsigned height, unsigned depth,
741 uint32_t *state,
742 uint32_t *fmask_state)
743 {
744 const struct vk_format_description *desc;
745 enum vk_swizzle swizzle[4];
746 unsigned img_format;
747 unsigned type;
748
749 desc = vk_format_description(vk_format);
750 img_format = gfx10_format_description(vk_format)->img_format;
751
752 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
753 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
754 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
755 } else {
756 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
757 }
758
759 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
760 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
761 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
762 height = 1;
763 depth = image->info.array_size;
764 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
765 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
766 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
767 depth = image->info.array_size;
768 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
769 depth = image->info.array_size / 6;
770
771 state[0] = 0;
772 state[1] = S_00A004_FORMAT(img_format) |
773 S_00A004_WIDTH_LO(width - 1);
774 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
775 S_00A008_HEIGHT(height - 1) |
776 S_00A008_RESOURCE_LEVEL(1);
777 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
778 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
779 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
780 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
781 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
782 0 : first_level) |
783 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
784 util_logbase2(image->info.samples) :
785 last_level) |
786 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
787 S_00A00C_TYPE(type);
788 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
789 * to know the total number of layers.
790 */
791 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
792 S_00A010_BASE_ARRAY(first_layer);
793 state[5] = S_00A014_ARRAY_PITCH(0) |
794 S_00A014_MAX_MIP(image->info.samples > 1 ?
795 util_logbase2(image->info.samples) :
796 image->info.levels - 1) |
797 S_00A014_PERF_MOD(4);
798 state[6] = 0;
799 state[7] = 0;
800
801 if (radv_dcc_enabled(image, first_level)) {
802 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
803 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
804 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
805 }
806
807 /* Initialize the sampler view for FMASK. */
808 if (radv_image_has_fmask(image)) {
809 uint64_t gpu_address = radv_buffer_get_va(image->bo);
810 uint32_t format;
811 uint64_t va;
812
813 assert(image->plane_count == 1);
814
815 va = gpu_address + image->offset + image->fmask_offset;
816
817 switch (image->info.samples) {
818 case 2:
819 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
820 break;
821 case 4:
822 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
823 break;
824 case 8:
825 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
826 break;
827 default:
828 unreachable("invalid nr_samples");
829 }
830
831 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
832 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
833 S_00A004_FORMAT(format) |
834 S_00A004_WIDTH_LO(width - 1);
835 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
836 S_00A008_HEIGHT(height - 1) |
837 S_00A008_RESOURCE_LEVEL(1);
838 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
839 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
840 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
841 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
842 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
843 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
844 fmask_state[4] = S_00A010_DEPTH(last_layer) |
845 S_00A010_BASE_ARRAY(first_layer);
846 fmask_state[5] = 0;
847 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned);
848 fmask_state[7] = 0;
849 } else if (fmask_state)
850 memset(fmask_state, 0, 8 * 4);
851 }
852
853 /**
854 * Build the sampler view descriptor for a texture (SI-GFX9)
855 */
856 static void
857 si_make_texture_descriptor(struct radv_device *device,
858 struct radv_image *image,
859 bool is_storage_image,
860 VkImageViewType view_type,
861 VkFormat vk_format,
862 const VkComponentMapping *mapping,
863 unsigned first_level, unsigned last_level,
864 unsigned first_layer, unsigned last_layer,
865 unsigned width, unsigned height, unsigned depth,
866 uint32_t *state,
867 uint32_t *fmask_state)
868 {
869 const struct vk_format_description *desc;
870 enum vk_swizzle swizzle[4];
871 int first_non_void;
872 unsigned num_format, data_format, type;
873
874 desc = vk_format_description(vk_format);
875
876 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
877 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
878 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
879 } else {
880 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
881 }
882
883 first_non_void = vk_format_get_first_non_void_channel(vk_format);
884
885 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
886 if (num_format == ~0) {
887 num_format = 0;
888 }
889
890 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
891 if (data_format == ~0) {
892 data_format = 0;
893 }
894
895 /* S8 with either Z16 or Z32 HTILE need a special format. */
896 if (device->physical_device->rad_info.chip_class == GFX9 &&
897 vk_format == VK_FORMAT_S8_UINT &&
898 radv_image_is_tc_compat_htile(image)) {
899 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
900 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
901 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
902 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
903 }
904 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
905 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
906 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
907 height = 1;
908 depth = image->info.array_size;
909 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
910 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
911 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
912 depth = image->info.array_size;
913 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
914 depth = image->info.array_size / 6;
915
916 state[0] = 0;
917 state[1] = (S_008F14_DATA_FORMAT(data_format) |
918 S_008F14_NUM_FORMAT(num_format));
919 state[2] = (S_008F18_WIDTH(width - 1) |
920 S_008F18_HEIGHT(height - 1) |
921 S_008F18_PERF_MOD(4));
922 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
923 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
924 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
925 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
926 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
927 0 : first_level) |
928 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
929 util_logbase2(image->info.samples) :
930 last_level) |
931 S_008F1C_TYPE(type));
932 state[4] = 0;
933 state[5] = S_008F24_BASE_ARRAY(first_layer);
934 state[6] = 0;
935 state[7] = 0;
936
937 if (device->physical_device->rad_info.chip_class == GFX9) {
938 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
939
940 /* Depth is the last accessible layer on Gfx9.
941 * The hw doesn't need to know the total number of layers.
942 */
943 if (type == V_008F1C_SQ_RSRC_IMG_3D)
944 state[4] |= S_008F20_DEPTH(depth - 1);
945 else
946 state[4] |= S_008F20_DEPTH(last_layer);
947
948 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
949 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
950 util_logbase2(image->info.samples) :
951 image->info.levels - 1);
952 } else {
953 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
954 state[4] |= S_008F20_DEPTH(depth - 1);
955 state[5] |= S_008F24_LAST_ARRAY(last_layer);
956 }
957 if (image->dcc_offset) {
958 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
959 } else {
960 /* The last dword is unused by hw. The shader uses it to clear
961 * bits in the first dword of sampler state.
962 */
963 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
964 if (first_level == last_level)
965 state[7] = C_008F30_MAX_ANISO_RATIO;
966 else
967 state[7] = 0xffffffff;
968 }
969 }
970
971 /* Initialize the sampler view for FMASK. */
972 if (radv_image_has_fmask(image)) {
973 uint32_t fmask_format, num_format;
974 uint64_t gpu_address = radv_buffer_get_va(image->bo);
975 uint64_t va;
976
977 assert(image->plane_count == 1);
978
979 va = gpu_address + image->offset + image->fmask_offset;
980
981 if (device->physical_device->rad_info.chip_class == GFX9) {
982 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
983 switch (image->info.samples) {
984 case 2:
985 num_format = V_008F14_IMG_FMASK_8_2_2;
986 break;
987 case 4:
988 num_format = V_008F14_IMG_FMASK_8_4_4;
989 break;
990 case 8:
991 num_format = V_008F14_IMG_FMASK_32_8_8;
992 break;
993 default:
994 unreachable("invalid nr_samples");
995 }
996 } else {
997 switch (image->info.samples) {
998 case 2:
999 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1000 break;
1001 case 4:
1002 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1003 break;
1004 case 8:
1005 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1006 break;
1007 default:
1008 assert(0);
1009 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1010 }
1011 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1012 }
1013
1014 fmask_state[0] = va >> 8;
1015 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1016 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1017 S_008F14_DATA_FORMAT(fmask_format) |
1018 S_008F14_NUM_FORMAT(num_format);
1019 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1020 S_008F18_HEIGHT(height - 1);
1021 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1022 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1023 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1024 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1025 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1026 fmask_state[4] = 0;
1027 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1028 fmask_state[6] = 0;
1029 fmask_state[7] = 0;
1030
1031 if (device->physical_device->rad_info.chip_class == GFX9) {
1032 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1033 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1034 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1035 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
1036 S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
1037
1038 if (radv_image_is_tc_compat_cmask(image)) {
1039 va = gpu_address + image->offset + image->cmask_offset;
1040
1041 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1042 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1043 fmask_state[7] |= va >> 8;
1044 }
1045 } else {
1046 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1047 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1048 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1049 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1050
1051 if (radv_image_is_tc_compat_cmask(image)) {
1052 va = gpu_address + image->offset + image->cmask_offset;
1053
1054 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1055 fmask_state[7] |= va >> 8;
1056 }
1057 }
1058 } else if (fmask_state)
1059 memset(fmask_state, 0, 8 * 4);
1060 }
1061
1062 static void
1063 radv_make_texture_descriptor(struct radv_device *device,
1064 struct radv_image *image,
1065 bool is_storage_image,
1066 VkImageViewType view_type,
1067 VkFormat vk_format,
1068 const VkComponentMapping *mapping,
1069 unsigned first_level, unsigned last_level,
1070 unsigned first_layer, unsigned last_layer,
1071 unsigned width, unsigned height, unsigned depth,
1072 uint32_t *state,
1073 uint32_t *fmask_state)
1074 {
1075 if (device->physical_device->rad_info.chip_class >= GFX10) {
1076 gfx10_make_texture_descriptor(device, image, is_storage_image,
1077 view_type, vk_format, mapping,
1078 first_level, last_level,
1079 first_layer, last_layer,
1080 width, height, depth,
1081 state, fmask_state);
1082 } else {
1083 si_make_texture_descriptor(device, image, is_storage_image,
1084 view_type, vk_format, mapping,
1085 first_level, last_level,
1086 first_layer, last_layer,
1087 width, height, depth,
1088 state, fmask_state);
1089 }
1090 }
1091
1092 static void
1093 radv_query_opaque_metadata(struct radv_device *device,
1094 struct radv_image *image,
1095 struct radeon_bo_metadata *md)
1096 {
1097 static const VkComponentMapping fixedmapping;
1098 uint32_t desc[8], i;
1099
1100 assert(image->plane_count == 1);
1101
1102 /* Metadata image format format version 1:
1103 * [0] = 1 (metadata format identifier)
1104 * [1] = (VENDOR_ID << 16) | PCI_ID
1105 * [2:9] = image descriptor for the whole resource
1106 * [2] is always 0, because the base address is cleared
1107 * [9] is the DCC offset bits [39:8] from the beginning of
1108 * the buffer
1109 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1110 */
1111 md->metadata[0] = 1; /* metadata image format version 1 */
1112
1113 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1114 md->metadata[1] = si_get_bo_metadata_word1(device);
1115
1116
1117 radv_make_texture_descriptor(device, image, false,
1118 (VkImageViewType)image->type, image->vk_format,
1119 &fixedmapping, 0, image->info.levels - 1, 0,
1120 image->info.array_size - 1,
1121 image->info.width, image->info.height,
1122 image->info.depth,
1123 desc, NULL);
1124
1125 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1126 image->planes[0].surface.blk_w, false, false, false, desc);
1127
1128 /* Clear the base address and set the relative DCC offset. */
1129 desc[0] = 0;
1130 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1131 desc[7] = image->dcc_offset >> 8;
1132
1133 /* Dwords [2:9] contain the image descriptor. */
1134 memcpy(&md->metadata[2], desc, sizeof(desc));
1135
1136 /* Dwords [10:..] contain the mipmap level offsets. */
1137 if (device->physical_device->rad_info.chip_class <= GFX8) {
1138 for (i = 0; i <= image->info.levels - 1; i++)
1139 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1140 md->size_metadata = (11 + image->info.levels - 1) * 4;
1141 } else
1142 md->size_metadata = 10 * 4;
1143 }
1144
1145 void
1146 radv_init_metadata(struct radv_device *device,
1147 struct radv_image *image,
1148 struct radeon_bo_metadata *metadata)
1149 {
1150 struct radeon_surf *surface = &image->planes[0].surface;
1151
1152 memset(metadata, 0, sizeof(*metadata));
1153
1154 if (device->physical_device->rad_info.chip_class >= GFX9) {
1155 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1156 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1157 } else {
1158 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1159 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1160 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1161 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1162 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1163 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1164 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1165 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1166 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1167 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1168 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1169 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1170 }
1171 radv_query_opaque_metadata(device, image, metadata);
1172 }
1173
1174 void
1175 radv_image_override_offset_stride(struct radv_device *device,
1176 struct radv_image *image,
1177 uint64_t offset, uint32_t stride)
1178 {
1179 struct radeon_surf *surface = &image->planes[0].surface;
1180 unsigned bpe = vk_format_get_blocksizebits(image->vk_format) / 8;
1181
1182 if (device->physical_device->rad_info.chip_class >= GFX9) {
1183 if (stride) {
1184 surface->u.gfx9.surf_pitch = stride;
1185 surface->u.gfx9.surf_slice_size =
1186 (uint64_t)stride * surface->u.gfx9.surf_height * bpe;
1187 }
1188 surface->u.gfx9.surf_offset = offset;
1189 } else {
1190 surface->u.legacy.level[0].nblk_x = stride;
1191 surface->u.legacy.level[0].slice_size_dw =
1192 ((uint64_t)stride * surface->u.legacy.level[0].nblk_y * bpe) / 4;
1193
1194 if (offset) {
1195 for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
1196 surface->u.legacy.level[i].offset += offset;
1197 }
1198
1199 }
1200 }
1201
1202 static void
1203 radv_image_alloc_fmask(struct radv_device *device,
1204 struct radv_image *image)
1205 {
1206 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1207
1208 image->fmask_offset = align64(image->size, fmask_alignment);
1209 image->size = image->fmask_offset + image->planes[0].surface.fmask_size;
1210 image->alignment = MAX2(image->alignment, fmask_alignment);
1211 }
1212
1213 static void
1214 radv_image_alloc_cmask(struct radv_device *device,
1215 struct radv_image *image)
1216 {
1217 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1218 unsigned cmask_size = image->planes[0].surface.cmask_size;
1219 uint32_t clear_value_size = 0;
1220
1221 if (!cmask_size)
1222 return;
1223
1224 assert(cmask_alignment);
1225
1226 image->cmask_offset = align64(image->size, cmask_alignment);
1227 /* + 8 for storing the clear values */
1228 if (!image->clear_value_offset) {
1229 image->clear_value_offset = image->cmask_offset + cmask_size;
1230 clear_value_size = 8;
1231 }
1232 image->size = image->cmask_offset + cmask_size + clear_value_size;
1233 image->alignment = MAX2(image->alignment, cmask_alignment);
1234 }
1235
1236 static void
1237 radv_image_alloc_dcc(struct radv_image *image)
1238 {
1239 assert(image->plane_count == 1);
1240
1241 image->dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1242 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1243 image->clear_value_offset = image->dcc_offset + image->planes[0].surface.dcc_size;
1244 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1245 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1246 image->size = image->dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1247 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1248 }
1249
1250 static void
1251 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1252 {
1253 image->htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1254
1255 /* + 8 for storing the clear values */
1256 image->clear_value_offset = image->htile_offset + image->planes[0].surface.htile_size;
1257 image->size = image->clear_value_offset + image->info.levels * 8;
1258 if (radv_image_is_tc_compat_htile(image) &&
1259 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1260 /* Metadata for the TC-compatible HTILE hardware bug which
1261 * have to be fixed by updating ZRANGE_PRECISION when doing
1262 * fast depth clears to 0.0f.
1263 */
1264 image->tc_compat_zrange_offset = image->size;
1265 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1266 }
1267 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1268 }
1269
1270 static inline bool
1271 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1272 {
1273 if (image->info.samples <= 1 &&
1274 image->info.width * image->info.height <= 512 * 512) {
1275 /* Do not enable CMASK or DCC for small surfaces where the cost
1276 * of the eliminate pass can be higher than the benefit of fast
1277 * clear. RadeonSI does this, but the image threshold is
1278 * different.
1279 */
1280 return false;
1281 }
1282
1283 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1284 (image->exclusive || image->queue_family_mask == 1);
1285 }
1286
1287 static inline bool
1288 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1289 {
1290 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1291 !radv_image_has_dcc(image))
1292 return false;
1293
1294 /* On GFX8, DCC layers can be interleaved and it's currently only
1295 * enabled if slice size is equal to the per slice fast clear size
1296 * because the driver assumes that portions of multiple layers are
1297 * contiguous during fast clears.
1298 */
1299 if (image->info.array_size > 1) {
1300 const struct legacy_surf_level *surf_level =
1301 &image->planes[0].surface.u.legacy.level[0];
1302
1303 assert(device->physical_device->rad_info.chip_class == GFX8);
1304
1305 if (image->planes[0].surface.dcc_slice_size != surf_level->dcc_fast_clear_size)
1306 return false;
1307 }
1308
1309 return true;
1310 }
1311
1312 static inline bool
1313 radv_image_can_enable_cmask(struct radv_image *image)
1314 {
1315 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1316 /* Do not enable CMASK for non-MSAA images (fast color clear)
1317 * because 128 bit formats are not supported, but FMASK might
1318 * still be used.
1319 */
1320 return false;
1321 }
1322
1323 return radv_image_can_enable_dcc_or_cmask(image) &&
1324 image->info.levels == 1 &&
1325 image->info.depth == 1 &&
1326 !image->planes[0].surface.is_linear;
1327 }
1328
1329 static inline bool
1330 radv_image_can_enable_fmask(struct radv_image *image)
1331 {
1332 return image->info.samples > 1 &&
1333 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
1334 }
1335
1336 static inline bool
1337 radv_image_can_enable_htile(struct radv_image *image)
1338 {
1339 return radv_image_has_htile(image) &&
1340 image->info.levels == 1 &&
1341 image->info.width * image->info.height >= 8 * 8;
1342 }
1343
1344 static void radv_image_disable_dcc(struct radv_image *image)
1345 {
1346 for (unsigned i = 0; i < image->plane_count; ++i)
1347 image->planes[i].surface.dcc_size = 0;
1348 }
1349
1350 static void radv_image_disable_htile(struct radv_image *image)
1351 {
1352 for (unsigned i = 0; i < image->plane_count; ++i)
1353 image->planes[i].surface.htile_size = 0;
1354 }
1355
1356 VkResult
1357 radv_image_create_layout(struct radv_device *device,
1358 struct radv_image_create_info create_info,
1359 struct radv_image *image)
1360 {
1361 /* Check that we did not initialize things earlier */
1362 assert(!image->planes[0].surface.surf_size);
1363
1364 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1365 * common internal case. */
1366 create_info.vk_info = NULL;
1367
1368 struct ac_surf_info image_info = image->info;
1369 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1370 if (result != VK_SUCCESS)
1371 return result;
1372
1373 image->size = 0;
1374 image->alignment = 1;
1375 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1376 struct ac_surf_info info = image_info;
1377
1378 if (plane) {
1379 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1380 assert(info.width % desc->width_divisor == 0);
1381 assert(info.height % desc->height_divisor == 0);
1382
1383 info.width /= desc->width_divisor;
1384 info.height /= desc->height_divisor;
1385 }
1386
1387 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1388
1389 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1390 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1391 image->alignment = image->planes[plane].surface.surf_alignment;
1392
1393 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1394 }
1395
1396 if (!create_info.no_metadata_planes) {
1397 /* Try to enable DCC first. */
1398 if (radv_image_can_enable_dcc(device, image)) {
1399 radv_image_alloc_dcc(image);
1400 if (image->info.samples > 1) {
1401 /* CMASK should be enabled because DCC fast
1402 * clear with MSAA needs it.
1403 */
1404 assert(radv_image_can_enable_cmask(image));
1405 radv_image_alloc_cmask(device, image);
1406 }
1407 } else {
1408 /* When DCC cannot be enabled, try CMASK. */
1409 radv_image_disable_dcc(image);
1410 if (radv_image_can_enable_cmask(image)) {
1411 radv_image_alloc_cmask(device, image);
1412 }
1413 }
1414
1415 /* Try to enable FMASK for multisampled images. */
1416 if (radv_image_can_enable_fmask(image)) {
1417 radv_image_alloc_fmask(device, image);
1418
1419 if (radv_use_tc_compat_cmask_for_image(device, image))
1420 image->tc_compatible_cmask = true;
1421 } else {
1422 /* Otherwise, try to enable HTILE for depth surfaces. */
1423 if (radv_image_can_enable_htile(image) &&
1424 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1425 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1426 radv_image_alloc_htile(device, image);
1427 } else {
1428 radv_image_disable_htile(image);
1429 }
1430 }
1431 } else {
1432 radv_image_disable_dcc(image);
1433 radv_image_disable_htile(image);
1434 }
1435
1436 assert(image->planes[0].surface.surf_size);
1437 return VK_SUCCESS;
1438 }
1439
1440 VkResult
1441 radv_image_create(VkDevice _device,
1442 const struct radv_image_create_info *create_info,
1443 const VkAllocationCallbacks* alloc,
1444 VkImage *pImage)
1445 {
1446 RADV_FROM_HANDLE(radv_device, device, _device);
1447 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1448 struct radv_image *image = NULL;
1449 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1450 pCreateInfo->format);
1451 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1452
1453 const unsigned plane_count = vk_format_get_plane_count(format);
1454 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1455
1456 radv_assert(pCreateInfo->mipLevels > 0);
1457 radv_assert(pCreateInfo->arrayLayers > 0);
1458 radv_assert(pCreateInfo->samples > 0);
1459 radv_assert(pCreateInfo->extent.width > 0);
1460 radv_assert(pCreateInfo->extent.height > 0);
1461 radv_assert(pCreateInfo->extent.depth > 0);
1462
1463 image = vk_zalloc2(&device->alloc, alloc, image_struct_size, 8,
1464 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1465 if (!image)
1466 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1467
1468 image->type = pCreateInfo->imageType;
1469 image->info.width = pCreateInfo->extent.width;
1470 image->info.height = pCreateInfo->extent.height;
1471 image->info.depth = pCreateInfo->extent.depth;
1472 image->info.samples = pCreateInfo->samples;
1473 image->info.storage_samples = pCreateInfo->samples;
1474 image->info.array_size = pCreateInfo->arrayLayers;
1475 image->info.levels = pCreateInfo->mipLevels;
1476 image->info.num_channels = vk_format_get_nr_components(format);
1477
1478 image->vk_format = format;
1479 image->tiling = pCreateInfo->tiling;
1480 image->usage = pCreateInfo->usage;
1481 image->flags = pCreateInfo->flags;
1482 image->plane_count = plane_count;
1483
1484 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1485 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1486 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1487 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1488 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1489 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1490 else
1491 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1492 }
1493
1494 const VkExternalMemoryImageCreateInfo *external_info =
1495 vk_find_struct_const(pCreateInfo->pNext,
1496 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1497
1498 image->shareable = external_info;
1499 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1500 image->info.surf_index = &device->image_mrt_offset_counter;
1501 }
1502
1503 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1504 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1505 }
1506
1507 bool delay_layout = external_info &&
1508 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1509
1510 if (delay_layout) {
1511 *pImage = radv_image_to_handle(image);
1512 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1513 return VK_SUCCESS;
1514 }
1515
1516 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1517 assert(result == VK_SUCCESS);
1518
1519 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1520 image->alignment = MAX2(image->alignment, 4096);
1521 image->size = align64(image->size, image->alignment);
1522 image->offset = 0;
1523
1524 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1525 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1526 if (!image->bo) {
1527 vk_free2(&device->alloc, alloc, image);
1528 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1529 }
1530 }
1531
1532 *pImage = radv_image_to_handle(image);
1533
1534 return VK_SUCCESS;
1535 }
1536
1537 static void
1538 radv_image_view_make_descriptor(struct radv_image_view *iview,
1539 struct radv_device *device,
1540 VkFormat vk_format,
1541 const VkComponentMapping *components,
1542 bool is_storage_image, bool disable_compression,
1543 unsigned plane_id, unsigned descriptor_plane_id)
1544 {
1545 struct radv_image *image = iview->image;
1546 struct radv_image_plane *plane = &image->planes[plane_id];
1547 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1548 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1549 uint32_t blk_w;
1550 union radv_descriptor *descriptor;
1551 uint32_t hw_level = 0;
1552
1553 if (is_storage_image) {
1554 descriptor = &iview->storage_descriptor;
1555 } else {
1556 descriptor = &iview->descriptor;
1557 }
1558
1559 assert(vk_format_get_plane_count(vk_format) == 1);
1560 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1561 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1562
1563 if (device->physical_device->rad_info.chip_class >= GFX9)
1564 hw_level = iview->base_mip;
1565 radv_make_texture_descriptor(device, image, is_storage_image,
1566 iview->type,
1567 vk_format,
1568 components,
1569 hw_level, hw_level + iview->level_count - 1,
1570 iview->base_layer,
1571 iview->base_layer + iview->layer_count - 1,
1572 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1573 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1574 iview->extent.depth,
1575 descriptor->plane_descriptors[descriptor_plane_id],
1576 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1577
1578 const struct legacy_surf_level *base_level_info = NULL;
1579 if (device->physical_device->rad_info.chip_class <= GFX9) {
1580 if (is_stencil)
1581 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1582 else
1583 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1584 }
1585 si_set_mutable_tex_desc_fields(device, image,
1586 base_level_info,
1587 plane_id,
1588 iview->base_mip,
1589 iview->base_mip,
1590 blk_w, is_stencil, is_storage_image,
1591 is_storage_image || disable_compression,
1592 descriptor->plane_descriptors[descriptor_plane_id]);
1593 }
1594
1595 static unsigned
1596 radv_plane_from_aspect(VkImageAspectFlags mask)
1597 {
1598 switch(mask) {
1599 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1600 return 1;
1601 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1602 return 2;
1603 default:
1604 return 0;
1605 }
1606 }
1607
1608 VkFormat
1609 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1610 {
1611 switch(mask) {
1612 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1613 return image->planes[0].format;
1614 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1615 return image->planes[1].format;
1616 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1617 return image->planes[2].format;
1618 case VK_IMAGE_ASPECT_STENCIL_BIT:
1619 return vk_format_stencil_only(image->vk_format);
1620 case VK_IMAGE_ASPECT_DEPTH_BIT:
1621 return vk_format_depth_only(image->vk_format);
1622 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1623 return vk_format_depth_only(image->vk_format);
1624 default:
1625 return image->vk_format;
1626 }
1627 }
1628
1629 void
1630 radv_image_view_init(struct radv_image_view *iview,
1631 struct radv_device *device,
1632 const VkImageViewCreateInfo* pCreateInfo,
1633 const struct radv_image_view_extra_create_info* extra_create_info)
1634 {
1635 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1636 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1637
1638 switch (image->type) {
1639 case VK_IMAGE_TYPE_1D:
1640 case VK_IMAGE_TYPE_2D:
1641 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1642 break;
1643 case VK_IMAGE_TYPE_3D:
1644 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1645 <= radv_minify(image->info.depth, range->baseMipLevel));
1646 break;
1647 default:
1648 unreachable("bad VkImageType");
1649 }
1650 iview->image = image;
1651 iview->bo = image->bo;
1652 iview->type = pCreateInfo->viewType;
1653 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1654 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1655 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1656
1657 iview->vk_format = pCreateInfo->format;
1658
1659 /* If the image has an Android external format, pCreateInfo->format will be
1660 * VK_FORMAT_UNDEFINED. */
1661 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1662 iview->vk_format = image->vk_format;
1663
1664 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1665 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1666 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1667 iview->vk_format = vk_format_depth_only(iview->vk_format);
1668 }
1669
1670 if (device->physical_device->rad_info.chip_class >= GFX9) {
1671 iview->extent = (VkExtent3D) {
1672 .width = image->info.width,
1673 .height = image->info.height,
1674 .depth = image->info.depth,
1675 };
1676 } else {
1677 iview->extent = (VkExtent3D) {
1678 .width = radv_minify(image->info.width , range->baseMipLevel),
1679 .height = radv_minify(image->info.height, range->baseMipLevel),
1680 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1681 };
1682 }
1683
1684 if (iview->vk_format != image->planes[iview->plane_id].format) {
1685 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1686 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1687 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1688 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1689
1690 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1691 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1692
1693 /* Comment ported from amdvlk -
1694 * If we have the following image:
1695 * Uncompressed pixels Compressed block sizes (4x4)
1696 * mip0: 22 x 22 6 x 6
1697 * mip1: 11 x 11 3 x 3
1698 * mip2: 5 x 5 2 x 2
1699 * mip3: 2 x 2 1 x 1
1700 * mip4: 1 x 1 1 x 1
1701 *
1702 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1703 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1704 * divide-by-two integer math):
1705 * mip0: 6x6
1706 * mip1: 3x3
1707 * mip2: 1x1
1708 * mip3: 1x1
1709 *
1710 * This means that mip2 will be missing texels.
1711 *
1712 * Fix this by calculating the base mip's width and height, then convert that, and round it
1713 * back up to get the level 0 size.
1714 * Clamp the converted size between the original values, and next power of two, which
1715 * means we don't oversize the image.
1716 */
1717 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1718 vk_format_is_compressed(image->vk_format) &&
1719 !vk_format_is_compressed(iview->vk_format)) {
1720 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1721 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1722
1723 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1724 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1725
1726 lvl_width <<= range->baseMipLevel;
1727 lvl_height <<= range->baseMipLevel;
1728
1729 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1730 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1731 }
1732 }
1733
1734 iview->base_layer = range->baseArrayLayer;
1735 iview->layer_count = radv_get_layerCount(image, range);
1736 iview->base_mip = range->baseMipLevel;
1737 iview->level_count = radv_get_levelCount(image, range);
1738
1739 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1740 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1741 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1742 radv_image_view_make_descriptor(iview, device, format,
1743 &pCreateInfo->components,
1744 false, disable_compression,
1745 iview->plane_id + i, i);
1746 radv_image_view_make_descriptor(iview, device,
1747 format, &pCreateInfo->components,
1748 true, disable_compression,
1749 iview->plane_id + i, i);
1750 }
1751 }
1752
1753 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1754 VkImageLayout layout,
1755 bool in_render_loop,
1756 unsigned queue_mask)
1757 {
1758 if (radv_image_is_tc_compat_htile(image)) {
1759 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1760 !in_render_loop &&
1761 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1762 /* It should be safe to enable TC-compat HTILE with
1763 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1764 * loop and if the image doesn't have the storage bit
1765 * set. This improves performance for apps that use
1766 * GENERAL for the main depth pass because this allows
1767 * compression and this reduces the number of
1768 * decompressions from/to GENERAL.
1769 */
1770 return true;
1771 }
1772
1773 return layout != VK_IMAGE_LAYOUT_GENERAL;
1774 }
1775
1776 return radv_image_has_htile(image) &&
1777 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1778 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1779 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1780 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1781 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1782 }
1783
1784 bool radv_layout_can_fast_clear(const struct radv_image *image,
1785 VkImageLayout layout,
1786 bool in_render_loop,
1787 unsigned queue_mask)
1788 {
1789 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1790 }
1791
1792 bool radv_layout_dcc_compressed(const struct radv_device *device,
1793 const struct radv_image *image,
1794 VkImageLayout layout,
1795 bool in_render_loop,
1796 unsigned queue_mask)
1797 {
1798 /* Don't compress compute transfer dst, as image stores are not supported. */
1799 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1800 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1801 return false;
1802
1803 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1804 }
1805
1806
1807 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1808 {
1809 if (!image->exclusive)
1810 return image->queue_family_mask;
1811 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1812 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1813 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1814 if (family == VK_QUEUE_FAMILY_IGNORED)
1815 return 1u << queue_family;
1816 return 1u << family;
1817 }
1818
1819 VkResult
1820 radv_CreateImage(VkDevice device,
1821 const VkImageCreateInfo *pCreateInfo,
1822 const VkAllocationCallbacks *pAllocator,
1823 VkImage *pImage)
1824 {
1825 #ifdef ANDROID
1826 const VkNativeBufferANDROID *gralloc_info =
1827 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1828
1829 if (gralloc_info)
1830 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1831 pAllocator, pImage);
1832 #endif
1833
1834 const struct wsi_image_create_info *wsi_info =
1835 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1836 bool scanout = wsi_info && wsi_info->scanout;
1837
1838 return radv_image_create(device,
1839 &(struct radv_image_create_info) {
1840 .vk_info = pCreateInfo,
1841 .scanout = scanout,
1842 },
1843 pAllocator,
1844 pImage);
1845 }
1846
1847 void
1848 radv_DestroyImage(VkDevice _device, VkImage _image,
1849 const VkAllocationCallbacks *pAllocator)
1850 {
1851 RADV_FROM_HANDLE(radv_device, device, _device);
1852 RADV_FROM_HANDLE(radv_image, image, _image);
1853
1854 if (!image)
1855 return;
1856
1857 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1858 device->ws->buffer_destroy(image->bo);
1859
1860 if (image->owned_memory != VK_NULL_HANDLE)
1861 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1862
1863 vk_free2(&device->alloc, pAllocator, image);
1864 }
1865
1866 void radv_GetImageSubresourceLayout(
1867 VkDevice _device,
1868 VkImage _image,
1869 const VkImageSubresource* pSubresource,
1870 VkSubresourceLayout* pLayout)
1871 {
1872 RADV_FROM_HANDLE(radv_image, image, _image);
1873 RADV_FROM_HANDLE(radv_device, device, _device);
1874 int level = pSubresource->mipLevel;
1875 int layer = pSubresource->arrayLayer;
1876
1877 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1878
1879 struct radv_image_plane *plane = &image->planes[plane_id];
1880 struct radeon_surf *surface = &plane->surface;
1881
1882 if (device->physical_device->rad_info.chip_class >= GFX9) {
1883 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1884
1885 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1886 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1887 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1888 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1889 /* Adjust the number of bytes between each row because
1890 * the pitch is actually the number of components per
1891 * row.
1892 */
1893 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1894 } else {
1895 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1896
1897 assert(util_is_power_of_two_nonzero(surface->bpe));
1898 pLayout->rowPitch = pitch * surface->bpe;
1899 }
1900
1901 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1902 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1903 pLayout->size = surface->u.gfx9.surf_slice_size;
1904 if (image->type == VK_IMAGE_TYPE_3D)
1905 pLayout->size *= u_minify(image->info.depth, level);
1906 } else {
1907 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1908 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1909 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1910 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1911 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1912 if (image->type == VK_IMAGE_TYPE_3D)
1913 pLayout->size *= u_minify(image->info.depth, level);
1914 }
1915 }
1916
1917
1918 VkResult
1919 radv_CreateImageView(VkDevice _device,
1920 const VkImageViewCreateInfo *pCreateInfo,
1921 const VkAllocationCallbacks *pAllocator,
1922 VkImageView *pView)
1923 {
1924 RADV_FROM_HANDLE(radv_device, device, _device);
1925 struct radv_image_view *view;
1926
1927 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1928 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1929 if (view == NULL)
1930 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1931
1932 radv_image_view_init(view, device, pCreateInfo, NULL);
1933
1934 *pView = radv_image_view_to_handle(view);
1935
1936 return VK_SUCCESS;
1937 }
1938
1939 void
1940 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1941 const VkAllocationCallbacks *pAllocator)
1942 {
1943 RADV_FROM_HANDLE(radv_device, device, _device);
1944 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1945
1946 if (!iview)
1947 return;
1948 vk_free2(&device->alloc, pAllocator, iview);
1949 }
1950
1951 void radv_buffer_view_init(struct radv_buffer_view *view,
1952 struct radv_device *device,
1953 const VkBufferViewCreateInfo* pCreateInfo)
1954 {
1955 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1956
1957 view->bo = buffer->bo;
1958 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1959 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1960 view->vk_format = pCreateInfo->format;
1961
1962 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1963 pCreateInfo->offset, view->range, view->state);
1964 }
1965
1966 VkResult
1967 radv_CreateBufferView(VkDevice _device,
1968 const VkBufferViewCreateInfo *pCreateInfo,
1969 const VkAllocationCallbacks *pAllocator,
1970 VkBufferView *pView)
1971 {
1972 RADV_FROM_HANDLE(radv_device, device, _device);
1973 struct radv_buffer_view *view;
1974
1975 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1976 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1977 if (!view)
1978 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1979
1980 radv_buffer_view_init(view, device, pCreateInfo);
1981
1982 *pView = radv_buffer_view_to_handle(view);
1983
1984 return VK_SUCCESS;
1985 }
1986
1987 void
1988 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1989 const VkAllocationCallbacks *pAllocator)
1990 {
1991 RADV_FROM_HANDLE(radv_device, device, _device);
1992 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1993
1994 if (!view)
1995 return;
1996
1997 vk_free2(&device->alloc, pAllocator, view);
1998 }