7782e77d1fe70e9f3f24e2f4f27eb555412f21eb
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class >= GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_image_use_fast_clear_for_image(const struct radv_device *device,
154 const struct radv_image *image)
155 {
156 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
157 return true;
158
159 if (image->info.samples <= 1 &&
160 image->info.width * image->info.height <= 512 * 512) {
161 /* Do not enable CMASK or DCC for small surfaces where the cost
162 * of the eliminate pass can be higher than the benefit of fast
163 * clear. RadeonSI does this, but the image threshold is
164 * different.
165 */
166 return false;
167 }
168
169 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
170 (image->exclusive || image->queue_family_mask == 1);
171 }
172
173 static bool
174 radv_use_dcc_for_image(struct radv_device *device,
175 const struct radv_image *image,
176 const VkImageCreateInfo *pCreateInfo,
177 VkFormat format)
178 {
179 bool dcc_compatible_formats;
180 bool blendable;
181
182 /* DCC (Delta Color Compression) is only available for GFX8+. */
183 if (device->physical_device->rad_info.chip_class < GFX8)
184 return false;
185
186 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
187 return false;
188
189 if (image->shareable)
190 return false;
191
192 /* TODO: Enable DCC for storage images. */
193 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
194 return false;
195
196 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
197 return false;
198
199 if (vk_format_is_subsampled(format) ||
200 vk_format_get_plane_count(format) > 1)
201 return false;
202
203 if (!radv_image_use_fast_clear_for_image(device, image))
204 return false;
205
206 /* TODO: Enable DCC for mipmaps on GFX9+. */
207 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
208 device->physical_device->rad_info.chip_class >= GFX9)
209 return false;
210
211 /* Do not enable DCC for mipmapped arrays because performance is worse. */
212 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
213 return false;
214
215 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
216 * 2x can be enabled with an option.
217 */
218 if (pCreateInfo->samples > 2 ||
219 (pCreateInfo->samples == 2 &&
220 !device->physical_device->dcc_msaa_allowed))
221 return false;
222
223 /* Determine if the formats are DCC compatible. */
224 dcc_compatible_formats =
225 radv_is_colorbuffer_format_supported(format,
226 &blendable);
227
228 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
229 const struct VkImageFormatListCreateInfo *format_list =
230 (const struct VkImageFormatListCreateInfo *)
231 vk_find_struct_const(pCreateInfo->pNext,
232 IMAGE_FORMAT_LIST_CREATE_INFO);
233
234 /* We have to ignore the existence of the list if viewFormatCount = 0 */
235 if (format_list && format_list->viewFormatCount) {
236 /* compatibility is transitive, so we only need to check
237 * one format with everything else. */
238 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
239 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
240 continue;
241
242 if (!radv_dcc_formats_compatible(format,
243 format_list->pViewFormats[i]))
244 dcc_compatible_formats = false;
245 }
246 } else {
247 dcc_compatible_formats = false;
248 }
249 }
250
251 if (!dcc_compatible_formats)
252 return false;
253
254 return true;
255 }
256
257 static inline bool
258 radv_use_fmask_for_image(const struct radv_device *device,
259 const struct radv_image *image)
260 {
261 return image->info.samples > 1 &&
262 ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
263 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
264 }
265
266 static inline bool
267 radv_use_htile_for_image(const struct radv_device *device,
268 const struct radv_image *image)
269 {
270 return image->info.levels == 1 &&
271 ((image->info.width * image->info.height >= 8 * 8) ||
272 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
273 }
274
275 static bool
276 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
277 struct radv_image *image)
278 {
279 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
280 return false;
281
282 /* TC-compat CMASK is only available for GFX8+. */
283 if (device->physical_device->rad_info.chip_class < GFX8)
284 return false;
285
286 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
287 return false;
288
289 if (radv_image_has_dcc(image))
290 return false;
291
292 if (!radv_image_has_cmask(image))
293 return false;
294
295 return true;
296 }
297
298 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
299 {
300 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
301 }
302
303 static bool
304 radv_is_valid_opaque_metadata(const struct radv_device *device,
305 const struct radeon_bo_metadata *md)
306 {
307 if (md->metadata[0] != 1 ||
308 md->metadata[1] != si_get_bo_metadata_word1(device))
309 return false;
310
311 if (md->size_metadata < 40)
312 return false;
313
314 return true;
315 }
316
317 static void
318 radv_patch_surface_from_metadata(struct radv_device *device,
319 struct radeon_surf *surface,
320 const struct radeon_bo_metadata *md)
321 {
322 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
323
324 if (device->physical_device->rad_info.chip_class >= GFX9) {
325 if (md->u.gfx9.swizzle_mode > 0)
326 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
327 else
328 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
329
330 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
331 } else {
332 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
333 surface->u.legacy.bankw = md->u.legacy.bankw;
334 surface->u.legacy.bankh = md->u.legacy.bankh;
335 surface->u.legacy.tile_split = md->u.legacy.tile_split;
336 surface->u.legacy.mtilea = md->u.legacy.mtilea;
337 surface->u.legacy.num_banks = md->u.legacy.num_banks;
338
339 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
340 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
341 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
342 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
343 else
344 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
345
346 }
347 }
348
349 static VkResult
350 radv_patch_image_dimensions(struct radv_device *device,
351 struct radv_image *image,
352 const struct radv_image_create_info *create_info,
353 struct ac_surf_info *image_info)
354 {
355 unsigned width = image->info.width;
356 unsigned height = image->info.height;
357
358 /*
359 * minigbm sometimes allocates bigger images which is going to result in
360 * weird strides and other properties. Lets be lenient where possible and
361 * fail it on GFX10 (as we cannot cope there).
362 *
363 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
364 */
365 if (create_info->bo_metadata &&
366 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
367 const struct radeon_bo_metadata *md = create_info->bo_metadata;
368
369 if (device->physical_device->rad_info.chip_class >= GFX10) {
370 width = G_00A004_WIDTH_LO(md->metadata[3]) +
371 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
372 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
373 } else {
374 width = G_008F18_WIDTH(md->metadata[4]) + 1;
375 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
376 }
377 }
378
379 if (image->info.width == width && image->info.height == height)
380 return VK_SUCCESS;
381
382 if (width < image->info.width || height < image->info.height) {
383 fprintf(stderr,
384 "The imported image has smaller dimensions than the internal\n"
385 "dimensions. Using it is going to fail badly, so we reject\n"
386 "this import.\n"
387 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
388 image->info.width, image->info.height, width, height);
389 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
390 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
391 fprintf(stderr,
392 "Tried to import an image with inconsistent width on GFX10.\n"
393 "As GFX10 has no separate stride fields we cannot cope with\n"
394 "an inconsistency in width and will fail this import.\n"
395 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
396 image->info.width, image->info.height, width, height);
397 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
398 } else {
399 fprintf(stderr,
400 "Tried to import an image with inconsistent width on pre-GFX10.\n"
401 "As GFX10 has no separate stride fields we cannot cope with\n"
402 "an inconsistency and would fail on GFX10.\n"
403 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
404 image->info.width, image->info.height, width, height);
405 }
406 image_info->width = width;
407 image_info->height = height;
408
409 return VK_SUCCESS;
410 }
411
412 static VkResult
413 radv_patch_image_from_extra_info(struct radv_device *device,
414 struct radv_image *image,
415 const struct radv_image_create_info *create_info,
416 struct ac_surf_info *image_info)
417 {
418 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
419 if (result != VK_SUCCESS)
420 return result;
421
422 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
423 if (create_info->bo_metadata) {
424 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
425 create_info->bo_metadata);
426 }
427
428 if (radv_surface_has_scanout(device, create_info)) {
429 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
430 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
431
432 image->info.surf_index = NULL;
433 }
434 }
435 return VK_SUCCESS;
436 }
437
438 static int
439 radv_init_surface(struct radv_device *device,
440 const struct radv_image *image,
441 struct radeon_surf *surface,
442 unsigned plane_id,
443 const VkImageCreateInfo *pCreateInfo,
444 VkFormat image_format)
445 {
446 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
447 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
448 const struct vk_format_description *desc = vk_format_description(format);
449 bool is_depth, is_stencil;
450
451 is_depth = vk_format_has_depth(desc);
452 is_stencil = vk_format_has_stencil(desc);
453
454 surface->blk_w = vk_format_get_blockwidth(format);
455 surface->blk_h = vk_format_get_blockheight(format);
456
457 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
458 /* align byte per element on dword */
459 if (surface->bpe == 3) {
460 surface->bpe = 4;
461 }
462
463 surface->flags = RADEON_SURF_SET(array_mode, MODE);
464
465 switch (pCreateInfo->imageType){
466 case VK_IMAGE_TYPE_1D:
467 if (pCreateInfo->arrayLayers > 1)
468 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
469 else
470 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
471 break;
472 case VK_IMAGE_TYPE_2D:
473 if (pCreateInfo->arrayLayers > 1)
474 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
475 else
476 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
477 break;
478 case VK_IMAGE_TYPE_3D:
479 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
480 break;
481 default:
482 unreachable("unhandled image type");
483 }
484
485 /* Required for clearing/initializing a specific layer on GFX8. */
486 surface->flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
487
488 if (is_depth) {
489 surface->flags |= RADEON_SURF_ZBUFFER;
490 if (!radv_use_htile_for_image(device, image) ||
491 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
492 surface->flags |= RADEON_SURF_NO_HTILE;
493 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
494 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
495 }
496
497 if (is_stencil)
498 surface->flags |= RADEON_SURF_SBUFFER;
499
500 if (device->physical_device->rad_info.chip_class >= GFX9 &&
501 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
502 vk_format_get_blocksizebits(image_format) == 128 &&
503 vk_format_is_compressed(image_format))
504 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
505
506 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
507 surface->flags |= RADEON_SURF_DISABLE_DCC;
508
509 if (!radv_use_fmask_for_image(device, image))
510 surface->flags |= RADEON_SURF_NO_FMASK;
511
512 return 0;
513 }
514
515 static inline unsigned
516 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
517 {
518 if (stencil)
519 return plane->surface.u.legacy.stencil_tiling_index[level];
520 else
521 return plane->surface.u.legacy.tiling_index[level];
522 }
523
524 static unsigned radv_map_swizzle(unsigned swizzle)
525 {
526 switch (swizzle) {
527 case VK_SWIZZLE_Y:
528 return V_008F0C_SQ_SEL_Y;
529 case VK_SWIZZLE_Z:
530 return V_008F0C_SQ_SEL_Z;
531 case VK_SWIZZLE_W:
532 return V_008F0C_SQ_SEL_W;
533 case VK_SWIZZLE_0:
534 return V_008F0C_SQ_SEL_0;
535 case VK_SWIZZLE_1:
536 return V_008F0C_SQ_SEL_1;
537 default: /* VK_SWIZZLE_X */
538 return V_008F0C_SQ_SEL_X;
539 }
540 }
541
542 static void
543 radv_make_buffer_descriptor(struct radv_device *device,
544 struct radv_buffer *buffer,
545 VkFormat vk_format,
546 unsigned offset,
547 unsigned range,
548 uint32_t *state)
549 {
550 const struct vk_format_description *desc;
551 unsigned stride;
552 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
553 uint64_t va = gpu_address + buffer->offset;
554 unsigned num_format, data_format;
555 int first_non_void;
556 desc = vk_format_description(vk_format);
557 first_non_void = vk_format_get_first_non_void_channel(vk_format);
558 stride = desc->block.bits / 8;
559
560 va += offset;
561 state[0] = va;
562 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
563 S_008F04_STRIDE(stride);
564
565 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
566 range /= stride;
567 }
568
569 state[2] = range;
570 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
571 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
572 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
573 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
574
575 if (device->physical_device->rad_info.chip_class >= GFX10) {
576 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
577
578 /* OOB_SELECT chooses the out-of-bounds check:
579 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
580 * - 1: index >= NUM_RECORDS
581 * - 2: NUM_RECORDS == 0
582 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
583 * else: swizzle_address >= NUM_RECORDS
584 */
585 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
586 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
587 S_008F0C_RESOURCE_LEVEL(1);
588 } else {
589 num_format = radv_translate_buffer_numformat(desc, first_non_void);
590 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
591
592 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
593 assert(num_format != ~0);
594
595 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
596 S_008F0C_DATA_FORMAT(data_format);
597 }
598 }
599
600 static void
601 si_set_mutable_tex_desc_fields(struct radv_device *device,
602 struct radv_image *image,
603 const struct legacy_surf_level *base_level_info,
604 unsigned plane_id,
605 unsigned base_level, unsigned first_level,
606 unsigned block_width, bool is_stencil,
607 bool is_storage_image, bool disable_compression,
608 uint32_t *state)
609 {
610 struct radv_image_plane *plane = &image->planes[plane_id];
611 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
612 uint64_t va = gpu_address + plane->offset;
613 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
614 uint64_t meta_va = 0;
615 if (chip_class >= GFX9) {
616 if (is_stencil)
617 va += plane->surface.u.gfx9.stencil_offset;
618 else
619 va += plane->surface.u.gfx9.surf_offset;
620 } else
621 va += base_level_info->offset;
622
623 state[0] = va >> 8;
624 if (chip_class >= GFX9 ||
625 base_level_info->mode == RADEON_SURF_MODE_2D)
626 state[0] |= plane->surface.tile_swizzle;
627 state[1] &= C_008F14_BASE_ADDRESS_HI;
628 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
629
630 if (chip_class >= GFX8) {
631 state[6] &= C_008F28_COMPRESSION_EN;
632 state[7] = 0;
633 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
634 meta_va = gpu_address + plane->surface.dcc_offset;
635 if (chip_class <= GFX8)
636 meta_va += base_level_info->dcc_offset;
637
638 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
639 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
640 meta_va |= dcc_tile_swizzle;
641 } else if (!disable_compression &&
642 radv_image_is_tc_compat_htile(image)) {
643 meta_va = gpu_address + plane->surface.htile_offset;
644 }
645
646 if (meta_va) {
647 state[6] |= S_008F28_COMPRESSION_EN(1);
648 if (chip_class <= GFX9)
649 state[7] = meta_va >> 8;
650 }
651 }
652
653 if (chip_class >= GFX10) {
654 state[3] &= C_00A00C_SW_MODE;
655
656 if (is_stencil) {
657 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
658 } else {
659 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
660 }
661
662 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
663 C_00A018_META_PIPE_ALIGNED;
664
665 if (meta_va) {
666 struct gfx9_surf_meta_flags meta = {
667 .rb_aligned = 1,
668 .pipe_aligned = 1,
669 };
670
671 if (plane->surface.dcc_offset)
672 meta = plane->surface.u.gfx9.dcc;
673
674 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
675 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
676 }
677
678 state[7] = meta_va >> 16;
679 } else if (chip_class == GFX9) {
680 state[3] &= C_008F1C_SW_MODE;
681 state[4] &= C_008F20_PITCH;
682
683 if (is_stencil) {
684 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
685 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
686 } else {
687 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
688 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
689 }
690
691 state[5] &= C_008F24_META_DATA_ADDRESS &
692 C_008F24_META_PIPE_ALIGNED &
693 C_008F24_META_RB_ALIGNED;
694 if (meta_va) {
695 struct gfx9_surf_meta_flags meta = {
696 .rb_aligned = 1,
697 .pipe_aligned = 1,
698 };
699
700 if (plane->surface.dcc_offset)
701 meta = plane->surface.u.gfx9.dcc;
702
703 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
704 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
705 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
706 }
707 } else {
708 /* GFX6-GFX8 */
709 unsigned pitch = base_level_info->nblk_x * block_width;
710 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
711
712 state[3] &= C_008F1C_TILING_INDEX;
713 state[3] |= S_008F1C_TILING_INDEX(index);
714 state[4] &= C_008F20_PITCH;
715 state[4] |= S_008F20_PITCH(pitch - 1);
716 }
717 }
718
719 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
720 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
721 {
722 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
723 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
724
725 /* GFX9 allocates 1D textures as 2D. */
726 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
727 image_type = VK_IMAGE_TYPE_2D;
728 switch (image_type) {
729 case VK_IMAGE_TYPE_1D:
730 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
731 case VK_IMAGE_TYPE_2D:
732 if (nr_samples > 1)
733 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
734 else
735 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
736 case VK_IMAGE_TYPE_3D:
737 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
738 return V_008F1C_SQ_RSRC_IMG_3D;
739 else
740 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
741 default:
742 unreachable("illegal image type");
743 }
744 }
745
746 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
747 {
748 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
749
750 if (swizzle[3] == VK_SWIZZLE_X) {
751 /* For the pre-defined border color values (white, opaque
752 * black, transparent black), the only thing that matters is
753 * that the alpha channel winds up in the correct place
754 * (because the RGB channels are all the same) so either of
755 * these enumerations will work.
756 */
757 if (swizzle[2] == VK_SWIZZLE_Y)
758 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
759 else
760 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
761 } else if (swizzle[0] == VK_SWIZZLE_X) {
762 if (swizzle[1] == VK_SWIZZLE_Y)
763 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
764 else
765 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
766 } else if (swizzle[1] == VK_SWIZZLE_X) {
767 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
768 } else if (swizzle[2] == VK_SWIZZLE_X) {
769 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
770 }
771
772 return bc_swizzle;
773 }
774
775 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
776 {
777 const struct vk_format_description *desc = vk_format_description(format);
778
779 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
780 return desc->swizzle[3] == VK_SWIZZLE_X;
781
782 return radv_translate_colorswap(format, false) <= 1;
783 }
784 /**
785 * Build the sampler view descriptor for a texture (GFX10).
786 */
787 static void
788 gfx10_make_texture_descriptor(struct radv_device *device,
789 struct radv_image *image,
790 bool is_storage_image,
791 VkImageViewType view_type,
792 VkFormat vk_format,
793 const VkComponentMapping *mapping,
794 unsigned first_level, unsigned last_level,
795 unsigned first_layer, unsigned last_layer,
796 unsigned width, unsigned height, unsigned depth,
797 uint32_t *state,
798 uint32_t *fmask_state)
799 {
800 const struct vk_format_description *desc;
801 enum vk_swizzle swizzle[4];
802 unsigned img_format;
803 unsigned type;
804
805 desc = vk_format_description(vk_format);
806 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
807
808 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
809 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
810 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
811 } else {
812 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
813 }
814
815 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
816 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
817 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
818 height = 1;
819 depth = image->info.array_size;
820 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
821 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
822 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
823 depth = image->info.array_size;
824 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
825 depth = image->info.array_size / 6;
826
827 state[0] = 0;
828 state[1] = S_00A004_FORMAT(img_format) |
829 S_00A004_WIDTH_LO(width - 1);
830 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
831 S_00A008_HEIGHT(height - 1) |
832 S_00A008_RESOURCE_LEVEL(1);
833 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
834 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
835 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
836 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
837 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
838 0 : first_level) |
839 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
840 util_logbase2(image->info.samples) :
841 last_level) |
842 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
843 S_00A00C_TYPE(type);
844 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
845 * to know the total number of layers.
846 */
847 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
848 S_00A010_BASE_ARRAY(first_layer);
849 state[5] = S_00A014_ARRAY_PITCH(0) |
850 S_00A014_MAX_MIP(image->info.samples > 1 ?
851 util_logbase2(image->info.samples) :
852 image->info.levels - 1) |
853 S_00A014_PERF_MOD(4);
854 state[6] = 0;
855 state[7] = 0;
856
857 if (radv_dcc_enabled(image, first_level)) {
858 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
859 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
860 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
861 }
862
863 /* Initialize the sampler view for FMASK. */
864 if (radv_image_has_fmask(image)) {
865 uint64_t gpu_address = radv_buffer_get_va(image->bo);
866 uint32_t format;
867 uint64_t va;
868
869 assert(image->plane_count == 1);
870
871 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
872
873 switch (image->info.samples) {
874 case 2:
875 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
876 break;
877 case 4:
878 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
879 break;
880 case 8:
881 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
882 break;
883 default:
884 unreachable("invalid nr_samples");
885 }
886
887 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
888 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
889 S_00A004_FORMAT(format) |
890 S_00A004_WIDTH_LO(width - 1);
891 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
892 S_00A008_HEIGHT(height - 1) |
893 S_00A008_RESOURCE_LEVEL(1);
894 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
895 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
896 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
897 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
898 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
899 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
900 fmask_state[4] = S_00A010_DEPTH(last_layer) |
901 S_00A010_BASE_ARRAY(first_layer);
902 fmask_state[5] = 0;
903 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
904 fmask_state[7] = 0;
905 } else if (fmask_state)
906 memset(fmask_state, 0, 8 * 4);
907 }
908
909 /**
910 * Build the sampler view descriptor for a texture (SI-GFX9)
911 */
912 static void
913 si_make_texture_descriptor(struct radv_device *device,
914 struct radv_image *image,
915 bool is_storage_image,
916 VkImageViewType view_type,
917 VkFormat vk_format,
918 const VkComponentMapping *mapping,
919 unsigned first_level, unsigned last_level,
920 unsigned first_layer, unsigned last_layer,
921 unsigned width, unsigned height, unsigned depth,
922 uint32_t *state,
923 uint32_t *fmask_state)
924 {
925 const struct vk_format_description *desc;
926 enum vk_swizzle swizzle[4];
927 int first_non_void;
928 unsigned num_format, data_format, type;
929
930 desc = vk_format_description(vk_format);
931
932 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
933 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
934 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
935 } else {
936 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
937 }
938
939 first_non_void = vk_format_get_first_non_void_channel(vk_format);
940
941 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
942 if (num_format == ~0) {
943 num_format = 0;
944 }
945
946 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
947 if (data_format == ~0) {
948 data_format = 0;
949 }
950
951 /* S8 with either Z16 or Z32 HTILE need a special format. */
952 if (device->physical_device->rad_info.chip_class == GFX9 &&
953 vk_format == VK_FORMAT_S8_UINT &&
954 radv_image_is_tc_compat_htile(image)) {
955 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
956 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
957 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
958 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
959 }
960 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
961 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
962 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
963 height = 1;
964 depth = image->info.array_size;
965 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
966 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
967 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
968 depth = image->info.array_size;
969 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
970 depth = image->info.array_size / 6;
971
972 state[0] = 0;
973 state[1] = (S_008F14_DATA_FORMAT(data_format) |
974 S_008F14_NUM_FORMAT(num_format));
975 state[2] = (S_008F18_WIDTH(width - 1) |
976 S_008F18_HEIGHT(height - 1) |
977 S_008F18_PERF_MOD(4));
978 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
979 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
980 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
981 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
982 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
983 0 : first_level) |
984 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
985 util_logbase2(image->info.samples) :
986 last_level) |
987 S_008F1C_TYPE(type));
988 state[4] = 0;
989 state[5] = S_008F24_BASE_ARRAY(first_layer);
990 state[6] = 0;
991 state[7] = 0;
992
993 if (device->physical_device->rad_info.chip_class == GFX9) {
994 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
995
996 /* Depth is the last accessible layer on Gfx9.
997 * The hw doesn't need to know the total number of layers.
998 */
999 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1000 state[4] |= S_008F20_DEPTH(depth - 1);
1001 else
1002 state[4] |= S_008F20_DEPTH(last_layer);
1003
1004 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1005 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
1006 util_logbase2(image->info.samples) :
1007 image->info.levels - 1);
1008 } else {
1009 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1010 state[4] |= S_008F20_DEPTH(depth - 1);
1011 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1012 }
1013 if (image->planes[0].surface.dcc_offset) {
1014 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1015 } else {
1016 /* The last dword is unused by hw. The shader uses it to clear
1017 * bits in the first dword of sampler state.
1018 */
1019 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1020 if (first_level == last_level)
1021 state[7] = C_008F30_MAX_ANISO_RATIO;
1022 else
1023 state[7] = 0xffffffff;
1024 }
1025 }
1026
1027 /* Initialize the sampler view for FMASK. */
1028 if (radv_image_has_fmask(image)) {
1029 uint32_t fmask_format, num_format;
1030 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1031 uint64_t va;
1032
1033 assert(image->plane_count == 1);
1034
1035 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1036
1037 if (device->physical_device->rad_info.chip_class == GFX9) {
1038 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1039 switch (image->info.samples) {
1040 case 2:
1041 num_format = V_008F14_IMG_FMASK_8_2_2;
1042 break;
1043 case 4:
1044 num_format = V_008F14_IMG_FMASK_8_4_4;
1045 break;
1046 case 8:
1047 num_format = V_008F14_IMG_FMASK_32_8_8;
1048 break;
1049 default:
1050 unreachable("invalid nr_samples");
1051 }
1052 } else {
1053 switch (image->info.samples) {
1054 case 2:
1055 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1056 break;
1057 case 4:
1058 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1059 break;
1060 case 8:
1061 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1062 break;
1063 default:
1064 assert(0);
1065 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1066 }
1067 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1068 }
1069
1070 fmask_state[0] = va >> 8;
1071 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1072 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1073 S_008F14_DATA_FORMAT(fmask_format) |
1074 S_008F14_NUM_FORMAT(num_format);
1075 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1076 S_008F18_HEIGHT(height - 1);
1077 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1078 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1079 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1080 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1081 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1082 fmask_state[4] = 0;
1083 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1084 fmask_state[6] = 0;
1085 fmask_state[7] = 0;
1086
1087 if (device->physical_device->rad_info.chip_class == GFX9) {
1088 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1089 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1090 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1091 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1092 S_008F24_META_RB_ALIGNED(1);
1093
1094 if (radv_image_is_tc_compat_cmask(image)) {
1095 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1096
1097 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1098 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1099 fmask_state[7] |= va >> 8;
1100 }
1101 } else {
1102 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1103 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1104 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1105 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1106
1107 if (radv_image_is_tc_compat_cmask(image)) {
1108 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1109
1110 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1111 fmask_state[7] |= va >> 8;
1112 }
1113 }
1114 } else if (fmask_state)
1115 memset(fmask_state, 0, 8 * 4);
1116 }
1117
1118 static void
1119 radv_make_texture_descriptor(struct radv_device *device,
1120 struct radv_image *image,
1121 bool is_storage_image,
1122 VkImageViewType view_type,
1123 VkFormat vk_format,
1124 const VkComponentMapping *mapping,
1125 unsigned first_level, unsigned last_level,
1126 unsigned first_layer, unsigned last_layer,
1127 unsigned width, unsigned height, unsigned depth,
1128 uint32_t *state,
1129 uint32_t *fmask_state)
1130 {
1131 if (device->physical_device->rad_info.chip_class >= GFX10) {
1132 gfx10_make_texture_descriptor(device, image, is_storage_image,
1133 view_type, vk_format, mapping,
1134 first_level, last_level,
1135 first_layer, last_layer,
1136 width, height, depth,
1137 state, fmask_state);
1138 } else {
1139 si_make_texture_descriptor(device, image, is_storage_image,
1140 view_type, vk_format, mapping,
1141 first_level, last_level,
1142 first_layer, last_layer,
1143 width, height, depth,
1144 state, fmask_state);
1145 }
1146 }
1147
1148 static void
1149 radv_query_opaque_metadata(struct radv_device *device,
1150 struct radv_image *image,
1151 struct radeon_bo_metadata *md)
1152 {
1153 static const VkComponentMapping fixedmapping;
1154 uint32_t desc[8], i;
1155
1156 assert(image->plane_count == 1);
1157
1158 /* Metadata image format format version 1:
1159 * [0] = 1 (metadata format identifier)
1160 * [1] = (VENDOR_ID << 16) | PCI_ID
1161 * [2:9] = image descriptor for the whole resource
1162 * [2] is always 0, because the base address is cleared
1163 * [9] is the DCC offset bits [39:8] from the beginning of
1164 * the buffer
1165 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1166 */
1167 md->metadata[0] = 1; /* metadata image format version 1 */
1168
1169 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1170 md->metadata[1] = si_get_bo_metadata_word1(device);
1171
1172
1173 radv_make_texture_descriptor(device, image, false,
1174 (VkImageViewType)image->type, image->vk_format,
1175 &fixedmapping, 0, image->info.levels - 1, 0,
1176 image->info.array_size - 1,
1177 image->info.width, image->info.height,
1178 image->info.depth,
1179 desc, NULL);
1180
1181 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1182 image->planes[0].surface.blk_w, false, false, false, desc);
1183
1184 /* Clear the base address and set the relative DCC offset. */
1185 desc[0] = 0;
1186 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1187 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1188
1189 /* Dwords [2:9] contain the image descriptor. */
1190 memcpy(&md->metadata[2], desc, sizeof(desc));
1191
1192 /* Dwords [10:..] contain the mipmap level offsets. */
1193 if (device->physical_device->rad_info.chip_class <= GFX8) {
1194 for (i = 0; i <= image->info.levels - 1; i++)
1195 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1196 md->size_metadata = (11 + image->info.levels - 1) * 4;
1197 } else
1198 md->size_metadata = 10 * 4;
1199 }
1200
1201 void
1202 radv_init_metadata(struct radv_device *device,
1203 struct radv_image *image,
1204 struct radeon_bo_metadata *metadata)
1205 {
1206 struct radeon_surf *surface = &image->planes[0].surface;
1207
1208 memset(metadata, 0, sizeof(*metadata));
1209
1210 if (device->physical_device->rad_info.chip_class >= GFX9) {
1211 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1212 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1213 } else {
1214 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1215 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1216 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1217 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1218 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1219 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1220 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1221 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1222 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1223 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1224 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1225 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1226 }
1227 radv_query_opaque_metadata(device, image, metadata);
1228 }
1229
1230 void
1231 radv_image_override_offset_stride(struct radv_device *device,
1232 struct radv_image *image,
1233 uint64_t offset, uint32_t stride)
1234 {
1235 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1236 &image->planes[0].surface,
1237 image->info.levels, offset, stride);
1238 }
1239
1240 static void
1241 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1242 const struct radv_image *image,
1243 struct radeon_surf *surf)
1244 {
1245 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
1246 image->info.levels > 1 || image->info.depth > 1 ||
1247 radv_image_has_dcc(image) ||
1248 !radv_image_use_fast_clear_for_image(device, image))
1249 return;
1250
1251 assert(image->info.storage_samples == 1);
1252
1253 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
1254 surf->total_size = surf->cmask_offset + surf->cmask_size;
1255 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
1256 }
1257
1258 static void
1259 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1260 {
1261 if (radv_image_has_dcc(image)) {
1262 image->fce_pred_offset = image->size;
1263 image->size += 8 * image->info.levels;
1264
1265 image->dcc_pred_offset = image->size;
1266 image->size += 8 * image->info.levels;
1267 }
1268
1269 if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
1270 radv_image_has_htile(image)) {
1271 image->clear_value_offset = image->size;
1272 image->size += 8 * image->info.levels;
1273 }
1274
1275 if (radv_image_is_tc_compat_htile(image) &&
1276 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1277 /* Metadata for the TC-compatible HTILE hardware bug which
1278 * have to be fixed by updating ZRANGE_PRECISION when doing
1279 * fast depth clears to 0.0f.
1280 */
1281 image->tc_compat_zrange_offset = image->size;
1282 image->size += image->info.levels * 4;
1283 }
1284 }
1285
1286 VkResult
1287 radv_image_create_layout(struct radv_device *device,
1288 struct radv_image_create_info create_info,
1289 struct radv_image *image)
1290 {
1291 /* Check that we did not initialize things earlier */
1292 assert(!image->planes[0].surface.surf_size);
1293
1294 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1295 * common internal case. */
1296 create_info.vk_info = NULL;
1297
1298 struct ac_surf_info image_info = image->info;
1299 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1300 if (result != VK_SUCCESS)
1301 return result;
1302
1303 image->size = 0;
1304 image->alignment = 1;
1305 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1306 struct ac_surf_info info = image_info;
1307
1308 if (plane) {
1309 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1310 assert(info.width % desc->width_divisor == 0);
1311 assert(info.height % desc->height_divisor == 0);
1312
1313 info.width /= desc->width_divisor;
1314 info.height /= desc->height_divisor;
1315 }
1316
1317 if (create_info.no_metadata_planes || image->plane_count > 1) {
1318 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1319 RADEON_SURF_NO_FMASK |
1320 RADEON_SURF_NO_HTILE;
1321 }
1322
1323 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1324
1325 if (!create_info.no_metadata_planes && image->plane_count == 1)
1326 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1327
1328 image->planes[plane].offset = align(image->size, image->planes[plane].surface.alignment);
1329 image->size = image->planes[plane].offset + image->planes[plane].surface.total_size;
1330 image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
1331
1332 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1333 }
1334
1335 image->tc_compatible_cmask = radv_image_has_cmask(image) &&
1336 radv_use_tc_compat_cmask_for_image(device, image);
1337
1338 image->tc_compatible_htile = radv_image_has_htile(image) &&
1339 image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1340
1341 radv_image_alloc_values(device, image);
1342
1343 assert(image->planes[0].surface.surf_size);
1344 return VK_SUCCESS;
1345 }
1346
1347 static void
1348 radv_destroy_image(struct radv_device *device,
1349 const VkAllocationCallbacks *pAllocator,
1350 struct radv_image *image)
1351 {
1352 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1353 device->ws->buffer_destroy(image->bo);
1354
1355 if (image->owned_memory != VK_NULL_HANDLE) {
1356 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1357 radv_free_memory(device, pAllocator, mem);
1358 }
1359
1360 vk_object_base_finish(&image->base);
1361 vk_free2(&device->vk.alloc, pAllocator, image);
1362 }
1363
1364 VkResult
1365 radv_image_create(VkDevice _device,
1366 const struct radv_image_create_info *create_info,
1367 const VkAllocationCallbacks* alloc,
1368 VkImage *pImage)
1369 {
1370 RADV_FROM_HANDLE(radv_device, device, _device);
1371 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1372 struct radv_image *image = NULL;
1373 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1374 pCreateInfo->format);
1375 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1376
1377 const unsigned plane_count = vk_format_get_plane_count(format);
1378 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1379
1380 radv_assert(pCreateInfo->mipLevels > 0);
1381 radv_assert(pCreateInfo->arrayLayers > 0);
1382 radv_assert(pCreateInfo->samples > 0);
1383 radv_assert(pCreateInfo->extent.width > 0);
1384 radv_assert(pCreateInfo->extent.height > 0);
1385 radv_assert(pCreateInfo->extent.depth > 0);
1386
1387 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1388 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1389 if (!image)
1390 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1391
1392 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1393
1394 image->type = pCreateInfo->imageType;
1395 image->info.width = pCreateInfo->extent.width;
1396 image->info.height = pCreateInfo->extent.height;
1397 image->info.depth = pCreateInfo->extent.depth;
1398 image->info.samples = pCreateInfo->samples;
1399 image->info.storage_samples = pCreateInfo->samples;
1400 image->info.array_size = pCreateInfo->arrayLayers;
1401 image->info.levels = pCreateInfo->mipLevels;
1402 image->info.num_channels = vk_format_get_nr_components(format);
1403
1404 image->vk_format = format;
1405 image->tiling = pCreateInfo->tiling;
1406 image->usage = pCreateInfo->usage;
1407 image->flags = pCreateInfo->flags;
1408 image->plane_count = plane_count;
1409
1410 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1411 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1412 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1413 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1414 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1415 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1416 else
1417 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1418 }
1419
1420 const VkExternalMemoryImageCreateInfo *external_info =
1421 vk_find_struct_const(pCreateInfo->pNext,
1422 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1423
1424 image->shareable = external_info;
1425 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1426 image->info.surf_index = &device->image_mrt_offset_counter;
1427 }
1428
1429 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1430 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1431 }
1432
1433 bool delay_layout = external_info &&
1434 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1435
1436 if (delay_layout) {
1437 *pImage = radv_image_to_handle(image);
1438 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1439 return VK_SUCCESS;
1440 }
1441
1442 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1443 assert(result == VK_SUCCESS);
1444
1445 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1446 image->alignment = MAX2(image->alignment, 4096);
1447 image->size = align64(image->size, image->alignment);
1448 image->offset = 0;
1449
1450 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1451 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1452 if (!image->bo) {
1453 radv_destroy_image(device, alloc, image);
1454 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1455 }
1456 }
1457
1458 *pImage = radv_image_to_handle(image);
1459
1460 return VK_SUCCESS;
1461 }
1462
1463 static void
1464 radv_image_view_make_descriptor(struct radv_image_view *iview,
1465 struct radv_device *device,
1466 VkFormat vk_format,
1467 const VkComponentMapping *components,
1468 bool is_storage_image, bool disable_compression,
1469 unsigned plane_id, unsigned descriptor_plane_id)
1470 {
1471 struct radv_image *image = iview->image;
1472 struct radv_image_plane *plane = &image->planes[plane_id];
1473 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1474 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1475 uint32_t blk_w;
1476 union radv_descriptor *descriptor;
1477 uint32_t hw_level = 0;
1478
1479 if (is_storage_image) {
1480 descriptor = &iview->storage_descriptor;
1481 } else {
1482 descriptor = &iview->descriptor;
1483 }
1484
1485 assert(vk_format_get_plane_count(vk_format) == 1);
1486 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1487 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1488
1489 if (device->physical_device->rad_info.chip_class >= GFX9)
1490 hw_level = iview->base_mip;
1491 radv_make_texture_descriptor(device, image, is_storage_image,
1492 iview->type,
1493 vk_format,
1494 components,
1495 hw_level, hw_level + iview->level_count - 1,
1496 iview->base_layer,
1497 iview->base_layer + iview->layer_count - 1,
1498 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1499 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1500 iview->extent.depth,
1501 descriptor->plane_descriptors[descriptor_plane_id],
1502 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1503
1504 const struct legacy_surf_level *base_level_info = NULL;
1505 if (device->physical_device->rad_info.chip_class <= GFX9) {
1506 if (is_stencil)
1507 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1508 else
1509 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1510 }
1511 si_set_mutable_tex_desc_fields(device, image,
1512 base_level_info,
1513 plane_id,
1514 iview->base_mip,
1515 iview->base_mip,
1516 blk_w, is_stencil, is_storage_image,
1517 is_storage_image || disable_compression,
1518 descriptor->plane_descriptors[descriptor_plane_id]);
1519 }
1520
1521 static unsigned
1522 radv_plane_from_aspect(VkImageAspectFlags mask)
1523 {
1524 switch(mask) {
1525 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1526 return 1;
1527 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1528 return 2;
1529 default:
1530 return 0;
1531 }
1532 }
1533
1534 VkFormat
1535 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1536 {
1537 switch(mask) {
1538 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1539 return image->planes[0].format;
1540 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1541 return image->planes[1].format;
1542 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1543 return image->planes[2].format;
1544 case VK_IMAGE_ASPECT_STENCIL_BIT:
1545 return vk_format_stencil_only(image->vk_format);
1546 case VK_IMAGE_ASPECT_DEPTH_BIT:
1547 return vk_format_depth_only(image->vk_format);
1548 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1549 return vk_format_depth_only(image->vk_format);
1550 default:
1551 return image->vk_format;
1552 }
1553 }
1554
1555 void
1556 radv_image_view_init(struct radv_image_view *iview,
1557 struct radv_device *device,
1558 const VkImageViewCreateInfo* pCreateInfo,
1559 const struct radv_image_view_extra_create_info* extra_create_info)
1560 {
1561 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1562 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1563
1564 switch (image->type) {
1565 case VK_IMAGE_TYPE_1D:
1566 case VK_IMAGE_TYPE_2D:
1567 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1568 break;
1569 case VK_IMAGE_TYPE_3D:
1570 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1571 <= radv_minify(image->info.depth, range->baseMipLevel));
1572 break;
1573 default:
1574 unreachable("bad VkImageType");
1575 }
1576 iview->image = image;
1577 iview->bo = image->bo;
1578 iview->type = pCreateInfo->viewType;
1579 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1580 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1581 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1582
1583 iview->vk_format = pCreateInfo->format;
1584
1585 /* If the image has an Android external format, pCreateInfo->format will be
1586 * VK_FORMAT_UNDEFINED. */
1587 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1588 iview->vk_format = image->vk_format;
1589
1590 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1591 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1592 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1593 iview->vk_format = vk_format_depth_only(iview->vk_format);
1594 }
1595
1596 if (device->physical_device->rad_info.chip_class >= GFX9) {
1597 iview->extent = (VkExtent3D) {
1598 .width = image->info.width,
1599 .height = image->info.height,
1600 .depth = image->info.depth,
1601 };
1602 } else {
1603 iview->extent = (VkExtent3D) {
1604 .width = radv_minify(image->info.width , range->baseMipLevel),
1605 .height = radv_minify(image->info.height, range->baseMipLevel),
1606 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1607 };
1608 }
1609
1610 if (iview->vk_format != image->planes[iview->plane_id].format) {
1611 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1612 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1613 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1614 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1615
1616 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1617 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1618
1619 /* Comment ported from amdvlk -
1620 * If we have the following image:
1621 * Uncompressed pixels Compressed block sizes (4x4)
1622 * mip0: 22 x 22 6 x 6
1623 * mip1: 11 x 11 3 x 3
1624 * mip2: 5 x 5 2 x 2
1625 * mip3: 2 x 2 1 x 1
1626 * mip4: 1 x 1 1 x 1
1627 *
1628 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1629 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1630 * divide-by-two integer math):
1631 * mip0: 6x6
1632 * mip1: 3x3
1633 * mip2: 1x1
1634 * mip3: 1x1
1635 *
1636 * This means that mip2 will be missing texels.
1637 *
1638 * Fix this by calculating the base mip's width and height, then convert that, and round it
1639 * back up to get the level 0 size.
1640 * Clamp the converted size between the original values, and next power of two, which
1641 * means we don't oversize the image.
1642 */
1643 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1644 vk_format_is_compressed(image->vk_format) &&
1645 !vk_format_is_compressed(iview->vk_format)) {
1646 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1647 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1648
1649 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1650 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1651
1652 lvl_width <<= range->baseMipLevel;
1653 lvl_height <<= range->baseMipLevel;
1654
1655 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1656 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1657 }
1658 }
1659
1660 iview->base_layer = range->baseArrayLayer;
1661 iview->layer_count = radv_get_layerCount(image, range);
1662 iview->base_mip = range->baseMipLevel;
1663 iview->level_count = radv_get_levelCount(image, range);
1664
1665 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1666 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1667 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1668 radv_image_view_make_descriptor(iview, device, format,
1669 &pCreateInfo->components,
1670 false, disable_compression,
1671 iview->plane_id + i, i);
1672 radv_image_view_make_descriptor(iview, device,
1673 format, &pCreateInfo->components,
1674 true, disable_compression,
1675 iview->plane_id + i, i);
1676 }
1677 }
1678
1679 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1680 VkImageLayout layout,
1681 bool in_render_loop,
1682 unsigned queue_mask)
1683 {
1684 if (radv_image_is_tc_compat_htile(image)) {
1685 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1686 !in_render_loop &&
1687 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1688 /* It should be safe to enable TC-compat HTILE with
1689 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1690 * loop and if the image doesn't have the storage bit
1691 * set. This improves performance for apps that use
1692 * GENERAL for the main depth pass because this allows
1693 * compression and this reduces the number of
1694 * decompressions from/to GENERAL.
1695 */
1696 return true;
1697 }
1698
1699 return layout != VK_IMAGE_LAYOUT_GENERAL;
1700 }
1701
1702 return radv_image_has_htile(image) &&
1703 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1704 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1705 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1706 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1707 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1708 }
1709
1710 bool radv_layout_can_fast_clear(const struct radv_image *image,
1711 VkImageLayout layout,
1712 bool in_render_loop,
1713 unsigned queue_mask)
1714 {
1715 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1716 queue_mask == (1u << RADV_QUEUE_GENERAL);
1717 }
1718
1719 bool radv_layout_dcc_compressed(const struct radv_device *device,
1720 const struct radv_image *image,
1721 VkImageLayout layout,
1722 bool in_render_loop,
1723 unsigned queue_mask)
1724 {
1725 /* Don't compress compute transfer dst, as image stores are not supported. */
1726 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1727 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1728 return false;
1729
1730 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1731 }
1732
1733
1734 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1735 {
1736 if (!image->exclusive)
1737 return image->queue_family_mask;
1738 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1739 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1740 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1741 if (family == VK_QUEUE_FAMILY_IGNORED)
1742 return 1u << queue_family;
1743 return 1u << family;
1744 }
1745
1746 VkResult
1747 radv_CreateImage(VkDevice device,
1748 const VkImageCreateInfo *pCreateInfo,
1749 const VkAllocationCallbacks *pAllocator,
1750 VkImage *pImage)
1751 {
1752 #ifdef ANDROID
1753 const VkNativeBufferANDROID *gralloc_info =
1754 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1755
1756 if (gralloc_info)
1757 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1758 pAllocator, pImage);
1759 #endif
1760
1761 const struct wsi_image_create_info *wsi_info =
1762 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1763 bool scanout = wsi_info && wsi_info->scanout;
1764
1765 return radv_image_create(device,
1766 &(struct radv_image_create_info) {
1767 .vk_info = pCreateInfo,
1768 .scanout = scanout,
1769 },
1770 pAllocator,
1771 pImage);
1772 }
1773
1774 void
1775 radv_DestroyImage(VkDevice _device, VkImage _image,
1776 const VkAllocationCallbacks *pAllocator)
1777 {
1778 RADV_FROM_HANDLE(radv_device, device, _device);
1779 RADV_FROM_HANDLE(radv_image, image, _image);
1780
1781 if (!image)
1782 return;
1783
1784 radv_destroy_image(device, pAllocator, image);
1785 }
1786
1787 void radv_GetImageSubresourceLayout(
1788 VkDevice _device,
1789 VkImage _image,
1790 const VkImageSubresource* pSubresource,
1791 VkSubresourceLayout* pLayout)
1792 {
1793 RADV_FROM_HANDLE(radv_image, image, _image);
1794 RADV_FROM_HANDLE(radv_device, device, _device);
1795 int level = pSubresource->mipLevel;
1796 int layer = pSubresource->arrayLayer;
1797
1798 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1799
1800 struct radv_image_plane *plane = &image->planes[plane_id];
1801 struct radeon_surf *surface = &plane->surface;
1802
1803 if (device->physical_device->rad_info.chip_class >= GFX9) {
1804 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1805
1806 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1807 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1808 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1809 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1810 /* Adjust the number of bytes between each row because
1811 * the pitch is actually the number of components per
1812 * row.
1813 */
1814 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1815 } else {
1816 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1817
1818 assert(util_is_power_of_two_nonzero(surface->bpe));
1819 pLayout->rowPitch = pitch * surface->bpe;
1820 }
1821
1822 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1823 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1824 pLayout->size = surface->u.gfx9.surf_slice_size;
1825 if (image->type == VK_IMAGE_TYPE_3D)
1826 pLayout->size *= u_minify(image->info.depth, level);
1827 } else {
1828 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1829 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1830 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1831 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1832 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1833 if (image->type == VK_IMAGE_TYPE_3D)
1834 pLayout->size *= u_minify(image->info.depth, level);
1835 }
1836 }
1837
1838
1839 VkResult
1840 radv_CreateImageView(VkDevice _device,
1841 const VkImageViewCreateInfo *pCreateInfo,
1842 const VkAllocationCallbacks *pAllocator,
1843 VkImageView *pView)
1844 {
1845 RADV_FROM_HANDLE(radv_device, device, _device);
1846 struct radv_image_view *view;
1847
1848 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1849 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1850 if (view == NULL)
1851 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1852
1853 vk_object_base_init(&device->vk, &view->base,
1854 VK_OBJECT_TYPE_IMAGE_VIEW);
1855
1856 radv_image_view_init(view, device, pCreateInfo, NULL);
1857
1858 *pView = radv_image_view_to_handle(view);
1859
1860 return VK_SUCCESS;
1861 }
1862
1863 void
1864 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1865 const VkAllocationCallbacks *pAllocator)
1866 {
1867 RADV_FROM_HANDLE(radv_device, device, _device);
1868 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1869
1870 if (!iview)
1871 return;
1872
1873 vk_object_base_finish(&iview->base);
1874 vk_free2(&device->vk.alloc, pAllocator, iview);
1875 }
1876
1877 void radv_buffer_view_init(struct radv_buffer_view *view,
1878 struct radv_device *device,
1879 const VkBufferViewCreateInfo* pCreateInfo)
1880 {
1881 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1882
1883 view->bo = buffer->bo;
1884 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1885 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1886 view->vk_format = pCreateInfo->format;
1887
1888 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1889 pCreateInfo->offset, view->range, view->state);
1890 }
1891
1892 VkResult
1893 radv_CreateBufferView(VkDevice _device,
1894 const VkBufferViewCreateInfo *pCreateInfo,
1895 const VkAllocationCallbacks *pAllocator,
1896 VkBufferView *pView)
1897 {
1898 RADV_FROM_HANDLE(radv_device, device, _device);
1899 struct radv_buffer_view *view;
1900
1901 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1902 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1903 if (!view)
1904 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1905
1906 vk_object_base_init(&device->vk, &view->base,
1907 VK_OBJECT_TYPE_BUFFER_VIEW);
1908
1909 radv_buffer_view_init(view, device, pCreateInfo);
1910
1911 *pView = radv_buffer_view_to_handle(view);
1912
1913 return VK_SUCCESS;
1914 }
1915
1916 void
1917 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1918 const VkAllocationCallbacks *pAllocator)
1919 {
1920 RADV_FROM_HANDLE(radv_device, device, _device);
1921 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1922
1923 if (!view)
1924 return;
1925
1926 vk_object_base_finish(&view->base);
1927 vk_free2(&device->vk.alloc, pAllocator, view);
1928 }