8f5b3ef37a8a6bfd4f922d612bc01970d2d7661e
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class >= GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_image_use_fast_clear_for_image(const struct radv_device *device,
154 const struct radv_image *image)
155 {
156 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
157 return true;
158
159 if (image->info.samples <= 1 &&
160 image->info.width * image->info.height <= 512 * 512) {
161 /* Do not enable CMASK or DCC for small surfaces where the cost
162 * of the eliminate pass can be higher than the benefit of fast
163 * clear. RadeonSI does this, but the image threshold is
164 * different.
165 */
166 return false;
167 }
168
169 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
170 (image->exclusive || image->queue_family_mask == 1);
171 }
172
173 static bool
174 radv_use_dcc_for_image(struct radv_device *device,
175 const struct radv_image *image,
176 const VkImageCreateInfo *pCreateInfo,
177 VkFormat format)
178 {
179 bool dcc_compatible_formats;
180 bool blendable;
181
182 /* DCC (Delta Color Compression) is only available for GFX8+. */
183 if (device->physical_device->rad_info.chip_class < GFX8)
184 return false;
185
186 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
187 return false;
188
189 if (image->shareable)
190 return false;
191
192 /* TODO: Enable DCC for storage images. */
193 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
194 return false;
195
196 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
197 return false;
198
199 if (vk_format_is_subsampled(format) ||
200 vk_format_get_plane_count(format) > 1)
201 return false;
202
203 if (!radv_image_use_fast_clear_for_image(device, image))
204 return false;
205
206 /* TODO: Enable DCC for mipmaps on GFX9+. */
207 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
208 device->physical_device->rad_info.chip_class >= GFX9)
209 return false;
210
211 /* Do not enable DCC for mipmapped arrays because performance is worse. */
212 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
213 return false;
214
215 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
216 * 2x can be enabled with an option.
217 */
218 if (pCreateInfo->samples > 2 ||
219 (pCreateInfo->samples == 2 &&
220 !device->physical_device->dcc_msaa_allowed))
221 return false;
222
223 /* Determine if the formats are DCC compatible. */
224 dcc_compatible_formats =
225 radv_is_colorbuffer_format_supported(format,
226 &blendable);
227
228 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
229 const struct VkImageFormatListCreateInfo *format_list =
230 (const struct VkImageFormatListCreateInfo *)
231 vk_find_struct_const(pCreateInfo->pNext,
232 IMAGE_FORMAT_LIST_CREATE_INFO);
233
234 /* We have to ignore the existence of the list if viewFormatCount = 0 */
235 if (format_list && format_list->viewFormatCount) {
236 /* compatibility is transitive, so we only need to check
237 * one format with everything else. */
238 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
239 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
240 continue;
241
242 if (!radv_dcc_formats_compatible(format,
243 format_list->pViewFormats[i]))
244 dcc_compatible_formats = false;
245 }
246 } else {
247 dcc_compatible_formats = false;
248 }
249 }
250
251 if (!dcc_compatible_formats)
252 return false;
253
254 return true;
255 }
256
257 static inline bool
258 radv_use_fmask_for_image(const struct radv_device *device,
259 const struct radv_image *image)
260 {
261 return image->info.samples > 1 &&
262 ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
263 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
264 }
265
266 static inline bool
267 radv_use_htile_for_image(const struct radv_device *device,
268 const struct radv_image *image)
269 {
270 return image->info.levels == 1 &&
271 ((image->info.width * image->info.height >= 8 * 8) ||
272 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
273 }
274
275 static bool
276 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
277 struct radv_image *image)
278 {
279 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
280 return false;
281
282 /* TC-compat CMASK is only available for GFX8+. */
283 if (device->physical_device->rad_info.chip_class < GFX8)
284 return false;
285
286 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
287 return false;
288
289 if (radv_image_has_dcc(image))
290 return false;
291
292 if (!radv_image_has_cmask(image))
293 return false;
294
295 return true;
296 }
297
298 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
299 {
300 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
301 }
302
303 static bool
304 radv_is_valid_opaque_metadata(const struct radv_device *device,
305 const struct radeon_bo_metadata *md)
306 {
307 if (md->metadata[0] != 1 ||
308 md->metadata[1] != si_get_bo_metadata_word1(device))
309 return false;
310
311 if (md->size_metadata < 40)
312 return false;
313
314 return true;
315 }
316
317 static void
318 radv_patch_surface_from_metadata(struct radv_device *device,
319 struct radeon_surf *surface,
320 const struct radeon_bo_metadata *md)
321 {
322 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
323
324 if (device->physical_device->rad_info.chip_class >= GFX9) {
325 if (md->u.gfx9.swizzle_mode > 0)
326 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
327 else
328 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
329
330 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
331 } else {
332 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
333 surface->u.legacy.bankw = md->u.legacy.bankw;
334 surface->u.legacy.bankh = md->u.legacy.bankh;
335 surface->u.legacy.tile_split = md->u.legacy.tile_split;
336 surface->u.legacy.mtilea = md->u.legacy.mtilea;
337 surface->u.legacy.num_banks = md->u.legacy.num_banks;
338
339 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
340 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
341 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
342 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
343 else
344 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
345
346 }
347 }
348
349 static VkResult
350 radv_patch_image_dimensions(struct radv_device *device,
351 struct radv_image *image,
352 const struct radv_image_create_info *create_info,
353 struct ac_surf_info *image_info)
354 {
355 unsigned width = image->info.width;
356 unsigned height = image->info.height;
357
358 /*
359 * minigbm sometimes allocates bigger images which is going to result in
360 * weird strides and other properties. Lets be lenient where possible and
361 * fail it on GFX10 (as we cannot cope there).
362 *
363 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
364 */
365 if (create_info->bo_metadata &&
366 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
367 const struct radeon_bo_metadata *md = create_info->bo_metadata;
368
369 if (device->physical_device->rad_info.chip_class >= GFX10) {
370 width = G_00A004_WIDTH_LO(md->metadata[3]) +
371 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
372 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
373 } else {
374 width = G_008F18_WIDTH(md->metadata[4]) + 1;
375 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
376 }
377 }
378
379 if (image->info.width == width && image->info.height == height)
380 return VK_SUCCESS;
381
382 if (width < image->info.width || height < image->info.height) {
383 fprintf(stderr,
384 "The imported image has smaller dimensions than the internal\n"
385 "dimensions. Using it is going to fail badly, so we reject\n"
386 "this import.\n"
387 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
388 image->info.width, image->info.height, width, height);
389 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
390 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
391 fprintf(stderr,
392 "Tried to import an image with inconsistent width on GFX10.\n"
393 "As GFX10 has no separate stride fields we cannot cope with\n"
394 "an inconsistency in width and will fail this import.\n"
395 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
396 image->info.width, image->info.height, width, height);
397 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
398 } else {
399 fprintf(stderr,
400 "Tried to import an image with inconsistent width on pre-GFX10.\n"
401 "As GFX10 has no separate stride fields we cannot cope with\n"
402 "an inconsistency and would fail on GFX10.\n"
403 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
404 image->info.width, image->info.height, width, height);
405 }
406 image_info->width = width;
407 image_info->height = height;
408
409 return VK_SUCCESS;
410 }
411
412 static VkResult
413 radv_patch_image_from_extra_info(struct radv_device *device,
414 struct radv_image *image,
415 const struct radv_image_create_info *create_info,
416 struct ac_surf_info *image_info)
417 {
418 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
419 if (result != VK_SUCCESS)
420 return result;
421
422 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
423 if (create_info->bo_metadata) {
424 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
425 create_info->bo_metadata);
426 }
427
428 if (radv_surface_has_scanout(device, create_info)) {
429 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
430 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
431
432 image->info.surf_index = NULL;
433 }
434 }
435 return VK_SUCCESS;
436 }
437
438 static int
439 radv_init_surface(struct radv_device *device,
440 const struct radv_image *image,
441 struct radeon_surf *surface,
442 unsigned plane_id,
443 const VkImageCreateInfo *pCreateInfo,
444 VkFormat image_format)
445 {
446 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
447 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
448 const struct vk_format_description *desc = vk_format_description(format);
449 bool is_depth, is_stencil;
450
451 is_depth = vk_format_has_depth(desc);
452 is_stencil = vk_format_has_stencil(desc);
453
454 surface->blk_w = vk_format_get_blockwidth(format);
455 surface->blk_h = vk_format_get_blockheight(format);
456
457 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
458 /* align byte per element on dword */
459 if (surface->bpe == 3) {
460 surface->bpe = 4;
461 }
462
463 surface->flags = RADEON_SURF_SET(array_mode, MODE);
464
465 switch (pCreateInfo->imageType){
466 case VK_IMAGE_TYPE_1D:
467 if (pCreateInfo->arrayLayers > 1)
468 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
469 else
470 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
471 break;
472 case VK_IMAGE_TYPE_2D:
473 if (pCreateInfo->arrayLayers > 1)
474 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
475 else
476 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
477 break;
478 case VK_IMAGE_TYPE_3D:
479 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
480 break;
481 default:
482 unreachable("unhandled image type");
483 }
484
485 /* Required for clearing/initializing a specific layer on GFX8. */
486 surface->flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
487
488 if (is_depth) {
489 surface->flags |= RADEON_SURF_ZBUFFER;
490 if (!radv_use_htile_for_image(device, image) ||
491 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
492 surface->flags |= RADEON_SURF_NO_HTILE;
493 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
494 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
495 }
496
497 if (is_stencil)
498 surface->flags |= RADEON_SURF_SBUFFER;
499
500 if (device->physical_device->rad_info.chip_class >= GFX9 &&
501 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
502 vk_format_get_blocksizebits(image_format) == 128 &&
503 vk_format_is_compressed(image_format))
504 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
505
506 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
507 surface->flags |= RADEON_SURF_DISABLE_DCC;
508
509 if (!radv_use_fmask_for_image(device, image))
510 surface->flags |= RADEON_SURF_NO_FMASK;
511
512 return 0;
513 }
514
515 static inline unsigned
516 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
517 {
518 if (stencil)
519 return plane->surface.u.legacy.stencil_tiling_index[level];
520 else
521 return plane->surface.u.legacy.tiling_index[level];
522 }
523
524 static unsigned radv_map_swizzle(unsigned swizzle)
525 {
526 switch (swizzle) {
527 case VK_SWIZZLE_Y:
528 return V_008F0C_SQ_SEL_Y;
529 case VK_SWIZZLE_Z:
530 return V_008F0C_SQ_SEL_Z;
531 case VK_SWIZZLE_W:
532 return V_008F0C_SQ_SEL_W;
533 case VK_SWIZZLE_0:
534 return V_008F0C_SQ_SEL_0;
535 case VK_SWIZZLE_1:
536 return V_008F0C_SQ_SEL_1;
537 default: /* VK_SWIZZLE_X */
538 return V_008F0C_SQ_SEL_X;
539 }
540 }
541
542 static void
543 radv_make_buffer_descriptor(struct radv_device *device,
544 struct radv_buffer *buffer,
545 VkFormat vk_format,
546 unsigned offset,
547 unsigned range,
548 uint32_t *state)
549 {
550 const struct vk_format_description *desc;
551 unsigned stride;
552 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
553 uint64_t va = gpu_address + buffer->offset;
554 unsigned num_format, data_format;
555 int first_non_void;
556 desc = vk_format_description(vk_format);
557 first_non_void = vk_format_get_first_non_void_channel(vk_format);
558 stride = desc->block.bits / 8;
559
560 va += offset;
561 state[0] = va;
562 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
563 S_008F04_STRIDE(stride);
564
565 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
566 range /= stride;
567 }
568
569 state[2] = range;
570 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
571 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
572 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
573 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
574
575 if (device->physical_device->rad_info.chip_class >= GFX10) {
576 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
577
578 /* OOB_SELECT chooses the out-of-bounds check:
579 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
580 * - 1: index >= NUM_RECORDS
581 * - 2: NUM_RECORDS == 0
582 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
583 * else: swizzle_address >= NUM_RECORDS
584 */
585 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
586 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
587 S_008F0C_RESOURCE_LEVEL(1);
588 } else {
589 num_format = radv_translate_buffer_numformat(desc, first_non_void);
590 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
591
592 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
593 assert(num_format != ~0);
594
595 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
596 S_008F0C_DATA_FORMAT(data_format);
597 }
598 }
599
600 static void
601 si_set_mutable_tex_desc_fields(struct radv_device *device,
602 struct radv_image *image,
603 const struct legacy_surf_level *base_level_info,
604 unsigned plane_id,
605 unsigned base_level, unsigned first_level,
606 unsigned block_width, bool is_stencil,
607 bool is_storage_image, bool disable_compression,
608 uint32_t *state)
609 {
610 struct radv_image_plane *plane = &image->planes[plane_id];
611 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
612 uint64_t va = gpu_address + plane->offset;
613 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
614 uint64_t meta_va = 0;
615 if (chip_class >= GFX9) {
616 if (is_stencil)
617 va += plane->surface.u.gfx9.stencil_offset;
618 else
619 va += plane->surface.u.gfx9.surf_offset;
620 } else
621 va += base_level_info->offset;
622
623 state[0] = va >> 8;
624 if (chip_class >= GFX9 ||
625 base_level_info->mode == RADEON_SURF_MODE_2D)
626 state[0] |= plane->surface.tile_swizzle;
627 state[1] &= C_008F14_BASE_ADDRESS_HI;
628 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
629
630 if (chip_class >= GFX8) {
631 state[6] &= C_008F28_COMPRESSION_EN;
632 state[7] = 0;
633 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
634 meta_va = gpu_address + plane->surface.dcc_offset;
635 if (chip_class <= GFX8)
636 meta_va += base_level_info->dcc_offset;
637
638 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
639 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
640 meta_va |= dcc_tile_swizzle;
641 } else if (!disable_compression &&
642 radv_image_is_tc_compat_htile(image)) {
643 meta_va = gpu_address + plane->surface.htile_offset;
644 }
645
646 if (meta_va) {
647 state[6] |= S_008F28_COMPRESSION_EN(1);
648 if (chip_class <= GFX9)
649 state[7] = meta_va >> 8;
650 }
651 }
652
653 if (chip_class >= GFX10) {
654 state[3] &= C_00A00C_SW_MODE;
655
656 if (is_stencil) {
657 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
658 } else {
659 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
660 }
661
662 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
663 C_00A018_META_PIPE_ALIGNED;
664
665 if (meta_va) {
666 struct gfx9_surf_meta_flags meta = {
667 .rb_aligned = 1,
668 .pipe_aligned = 1,
669 };
670
671 if (plane->surface.dcc_offset)
672 meta = plane->surface.u.gfx9.dcc;
673
674 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
675 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
676 }
677
678 state[7] = meta_va >> 16;
679 } else if (chip_class == GFX9) {
680 state[3] &= C_008F1C_SW_MODE;
681 state[4] &= C_008F20_PITCH;
682
683 if (is_stencil) {
684 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
685 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
686 } else {
687 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
688 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
689 }
690
691 state[5] &= C_008F24_META_DATA_ADDRESS &
692 C_008F24_META_PIPE_ALIGNED &
693 C_008F24_META_RB_ALIGNED;
694 if (meta_va) {
695 struct gfx9_surf_meta_flags meta = {
696 .rb_aligned = 1,
697 .pipe_aligned = 1,
698 };
699
700 if (plane->surface.dcc_offset)
701 meta = plane->surface.u.gfx9.dcc;
702
703 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
704 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
705 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
706 }
707 } else {
708 /* GFX6-GFX8 */
709 unsigned pitch = base_level_info->nblk_x * block_width;
710 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
711
712 state[3] &= C_008F1C_TILING_INDEX;
713 state[3] |= S_008F1C_TILING_INDEX(index);
714 state[4] &= C_008F20_PITCH;
715 state[4] |= S_008F20_PITCH(pitch - 1);
716 }
717 }
718
719 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
720 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
721 {
722 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
723 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
724
725 /* GFX9 allocates 1D textures as 2D. */
726 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
727 image_type = VK_IMAGE_TYPE_2D;
728 switch (image_type) {
729 case VK_IMAGE_TYPE_1D:
730 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
731 case VK_IMAGE_TYPE_2D:
732 if (nr_samples > 1)
733 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
734 else
735 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
736 case VK_IMAGE_TYPE_3D:
737 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
738 return V_008F1C_SQ_RSRC_IMG_3D;
739 else
740 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
741 default:
742 unreachable("illegal image type");
743 }
744 }
745
746 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
747 {
748 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
749
750 if (swizzle[3] == VK_SWIZZLE_X) {
751 /* For the pre-defined border color values (white, opaque
752 * black, transparent black), the only thing that matters is
753 * that the alpha channel winds up in the correct place
754 * (because the RGB channels are all the same) so either of
755 * these enumerations will work.
756 */
757 if (swizzle[2] == VK_SWIZZLE_Y)
758 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
759 else
760 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
761 } else if (swizzle[0] == VK_SWIZZLE_X) {
762 if (swizzle[1] == VK_SWIZZLE_Y)
763 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
764 else
765 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
766 } else if (swizzle[1] == VK_SWIZZLE_X) {
767 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
768 } else if (swizzle[2] == VK_SWIZZLE_X) {
769 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
770 }
771
772 return bc_swizzle;
773 }
774
775 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
776 {
777 const struct vk_format_description *desc = vk_format_description(format);
778
779 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
780 return desc->swizzle[3] == VK_SWIZZLE_X;
781
782 return radv_translate_colorswap(format, false) <= 1;
783 }
784 /**
785 * Build the sampler view descriptor for a texture (GFX10).
786 */
787 static void
788 gfx10_make_texture_descriptor(struct radv_device *device,
789 struct radv_image *image,
790 bool is_storage_image,
791 VkImageViewType view_type,
792 VkFormat vk_format,
793 const VkComponentMapping *mapping,
794 unsigned first_level, unsigned last_level,
795 unsigned first_layer, unsigned last_layer,
796 unsigned width, unsigned height, unsigned depth,
797 uint32_t *state,
798 uint32_t *fmask_state)
799 {
800 const struct vk_format_description *desc;
801 enum vk_swizzle swizzle[4];
802 unsigned img_format;
803 unsigned type;
804
805 desc = vk_format_description(vk_format);
806 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
807
808 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
809 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
810 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
811 } else {
812 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
813 }
814
815 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
816 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
817 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
818 height = 1;
819 depth = image->info.array_size;
820 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
821 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
822 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
823 depth = image->info.array_size;
824 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
825 depth = image->info.array_size / 6;
826
827 state[0] = 0;
828 state[1] = S_00A004_FORMAT(img_format) |
829 S_00A004_WIDTH_LO(width - 1);
830 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
831 S_00A008_HEIGHT(height - 1) |
832 S_00A008_RESOURCE_LEVEL(1);
833 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
834 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
835 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
836 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
837 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
838 0 : first_level) |
839 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
840 util_logbase2(image->info.samples) :
841 last_level) |
842 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
843 S_00A00C_TYPE(type);
844 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
845 * to know the total number of layers.
846 */
847 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
848 S_00A010_BASE_ARRAY(first_layer);
849 state[5] = S_00A014_ARRAY_PITCH(0) |
850 S_00A014_MAX_MIP(image->info.samples > 1 ?
851 util_logbase2(image->info.samples) :
852 image->info.levels - 1) |
853 S_00A014_PERF_MOD(4);
854 state[6] = 0;
855 state[7] = 0;
856
857 if (radv_dcc_enabled(image, first_level)) {
858 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
859 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
860 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
861 }
862
863 /* Initialize the sampler view for FMASK. */
864 if (radv_image_has_fmask(image)) {
865 uint64_t gpu_address = radv_buffer_get_va(image->bo);
866 uint32_t format;
867 uint64_t va;
868
869 assert(image->plane_count == 1);
870
871 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
872
873 switch (image->info.samples) {
874 case 2:
875 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
876 break;
877 case 4:
878 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
879 break;
880 case 8:
881 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
882 break;
883 default:
884 unreachable("invalid nr_samples");
885 }
886
887 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
888 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
889 S_00A004_FORMAT(format) |
890 S_00A004_WIDTH_LO(width - 1);
891 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
892 S_00A008_HEIGHT(height - 1) |
893 S_00A008_RESOURCE_LEVEL(1);
894 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
895 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
896 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
897 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
898 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
899 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
900 fmask_state[4] = S_00A010_DEPTH(last_layer) |
901 S_00A010_BASE_ARRAY(first_layer);
902 fmask_state[5] = 0;
903 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
904 fmask_state[7] = 0;
905 } else if (fmask_state)
906 memset(fmask_state, 0, 8 * 4);
907 }
908
909 /**
910 * Build the sampler view descriptor for a texture (SI-GFX9)
911 */
912 static void
913 si_make_texture_descriptor(struct radv_device *device,
914 struct radv_image *image,
915 bool is_storage_image,
916 VkImageViewType view_type,
917 VkFormat vk_format,
918 const VkComponentMapping *mapping,
919 unsigned first_level, unsigned last_level,
920 unsigned first_layer, unsigned last_layer,
921 unsigned width, unsigned height, unsigned depth,
922 uint32_t *state,
923 uint32_t *fmask_state)
924 {
925 const struct vk_format_description *desc;
926 enum vk_swizzle swizzle[4];
927 int first_non_void;
928 unsigned num_format, data_format, type;
929
930 desc = vk_format_description(vk_format);
931
932 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
933 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
934 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
935 } else {
936 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
937 }
938
939 first_non_void = vk_format_get_first_non_void_channel(vk_format);
940
941 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
942 if (num_format == ~0) {
943 num_format = 0;
944 }
945
946 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
947 if (data_format == ~0) {
948 data_format = 0;
949 }
950
951 /* S8 with either Z16 or Z32 HTILE need a special format. */
952 if (device->physical_device->rad_info.chip_class == GFX9 &&
953 vk_format == VK_FORMAT_S8_UINT &&
954 radv_image_is_tc_compat_htile(image)) {
955 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
956 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
957 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
958 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
959 }
960 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
961 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
962 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
963 height = 1;
964 depth = image->info.array_size;
965 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
966 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
967 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
968 depth = image->info.array_size;
969 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
970 depth = image->info.array_size / 6;
971
972 state[0] = 0;
973 state[1] = (S_008F14_DATA_FORMAT(data_format) |
974 S_008F14_NUM_FORMAT(num_format));
975 state[2] = (S_008F18_WIDTH(width - 1) |
976 S_008F18_HEIGHT(height - 1) |
977 S_008F18_PERF_MOD(4));
978 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
979 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
980 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
981 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
982 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
983 0 : first_level) |
984 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
985 util_logbase2(image->info.samples) :
986 last_level) |
987 S_008F1C_TYPE(type));
988 state[4] = 0;
989 state[5] = S_008F24_BASE_ARRAY(first_layer);
990 state[6] = 0;
991 state[7] = 0;
992
993 if (device->physical_device->rad_info.chip_class == GFX9) {
994 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
995
996 /* Depth is the last accessible layer on Gfx9.
997 * The hw doesn't need to know the total number of layers.
998 */
999 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1000 state[4] |= S_008F20_DEPTH(depth - 1);
1001 else
1002 state[4] |= S_008F20_DEPTH(last_layer);
1003
1004 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1005 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
1006 util_logbase2(image->info.samples) :
1007 image->info.levels - 1);
1008 } else {
1009 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1010 state[4] |= S_008F20_DEPTH(depth - 1);
1011 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1012 }
1013 if (image->planes[0].surface.dcc_offset) {
1014 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1015 } else {
1016 /* The last dword is unused by hw. The shader uses it to clear
1017 * bits in the first dword of sampler state.
1018 */
1019 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1020 if (first_level == last_level)
1021 state[7] = C_008F30_MAX_ANISO_RATIO;
1022 else
1023 state[7] = 0xffffffff;
1024 }
1025 }
1026
1027 /* Initialize the sampler view for FMASK. */
1028 if (radv_image_has_fmask(image)) {
1029 uint32_t fmask_format, num_format;
1030 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1031 uint64_t va;
1032
1033 assert(image->plane_count == 1);
1034
1035 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1036
1037 if (device->physical_device->rad_info.chip_class == GFX9) {
1038 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1039 switch (image->info.samples) {
1040 case 2:
1041 num_format = V_008F14_IMG_FMASK_8_2_2;
1042 break;
1043 case 4:
1044 num_format = V_008F14_IMG_FMASK_8_4_4;
1045 break;
1046 case 8:
1047 num_format = V_008F14_IMG_FMASK_32_8_8;
1048 break;
1049 default:
1050 unreachable("invalid nr_samples");
1051 }
1052 } else {
1053 switch (image->info.samples) {
1054 case 2:
1055 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1056 break;
1057 case 4:
1058 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1059 break;
1060 case 8:
1061 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1062 break;
1063 default:
1064 assert(0);
1065 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1066 }
1067 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1068 }
1069
1070 fmask_state[0] = va >> 8;
1071 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1072 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1073 S_008F14_DATA_FORMAT(fmask_format) |
1074 S_008F14_NUM_FORMAT(num_format);
1075 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1076 S_008F18_HEIGHT(height - 1);
1077 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1078 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1079 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1080 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1081 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1082 fmask_state[4] = 0;
1083 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1084 fmask_state[6] = 0;
1085 fmask_state[7] = 0;
1086
1087 if (device->physical_device->rad_info.chip_class == GFX9) {
1088 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1089 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1090 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1091 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1092 S_008F24_META_RB_ALIGNED(1);
1093
1094 if (radv_image_is_tc_compat_cmask(image)) {
1095 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1096
1097 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1098 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1099 fmask_state[7] |= va >> 8;
1100 }
1101 } else {
1102 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1103 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1104 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1105 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1106
1107 if (radv_image_is_tc_compat_cmask(image)) {
1108 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1109
1110 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1111 fmask_state[7] |= va >> 8;
1112 }
1113 }
1114 } else if (fmask_state)
1115 memset(fmask_state, 0, 8 * 4);
1116 }
1117
1118 static void
1119 radv_make_texture_descriptor(struct radv_device *device,
1120 struct radv_image *image,
1121 bool is_storage_image,
1122 VkImageViewType view_type,
1123 VkFormat vk_format,
1124 const VkComponentMapping *mapping,
1125 unsigned first_level, unsigned last_level,
1126 unsigned first_layer, unsigned last_layer,
1127 unsigned width, unsigned height, unsigned depth,
1128 uint32_t *state,
1129 uint32_t *fmask_state)
1130 {
1131 if (device->physical_device->rad_info.chip_class >= GFX10) {
1132 gfx10_make_texture_descriptor(device, image, is_storage_image,
1133 view_type, vk_format, mapping,
1134 first_level, last_level,
1135 first_layer, last_layer,
1136 width, height, depth,
1137 state, fmask_state);
1138 } else {
1139 si_make_texture_descriptor(device, image, is_storage_image,
1140 view_type, vk_format, mapping,
1141 first_level, last_level,
1142 first_layer, last_layer,
1143 width, height, depth,
1144 state, fmask_state);
1145 }
1146 }
1147
1148 static void
1149 radv_query_opaque_metadata(struct radv_device *device,
1150 struct radv_image *image,
1151 struct radeon_bo_metadata *md)
1152 {
1153 static const VkComponentMapping fixedmapping;
1154 uint32_t desc[8], i;
1155
1156 assert(image->plane_count == 1);
1157
1158 /* Metadata image format format version 1:
1159 * [0] = 1 (metadata format identifier)
1160 * [1] = (VENDOR_ID << 16) | PCI_ID
1161 * [2:9] = image descriptor for the whole resource
1162 * [2] is always 0, because the base address is cleared
1163 * [9] is the DCC offset bits [39:8] from the beginning of
1164 * the buffer
1165 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1166 */
1167 md->metadata[0] = 1; /* metadata image format version 1 */
1168
1169 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1170 md->metadata[1] = si_get_bo_metadata_word1(device);
1171
1172
1173 radv_make_texture_descriptor(device, image, false,
1174 (VkImageViewType)image->type, image->vk_format,
1175 &fixedmapping, 0, image->info.levels - 1, 0,
1176 image->info.array_size - 1,
1177 image->info.width, image->info.height,
1178 image->info.depth,
1179 desc, NULL);
1180
1181 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1182 image->planes[0].surface.blk_w, false, false, false, desc);
1183
1184 /* Clear the base address and set the relative DCC offset. */
1185 desc[0] = 0;
1186 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1187 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1188
1189 /* Dwords [2:9] contain the image descriptor. */
1190 memcpy(&md->metadata[2], desc, sizeof(desc));
1191
1192 /* Dwords [10:..] contain the mipmap level offsets. */
1193 if (device->physical_device->rad_info.chip_class <= GFX8) {
1194 for (i = 0; i <= image->info.levels - 1; i++)
1195 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1196 md->size_metadata = (11 + image->info.levels - 1) * 4;
1197 } else
1198 md->size_metadata = 10 * 4;
1199 }
1200
1201 void
1202 radv_init_metadata(struct radv_device *device,
1203 struct radv_image *image,
1204 struct radeon_bo_metadata *metadata)
1205 {
1206 struct radeon_surf *surface = &image->planes[0].surface;
1207
1208 memset(metadata, 0, sizeof(*metadata));
1209
1210 if (device->physical_device->rad_info.chip_class >= GFX9) {
1211 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1212 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1213 } else {
1214 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1215 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1216 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1217 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1218 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1219 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1220 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1221 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1222 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1223 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1224 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1225 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1226 }
1227 radv_query_opaque_metadata(device, image, metadata);
1228 }
1229
1230 void
1231 radv_image_override_offset_stride(struct radv_device *device,
1232 struct radv_image *image,
1233 uint64_t offset, uint32_t stride)
1234 {
1235 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1236 &image->planes[0].surface,
1237 image->info.levels, offset, stride);
1238 }
1239
1240 static void
1241 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1242 const struct radv_image *image,
1243 struct radeon_surf *surf)
1244 {
1245 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
1246 image->info.levels > 1 || image->info.depth > 1 ||
1247 radv_image_has_dcc(image) ||
1248 !radv_image_use_fast_clear_for_image(device, image))
1249 return;
1250
1251 assert(image->info.storage_samples == 1);
1252
1253 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
1254 surf->total_size = surf->cmask_offset + surf->cmask_size;
1255 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
1256 }
1257
1258 static void
1259 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1260 {
1261 if (radv_image_has_dcc(image)) {
1262 image->fce_pred_offset = image->size;
1263 image->size += 8 * image->info.levels;
1264
1265 image->dcc_pred_offset = image->size;
1266 image->size += 8 * image->info.levels;
1267 }
1268
1269 if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
1270 radv_image_has_htile(image)) {
1271 image->clear_value_offset = image->size;
1272 image->size += 8 * image->info.levels;
1273 }
1274
1275 if (radv_image_is_tc_compat_htile(image) &&
1276 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1277 /* Metadata for the TC-compatible HTILE hardware bug which
1278 * have to be fixed by updating ZRANGE_PRECISION when doing
1279 * fast depth clears to 0.0f.
1280 */
1281 image->tc_compat_zrange_offset = image->size;
1282 image->size += image->info.levels * 4;
1283 }
1284 }
1285
1286
1287 static void
1288 radv_image_reset_layout(struct radv_image *image)
1289 {
1290 image->size = 0;
1291 image->alignment = 1;
1292
1293 image->tc_compatible_cmask = image->tc_compatible_htile = 0;
1294 image->fce_pred_offset = image->dcc_pred_offset = 0;
1295 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1296
1297 for (unsigned i = 0; i < image->plane_count; ++i) {
1298 VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1299
1300 uint32_t flags = image->planes[i].surface.flags;
1301 memset(image->planes + i, 0, sizeof(image->planes[i]));
1302
1303 image->planes[i].surface.flags = flags;
1304 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1305 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1306 image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
1307
1308 /* align byte per element on dword */
1309 if (image->planes[i].surface.bpe == 3) {
1310 image->planes[i].surface.bpe = 4;
1311 }
1312 }
1313 }
1314
1315 VkResult
1316 radv_image_create_layout(struct radv_device *device,
1317 struct radv_image_create_info create_info,
1318 struct radv_image *image)
1319 {
1320 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1321 * common internal case. */
1322 create_info.vk_info = NULL;
1323
1324 struct ac_surf_info image_info = image->info;
1325 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1326 if (result != VK_SUCCESS)
1327 return result;
1328
1329 radv_image_reset_layout(image);
1330
1331 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1332 struct ac_surf_info info = image_info;
1333
1334 if (plane) {
1335 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1336 assert(info.width % desc->width_divisor == 0);
1337 assert(info.height % desc->height_divisor == 0);
1338
1339 info.width /= desc->width_divisor;
1340 info.height /= desc->height_divisor;
1341 }
1342
1343 if (create_info.no_metadata_planes || image->plane_count > 1) {
1344 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1345 RADEON_SURF_NO_FMASK |
1346 RADEON_SURF_NO_HTILE;
1347 }
1348
1349 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1350
1351 if (!create_info.no_metadata_planes && image->plane_count == 1)
1352 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1353
1354 image->planes[plane].offset = align(image->size, image->planes[plane].surface.alignment);
1355 image->size = image->planes[plane].offset + image->planes[plane].surface.total_size;
1356 image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
1357
1358 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1359 }
1360
1361 image->tc_compatible_cmask = radv_image_has_cmask(image) &&
1362 radv_use_tc_compat_cmask_for_image(device, image);
1363
1364 image->tc_compatible_htile = radv_image_has_htile(image) &&
1365 image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1366
1367 radv_image_alloc_values(device, image);
1368
1369 assert(image->planes[0].surface.surf_size);
1370 return VK_SUCCESS;
1371 }
1372
1373 static void
1374 radv_destroy_image(struct radv_device *device,
1375 const VkAllocationCallbacks *pAllocator,
1376 struct radv_image *image)
1377 {
1378 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1379 device->ws->buffer_destroy(image->bo);
1380
1381 if (image->owned_memory != VK_NULL_HANDLE) {
1382 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1383 radv_free_memory(device, pAllocator, mem);
1384 }
1385
1386 vk_object_base_finish(&image->base);
1387 vk_free2(&device->vk.alloc, pAllocator, image);
1388 }
1389
1390 VkResult
1391 radv_image_create(VkDevice _device,
1392 const struct radv_image_create_info *create_info,
1393 const VkAllocationCallbacks* alloc,
1394 VkImage *pImage)
1395 {
1396 RADV_FROM_HANDLE(radv_device, device, _device);
1397 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1398 struct radv_image *image = NULL;
1399 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1400 pCreateInfo->format);
1401 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1402
1403 const unsigned plane_count = vk_format_get_plane_count(format);
1404 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1405
1406 radv_assert(pCreateInfo->mipLevels > 0);
1407 radv_assert(pCreateInfo->arrayLayers > 0);
1408 radv_assert(pCreateInfo->samples > 0);
1409 radv_assert(pCreateInfo->extent.width > 0);
1410 radv_assert(pCreateInfo->extent.height > 0);
1411 radv_assert(pCreateInfo->extent.depth > 0);
1412
1413 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1414 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1415 if (!image)
1416 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1417
1418 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1419
1420 image->type = pCreateInfo->imageType;
1421 image->info.width = pCreateInfo->extent.width;
1422 image->info.height = pCreateInfo->extent.height;
1423 image->info.depth = pCreateInfo->extent.depth;
1424 image->info.samples = pCreateInfo->samples;
1425 image->info.storage_samples = pCreateInfo->samples;
1426 image->info.array_size = pCreateInfo->arrayLayers;
1427 image->info.levels = pCreateInfo->mipLevels;
1428 image->info.num_channels = vk_format_get_nr_components(format);
1429
1430 image->vk_format = format;
1431 image->tiling = pCreateInfo->tiling;
1432 image->usage = pCreateInfo->usage;
1433 image->flags = pCreateInfo->flags;
1434 image->plane_count = plane_count;
1435
1436 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1437 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1438 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1439 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1440 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1441 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1442 else
1443 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1444 }
1445
1446 const VkExternalMemoryImageCreateInfo *external_info =
1447 vk_find_struct_const(pCreateInfo->pNext,
1448 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1449
1450 image->shareable = external_info;
1451 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1452 image->info.surf_index = &device->image_mrt_offset_counter;
1453 }
1454
1455 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1456 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1457 }
1458
1459 bool delay_layout = external_info &&
1460 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1461
1462 if (delay_layout) {
1463 *pImage = radv_image_to_handle(image);
1464 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1465 return VK_SUCCESS;
1466 }
1467
1468 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1469 assert(result == VK_SUCCESS);
1470
1471 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1472 image->alignment = MAX2(image->alignment, 4096);
1473 image->size = align64(image->size, image->alignment);
1474 image->offset = 0;
1475
1476 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1477 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1478 if (!image->bo) {
1479 radv_destroy_image(device, alloc, image);
1480 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1481 }
1482 }
1483
1484 *pImage = radv_image_to_handle(image);
1485
1486 return VK_SUCCESS;
1487 }
1488
1489 static void
1490 radv_image_view_make_descriptor(struct radv_image_view *iview,
1491 struct radv_device *device,
1492 VkFormat vk_format,
1493 const VkComponentMapping *components,
1494 bool is_storage_image, bool disable_compression,
1495 unsigned plane_id, unsigned descriptor_plane_id)
1496 {
1497 struct radv_image *image = iview->image;
1498 struct radv_image_plane *plane = &image->planes[plane_id];
1499 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1500 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1501 uint32_t blk_w;
1502 union radv_descriptor *descriptor;
1503 uint32_t hw_level = 0;
1504
1505 if (is_storage_image) {
1506 descriptor = &iview->storage_descriptor;
1507 } else {
1508 descriptor = &iview->descriptor;
1509 }
1510
1511 assert(vk_format_get_plane_count(vk_format) == 1);
1512 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1513 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1514
1515 if (device->physical_device->rad_info.chip_class >= GFX9)
1516 hw_level = iview->base_mip;
1517 radv_make_texture_descriptor(device, image, is_storage_image,
1518 iview->type,
1519 vk_format,
1520 components,
1521 hw_level, hw_level + iview->level_count - 1,
1522 iview->base_layer,
1523 iview->base_layer + iview->layer_count - 1,
1524 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1525 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1526 iview->extent.depth,
1527 descriptor->plane_descriptors[descriptor_plane_id],
1528 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1529
1530 const struct legacy_surf_level *base_level_info = NULL;
1531 if (device->physical_device->rad_info.chip_class <= GFX9) {
1532 if (is_stencil)
1533 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1534 else
1535 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1536 }
1537 si_set_mutable_tex_desc_fields(device, image,
1538 base_level_info,
1539 plane_id,
1540 iview->base_mip,
1541 iview->base_mip,
1542 blk_w, is_stencil, is_storage_image,
1543 is_storage_image || disable_compression,
1544 descriptor->plane_descriptors[descriptor_plane_id]);
1545 }
1546
1547 static unsigned
1548 radv_plane_from_aspect(VkImageAspectFlags mask)
1549 {
1550 switch(mask) {
1551 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1552 return 1;
1553 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1554 return 2;
1555 default:
1556 return 0;
1557 }
1558 }
1559
1560 VkFormat
1561 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1562 {
1563 switch(mask) {
1564 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1565 return image->planes[0].format;
1566 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1567 return image->planes[1].format;
1568 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1569 return image->planes[2].format;
1570 case VK_IMAGE_ASPECT_STENCIL_BIT:
1571 return vk_format_stencil_only(image->vk_format);
1572 case VK_IMAGE_ASPECT_DEPTH_BIT:
1573 return vk_format_depth_only(image->vk_format);
1574 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1575 return vk_format_depth_only(image->vk_format);
1576 default:
1577 return image->vk_format;
1578 }
1579 }
1580
1581 void
1582 radv_image_view_init(struct radv_image_view *iview,
1583 struct radv_device *device,
1584 const VkImageViewCreateInfo* pCreateInfo,
1585 const struct radv_image_view_extra_create_info* extra_create_info)
1586 {
1587 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1588 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1589
1590 switch (image->type) {
1591 case VK_IMAGE_TYPE_1D:
1592 case VK_IMAGE_TYPE_2D:
1593 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1594 break;
1595 case VK_IMAGE_TYPE_3D:
1596 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1597 <= radv_minify(image->info.depth, range->baseMipLevel));
1598 break;
1599 default:
1600 unreachable("bad VkImageType");
1601 }
1602 iview->image = image;
1603 iview->bo = image->bo;
1604 iview->type = pCreateInfo->viewType;
1605 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1606 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1607 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1608
1609 iview->vk_format = pCreateInfo->format;
1610
1611 /* If the image has an Android external format, pCreateInfo->format will be
1612 * VK_FORMAT_UNDEFINED. */
1613 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1614 iview->vk_format = image->vk_format;
1615
1616 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1617 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1618 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1619 iview->vk_format = vk_format_depth_only(iview->vk_format);
1620 }
1621
1622 if (device->physical_device->rad_info.chip_class >= GFX9) {
1623 iview->extent = (VkExtent3D) {
1624 .width = image->info.width,
1625 .height = image->info.height,
1626 .depth = image->info.depth,
1627 };
1628 } else {
1629 iview->extent = (VkExtent3D) {
1630 .width = radv_minify(image->info.width , range->baseMipLevel),
1631 .height = radv_minify(image->info.height, range->baseMipLevel),
1632 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1633 };
1634 }
1635
1636 if (iview->vk_format != image->planes[iview->plane_id].format) {
1637 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1638 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1639 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1640 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1641
1642 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1643 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1644
1645 /* Comment ported from amdvlk -
1646 * If we have the following image:
1647 * Uncompressed pixels Compressed block sizes (4x4)
1648 * mip0: 22 x 22 6 x 6
1649 * mip1: 11 x 11 3 x 3
1650 * mip2: 5 x 5 2 x 2
1651 * mip3: 2 x 2 1 x 1
1652 * mip4: 1 x 1 1 x 1
1653 *
1654 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1655 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1656 * divide-by-two integer math):
1657 * mip0: 6x6
1658 * mip1: 3x3
1659 * mip2: 1x1
1660 * mip3: 1x1
1661 *
1662 * This means that mip2 will be missing texels.
1663 *
1664 * Fix this by calculating the base mip's width and height, then convert that, and round it
1665 * back up to get the level 0 size.
1666 * Clamp the converted size between the original values, and next power of two, which
1667 * means we don't oversize the image.
1668 */
1669 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1670 vk_format_is_compressed(image->vk_format) &&
1671 !vk_format_is_compressed(iview->vk_format)) {
1672 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1673 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1674
1675 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1676 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1677
1678 lvl_width <<= range->baseMipLevel;
1679 lvl_height <<= range->baseMipLevel;
1680
1681 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1682 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1683 }
1684 }
1685
1686 iview->base_layer = range->baseArrayLayer;
1687 iview->layer_count = radv_get_layerCount(image, range);
1688 iview->base_mip = range->baseMipLevel;
1689 iview->level_count = radv_get_levelCount(image, range);
1690
1691 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1692 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1693 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1694 radv_image_view_make_descriptor(iview, device, format,
1695 &pCreateInfo->components,
1696 false, disable_compression,
1697 iview->plane_id + i, i);
1698 radv_image_view_make_descriptor(iview, device,
1699 format, &pCreateInfo->components,
1700 true, disable_compression,
1701 iview->plane_id + i, i);
1702 }
1703 }
1704
1705 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1706 VkImageLayout layout,
1707 bool in_render_loop,
1708 unsigned queue_mask)
1709 {
1710 if (radv_image_is_tc_compat_htile(image)) {
1711 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1712 !in_render_loop &&
1713 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1714 /* It should be safe to enable TC-compat HTILE with
1715 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1716 * loop and if the image doesn't have the storage bit
1717 * set. This improves performance for apps that use
1718 * GENERAL for the main depth pass because this allows
1719 * compression and this reduces the number of
1720 * decompressions from/to GENERAL.
1721 */
1722 return true;
1723 }
1724
1725 return layout != VK_IMAGE_LAYOUT_GENERAL;
1726 }
1727
1728 return radv_image_has_htile(image) &&
1729 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1730 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1731 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1732 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1733 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1734 }
1735
1736 bool radv_layout_can_fast_clear(const struct radv_image *image,
1737 VkImageLayout layout,
1738 bool in_render_loop,
1739 unsigned queue_mask)
1740 {
1741 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1742 queue_mask == (1u << RADV_QUEUE_GENERAL);
1743 }
1744
1745 bool radv_layout_dcc_compressed(const struct radv_device *device,
1746 const struct radv_image *image,
1747 VkImageLayout layout,
1748 bool in_render_loop,
1749 unsigned queue_mask)
1750 {
1751 /* Don't compress compute transfer dst, as image stores are not supported. */
1752 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1753 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1754 return false;
1755
1756 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1757 }
1758
1759
1760 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1761 {
1762 if (!image->exclusive)
1763 return image->queue_family_mask;
1764 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1765 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1766 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1767 if (family == VK_QUEUE_FAMILY_IGNORED)
1768 return 1u << queue_family;
1769 return 1u << family;
1770 }
1771
1772 VkResult
1773 radv_CreateImage(VkDevice device,
1774 const VkImageCreateInfo *pCreateInfo,
1775 const VkAllocationCallbacks *pAllocator,
1776 VkImage *pImage)
1777 {
1778 #ifdef ANDROID
1779 const VkNativeBufferANDROID *gralloc_info =
1780 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1781
1782 if (gralloc_info)
1783 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1784 pAllocator, pImage);
1785 #endif
1786
1787 const struct wsi_image_create_info *wsi_info =
1788 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1789 bool scanout = wsi_info && wsi_info->scanout;
1790
1791 return radv_image_create(device,
1792 &(struct radv_image_create_info) {
1793 .vk_info = pCreateInfo,
1794 .scanout = scanout,
1795 },
1796 pAllocator,
1797 pImage);
1798 }
1799
1800 void
1801 radv_DestroyImage(VkDevice _device, VkImage _image,
1802 const VkAllocationCallbacks *pAllocator)
1803 {
1804 RADV_FROM_HANDLE(radv_device, device, _device);
1805 RADV_FROM_HANDLE(radv_image, image, _image);
1806
1807 if (!image)
1808 return;
1809
1810 radv_destroy_image(device, pAllocator, image);
1811 }
1812
1813 void radv_GetImageSubresourceLayout(
1814 VkDevice _device,
1815 VkImage _image,
1816 const VkImageSubresource* pSubresource,
1817 VkSubresourceLayout* pLayout)
1818 {
1819 RADV_FROM_HANDLE(radv_image, image, _image);
1820 RADV_FROM_HANDLE(radv_device, device, _device);
1821 int level = pSubresource->mipLevel;
1822 int layer = pSubresource->arrayLayer;
1823
1824 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1825
1826 struct radv_image_plane *plane = &image->planes[plane_id];
1827 struct radeon_surf *surface = &plane->surface;
1828
1829 if (device->physical_device->rad_info.chip_class >= GFX9) {
1830 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1831
1832 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1833 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1834 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1835 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1836 /* Adjust the number of bytes between each row because
1837 * the pitch is actually the number of components per
1838 * row.
1839 */
1840 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1841 } else {
1842 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1843
1844 assert(util_is_power_of_two_nonzero(surface->bpe));
1845 pLayout->rowPitch = pitch * surface->bpe;
1846 }
1847
1848 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1849 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1850 pLayout->size = surface->u.gfx9.surf_slice_size;
1851 if (image->type == VK_IMAGE_TYPE_3D)
1852 pLayout->size *= u_minify(image->info.depth, level);
1853 } else {
1854 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1855 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1856 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1857 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1858 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1859 if (image->type == VK_IMAGE_TYPE_3D)
1860 pLayout->size *= u_minify(image->info.depth, level);
1861 }
1862 }
1863
1864
1865 VkResult
1866 radv_CreateImageView(VkDevice _device,
1867 const VkImageViewCreateInfo *pCreateInfo,
1868 const VkAllocationCallbacks *pAllocator,
1869 VkImageView *pView)
1870 {
1871 RADV_FROM_HANDLE(radv_device, device, _device);
1872 struct radv_image_view *view;
1873
1874 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1875 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1876 if (view == NULL)
1877 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1878
1879 vk_object_base_init(&device->vk, &view->base,
1880 VK_OBJECT_TYPE_IMAGE_VIEW);
1881
1882 radv_image_view_init(view, device, pCreateInfo, NULL);
1883
1884 *pView = radv_image_view_to_handle(view);
1885
1886 return VK_SUCCESS;
1887 }
1888
1889 void
1890 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1891 const VkAllocationCallbacks *pAllocator)
1892 {
1893 RADV_FROM_HANDLE(radv_device, device, _device);
1894 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1895
1896 if (!iview)
1897 return;
1898
1899 vk_object_base_finish(&iview->base);
1900 vk_free2(&device->vk.alloc, pAllocator, iview);
1901 }
1902
1903 void radv_buffer_view_init(struct radv_buffer_view *view,
1904 struct radv_device *device,
1905 const VkBufferViewCreateInfo* pCreateInfo)
1906 {
1907 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1908
1909 view->bo = buffer->bo;
1910 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1911 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1912 view->vk_format = pCreateInfo->format;
1913
1914 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1915 pCreateInfo->offset, view->range, view->state);
1916 }
1917
1918 VkResult
1919 radv_CreateBufferView(VkDevice _device,
1920 const VkBufferViewCreateInfo *pCreateInfo,
1921 const VkAllocationCallbacks *pAllocator,
1922 VkBufferView *pView)
1923 {
1924 RADV_FROM_HANDLE(radv_device, device, _device);
1925 struct radv_buffer_view *view;
1926
1927 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1928 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1929 if (!view)
1930 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1931
1932 vk_object_base_init(&device->vk, &view->base,
1933 VK_OBJECT_TYPE_BUFFER_VIEW);
1934
1935 radv_buffer_view_init(view, device, pCreateInfo);
1936
1937 *pView = radv_buffer_view_to_handle(view);
1938
1939 return VK_SUCCESS;
1940 }
1941
1942 void
1943 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1944 const VkAllocationCallbacks *pAllocator)
1945 {
1946 RADV_FROM_HANDLE(radv_device, device, _device);
1947 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1948
1949 if (!view)
1950 return;
1951
1952 vk_object_base_finish(&view->base);
1953 vk_free2(&device->vk.alloc, pAllocator, view);
1954 }