radv: Disable HTILE in ac_surface.
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class == GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_image_use_fast_clear_for_image(const struct radv_image *image)
154 {
155 if (image->info.samples <= 1 &&
156 image->info.width * image->info.height <= 512 * 512) {
157 /* Do not enable CMASK or DCC for small surfaces where the cost
158 * of the eliminate pass can be higher than the benefit of fast
159 * clear. RadeonSI does this, but the image threshold is
160 * different.
161 */
162 return false;
163 }
164
165 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
166 (image->exclusive || image->queue_family_mask == 1);
167 }
168
169 static bool
170 radv_use_dcc_for_image(struct radv_device *device,
171 const struct radv_image *image,
172 const VkImageCreateInfo *pCreateInfo,
173 VkFormat format)
174 {
175 bool dcc_compatible_formats;
176 bool blendable;
177
178 /* DCC (Delta Color Compression) is only available for GFX8+. */
179 if (device->physical_device->rad_info.chip_class < GFX8)
180 return false;
181
182 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
183 return false;
184
185 if (image->shareable)
186 return false;
187
188 /* TODO: Enable DCC for storage images. */
189 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
190 return false;
191
192 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
193 return false;
194
195 if (vk_format_is_subsampled(format) ||
196 vk_format_get_plane_count(format) > 1)
197 return false;
198
199 if (!radv_image_use_fast_clear_for_image(image))
200 return false;
201
202 /* TODO: Enable DCC for mipmaps on GFX9+. */
203 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
204 device->physical_device->rad_info.chip_class >= GFX9)
205 return false;
206
207 /* Do not enable DCC for mipmapped arrays because performance is worse. */
208 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
209 return false;
210
211 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
212 * 2x can be enabled with an option.
213 */
214 if (pCreateInfo->samples > 2 ||
215 (pCreateInfo->samples == 2 &&
216 !device->physical_device->dcc_msaa_allowed))
217 return false;
218
219 /* Determine if the formats are DCC compatible. */
220 dcc_compatible_formats =
221 radv_is_colorbuffer_format_supported(format,
222 &blendable);
223
224 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
225 const struct VkImageFormatListCreateInfo *format_list =
226 (const struct VkImageFormatListCreateInfo *)
227 vk_find_struct_const(pCreateInfo->pNext,
228 IMAGE_FORMAT_LIST_CREATE_INFO);
229
230 /* We have to ignore the existence of the list if viewFormatCount = 0 */
231 if (format_list && format_list->viewFormatCount) {
232 /* compatibility is transitive, so we only need to check
233 * one format with everything else. */
234 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
235 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
236 continue;
237
238 if (!radv_dcc_formats_compatible(format,
239 format_list->pViewFormats[i]))
240 dcc_compatible_formats = false;
241 }
242 } else {
243 dcc_compatible_formats = false;
244 }
245 }
246
247 if (!dcc_compatible_formats)
248 return false;
249
250 return true;
251 }
252
253 static inline bool
254 radv_use_fmask_for_image(const struct radv_image *image)
255 {
256 return image->info.samples > 1 &&
257 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
258 }
259
260 static inline bool
261 radv_use_htile_for_image(const struct radv_image *image)
262 {
263 return image->info.levels == 1 &&
264 image->info.width * image->info.height >= 8 * 8;
265 }
266
267 static bool
268 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
269 struct radv_image *image)
270 {
271 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
272 return false;
273
274 /* TC-compat CMASK is only available for GFX8+. */
275 if (device->physical_device->rad_info.chip_class < GFX8)
276 return false;
277
278 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
279 return false;
280
281 if (radv_image_has_dcc(image))
282 return false;
283
284 if (!radv_image_has_cmask(image))
285 return false;
286
287 return true;
288 }
289
290 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
291 {
292 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
293 }
294
295 static bool
296 radv_is_valid_opaque_metadata(const struct radv_device *device,
297 const struct radeon_bo_metadata *md)
298 {
299 if (md->metadata[0] != 1 ||
300 md->metadata[1] != si_get_bo_metadata_word1(device))
301 return false;
302
303 if (md->size_metadata < 40)
304 return false;
305
306 return true;
307 }
308
309 static void
310 radv_patch_surface_from_metadata(struct radv_device *device,
311 struct radeon_surf *surface,
312 const struct radeon_bo_metadata *md)
313 {
314 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
315
316 if (device->physical_device->rad_info.chip_class >= GFX9) {
317 if (md->u.gfx9.swizzle_mode > 0)
318 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
319 else
320 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
321
322 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
323 } else {
324 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
325 surface->u.legacy.bankw = md->u.legacy.bankw;
326 surface->u.legacy.bankh = md->u.legacy.bankh;
327 surface->u.legacy.tile_split = md->u.legacy.tile_split;
328 surface->u.legacy.mtilea = md->u.legacy.mtilea;
329 surface->u.legacy.num_banks = md->u.legacy.num_banks;
330
331 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
332 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
333 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
334 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
335 else
336 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
337
338 }
339 }
340
341 static VkResult
342 radv_patch_image_dimensions(struct radv_device *device,
343 struct radv_image *image,
344 const struct radv_image_create_info *create_info,
345 struct ac_surf_info *image_info)
346 {
347 unsigned width = image->info.width;
348 unsigned height = image->info.height;
349
350 /*
351 * minigbm sometimes allocates bigger images which is going to result in
352 * weird strides and other properties. Lets be lenient where possible and
353 * fail it on GFX10 (as we cannot cope there).
354 *
355 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
356 */
357 if (create_info->bo_metadata &&
358 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
359 const struct radeon_bo_metadata *md = create_info->bo_metadata;
360
361 if (device->physical_device->rad_info.chip_class >= GFX10) {
362 width = G_00A004_WIDTH_LO(md->metadata[3]) +
363 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
364 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
365 } else {
366 width = G_008F18_WIDTH(md->metadata[4]) + 1;
367 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
368 }
369 }
370
371 if (image->info.width == width && image->info.height == height)
372 return VK_SUCCESS;
373
374 if (width < image->info.width || height < image->info.height) {
375 fprintf(stderr,
376 "The imported image has smaller dimensions than the internal\n"
377 "dimensions. Using it is going to fail badly, so we reject\n"
378 "this import.\n"
379 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
380 image->info.width, image->info.height, width, height);
381 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
382 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
383 fprintf(stderr,
384 "Tried to import an image with inconsistent width on GFX10.\n"
385 "As GFX10 has no separate stride fields we cannot cope with\n"
386 "an inconsistency in width and will fail this import.\n"
387 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
388 image->info.width, image->info.height, width, height);
389 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
390 } else {
391 fprintf(stderr,
392 "Tried to import an image with inconsistent width on pre-GFX10.\n"
393 "As GFX10 has no separate stride fields we cannot cope with\n"
394 "an inconsistency and would fail on GFX10.\n"
395 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
396 image->info.width, image->info.height, width, height);
397 }
398 image_info->width = width;
399 image_info->height = height;
400
401 return VK_SUCCESS;
402 }
403
404 static VkResult
405 radv_patch_image_from_extra_info(struct radv_device *device,
406 struct radv_image *image,
407 const struct radv_image_create_info *create_info,
408 struct ac_surf_info *image_info)
409 {
410 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
411 if (result != VK_SUCCESS)
412 return result;
413
414 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
415 if (create_info->bo_metadata) {
416 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
417 create_info->bo_metadata);
418 }
419
420 if (radv_surface_has_scanout(device, create_info)) {
421 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
422 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
423
424 image->info.surf_index = NULL;
425 }
426 }
427 return VK_SUCCESS;
428 }
429
430 static int
431 radv_init_surface(struct radv_device *device,
432 const struct radv_image *image,
433 struct radeon_surf *surface,
434 unsigned plane_id,
435 const VkImageCreateInfo *pCreateInfo,
436 VkFormat image_format)
437 {
438 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
439 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
440 const struct vk_format_description *desc = vk_format_description(format);
441 bool is_depth, is_stencil;
442
443 is_depth = vk_format_has_depth(desc);
444 is_stencil = vk_format_has_stencil(desc);
445
446 surface->blk_w = vk_format_get_blockwidth(format);
447 surface->blk_h = vk_format_get_blockheight(format);
448
449 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
450 /* align byte per element on dword */
451 if (surface->bpe == 3) {
452 surface->bpe = 4;
453 }
454
455 surface->flags = RADEON_SURF_SET(array_mode, MODE);
456
457 switch (pCreateInfo->imageType){
458 case VK_IMAGE_TYPE_1D:
459 if (pCreateInfo->arrayLayers > 1)
460 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
461 else
462 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
463 break;
464 case VK_IMAGE_TYPE_2D:
465 if (pCreateInfo->arrayLayers > 1)
466 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
467 else
468 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
469 break;
470 case VK_IMAGE_TYPE_3D:
471 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
472 break;
473 default:
474 unreachable("unhandled image type");
475 }
476
477 /* Required for clearing/initializing a specific layer on GFX8. */
478 surface->flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
479
480 if (is_depth) {
481 surface->flags |= RADEON_SURF_ZBUFFER;
482 if (!radv_use_htile_for_image(image) ||
483 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
484 surface->flags |= RADEON_SURF_NO_HTILE;
485 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
486 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
487 }
488
489 if (is_stencil)
490 surface->flags |= RADEON_SURF_SBUFFER;
491
492 if (device->physical_device->rad_info.chip_class >= GFX9 &&
493 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
494 vk_format_get_blocksizebits(image_format) == 128 &&
495 vk_format_is_compressed(image_format))
496 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
497
498 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
499 surface->flags |= RADEON_SURF_DISABLE_DCC;
500
501 if (!radv_use_fmask_for_image(image))
502 surface->flags |= RADEON_SURF_NO_FMASK;
503
504 return 0;
505 }
506
507 static inline unsigned
508 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
509 {
510 if (stencil)
511 return plane->surface.u.legacy.stencil_tiling_index[level];
512 else
513 return plane->surface.u.legacy.tiling_index[level];
514 }
515
516 static unsigned radv_map_swizzle(unsigned swizzle)
517 {
518 switch (swizzle) {
519 case VK_SWIZZLE_Y:
520 return V_008F0C_SQ_SEL_Y;
521 case VK_SWIZZLE_Z:
522 return V_008F0C_SQ_SEL_Z;
523 case VK_SWIZZLE_W:
524 return V_008F0C_SQ_SEL_W;
525 case VK_SWIZZLE_0:
526 return V_008F0C_SQ_SEL_0;
527 case VK_SWIZZLE_1:
528 return V_008F0C_SQ_SEL_1;
529 default: /* VK_SWIZZLE_X */
530 return V_008F0C_SQ_SEL_X;
531 }
532 }
533
534 static void
535 radv_make_buffer_descriptor(struct radv_device *device,
536 struct radv_buffer *buffer,
537 VkFormat vk_format,
538 unsigned offset,
539 unsigned range,
540 uint32_t *state)
541 {
542 const struct vk_format_description *desc;
543 unsigned stride;
544 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
545 uint64_t va = gpu_address + buffer->offset;
546 unsigned num_format, data_format;
547 int first_non_void;
548 desc = vk_format_description(vk_format);
549 first_non_void = vk_format_get_first_non_void_channel(vk_format);
550 stride = desc->block.bits / 8;
551
552 va += offset;
553 state[0] = va;
554 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
555 S_008F04_STRIDE(stride);
556
557 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
558 range /= stride;
559 }
560
561 state[2] = range;
562 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
563 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
564 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
565 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
566
567 if (device->physical_device->rad_info.chip_class >= GFX10) {
568 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
569
570 /* OOB_SELECT chooses the out-of-bounds check:
571 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
572 * - 1: index >= NUM_RECORDS
573 * - 2: NUM_RECORDS == 0
574 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
575 * else: swizzle_address >= NUM_RECORDS
576 */
577 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
578 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
579 S_008F0C_RESOURCE_LEVEL(1);
580 } else {
581 num_format = radv_translate_buffer_numformat(desc, first_non_void);
582 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
583
584 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
585 assert(num_format != ~0);
586
587 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
588 S_008F0C_DATA_FORMAT(data_format);
589 }
590 }
591
592 static void
593 si_set_mutable_tex_desc_fields(struct radv_device *device,
594 struct radv_image *image,
595 const struct legacy_surf_level *base_level_info,
596 unsigned plane_id,
597 unsigned base_level, unsigned first_level,
598 unsigned block_width, bool is_stencil,
599 bool is_storage_image, bool disable_compression,
600 uint32_t *state)
601 {
602 struct radv_image_plane *plane = &image->planes[plane_id];
603 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
604 uint64_t va = gpu_address + plane->offset;
605 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
606 uint64_t meta_va = 0;
607 if (chip_class >= GFX9) {
608 if (is_stencil)
609 va += plane->surface.u.gfx9.stencil_offset;
610 else
611 va += plane->surface.u.gfx9.surf_offset;
612 } else
613 va += base_level_info->offset;
614
615 state[0] = va >> 8;
616 if (chip_class >= GFX9 ||
617 base_level_info->mode == RADEON_SURF_MODE_2D)
618 state[0] |= plane->surface.tile_swizzle;
619 state[1] &= C_008F14_BASE_ADDRESS_HI;
620 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
621
622 if (chip_class >= GFX8) {
623 state[6] &= C_008F28_COMPRESSION_EN;
624 state[7] = 0;
625 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
626 meta_va = gpu_address + plane->surface.dcc_offset;
627 if (chip_class <= GFX8)
628 meta_va += base_level_info->dcc_offset;
629
630 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
631 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
632 meta_va |= dcc_tile_swizzle;
633 } else if (!disable_compression &&
634 radv_image_is_tc_compat_htile(image)) {
635 meta_va = gpu_address + plane->surface.htile_offset;
636 }
637
638 if (meta_va) {
639 state[6] |= S_008F28_COMPRESSION_EN(1);
640 if (chip_class <= GFX9)
641 state[7] = meta_va >> 8;
642 }
643 }
644
645 if (chip_class >= GFX10) {
646 state[3] &= C_00A00C_SW_MODE;
647
648 if (is_stencil) {
649 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
650 } else {
651 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
652 }
653
654 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
655 C_00A018_META_PIPE_ALIGNED;
656
657 if (meta_va) {
658 struct gfx9_surf_meta_flags meta = {
659 .rb_aligned = 1,
660 .pipe_aligned = 1,
661 };
662
663 if (plane->surface.dcc_offset)
664 meta = plane->surface.u.gfx9.dcc;
665
666 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
667 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
668 }
669
670 state[7] = meta_va >> 16;
671 } else if (chip_class == GFX9) {
672 state[3] &= C_008F1C_SW_MODE;
673 state[4] &= C_008F20_PITCH;
674
675 if (is_stencil) {
676 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
677 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
678 } else {
679 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
680 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
681 }
682
683 state[5] &= C_008F24_META_DATA_ADDRESS &
684 C_008F24_META_PIPE_ALIGNED &
685 C_008F24_META_RB_ALIGNED;
686 if (meta_va) {
687 struct gfx9_surf_meta_flags meta = {
688 .rb_aligned = 1,
689 .pipe_aligned = 1,
690 };
691
692 if (plane->surface.dcc_offset)
693 meta = plane->surface.u.gfx9.dcc;
694
695 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
696 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
697 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
698 }
699 } else {
700 /* GFX6-GFX8 */
701 unsigned pitch = base_level_info->nblk_x * block_width;
702 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
703
704 state[3] &= C_008F1C_TILING_INDEX;
705 state[3] |= S_008F1C_TILING_INDEX(index);
706 state[4] &= C_008F20_PITCH;
707 state[4] |= S_008F20_PITCH(pitch - 1);
708 }
709 }
710
711 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
712 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
713 {
714 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
715 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
716
717 /* GFX9 allocates 1D textures as 2D. */
718 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
719 image_type = VK_IMAGE_TYPE_2D;
720 switch (image_type) {
721 case VK_IMAGE_TYPE_1D:
722 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
723 case VK_IMAGE_TYPE_2D:
724 if (nr_samples > 1)
725 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
726 else
727 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
728 case VK_IMAGE_TYPE_3D:
729 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
730 return V_008F1C_SQ_RSRC_IMG_3D;
731 else
732 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
733 default:
734 unreachable("illegal image type");
735 }
736 }
737
738 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
739 {
740 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
741
742 if (swizzle[3] == VK_SWIZZLE_X) {
743 /* For the pre-defined border color values (white, opaque
744 * black, transparent black), the only thing that matters is
745 * that the alpha channel winds up in the correct place
746 * (because the RGB channels are all the same) so either of
747 * these enumerations will work.
748 */
749 if (swizzle[2] == VK_SWIZZLE_Y)
750 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
751 else
752 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
753 } else if (swizzle[0] == VK_SWIZZLE_X) {
754 if (swizzle[1] == VK_SWIZZLE_Y)
755 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
756 else
757 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
758 } else if (swizzle[1] == VK_SWIZZLE_X) {
759 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
760 } else if (swizzle[2] == VK_SWIZZLE_X) {
761 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
762 }
763
764 return bc_swizzle;
765 }
766
767 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
768 {
769 const struct vk_format_description *desc = vk_format_description(format);
770
771 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
772 return desc->swizzle[3] == VK_SWIZZLE_X;
773
774 return radv_translate_colorswap(format, false) <= 1;
775 }
776 /**
777 * Build the sampler view descriptor for a texture (GFX10).
778 */
779 static void
780 gfx10_make_texture_descriptor(struct radv_device *device,
781 struct radv_image *image,
782 bool is_storage_image,
783 VkImageViewType view_type,
784 VkFormat vk_format,
785 const VkComponentMapping *mapping,
786 unsigned first_level, unsigned last_level,
787 unsigned first_layer, unsigned last_layer,
788 unsigned width, unsigned height, unsigned depth,
789 uint32_t *state,
790 uint32_t *fmask_state)
791 {
792 const struct vk_format_description *desc;
793 enum vk_swizzle swizzle[4];
794 unsigned img_format;
795 unsigned type;
796
797 desc = vk_format_description(vk_format);
798 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
799
800 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
801 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
802 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
803 } else {
804 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
805 }
806
807 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
808 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
809 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
810 height = 1;
811 depth = image->info.array_size;
812 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
813 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
814 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
815 depth = image->info.array_size;
816 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
817 depth = image->info.array_size / 6;
818
819 state[0] = 0;
820 state[1] = S_00A004_FORMAT(img_format) |
821 S_00A004_WIDTH_LO(width - 1);
822 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
823 S_00A008_HEIGHT(height - 1) |
824 S_00A008_RESOURCE_LEVEL(1);
825 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
826 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
827 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
828 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
829 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
830 0 : first_level) |
831 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
832 util_logbase2(image->info.samples) :
833 last_level) |
834 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
835 S_00A00C_TYPE(type);
836 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
837 * to know the total number of layers.
838 */
839 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
840 S_00A010_BASE_ARRAY(first_layer);
841 state[5] = S_00A014_ARRAY_PITCH(0) |
842 S_00A014_MAX_MIP(image->info.samples > 1 ?
843 util_logbase2(image->info.samples) :
844 image->info.levels - 1) |
845 S_00A014_PERF_MOD(4);
846 state[6] = 0;
847 state[7] = 0;
848
849 if (radv_dcc_enabled(image, first_level)) {
850 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
851 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
852 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
853 }
854
855 /* Initialize the sampler view for FMASK. */
856 if (radv_image_has_fmask(image)) {
857 uint64_t gpu_address = radv_buffer_get_va(image->bo);
858 uint32_t format;
859 uint64_t va;
860
861 assert(image->plane_count == 1);
862
863 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
864
865 switch (image->info.samples) {
866 case 2:
867 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
868 break;
869 case 4:
870 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
871 break;
872 case 8:
873 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
874 break;
875 default:
876 unreachable("invalid nr_samples");
877 }
878
879 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
880 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
881 S_00A004_FORMAT(format) |
882 S_00A004_WIDTH_LO(width - 1);
883 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
884 S_00A008_HEIGHT(height - 1) |
885 S_00A008_RESOURCE_LEVEL(1);
886 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
887 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
888 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
889 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
890 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
891 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
892 fmask_state[4] = S_00A010_DEPTH(last_layer) |
893 S_00A010_BASE_ARRAY(first_layer);
894 fmask_state[5] = 0;
895 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
896 fmask_state[7] = 0;
897 } else if (fmask_state)
898 memset(fmask_state, 0, 8 * 4);
899 }
900
901 /**
902 * Build the sampler view descriptor for a texture (SI-GFX9)
903 */
904 static void
905 si_make_texture_descriptor(struct radv_device *device,
906 struct radv_image *image,
907 bool is_storage_image,
908 VkImageViewType view_type,
909 VkFormat vk_format,
910 const VkComponentMapping *mapping,
911 unsigned first_level, unsigned last_level,
912 unsigned first_layer, unsigned last_layer,
913 unsigned width, unsigned height, unsigned depth,
914 uint32_t *state,
915 uint32_t *fmask_state)
916 {
917 const struct vk_format_description *desc;
918 enum vk_swizzle swizzle[4];
919 int first_non_void;
920 unsigned num_format, data_format, type;
921
922 desc = vk_format_description(vk_format);
923
924 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
925 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
926 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
927 } else {
928 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
929 }
930
931 first_non_void = vk_format_get_first_non_void_channel(vk_format);
932
933 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
934 if (num_format == ~0) {
935 num_format = 0;
936 }
937
938 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
939 if (data_format == ~0) {
940 data_format = 0;
941 }
942
943 /* S8 with either Z16 or Z32 HTILE need a special format. */
944 if (device->physical_device->rad_info.chip_class == GFX9 &&
945 vk_format == VK_FORMAT_S8_UINT &&
946 radv_image_is_tc_compat_htile(image)) {
947 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
948 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
949 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
950 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
951 }
952 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
953 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
954 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
955 height = 1;
956 depth = image->info.array_size;
957 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
958 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
959 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
960 depth = image->info.array_size;
961 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
962 depth = image->info.array_size / 6;
963
964 state[0] = 0;
965 state[1] = (S_008F14_DATA_FORMAT(data_format) |
966 S_008F14_NUM_FORMAT(num_format));
967 state[2] = (S_008F18_WIDTH(width - 1) |
968 S_008F18_HEIGHT(height - 1) |
969 S_008F18_PERF_MOD(4));
970 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
971 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
972 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
973 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
974 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
975 0 : first_level) |
976 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
977 util_logbase2(image->info.samples) :
978 last_level) |
979 S_008F1C_TYPE(type));
980 state[4] = 0;
981 state[5] = S_008F24_BASE_ARRAY(first_layer);
982 state[6] = 0;
983 state[7] = 0;
984
985 if (device->physical_device->rad_info.chip_class == GFX9) {
986 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
987
988 /* Depth is the last accessible layer on Gfx9.
989 * The hw doesn't need to know the total number of layers.
990 */
991 if (type == V_008F1C_SQ_RSRC_IMG_3D)
992 state[4] |= S_008F20_DEPTH(depth - 1);
993 else
994 state[4] |= S_008F20_DEPTH(last_layer);
995
996 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
997 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
998 util_logbase2(image->info.samples) :
999 image->info.levels - 1);
1000 } else {
1001 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1002 state[4] |= S_008F20_DEPTH(depth - 1);
1003 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1004 }
1005 if (image->planes[0].surface.dcc_offset) {
1006 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1007 } else {
1008 /* The last dword is unused by hw. The shader uses it to clear
1009 * bits in the first dword of sampler state.
1010 */
1011 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1012 if (first_level == last_level)
1013 state[7] = C_008F30_MAX_ANISO_RATIO;
1014 else
1015 state[7] = 0xffffffff;
1016 }
1017 }
1018
1019 /* Initialize the sampler view for FMASK. */
1020 if (radv_image_has_fmask(image)) {
1021 uint32_t fmask_format, num_format;
1022 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1023 uint64_t va;
1024
1025 assert(image->plane_count == 1);
1026
1027 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1028
1029 if (device->physical_device->rad_info.chip_class == GFX9) {
1030 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1031 switch (image->info.samples) {
1032 case 2:
1033 num_format = V_008F14_IMG_FMASK_8_2_2;
1034 break;
1035 case 4:
1036 num_format = V_008F14_IMG_FMASK_8_4_4;
1037 break;
1038 case 8:
1039 num_format = V_008F14_IMG_FMASK_32_8_8;
1040 break;
1041 default:
1042 unreachable("invalid nr_samples");
1043 }
1044 } else {
1045 switch (image->info.samples) {
1046 case 2:
1047 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1048 break;
1049 case 4:
1050 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1051 break;
1052 case 8:
1053 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1054 break;
1055 default:
1056 assert(0);
1057 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1058 }
1059 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1060 }
1061
1062 fmask_state[0] = va >> 8;
1063 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1064 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1065 S_008F14_DATA_FORMAT(fmask_format) |
1066 S_008F14_NUM_FORMAT(num_format);
1067 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1068 S_008F18_HEIGHT(height - 1);
1069 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1070 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1071 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1072 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1073 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1074 fmask_state[4] = 0;
1075 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1076 fmask_state[6] = 0;
1077 fmask_state[7] = 0;
1078
1079 if (device->physical_device->rad_info.chip_class == GFX9) {
1080 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1081 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1082 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1083 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1084 S_008F24_META_RB_ALIGNED(1);
1085
1086 if (radv_image_is_tc_compat_cmask(image)) {
1087 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1088
1089 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1090 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1091 fmask_state[7] |= va >> 8;
1092 }
1093 } else {
1094 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1095 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1096 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1097 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1098
1099 if (radv_image_is_tc_compat_cmask(image)) {
1100 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1101
1102 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1103 fmask_state[7] |= va >> 8;
1104 }
1105 }
1106 } else if (fmask_state)
1107 memset(fmask_state, 0, 8 * 4);
1108 }
1109
1110 static void
1111 radv_make_texture_descriptor(struct radv_device *device,
1112 struct radv_image *image,
1113 bool is_storage_image,
1114 VkImageViewType view_type,
1115 VkFormat vk_format,
1116 const VkComponentMapping *mapping,
1117 unsigned first_level, unsigned last_level,
1118 unsigned first_layer, unsigned last_layer,
1119 unsigned width, unsigned height, unsigned depth,
1120 uint32_t *state,
1121 uint32_t *fmask_state)
1122 {
1123 if (device->physical_device->rad_info.chip_class >= GFX10) {
1124 gfx10_make_texture_descriptor(device, image, is_storage_image,
1125 view_type, vk_format, mapping,
1126 first_level, last_level,
1127 first_layer, last_layer,
1128 width, height, depth,
1129 state, fmask_state);
1130 } else {
1131 si_make_texture_descriptor(device, image, is_storage_image,
1132 view_type, vk_format, mapping,
1133 first_level, last_level,
1134 first_layer, last_layer,
1135 width, height, depth,
1136 state, fmask_state);
1137 }
1138 }
1139
1140 static void
1141 radv_query_opaque_metadata(struct radv_device *device,
1142 struct radv_image *image,
1143 struct radeon_bo_metadata *md)
1144 {
1145 static const VkComponentMapping fixedmapping;
1146 uint32_t desc[8], i;
1147
1148 assert(image->plane_count == 1);
1149
1150 /* Metadata image format format version 1:
1151 * [0] = 1 (metadata format identifier)
1152 * [1] = (VENDOR_ID << 16) | PCI_ID
1153 * [2:9] = image descriptor for the whole resource
1154 * [2] is always 0, because the base address is cleared
1155 * [9] is the DCC offset bits [39:8] from the beginning of
1156 * the buffer
1157 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1158 */
1159 md->metadata[0] = 1; /* metadata image format version 1 */
1160
1161 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1162 md->metadata[1] = si_get_bo_metadata_word1(device);
1163
1164
1165 radv_make_texture_descriptor(device, image, false,
1166 (VkImageViewType)image->type, image->vk_format,
1167 &fixedmapping, 0, image->info.levels - 1, 0,
1168 image->info.array_size - 1,
1169 image->info.width, image->info.height,
1170 image->info.depth,
1171 desc, NULL);
1172
1173 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1174 image->planes[0].surface.blk_w, false, false, false, desc);
1175
1176 /* Clear the base address and set the relative DCC offset. */
1177 desc[0] = 0;
1178 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1179 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1180
1181 /* Dwords [2:9] contain the image descriptor. */
1182 memcpy(&md->metadata[2], desc, sizeof(desc));
1183
1184 /* Dwords [10:..] contain the mipmap level offsets. */
1185 if (device->physical_device->rad_info.chip_class <= GFX8) {
1186 for (i = 0; i <= image->info.levels - 1; i++)
1187 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1188 md->size_metadata = (11 + image->info.levels - 1) * 4;
1189 } else
1190 md->size_metadata = 10 * 4;
1191 }
1192
1193 void
1194 radv_init_metadata(struct radv_device *device,
1195 struct radv_image *image,
1196 struct radeon_bo_metadata *metadata)
1197 {
1198 struct radeon_surf *surface = &image->planes[0].surface;
1199
1200 memset(metadata, 0, sizeof(*metadata));
1201
1202 if (device->physical_device->rad_info.chip_class >= GFX9) {
1203 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1204 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1205 } else {
1206 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1207 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1208 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1209 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1210 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1211 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1212 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1213 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1214 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1215 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1216 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1217 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1218 }
1219 radv_query_opaque_metadata(device, image, metadata);
1220 }
1221
1222 void
1223 radv_image_override_offset_stride(struct radv_device *device,
1224 struct radv_image *image,
1225 uint64_t offset, uint32_t stride)
1226 {
1227 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1228 &image->planes[0].surface,
1229 image->info.levels, offset, stride);
1230 }
1231
1232 static void
1233 radv_image_alloc_fmask(struct radv_device *device,
1234 struct radv_image *image)
1235 {
1236 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1237
1238 image->planes[0].surface.fmask_offset = align64(image->size, fmask_alignment);
1239 image->size = image->planes[0].surface.fmask_offset + image->planes[0].surface.fmask_size;
1240 image->alignment = MAX2(image->alignment, fmask_alignment);
1241 }
1242
1243 static void
1244 radv_image_alloc_cmask(struct radv_device *device,
1245 struct radv_image *image)
1246 {
1247 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1248 unsigned cmask_size = image->planes[0].surface.cmask_size;
1249 uint32_t clear_value_size = 0;
1250
1251 if (!cmask_size)
1252 return;
1253
1254 assert(cmask_alignment);
1255
1256 image->planes[0].surface.cmask_offset = align64(image->size, cmask_alignment);
1257 /* + 8 for storing the clear values */
1258 if (!image->clear_value_offset) {
1259 image->clear_value_offset = image->planes[0].surface.cmask_offset + cmask_size;
1260 clear_value_size = 8;
1261 }
1262 image->size = image->planes[0].surface.cmask_offset + cmask_size + clear_value_size;
1263 image->alignment = MAX2(image->alignment, cmask_alignment);
1264 }
1265
1266 static void
1267 radv_image_alloc_dcc(struct radv_image *image)
1268 {
1269 assert(image->plane_count == 1);
1270
1271 image->planes[0].surface.dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1272 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1273 image->clear_value_offset = image->planes[0].surface.dcc_offset + image->planes[0].surface.dcc_size;
1274 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1275 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1276 image->size = image->planes[0].surface.dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1277 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1278 }
1279
1280 static void
1281 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1282 {
1283 image->planes[0].surface.htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1284
1285 /* + 8 for storing the clear values */
1286 image->clear_value_offset = image->planes[0].surface.htile_offset + image->planes[0].surface.htile_size;
1287 image->size = image->clear_value_offset + image->info.levels * 8;
1288 if (radv_image_is_tc_compat_htile(image) &&
1289 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1290 /* Metadata for the TC-compatible HTILE hardware bug which
1291 * have to be fixed by updating ZRANGE_PRECISION when doing
1292 * fast depth clears to 0.0f.
1293 */
1294 image->tc_compat_zrange_offset = image->size;
1295 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1296 }
1297 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1298 }
1299
1300 static inline bool
1301 radv_image_can_enable_cmask(struct radv_image *image)
1302 {
1303 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1304 /* Do not enable CMASK for non-MSAA images (fast color clear)
1305 * because 128 bit formats are not supported, but FMASK might
1306 * still be used.
1307 */
1308 return false;
1309 }
1310
1311 return radv_image_use_fast_clear_for_image(image) &&
1312 image->info.levels == 1 &&
1313 image->info.depth == 1;
1314 }
1315
1316 static void radv_image_disable_dcc(struct radv_image *image)
1317 {
1318 for (unsigned i = 0; i < image->plane_count; ++i)
1319 image->planes[i].surface.dcc_size = 0;
1320 }
1321
1322 static void radv_image_disable_htile(struct radv_image *image)
1323 {
1324 for (unsigned i = 0; i < image->plane_count; ++i)
1325 image->planes[i].surface.htile_size = 0;
1326 }
1327
1328 VkResult
1329 radv_image_create_layout(struct radv_device *device,
1330 struct radv_image_create_info create_info,
1331 struct radv_image *image)
1332 {
1333 /* Check that we did not initialize things earlier */
1334 assert(!image->planes[0].surface.surf_size);
1335
1336 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1337 * common internal case. */
1338 create_info.vk_info = NULL;
1339
1340 struct ac_surf_info image_info = image->info;
1341 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1342 if (result != VK_SUCCESS)
1343 return result;
1344
1345 image->size = 0;
1346 image->alignment = 1;
1347 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1348 struct ac_surf_info info = image_info;
1349
1350 if (plane) {
1351 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1352 assert(info.width % desc->width_divisor == 0);
1353 assert(info.height % desc->height_divisor == 0);
1354
1355 info.width /= desc->width_divisor;
1356 info.height /= desc->height_divisor;
1357 }
1358
1359 if (create_info.no_metadata_planes || image->plane_count > 1) {
1360 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1361 RADEON_SURF_NO_FMASK |
1362 RADEON_SURF_NO_HTILE;
1363 }
1364
1365 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1366
1367 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1368 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1369 image->alignment = image->planes[plane].surface.surf_alignment;
1370
1371 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1372 }
1373
1374 /* Try to enable DCC first. */
1375 if (radv_image_has_dcc(image)) {
1376 radv_image_alloc_dcc(image);
1377 if (image->info.samples > 1) {
1378 /* CMASK should be enabled because DCC fast
1379 * clear with MSAA needs it.
1380 */
1381 assert(radv_image_can_enable_cmask(image));
1382 radv_image_alloc_cmask(device, image);
1383 }
1384 } else {
1385 /* When DCC cannot be enabled, try CMASK. */
1386 radv_image_disable_dcc(image);
1387 if (radv_image_can_enable_cmask(image)) {
1388 radv_image_alloc_cmask(device, image);
1389 }
1390 }
1391
1392 /* Try to enable FMASK for multisampled images. */
1393 if (image->planes[0].surface.fmask_size) {
1394 radv_image_alloc_fmask(device, image);
1395
1396 if (radv_use_tc_compat_cmask_for_image(device, image))
1397 image->tc_compatible_cmask = true;
1398 } else {
1399 /* Otherwise, try to enable HTILE for depth surfaces. */
1400 if (radv_image_has_htile(image)) {
1401 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1402 radv_image_alloc_htile(device, image);
1403 } else {
1404 radv_image_disable_htile(image);
1405 }
1406 }
1407
1408 assert(image->planes[0].surface.surf_size);
1409 return VK_SUCCESS;
1410 }
1411
1412 VkResult
1413 radv_image_create(VkDevice _device,
1414 const struct radv_image_create_info *create_info,
1415 const VkAllocationCallbacks* alloc,
1416 VkImage *pImage)
1417 {
1418 RADV_FROM_HANDLE(radv_device, device, _device);
1419 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1420 struct radv_image *image = NULL;
1421 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1422 pCreateInfo->format);
1423 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1424
1425 const unsigned plane_count = vk_format_get_plane_count(format);
1426 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1427
1428 radv_assert(pCreateInfo->mipLevels > 0);
1429 radv_assert(pCreateInfo->arrayLayers > 0);
1430 radv_assert(pCreateInfo->samples > 0);
1431 radv_assert(pCreateInfo->extent.width > 0);
1432 radv_assert(pCreateInfo->extent.height > 0);
1433 radv_assert(pCreateInfo->extent.depth > 0);
1434
1435 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1436 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1437 if (!image)
1438 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1439
1440 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1441
1442 image->type = pCreateInfo->imageType;
1443 image->info.width = pCreateInfo->extent.width;
1444 image->info.height = pCreateInfo->extent.height;
1445 image->info.depth = pCreateInfo->extent.depth;
1446 image->info.samples = pCreateInfo->samples;
1447 image->info.storage_samples = pCreateInfo->samples;
1448 image->info.array_size = pCreateInfo->arrayLayers;
1449 image->info.levels = pCreateInfo->mipLevels;
1450 image->info.num_channels = vk_format_get_nr_components(format);
1451
1452 image->vk_format = format;
1453 image->tiling = pCreateInfo->tiling;
1454 image->usage = pCreateInfo->usage;
1455 image->flags = pCreateInfo->flags;
1456 image->plane_count = plane_count;
1457
1458 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1459 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1460 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1461 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1462 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1463 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1464 else
1465 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1466 }
1467
1468 const VkExternalMemoryImageCreateInfo *external_info =
1469 vk_find_struct_const(pCreateInfo->pNext,
1470 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1471
1472 image->shareable = external_info;
1473 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1474 image->info.surf_index = &device->image_mrt_offset_counter;
1475 }
1476
1477 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1478 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1479 }
1480
1481 bool delay_layout = external_info &&
1482 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1483
1484 if (delay_layout) {
1485 *pImage = radv_image_to_handle(image);
1486 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1487 return VK_SUCCESS;
1488 }
1489
1490 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1491 assert(result == VK_SUCCESS);
1492
1493 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1494 image->alignment = MAX2(image->alignment, 4096);
1495 image->size = align64(image->size, image->alignment);
1496 image->offset = 0;
1497
1498 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1499 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1500 if (!image->bo) {
1501 vk_free2(&device->vk.alloc, alloc, image);
1502 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1503 }
1504 }
1505
1506 *pImage = radv_image_to_handle(image);
1507
1508 return VK_SUCCESS;
1509 }
1510
1511 static void
1512 radv_image_view_make_descriptor(struct radv_image_view *iview,
1513 struct radv_device *device,
1514 VkFormat vk_format,
1515 const VkComponentMapping *components,
1516 bool is_storage_image, bool disable_compression,
1517 unsigned plane_id, unsigned descriptor_plane_id)
1518 {
1519 struct radv_image *image = iview->image;
1520 struct radv_image_plane *plane = &image->planes[plane_id];
1521 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1522 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1523 uint32_t blk_w;
1524 union radv_descriptor *descriptor;
1525 uint32_t hw_level = 0;
1526
1527 if (is_storage_image) {
1528 descriptor = &iview->storage_descriptor;
1529 } else {
1530 descriptor = &iview->descriptor;
1531 }
1532
1533 assert(vk_format_get_plane_count(vk_format) == 1);
1534 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1535 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1536
1537 if (device->physical_device->rad_info.chip_class >= GFX9)
1538 hw_level = iview->base_mip;
1539 radv_make_texture_descriptor(device, image, is_storage_image,
1540 iview->type,
1541 vk_format,
1542 components,
1543 hw_level, hw_level + iview->level_count - 1,
1544 iview->base_layer,
1545 iview->base_layer + iview->layer_count - 1,
1546 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1547 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1548 iview->extent.depth,
1549 descriptor->plane_descriptors[descriptor_plane_id],
1550 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1551
1552 const struct legacy_surf_level *base_level_info = NULL;
1553 if (device->physical_device->rad_info.chip_class <= GFX9) {
1554 if (is_stencil)
1555 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1556 else
1557 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1558 }
1559 si_set_mutable_tex_desc_fields(device, image,
1560 base_level_info,
1561 plane_id,
1562 iview->base_mip,
1563 iview->base_mip,
1564 blk_w, is_stencil, is_storage_image,
1565 is_storage_image || disable_compression,
1566 descriptor->plane_descriptors[descriptor_plane_id]);
1567 }
1568
1569 static unsigned
1570 radv_plane_from_aspect(VkImageAspectFlags mask)
1571 {
1572 switch(mask) {
1573 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1574 return 1;
1575 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1576 return 2;
1577 default:
1578 return 0;
1579 }
1580 }
1581
1582 VkFormat
1583 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1584 {
1585 switch(mask) {
1586 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1587 return image->planes[0].format;
1588 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1589 return image->planes[1].format;
1590 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1591 return image->planes[2].format;
1592 case VK_IMAGE_ASPECT_STENCIL_BIT:
1593 return vk_format_stencil_only(image->vk_format);
1594 case VK_IMAGE_ASPECT_DEPTH_BIT:
1595 return vk_format_depth_only(image->vk_format);
1596 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1597 return vk_format_depth_only(image->vk_format);
1598 default:
1599 return image->vk_format;
1600 }
1601 }
1602
1603 void
1604 radv_image_view_init(struct radv_image_view *iview,
1605 struct radv_device *device,
1606 const VkImageViewCreateInfo* pCreateInfo,
1607 const struct radv_image_view_extra_create_info* extra_create_info)
1608 {
1609 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1610 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1611
1612 switch (image->type) {
1613 case VK_IMAGE_TYPE_1D:
1614 case VK_IMAGE_TYPE_2D:
1615 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1616 break;
1617 case VK_IMAGE_TYPE_3D:
1618 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1619 <= radv_minify(image->info.depth, range->baseMipLevel));
1620 break;
1621 default:
1622 unreachable("bad VkImageType");
1623 }
1624 iview->image = image;
1625 iview->bo = image->bo;
1626 iview->type = pCreateInfo->viewType;
1627 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1628 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1629 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1630
1631 iview->vk_format = pCreateInfo->format;
1632
1633 /* If the image has an Android external format, pCreateInfo->format will be
1634 * VK_FORMAT_UNDEFINED. */
1635 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1636 iview->vk_format = image->vk_format;
1637
1638 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1639 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1640 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1641 iview->vk_format = vk_format_depth_only(iview->vk_format);
1642 }
1643
1644 if (device->physical_device->rad_info.chip_class >= GFX9) {
1645 iview->extent = (VkExtent3D) {
1646 .width = image->info.width,
1647 .height = image->info.height,
1648 .depth = image->info.depth,
1649 };
1650 } else {
1651 iview->extent = (VkExtent3D) {
1652 .width = radv_minify(image->info.width , range->baseMipLevel),
1653 .height = radv_minify(image->info.height, range->baseMipLevel),
1654 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1655 };
1656 }
1657
1658 if (iview->vk_format != image->planes[iview->plane_id].format) {
1659 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1660 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1661 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1662 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1663
1664 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1665 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1666
1667 /* Comment ported from amdvlk -
1668 * If we have the following image:
1669 * Uncompressed pixels Compressed block sizes (4x4)
1670 * mip0: 22 x 22 6 x 6
1671 * mip1: 11 x 11 3 x 3
1672 * mip2: 5 x 5 2 x 2
1673 * mip3: 2 x 2 1 x 1
1674 * mip4: 1 x 1 1 x 1
1675 *
1676 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1677 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1678 * divide-by-two integer math):
1679 * mip0: 6x6
1680 * mip1: 3x3
1681 * mip2: 1x1
1682 * mip3: 1x1
1683 *
1684 * This means that mip2 will be missing texels.
1685 *
1686 * Fix this by calculating the base mip's width and height, then convert that, and round it
1687 * back up to get the level 0 size.
1688 * Clamp the converted size between the original values, and next power of two, which
1689 * means we don't oversize the image.
1690 */
1691 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1692 vk_format_is_compressed(image->vk_format) &&
1693 !vk_format_is_compressed(iview->vk_format)) {
1694 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1695 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1696
1697 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1698 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1699
1700 lvl_width <<= range->baseMipLevel;
1701 lvl_height <<= range->baseMipLevel;
1702
1703 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1704 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1705 }
1706 }
1707
1708 iview->base_layer = range->baseArrayLayer;
1709 iview->layer_count = radv_get_layerCount(image, range);
1710 iview->base_mip = range->baseMipLevel;
1711 iview->level_count = radv_get_levelCount(image, range);
1712
1713 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1714 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1715 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1716 radv_image_view_make_descriptor(iview, device, format,
1717 &pCreateInfo->components,
1718 false, disable_compression,
1719 iview->plane_id + i, i);
1720 radv_image_view_make_descriptor(iview, device,
1721 format, &pCreateInfo->components,
1722 true, disable_compression,
1723 iview->plane_id + i, i);
1724 }
1725 }
1726
1727 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1728 VkImageLayout layout,
1729 bool in_render_loop,
1730 unsigned queue_mask)
1731 {
1732 if (radv_image_is_tc_compat_htile(image)) {
1733 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1734 !in_render_loop &&
1735 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1736 /* It should be safe to enable TC-compat HTILE with
1737 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1738 * loop and if the image doesn't have the storage bit
1739 * set. This improves performance for apps that use
1740 * GENERAL for the main depth pass because this allows
1741 * compression and this reduces the number of
1742 * decompressions from/to GENERAL.
1743 */
1744 return true;
1745 }
1746
1747 return layout != VK_IMAGE_LAYOUT_GENERAL;
1748 }
1749
1750 return radv_image_has_htile(image) &&
1751 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1752 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1753 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1754 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1755 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1756 }
1757
1758 bool radv_layout_can_fast_clear(const struct radv_image *image,
1759 VkImageLayout layout,
1760 bool in_render_loop,
1761 unsigned queue_mask)
1762 {
1763 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1764 }
1765
1766 bool radv_layout_dcc_compressed(const struct radv_device *device,
1767 const struct radv_image *image,
1768 VkImageLayout layout,
1769 bool in_render_loop,
1770 unsigned queue_mask)
1771 {
1772 /* Don't compress compute transfer dst, as image stores are not supported. */
1773 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1774 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1775 return false;
1776
1777 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1778 }
1779
1780
1781 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1782 {
1783 if (!image->exclusive)
1784 return image->queue_family_mask;
1785 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1786 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1787 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1788 if (family == VK_QUEUE_FAMILY_IGNORED)
1789 return 1u << queue_family;
1790 return 1u << family;
1791 }
1792
1793 VkResult
1794 radv_CreateImage(VkDevice device,
1795 const VkImageCreateInfo *pCreateInfo,
1796 const VkAllocationCallbacks *pAllocator,
1797 VkImage *pImage)
1798 {
1799 #ifdef ANDROID
1800 const VkNativeBufferANDROID *gralloc_info =
1801 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1802
1803 if (gralloc_info)
1804 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1805 pAllocator, pImage);
1806 #endif
1807
1808 const struct wsi_image_create_info *wsi_info =
1809 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1810 bool scanout = wsi_info && wsi_info->scanout;
1811
1812 return radv_image_create(device,
1813 &(struct radv_image_create_info) {
1814 .vk_info = pCreateInfo,
1815 .scanout = scanout,
1816 },
1817 pAllocator,
1818 pImage);
1819 }
1820
1821 void
1822 radv_DestroyImage(VkDevice _device, VkImage _image,
1823 const VkAllocationCallbacks *pAllocator)
1824 {
1825 RADV_FROM_HANDLE(radv_device, device, _device);
1826 RADV_FROM_HANDLE(radv_image, image, _image);
1827
1828 if (!image)
1829 return;
1830
1831 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1832 device->ws->buffer_destroy(image->bo);
1833
1834 if (image->owned_memory != VK_NULL_HANDLE)
1835 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1836
1837 vk_object_base_finish(&image->base);
1838 vk_free2(&device->vk.alloc, pAllocator, image);
1839 }
1840
1841 void radv_GetImageSubresourceLayout(
1842 VkDevice _device,
1843 VkImage _image,
1844 const VkImageSubresource* pSubresource,
1845 VkSubresourceLayout* pLayout)
1846 {
1847 RADV_FROM_HANDLE(radv_image, image, _image);
1848 RADV_FROM_HANDLE(radv_device, device, _device);
1849 int level = pSubresource->mipLevel;
1850 int layer = pSubresource->arrayLayer;
1851
1852 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1853
1854 struct radv_image_plane *plane = &image->planes[plane_id];
1855 struct radeon_surf *surface = &plane->surface;
1856
1857 if (device->physical_device->rad_info.chip_class >= GFX9) {
1858 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1859
1860 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1861 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1862 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1863 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1864 /* Adjust the number of bytes between each row because
1865 * the pitch is actually the number of components per
1866 * row.
1867 */
1868 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1869 } else {
1870 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1871
1872 assert(util_is_power_of_two_nonzero(surface->bpe));
1873 pLayout->rowPitch = pitch * surface->bpe;
1874 }
1875
1876 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1877 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1878 pLayout->size = surface->u.gfx9.surf_slice_size;
1879 if (image->type == VK_IMAGE_TYPE_3D)
1880 pLayout->size *= u_minify(image->info.depth, level);
1881 } else {
1882 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1883 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1884 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1885 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1886 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1887 if (image->type == VK_IMAGE_TYPE_3D)
1888 pLayout->size *= u_minify(image->info.depth, level);
1889 }
1890 }
1891
1892
1893 VkResult
1894 radv_CreateImageView(VkDevice _device,
1895 const VkImageViewCreateInfo *pCreateInfo,
1896 const VkAllocationCallbacks *pAllocator,
1897 VkImageView *pView)
1898 {
1899 RADV_FROM_HANDLE(radv_device, device, _device);
1900 struct radv_image_view *view;
1901
1902 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1903 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1904 if (view == NULL)
1905 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1906
1907 vk_object_base_init(&device->vk, &view->base,
1908 VK_OBJECT_TYPE_IMAGE_VIEW);
1909
1910 radv_image_view_init(view, device, pCreateInfo, NULL);
1911
1912 *pView = radv_image_view_to_handle(view);
1913
1914 return VK_SUCCESS;
1915 }
1916
1917 void
1918 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1919 const VkAllocationCallbacks *pAllocator)
1920 {
1921 RADV_FROM_HANDLE(radv_device, device, _device);
1922 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1923
1924 if (!iview)
1925 return;
1926
1927 vk_object_base_finish(&iview->base);
1928 vk_free2(&device->vk.alloc, pAllocator, iview);
1929 }
1930
1931 void radv_buffer_view_init(struct radv_buffer_view *view,
1932 struct radv_device *device,
1933 const VkBufferViewCreateInfo* pCreateInfo)
1934 {
1935 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1936
1937 view->bo = buffer->bo;
1938 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1939 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1940 view->vk_format = pCreateInfo->format;
1941
1942 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1943 pCreateInfo->offset, view->range, view->state);
1944 }
1945
1946 VkResult
1947 radv_CreateBufferView(VkDevice _device,
1948 const VkBufferViewCreateInfo *pCreateInfo,
1949 const VkAllocationCallbacks *pAllocator,
1950 VkBufferView *pView)
1951 {
1952 RADV_FROM_HANDLE(radv_device, device, _device);
1953 struct radv_buffer_view *view;
1954
1955 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1956 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1957 if (!view)
1958 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1959
1960 vk_object_base_init(&device->vk, &view->base,
1961 VK_OBJECT_TYPE_BUFFER_VIEW);
1962
1963 radv_buffer_view_init(view, device, pCreateInfo);
1964
1965 *pView = radv_buffer_view_to_handle(view);
1966
1967 return VK_SUCCESS;
1968 }
1969
1970 void
1971 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1972 const VkAllocationCallbacks *pAllocator)
1973 {
1974 RADV_FROM_HANDLE(radv_device, device, _device);
1975 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1976
1977 if (!view)
1978 return;
1979
1980 vk_object_base_finish(&view->base);
1981 vk_free2(&device->vk.alloc, pAllocator, view);
1982 }