radv: Use offsets in surface struct.
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class == GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_use_dcc_for_image(struct radv_device *device,
154 const struct radv_image *image,
155 const VkImageCreateInfo *pCreateInfo,
156 VkFormat format)
157 {
158 bool dcc_compatible_formats;
159 bool blendable;
160
161 /* DCC (Delta Color Compression) is only available for GFX8+. */
162 if (device->physical_device->rad_info.chip_class < GFX8)
163 return false;
164
165 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
166 return false;
167
168 if (image->shareable)
169 return false;
170
171 /* TODO: Enable DCC for storage images. */
172 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
173 return false;
174
175 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
176 return false;
177
178 if (vk_format_is_subsampled(format) ||
179 vk_format_get_plane_count(format) > 1)
180 return false;
181
182 /* TODO: Enable DCC for mipmaps on GFX9+. */
183 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
184 device->physical_device->rad_info.chip_class >= GFX9)
185 return false;
186
187 /* Do not enable DCC for mipmapped arrays because performance is worse. */
188 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
189 return false;
190
191 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
192 * 2x can be enabled with an option.
193 */
194 if (pCreateInfo->samples > 2 ||
195 (pCreateInfo->samples == 2 &&
196 !device->physical_device->dcc_msaa_allowed))
197 return false;
198
199 /* Determine if the formats are DCC compatible. */
200 dcc_compatible_formats =
201 radv_is_colorbuffer_format_supported(format,
202 &blendable);
203
204 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
205 const struct VkImageFormatListCreateInfo *format_list =
206 (const struct VkImageFormatListCreateInfo *)
207 vk_find_struct_const(pCreateInfo->pNext,
208 IMAGE_FORMAT_LIST_CREATE_INFO);
209
210 /* We have to ignore the existence of the list if viewFormatCount = 0 */
211 if (format_list && format_list->viewFormatCount) {
212 /* compatibility is transitive, so we only need to check
213 * one format with everything else. */
214 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
215 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
216 continue;
217
218 if (!radv_dcc_formats_compatible(format,
219 format_list->pViewFormats[i]))
220 dcc_compatible_formats = false;
221 }
222 } else {
223 dcc_compatible_formats = false;
224 }
225 }
226
227 if (!dcc_compatible_formats)
228 return false;
229
230 return true;
231 }
232
233 static inline bool
234 radv_use_fmask_for_image(const struct radv_image *image)
235 {
236 return image->info.samples > 1 &&
237 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
238 }
239
240 static bool
241 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
242 struct radv_image *image)
243 {
244 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
245 return false;
246
247 /* TC-compat CMASK is only available for GFX8+. */
248 if (device->physical_device->rad_info.chip_class < GFX8)
249 return false;
250
251 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
252 return false;
253
254 if (radv_image_has_dcc(image))
255 return false;
256
257 if (!radv_image_has_cmask(image))
258 return false;
259
260 return true;
261 }
262
263 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
264 {
265 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
266 }
267
268 static bool
269 radv_is_valid_opaque_metadata(const struct radv_device *device,
270 const struct radeon_bo_metadata *md)
271 {
272 if (md->metadata[0] != 1 ||
273 md->metadata[1] != si_get_bo_metadata_word1(device))
274 return false;
275
276 if (md->size_metadata < 40)
277 return false;
278
279 return true;
280 }
281
282 static void
283 radv_patch_surface_from_metadata(struct radv_device *device,
284 struct radeon_surf *surface,
285 const struct radeon_bo_metadata *md)
286 {
287 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
288
289 if (device->physical_device->rad_info.chip_class >= GFX9) {
290 if (md->u.gfx9.swizzle_mode > 0)
291 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
292 else
293 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
294
295 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
296 } else {
297 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
298 surface->u.legacy.bankw = md->u.legacy.bankw;
299 surface->u.legacy.bankh = md->u.legacy.bankh;
300 surface->u.legacy.tile_split = md->u.legacy.tile_split;
301 surface->u.legacy.mtilea = md->u.legacy.mtilea;
302 surface->u.legacy.num_banks = md->u.legacy.num_banks;
303
304 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
305 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
306 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
307 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
308 else
309 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
310
311 }
312 }
313
314 static VkResult
315 radv_patch_image_dimensions(struct radv_device *device,
316 struct radv_image *image,
317 const struct radv_image_create_info *create_info,
318 struct ac_surf_info *image_info)
319 {
320 unsigned width = image->info.width;
321 unsigned height = image->info.height;
322
323 /*
324 * minigbm sometimes allocates bigger images which is going to result in
325 * weird strides and other properties. Lets be lenient where possible and
326 * fail it on GFX10 (as we cannot cope there).
327 *
328 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
329 */
330 if (create_info->bo_metadata &&
331 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
332 const struct radeon_bo_metadata *md = create_info->bo_metadata;
333
334 if (device->physical_device->rad_info.chip_class >= GFX10) {
335 width = G_00A004_WIDTH_LO(md->metadata[3]) +
336 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
337 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
338 } else {
339 width = G_008F18_WIDTH(md->metadata[4]) + 1;
340 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
341 }
342 }
343
344 if (image->info.width == width && image->info.height == height)
345 return VK_SUCCESS;
346
347 if (width < image->info.width || height < image->info.height) {
348 fprintf(stderr,
349 "The imported image has smaller dimensions than the internal\n"
350 "dimensions. Using it is going to fail badly, so we reject\n"
351 "this import.\n"
352 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
353 image->info.width, image->info.height, width, height);
354 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
355 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
356 fprintf(stderr,
357 "Tried to import an image with inconsistent width on GFX10.\n"
358 "As GFX10 has no separate stride fields we cannot cope with\n"
359 "an inconsistency in width and will fail this import.\n"
360 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
361 image->info.width, image->info.height, width, height);
362 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
363 } else {
364 fprintf(stderr,
365 "Tried to import an image with inconsistent width on pre-GFX10.\n"
366 "As GFX10 has no separate stride fields we cannot cope with\n"
367 "an inconsistency and would fail on GFX10.\n"
368 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
369 image->info.width, image->info.height, width, height);
370 }
371 image_info->width = width;
372 image_info->height = height;
373
374 return VK_SUCCESS;
375 }
376
377 static VkResult
378 radv_patch_image_from_extra_info(struct radv_device *device,
379 struct radv_image *image,
380 const struct radv_image_create_info *create_info,
381 struct ac_surf_info *image_info)
382 {
383 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
384 if (result != VK_SUCCESS)
385 return result;
386
387 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
388 if (create_info->bo_metadata) {
389 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
390 create_info->bo_metadata);
391 }
392
393 if (radv_surface_has_scanout(device, create_info)) {
394 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
395 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
396
397 image->info.surf_index = NULL;
398 }
399 }
400 return VK_SUCCESS;
401 }
402
403 static int
404 radv_init_surface(struct radv_device *device,
405 const struct radv_image *image,
406 struct radeon_surf *surface,
407 unsigned plane_id,
408 const VkImageCreateInfo *pCreateInfo,
409 VkFormat image_format)
410 {
411 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
412 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
413 const struct vk_format_description *desc = vk_format_description(format);
414 bool is_depth, is_stencil;
415
416 is_depth = vk_format_has_depth(desc);
417 is_stencil = vk_format_has_stencil(desc);
418
419 surface->blk_w = vk_format_get_blockwidth(format);
420 surface->blk_h = vk_format_get_blockheight(format);
421
422 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
423 /* align byte per element on dword */
424 if (surface->bpe == 3) {
425 surface->bpe = 4;
426 }
427
428 surface->flags = RADEON_SURF_SET(array_mode, MODE);
429
430 switch (pCreateInfo->imageType){
431 case VK_IMAGE_TYPE_1D:
432 if (pCreateInfo->arrayLayers > 1)
433 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
434 else
435 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
436 break;
437 case VK_IMAGE_TYPE_2D:
438 if (pCreateInfo->arrayLayers > 1)
439 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
440 else
441 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
442 break;
443 case VK_IMAGE_TYPE_3D:
444 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
445 break;
446 default:
447 unreachable("unhandled image type");
448 }
449
450 /* Required for clearing/initializing a specific layer on GFX8. */
451 surface->flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
452
453 if (is_depth) {
454 surface->flags |= RADEON_SURF_ZBUFFER;
455 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
456 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
457 }
458
459 if (is_stencil)
460 surface->flags |= RADEON_SURF_SBUFFER;
461
462 if (device->physical_device->rad_info.chip_class >= GFX9 &&
463 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
464 vk_format_get_blocksizebits(image_format) == 128 &&
465 vk_format_is_compressed(image_format))
466 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
467
468 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
469 surface->flags |= RADEON_SURF_DISABLE_DCC;
470
471 if (!radv_use_fmask_for_image(image))
472 surface->flags |= RADEON_SURF_NO_FMASK;
473
474 return 0;
475 }
476
477 static inline unsigned
478 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
479 {
480 if (stencil)
481 return plane->surface.u.legacy.stencil_tiling_index[level];
482 else
483 return plane->surface.u.legacy.tiling_index[level];
484 }
485
486 static unsigned radv_map_swizzle(unsigned swizzle)
487 {
488 switch (swizzle) {
489 case VK_SWIZZLE_Y:
490 return V_008F0C_SQ_SEL_Y;
491 case VK_SWIZZLE_Z:
492 return V_008F0C_SQ_SEL_Z;
493 case VK_SWIZZLE_W:
494 return V_008F0C_SQ_SEL_W;
495 case VK_SWIZZLE_0:
496 return V_008F0C_SQ_SEL_0;
497 case VK_SWIZZLE_1:
498 return V_008F0C_SQ_SEL_1;
499 default: /* VK_SWIZZLE_X */
500 return V_008F0C_SQ_SEL_X;
501 }
502 }
503
504 static void
505 radv_make_buffer_descriptor(struct radv_device *device,
506 struct radv_buffer *buffer,
507 VkFormat vk_format,
508 unsigned offset,
509 unsigned range,
510 uint32_t *state)
511 {
512 const struct vk_format_description *desc;
513 unsigned stride;
514 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
515 uint64_t va = gpu_address + buffer->offset;
516 unsigned num_format, data_format;
517 int first_non_void;
518 desc = vk_format_description(vk_format);
519 first_non_void = vk_format_get_first_non_void_channel(vk_format);
520 stride = desc->block.bits / 8;
521
522 va += offset;
523 state[0] = va;
524 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
525 S_008F04_STRIDE(stride);
526
527 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
528 range /= stride;
529 }
530
531 state[2] = range;
532 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
533 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
534 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
535 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
536
537 if (device->physical_device->rad_info.chip_class >= GFX10) {
538 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
539
540 /* OOB_SELECT chooses the out-of-bounds check:
541 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
542 * - 1: index >= NUM_RECORDS
543 * - 2: NUM_RECORDS == 0
544 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
545 * else: swizzle_address >= NUM_RECORDS
546 */
547 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
548 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
549 S_008F0C_RESOURCE_LEVEL(1);
550 } else {
551 num_format = radv_translate_buffer_numformat(desc, first_non_void);
552 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
553
554 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
555 assert(num_format != ~0);
556
557 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
558 S_008F0C_DATA_FORMAT(data_format);
559 }
560 }
561
562 static void
563 si_set_mutable_tex_desc_fields(struct radv_device *device,
564 struct radv_image *image,
565 const struct legacy_surf_level *base_level_info,
566 unsigned plane_id,
567 unsigned base_level, unsigned first_level,
568 unsigned block_width, bool is_stencil,
569 bool is_storage_image, bool disable_compression,
570 uint32_t *state)
571 {
572 struct radv_image_plane *plane = &image->planes[plane_id];
573 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
574 uint64_t va = gpu_address + plane->offset;
575 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
576 uint64_t meta_va = 0;
577 if (chip_class >= GFX9) {
578 if (is_stencil)
579 va += plane->surface.u.gfx9.stencil_offset;
580 else
581 va += plane->surface.u.gfx9.surf_offset;
582 } else
583 va += base_level_info->offset;
584
585 state[0] = va >> 8;
586 if (chip_class >= GFX9 ||
587 base_level_info->mode == RADEON_SURF_MODE_2D)
588 state[0] |= plane->surface.tile_swizzle;
589 state[1] &= C_008F14_BASE_ADDRESS_HI;
590 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
591
592 if (chip_class >= GFX8) {
593 state[6] &= C_008F28_COMPRESSION_EN;
594 state[7] = 0;
595 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
596 meta_va = gpu_address + plane->surface.dcc_offset;
597 if (chip_class <= GFX8)
598 meta_va += base_level_info->dcc_offset;
599
600 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
601 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
602 meta_va |= dcc_tile_swizzle;
603 } else if (!disable_compression &&
604 radv_image_is_tc_compat_htile(image)) {
605 meta_va = gpu_address + plane->surface.htile_offset;
606 }
607
608 if (meta_va) {
609 state[6] |= S_008F28_COMPRESSION_EN(1);
610 if (chip_class <= GFX9)
611 state[7] = meta_va >> 8;
612 }
613 }
614
615 if (chip_class >= GFX10) {
616 state[3] &= C_00A00C_SW_MODE;
617
618 if (is_stencil) {
619 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
620 } else {
621 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
622 }
623
624 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
625 C_00A018_META_PIPE_ALIGNED;
626
627 if (meta_va) {
628 struct gfx9_surf_meta_flags meta = {
629 .rb_aligned = 1,
630 .pipe_aligned = 1,
631 };
632
633 if (plane->surface.dcc_offset)
634 meta = plane->surface.u.gfx9.dcc;
635
636 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
637 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
638 }
639
640 state[7] = meta_va >> 16;
641 } else if (chip_class == GFX9) {
642 state[3] &= C_008F1C_SW_MODE;
643 state[4] &= C_008F20_PITCH;
644
645 if (is_stencil) {
646 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
647 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
648 } else {
649 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
650 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
651 }
652
653 state[5] &= C_008F24_META_DATA_ADDRESS &
654 C_008F24_META_PIPE_ALIGNED &
655 C_008F24_META_RB_ALIGNED;
656 if (meta_va) {
657 struct gfx9_surf_meta_flags meta = {
658 .rb_aligned = 1,
659 .pipe_aligned = 1,
660 };
661
662 if (plane->surface.dcc_offset)
663 meta = plane->surface.u.gfx9.dcc;
664
665 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
666 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
667 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
668 }
669 } else {
670 /* GFX6-GFX8 */
671 unsigned pitch = base_level_info->nblk_x * block_width;
672 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
673
674 state[3] &= C_008F1C_TILING_INDEX;
675 state[3] |= S_008F1C_TILING_INDEX(index);
676 state[4] &= C_008F20_PITCH;
677 state[4] |= S_008F20_PITCH(pitch - 1);
678 }
679 }
680
681 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
682 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
683 {
684 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
685 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
686
687 /* GFX9 allocates 1D textures as 2D. */
688 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
689 image_type = VK_IMAGE_TYPE_2D;
690 switch (image_type) {
691 case VK_IMAGE_TYPE_1D:
692 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
693 case VK_IMAGE_TYPE_2D:
694 if (nr_samples > 1)
695 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
696 else
697 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
698 case VK_IMAGE_TYPE_3D:
699 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
700 return V_008F1C_SQ_RSRC_IMG_3D;
701 else
702 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
703 default:
704 unreachable("illegal image type");
705 }
706 }
707
708 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
709 {
710 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
711
712 if (swizzle[3] == VK_SWIZZLE_X) {
713 /* For the pre-defined border color values (white, opaque
714 * black, transparent black), the only thing that matters is
715 * that the alpha channel winds up in the correct place
716 * (because the RGB channels are all the same) so either of
717 * these enumerations will work.
718 */
719 if (swizzle[2] == VK_SWIZZLE_Y)
720 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
721 else
722 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
723 } else if (swizzle[0] == VK_SWIZZLE_X) {
724 if (swizzle[1] == VK_SWIZZLE_Y)
725 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
726 else
727 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
728 } else if (swizzle[1] == VK_SWIZZLE_X) {
729 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
730 } else if (swizzle[2] == VK_SWIZZLE_X) {
731 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
732 }
733
734 return bc_swizzle;
735 }
736
737 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
738 {
739 const struct vk_format_description *desc = vk_format_description(format);
740
741 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
742 return desc->swizzle[3] == VK_SWIZZLE_X;
743
744 return radv_translate_colorswap(format, false) <= 1;
745 }
746 /**
747 * Build the sampler view descriptor for a texture (GFX10).
748 */
749 static void
750 gfx10_make_texture_descriptor(struct radv_device *device,
751 struct radv_image *image,
752 bool is_storage_image,
753 VkImageViewType view_type,
754 VkFormat vk_format,
755 const VkComponentMapping *mapping,
756 unsigned first_level, unsigned last_level,
757 unsigned first_layer, unsigned last_layer,
758 unsigned width, unsigned height, unsigned depth,
759 uint32_t *state,
760 uint32_t *fmask_state)
761 {
762 const struct vk_format_description *desc;
763 enum vk_swizzle swizzle[4];
764 unsigned img_format;
765 unsigned type;
766
767 desc = vk_format_description(vk_format);
768 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
769
770 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
771 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
772 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
773 } else {
774 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
775 }
776
777 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
778 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
779 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
780 height = 1;
781 depth = image->info.array_size;
782 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
783 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
784 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
785 depth = image->info.array_size;
786 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
787 depth = image->info.array_size / 6;
788
789 state[0] = 0;
790 state[1] = S_00A004_FORMAT(img_format) |
791 S_00A004_WIDTH_LO(width - 1);
792 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
793 S_00A008_HEIGHT(height - 1) |
794 S_00A008_RESOURCE_LEVEL(1);
795 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
796 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
797 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
798 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
799 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
800 0 : first_level) |
801 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
802 util_logbase2(image->info.samples) :
803 last_level) |
804 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
805 S_00A00C_TYPE(type);
806 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
807 * to know the total number of layers.
808 */
809 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
810 S_00A010_BASE_ARRAY(first_layer);
811 state[5] = S_00A014_ARRAY_PITCH(0) |
812 S_00A014_MAX_MIP(image->info.samples > 1 ?
813 util_logbase2(image->info.samples) :
814 image->info.levels - 1) |
815 S_00A014_PERF_MOD(4);
816 state[6] = 0;
817 state[7] = 0;
818
819 if (radv_dcc_enabled(image, first_level)) {
820 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
821 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
822 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
823 }
824
825 /* Initialize the sampler view for FMASK. */
826 if (radv_image_has_fmask(image)) {
827 uint64_t gpu_address = radv_buffer_get_va(image->bo);
828 uint32_t format;
829 uint64_t va;
830
831 assert(image->plane_count == 1);
832
833 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
834
835 switch (image->info.samples) {
836 case 2:
837 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
838 break;
839 case 4:
840 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
841 break;
842 case 8:
843 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
844 break;
845 default:
846 unreachable("invalid nr_samples");
847 }
848
849 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
850 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
851 S_00A004_FORMAT(format) |
852 S_00A004_WIDTH_LO(width - 1);
853 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
854 S_00A008_HEIGHT(height - 1) |
855 S_00A008_RESOURCE_LEVEL(1);
856 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
857 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
858 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
859 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
860 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
861 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
862 fmask_state[4] = S_00A010_DEPTH(last_layer) |
863 S_00A010_BASE_ARRAY(first_layer);
864 fmask_state[5] = 0;
865 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
866 fmask_state[7] = 0;
867 } else if (fmask_state)
868 memset(fmask_state, 0, 8 * 4);
869 }
870
871 /**
872 * Build the sampler view descriptor for a texture (SI-GFX9)
873 */
874 static void
875 si_make_texture_descriptor(struct radv_device *device,
876 struct radv_image *image,
877 bool is_storage_image,
878 VkImageViewType view_type,
879 VkFormat vk_format,
880 const VkComponentMapping *mapping,
881 unsigned first_level, unsigned last_level,
882 unsigned first_layer, unsigned last_layer,
883 unsigned width, unsigned height, unsigned depth,
884 uint32_t *state,
885 uint32_t *fmask_state)
886 {
887 const struct vk_format_description *desc;
888 enum vk_swizzle swizzle[4];
889 int first_non_void;
890 unsigned num_format, data_format, type;
891
892 desc = vk_format_description(vk_format);
893
894 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
895 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
896 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
897 } else {
898 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
899 }
900
901 first_non_void = vk_format_get_first_non_void_channel(vk_format);
902
903 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
904 if (num_format == ~0) {
905 num_format = 0;
906 }
907
908 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
909 if (data_format == ~0) {
910 data_format = 0;
911 }
912
913 /* S8 with either Z16 or Z32 HTILE need a special format. */
914 if (device->physical_device->rad_info.chip_class == GFX9 &&
915 vk_format == VK_FORMAT_S8_UINT &&
916 radv_image_is_tc_compat_htile(image)) {
917 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
918 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
919 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
920 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
921 }
922 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
923 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
924 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
925 height = 1;
926 depth = image->info.array_size;
927 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
928 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
929 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
930 depth = image->info.array_size;
931 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
932 depth = image->info.array_size / 6;
933
934 state[0] = 0;
935 state[1] = (S_008F14_DATA_FORMAT(data_format) |
936 S_008F14_NUM_FORMAT(num_format));
937 state[2] = (S_008F18_WIDTH(width - 1) |
938 S_008F18_HEIGHT(height - 1) |
939 S_008F18_PERF_MOD(4));
940 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
941 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
942 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
943 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
944 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
945 0 : first_level) |
946 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
947 util_logbase2(image->info.samples) :
948 last_level) |
949 S_008F1C_TYPE(type));
950 state[4] = 0;
951 state[5] = S_008F24_BASE_ARRAY(first_layer);
952 state[6] = 0;
953 state[7] = 0;
954
955 if (device->physical_device->rad_info.chip_class == GFX9) {
956 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
957
958 /* Depth is the last accessible layer on Gfx9.
959 * The hw doesn't need to know the total number of layers.
960 */
961 if (type == V_008F1C_SQ_RSRC_IMG_3D)
962 state[4] |= S_008F20_DEPTH(depth - 1);
963 else
964 state[4] |= S_008F20_DEPTH(last_layer);
965
966 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
967 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
968 util_logbase2(image->info.samples) :
969 image->info.levels - 1);
970 } else {
971 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
972 state[4] |= S_008F20_DEPTH(depth - 1);
973 state[5] |= S_008F24_LAST_ARRAY(last_layer);
974 }
975 if (image->planes[0].surface.dcc_offset) {
976 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
977 } else {
978 /* The last dword is unused by hw. The shader uses it to clear
979 * bits in the first dword of sampler state.
980 */
981 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
982 if (first_level == last_level)
983 state[7] = C_008F30_MAX_ANISO_RATIO;
984 else
985 state[7] = 0xffffffff;
986 }
987 }
988
989 /* Initialize the sampler view for FMASK. */
990 if (radv_image_has_fmask(image)) {
991 uint32_t fmask_format, num_format;
992 uint64_t gpu_address = radv_buffer_get_va(image->bo);
993 uint64_t va;
994
995 assert(image->plane_count == 1);
996
997 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
998
999 if (device->physical_device->rad_info.chip_class == GFX9) {
1000 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1001 switch (image->info.samples) {
1002 case 2:
1003 num_format = V_008F14_IMG_FMASK_8_2_2;
1004 break;
1005 case 4:
1006 num_format = V_008F14_IMG_FMASK_8_4_4;
1007 break;
1008 case 8:
1009 num_format = V_008F14_IMG_FMASK_32_8_8;
1010 break;
1011 default:
1012 unreachable("invalid nr_samples");
1013 }
1014 } else {
1015 switch (image->info.samples) {
1016 case 2:
1017 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1018 break;
1019 case 4:
1020 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1021 break;
1022 case 8:
1023 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1024 break;
1025 default:
1026 assert(0);
1027 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1028 }
1029 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1030 }
1031
1032 fmask_state[0] = va >> 8;
1033 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1034 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1035 S_008F14_DATA_FORMAT(fmask_format) |
1036 S_008F14_NUM_FORMAT(num_format);
1037 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1038 S_008F18_HEIGHT(height - 1);
1039 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1040 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1041 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1042 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1043 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1044 fmask_state[4] = 0;
1045 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1046 fmask_state[6] = 0;
1047 fmask_state[7] = 0;
1048
1049 if (device->physical_device->rad_info.chip_class == GFX9) {
1050 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1051 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1052 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1053 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1054 S_008F24_META_RB_ALIGNED(1);
1055
1056 if (radv_image_is_tc_compat_cmask(image)) {
1057 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1058
1059 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1060 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1061 fmask_state[7] |= va >> 8;
1062 }
1063 } else {
1064 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1065 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1066 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1067 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1068
1069 if (radv_image_is_tc_compat_cmask(image)) {
1070 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1071
1072 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1073 fmask_state[7] |= va >> 8;
1074 }
1075 }
1076 } else if (fmask_state)
1077 memset(fmask_state, 0, 8 * 4);
1078 }
1079
1080 static void
1081 radv_make_texture_descriptor(struct radv_device *device,
1082 struct radv_image *image,
1083 bool is_storage_image,
1084 VkImageViewType view_type,
1085 VkFormat vk_format,
1086 const VkComponentMapping *mapping,
1087 unsigned first_level, unsigned last_level,
1088 unsigned first_layer, unsigned last_layer,
1089 unsigned width, unsigned height, unsigned depth,
1090 uint32_t *state,
1091 uint32_t *fmask_state)
1092 {
1093 if (device->physical_device->rad_info.chip_class >= GFX10) {
1094 gfx10_make_texture_descriptor(device, image, is_storage_image,
1095 view_type, vk_format, mapping,
1096 first_level, last_level,
1097 first_layer, last_layer,
1098 width, height, depth,
1099 state, fmask_state);
1100 } else {
1101 si_make_texture_descriptor(device, image, is_storage_image,
1102 view_type, vk_format, mapping,
1103 first_level, last_level,
1104 first_layer, last_layer,
1105 width, height, depth,
1106 state, fmask_state);
1107 }
1108 }
1109
1110 static void
1111 radv_query_opaque_metadata(struct radv_device *device,
1112 struct radv_image *image,
1113 struct radeon_bo_metadata *md)
1114 {
1115 static const VkComponentMapping fixedmapping;
1116 uint32_t desc[8], i;
1117
1118 assert(image->plane_count == 1);
1119
1120 /* Metadata image format format version 1:
1121 * [0] = 1 (metadata format identifier)
1122 * [1] = (VENDOR_ID << 16) | PCI_ID
1123 * [2:9] = image descriptor for the whole resource
1124 * [2] is always 0, because the base address is cleared
1125 * [9] is the DCC offset bits [39:8] from the beginning of
1126 * the buffer
1127 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1128 */
1129 md->metadata[0] = 1; /* metadata image format version 1 */
1130
1131 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1132 md->metadata[1] = si_get_bo_metadata_word1(device);
1133
1134
1135 radv_make_texture_descriptor(device, image, false,
1136 (VkImageViewType)image->type, image->vk_format,
1137 &fixedmapping, 0, image->info.levels - 1, 0,
1138 image->info.array_size - 1,
1139 image->info.width, image->info.height,
1140 image->info.depth,
1141 desc, NULL);
1142
1143 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1144 image->planes[0].surface.blk_w, false, false, false, desc);
1145
1146 /* Clear the base address and set the relative DCC offset. */
1147 desc[0] = 0;
1148 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1149 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1150
1151 /* Dwords [2:9] contain the image descriptor. */
1152 memcpy(&md->metadata[2], desc, sizeof(desc));
1153
1154 /* Dwords [10:..] contain the mipmap level offsets. */
1155 if (device->physical_device->rad_info.chip_class <= GFX8) {
1156 for (i = 0; i <= image->info.levels - 1; i++)
1157 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1158 md->size_metadata = (11 + image->info.levels - 1) * 4;
1159 } else
1160 md->size_metadata = 10 * 4;
1161 }
1162
1163 void
1164 radv_init_metadata(struct radv_device *device,
1165 struct radv_image *image,
1166 struct radeon_bo_metadata *metadata)
1167 {
1168 struct radeon_surf *surface = &image->planes[0].surface;
1169
1170 memset(metadata, 0, sizeof(*metadata));
1171
1172 if (device->physical_device->rad_info.chip_class >= GFX9) {
1173 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1174 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1175 } else {
1176 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1177 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1178 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1179 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1180 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1181 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1182 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1183 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1184 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1185 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1186 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1187 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1188 }
1189 radv_query_opaque_metadata(device, image, metadata);
1190 }
1191
1192 void
1193 radv_image_override_offset_stride(struct radv_device *device,
1194 struct radv_image *image,
1195 uint64_t offset, uint32_t stride)
1196 {
1197 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1198 &image->planes[0].surface,
1199 image->info.levels, offset, stride);
1200 }
1201
1202 static void
1203 radv_image_alloc_fmask(struct radv_device *device,
1204 struct radv_image *image)
1205 {
1206 unsigned fmask_alignment = image->planes[0].surface.fmask_alignment;
1207
1208 image->planes[0].surface.fmask_offset = align64(image->size, fmask_alignment);
1209 image->size = image->planes[0].surface.fmask_offset + image->planes[0].surface.fmask_size;
1210 image->alignment = MAX2(image->alignment, fmask_alignment);
1211 }
1212
1213 static void
1214 radv_image_alloc_cmask(struct radv_device *device,
1215 struct radv_image *image)
1216 {
1217 unsigned cmask_alignment = image->planes[0].surface.cmask_alignment;
1218 unsigned cmask_size = image->planes[0].surface.cmask_size;
1219 uint32_t clear_value_size = 0;
1220
1221 if (!cmask_size)
1222 return;
1223
1224 assert(cmask_alignment);
1225
1226 image->planes[0].surface.cmask_offset = align64(image->size, cmask_alignment);
1227 /* + 8 for storing the clear values */
1228 if (!image->clear_value_offset) {
1229 image->clear_value_offset = image->planes[0].surface.cmask_offset + cmask_size;
1230 clear_value_size = 8;
1231 }
1232 image->size = image->planes[0].surface.cmask_offset + cmask_size + clear_value_size;
1233 image->alignment = MAX2(image->alignment, cmask_alignment);
1234 }
1235
1236 static void
1237 radv_image_alloc_dcc(struct radv_image *image)
1238 {
1239 assert(image->plane_count == 1);
1240
1241 image->planes[0].surface.dcc_offset = align64(image->size, image->planes[0].surface.dcc_alignment);
1242 /* + 24 for storing the clear values + fce pred + dcc pred for each mip */
1243 image->clear_value_offset = image->planes[0].surface.dcc_offset + image->planes[0].surface.dcc_size;
1244 image->fce_pred_offset = image->clear_value_offset + 8 * image->info.levels;
1245 image->dcc_pred_offset = image->clear_value_offset + 16 * image->info.levels;
1246 image->size = image->planes[0].surface.dcc_offset + image->planes[0].surface.dcc_size + 24 * image->info.levels;
1247 image->alignment = MAX2(image->alignment, image->planes[0].surface.dcc_alignment);
1248 }
1249
1250 static void
1251 radv_image_alloc_htile(struct radv_device *device, struct radv_image *image)
1252 {
1253 image->planes[0].surface.htile_offset = align64(image->size, image->planes[0].surface.htile_alignment);
1254
1255 /* + 8 for storing the clear values */
1256 image->clear_value_offset = image->planes[0].surface.htile_offset + image->planes[0].surface.htile_size;
1257 image->size = image->clear_value_offset + image->info.levels * 8;
1258 if (radv_image_is_tc_compat_htile(image) &&
1259 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1260 /* Metadata for the TC-compatible HTILE hardware bug which
1261 * have to be fixed by updating ZRANGE_PRECISION when doing
1262 * fast depth clears to 0.0f.
1263 */
1264 image->tc_compat_zrange_offset = image->size;
1265 image->size = image->tc_compat_zrange_offset + image->info.levels * 4;
1266 }
1267 image->alignment = align64(image->alignment, image->planes[0].surface.htile_alignment);
1268 }
1269
1270 static inline bool
1271 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
1272 {
1273 if (image->info.samples <= 1 &&
1274 image->info.width * image->info.height <= 512 * 512) {
1275 /* Do not enable CMASK or DCC for small surfaces where the cost
1276 * of the eliminate pass can be higher than the benefit of fast
1277 * clear. RadeonSI does this, but the image threshold is
1278 * different.
1279 */
1280 return false;
1281 }
1282
1283 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
1284 (image->exclusive || image->queue_family_mask == 1);
1285 }
1286
1287 static inline bool
1288 radv_image_can_enable_dcc(struct radv_device *device, struct radv_image *image)
1289 {
1290 if (!radv_image_can_enable_dcc_or_cmask(image) ||
1291 !radv_image_has_dcc(image))
1292 return false;
1293
1294 return true;
1295 }
1296
1297 static inline bool
1298 radv_image_can_enable_cmask(struct radv_image *image)
1299 {
1300 if (image->planes[0].surface.bpe > 8 && image->info.samples == 1) {
1301 /* Do not enable CMASK for non-MSAA images (fast color clear)
1302 * because 128 bit formats are not supported, but FMASK might
1303 * still be used.
1304 */
1305 return false;
1306 }
1307
1308 return radv_image_can_enable_dcc_or_cmask(image) &&
1309 image->info.levels == 1 &&
1310 image->info.depth == 1;
1311 }
1312
1313 static inline bool
1314 radv_image_can_enable_htile(struct radv_image *image)
1315 {
1316 return radv_image_has_htile(image) &&
1317 image->info.levels == 1 &&
1318 image->info.width * image->info.height >= 8 * 8;
1319 }
1320
1321 static void radv_image_disable_dcc(struct radv_image *image)
1322 {
1323 for (unsigned i = 0; i < image->plane_count; ++i)
1324 image->planes[i].surface.dcc_size = 0;
1325 }
1326
1327 static void radv_image_disable_htile(struct radv_image *image)
1328 {
1329 for (unsigned i = 0; i < image->plane_count; ++i)
1330 image->planes[i].surface.htile_size = 0;
1331 }
1332
1333 VkResult
1334 radv_image_create_layout(struct radv_device *device,
1335 struct radv_image_create_info create_info,
1336 struct radv_image *image)
1337 {
1338 /* Check that we did not initialize things earlier */
1339 assert(!image->planes[0].surface.surf_size);
1340
1341 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1342 * common internal case. */
1343 create_info.vk_info = NULL;
1344
1345 struct ac_surf_info image_info = image->info;
1346 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1347 if (result != VK_SUCCESS)
1348 return result;
1349
1350 image->size = 0;
1351 image->alignment = 1;
1352 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1353 struct ac_surf_info info = image_info;
1354
1355 if (plane) {
1356 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1357 assert(info.width % desc->width_divisor == 0);
1358 assert(info.height % desc->height_divisor == 0);
1359
1360 info.width /= desc->width_divisor;
1361 info.height /= desc->height_divisor;
1362 }
1363
1364 if (create_info.no_metadata_planes || image->plane_count > 1) {
1365 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1366 RADEON_SURF_NO_FMASK |
1367 RADEON_SURF_NO_HTILE;
1368 }
1369
1370 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1371
1372 image->planes[plane].offset = align(image->size, image->planes[plane].surface.surf_alignment);
1373 image->size = image->planes[plane].offset + image->planes[plane].surface.surf_size;
1374 image->alignment = image->planes[plane].surface.surf_alignment;
1375
1376 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1377 }
1378
1379 /* Try to enable DCC first. */
1380 if (radv_image_can_enable_dcc(device, image)) {
1381 radv_image_alloc_dcc(image);
1382 if (image->info.samples > 1) {
1383 /* CMASK should be enabled because DCC fast
1384 * clear with MSAA needs it.
1385 */
1386 assert(radv_image_can_enable_cmask(image));
1387 radv_image_alloc_cmask(device, image);
1388 }
1389 } else {
1390 /* When DCC cannot be enabled, try CMASK. */
1391 radv_image_disable_dcc(image);
1392 if (radv_image_can_enable_cmask(image)) {
1393 radv_image_alloc_cmask(device, image);
1394 }
1395 }
1396
1397 /* Try to enable FMASK for multisampled images. */
1398 if (image->planes[0].surface.fmask_size) {
1399 radv_image_alloc_fmask(device, image);
1400
1401 if (radv_use_tc_compat_cmask_for_image(device, image))
1402 image->tc_compatible_cmask = true;
1403 } else {
1404 /* Otherwise, try to enable HTILE for depth surfaces. */
1405 if (radv_image_can_enable_htile(image) &&
1406 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1407 image->tc_compatible_htile = image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1408 radv_image_alloc_htile(device, image);
1409 } else {
1410 radv_image_disable_htile(image);
1411 }
1412 }
1413
1414 assert(image->planes[0].surface.surf_size);
1415 return VK_SUCCESS;
1416 }
1417
1418 VkResult
1419 radv_image_create(VkDevice _device,
1420 const struct radv_image_create_info *create_info,
1421 const VkAllocationCallbacks* alloc,
1422 VkImage *pImage)
1423 {
1424 RADV_FROM_HANDLE(radv_device, device, _device);
1425 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1426 struct radv_image *image = NULL;
1427 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1428 pCreateInfo->format);
1429 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1430
1431 const unsigned plane_count = vk_format_get_plane_count(format);
1432 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1433
1434 radv_assert(pCreateInfo->mipLevels > 0);
1435 radv_assert(pCreateInfo->arrayLayers > 0);
1436 radv_assert(pCreateInfo->samples > 0);
1437 radv_assert(pCreateInfo->extent.width > 0);
1438 radv_assert(pCreateInfo->extent.height > 0);
1439 radv_assert(pCreateInfo->extent.depth > 0);
1440
1441 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1442 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1443 if (!image)
1444 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1445
1446 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1447
1448 image->type = pCreateInfo->imageType;
1449 image->info.width = pCreateInfo->extent.width;
1450 image->info.height = pCreateInfo->extent.height;
1451 image->info.depth = pCreateInfo->extent.depth;
1452 image->info.samples = pCreateInfo->samples;
1453 image->info.storage_samples = pCreateInfo->samples;
1454 image->info.array_size = pCreateInfo->arrayLayers;
1455 image->info.levels = pCreateInfo->mipLevels;
1456 image->info.num_channels = vk_format_get_nr_components(format);
1457
1458 image->vk_format = format;
1459 image->tiling = pCreateInfo->tiling;
1460 image->usage = pCreateInfo->usage;
1461 image->flags = pCreateInfo->flags;
1462 image->plane_count = plane_count;
1463
1464 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1465 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1466 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1467 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1468 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1469 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1470 else
1471 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1472 }
1473
1474 const VkExternalMemoryImageCreateInfo *external_info =
1475 vk_find_struct_const(pCreateInfo->pNext,
1476 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1477
1478 image->shareable = external_info;
1479 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1480 image->info.surf_index = &device->image_mrt_offset_counter;
1481 }
1482
1483 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1484 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1485 }
1486
1487 bool delay_layout = external_info &&
1488 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1489
1490 if (delay_layout) {
1491 *pImage = radv_image_to_handle(image);
1492 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1493 return VK_SUCCESS;
1494 }
1495
1496 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1497 assert(result == VK_SUCCESS);
1498
1499 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1500 image->alignment = MAX2(image->alignment, 4096);
1501 image->size = align64(image->size, image->alignment);
1502 image->offset = 0;
1503
1504 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1505 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1506 if (!image->bo) {
1507 vk_free2(&device->vk.alloc, alloc, image);
1508 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1509 }
1510 }
1511
1512 *pImage = radv_image_to_handle(image);
1513
1514 return VK_SUCCESS;
1515 }
1516
1517 static void
1518 radv_image_view_make_descriptor(struct radv_image_view *iview,
1519 struct radv_device *device,
1520 VkFormat vk_format,
1521 const VkComponentMapping *components,
1522 bool is_storage_image, bool disable_compression,
1523 unsigned plane_id, unsigned descriptor_plane_id)
1524 {
1525 struct radv_image *image = iview->image;
1526 struct radv_image_plane *plane = &image->planes[plane_id];
1527 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1528 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1529 uint32_t blk_w;
1530 union radv_descriptor *descriptor;
1531 uint32_t hw_level = 0;
1532
1533 if (is_storage_image) {
1534 descriptor = &iview->storage_descriptor;
1535 } else {
1536 descriptor = &iview->descriptor;
1537 }
1538
1539 assert(vk_format_get_plane_count(vk_format) == 1);
1540 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1541 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1542
1543 if (device->physical_device->rad_info.chip_class >= GFX9)
1544 hw_level = iview->base_mip;
1545 radv_make_texture_descriptor(device, image, is_storage_image,
1546 iview->type,
1547 vk_format,
1548 components,
1549 hw_level, hw_level + iview->level_count - 1,
1550 iview->base_layer,
1551 iview->base_layer + iview->layer_count - 1,
1552 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1553 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1554 iview->extent.depth,
1555 descriptor->plane_descriptors[descriptor_plane_id],
1556 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1557
1558 const struct legacy_surf_level *base_level_info = NULL;
1559 if (device->physical_device->rad_info.chip_class <= GFX9) {
1560 if (is_stencil)
1561 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1562 else
1563 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1564 }
1565 si_set_mutable_tex_desc_fields(device, image,
1566 base_level_info,
1567 plane_id,
1568 iview->base_mip,
1569 iview->base_mip,
1570 blk_w, is_stencil, is_storage_image,
1571 is_storage_image || disable_compression,
1572 descriptor->plane_descriptors[descriptor_plane_id]);
1573 }
1574
1575 static unsigned
1576 radv_plane_from_aspect(VkImageAspectFlags mask)
1577 {
1578 switch(mask) {
1579 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1580 return 1;
1581 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1582 return 2;
1583 default:
1584 return 0;
1585 }
1586 }
1587
1588 VkFormat
1589 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1590 {
1591 switch(mask) {
1592 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1593 return image->planes[0].format;
1594 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1595 return image->planes[1].format;
1596 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1597 return image->planes[2].format;
1598 case VK_IMAGE_ASPECT_STENCIL_BIT:
1599 return vk_format_stencil_only(image->vk_format);
1600 case VK_IMAGE_ASPECT_DEPTH_BIT:
1601 return vk_format_depth_only(image->vk_format);
1602 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1603 return vk_format_depth_only(image->vk_format);
1604 default:
1605 return image->vk_format;
1606 }
1607 }
1608
1609 void
1610 radv_image_view_init(struct radv_image_view *iview,
1611 struct radv_device *device,
1612 const VkImageViewCreateInfo* pCreateInfo,
1613 const struct radv_image_view_extra_create_info* extra_create_info)
1614 {
1615 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1616 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1617
1618 switch (image->type) {
1619 case VK_IMAGE_TYPE_1D:
1620 case VK_IMAGE_TYPE_2D:
1621 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1622 break;
1623 case VK_IMAGE_TYPE_3D:
1624 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1625 <= radv_minify(image->info.depth, range->baseMipLevel));
1626 break;
1627 default:
1628 unreachable("bad VkImageType");
1629 }
1630 iview->image = image;
1631 iview->bo = image->bo;
1632 iview->type = pCreateInfo->viewType;
1633 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1634 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1635 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1636
1637 iview->vk_format = pCreateInfo->format;
1638
1639 /* If the image has an Android external format, pCreateInfo->format will be
1640 * VK_FORMAT_UNDEFINED. */
1641 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1642 iview->vk_format = image->vk_format;
1643
1644 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1645 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1646 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1647 iview->vk_format = vk_format_depth_only(iview->vk_format);
1648 }
1649
1650 if (device->physical_device->rad_info.chip_class >= GFX9) {
1651 iview->extent = (VkExtent3D) {
1652 .width = image->info.width,
1653 .height = image->info.height,
1654 .depth = image->info.depth,
1655 };
1656 } else {
1657 iview->extent = (VkExtent3D) {
1658 .width = radv_minify(image->info.width , range->baseMipLevel),
1659 .height = radv_minify(image->info.height, range->baseMipLevel),
1660 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1661 };
1662 }
1663
1664 if (iview->vk_format != image->planes[iview->plane_id].format) {
1665 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1666 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1667 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1668 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1669
1670 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1671 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1672
1673 /* Comment ported from amdvlk -
1674 * If we have the following image:
1675 * Uncompressed pixels Compressed block sizes (4x4)
1676 * mip0: 22 x 22 6 x 6
1677 * mip1: 11 x 11 3 x 3
1678 * mip2: 5 x 5 2 x 2
1679 * mip3: 2 x 2 1 x 1
1680 * mip4: 1 x 1 1 x 1
1681 *
1682 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1683 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1684 * divide-by-two integer math):
1685 * mip0: 6x6
1686 * mip1: 3x3
1687 * mip2: 1x1
1688 * mip3: 1x1
1689 *
1690 * This means that mip2 will be missing texels.
1691 *
1692 * Fix this by calculating the base mip's width and height, then convert that, and round it
1693 * back up to get the level 0 size.
1694 * Clamp the converted size between the original values, and next power of two, which
1695 * means we don't oversize the image.
1696 */
1697 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1698 vk_format_is_compressed(image->vk_format) &&
1699 !vk_format_is_compressed(iview->vk_format)) {
1700 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1701 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1702
1703 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1704 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1705
1706 lvl_width <<= range->baseMipLevel;
1707 lvl_height <<= range->baseMipLevel;
1708
1709 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1710 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1711 }
1712 }
1713
1714 iview->base_layer = range->baseArrayLayer;
1715 iview->layer_count = radv_get_layerCount(image, range);
1716 iview->base_mip = range->baseMipLevel;
1717 iview->level_count = radv_get_levelCount(image, range);
1718
1719 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1720 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1721 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1722 radv_image_view_make_descriptor(iview, device, format,
1723 &pCreateInfo->components,
1724 false, disable_compression,
1725 iview->plane_id + i, i);
1726 radv_image_view_make_descriptor(iview, device,
1727 format, &pCreateInfo->components,
1728 true, disable_compression,
1729 iview->plane_id + i, i);
1730 }
1731 }
1732
1733 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1734 VkImageLayout layout,
1735 bool in_render_loop,
1736 unsigned queue_mask)
1737 {
1738 if (radv_image_is_tc_compat_htile(image)) {
1739 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1740 !in_render_loop &&
1741 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1742 /* It should be safe to enable TC-compat HTILE with
1743 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1744 * loop and if the image doesn't have the storage bit
1745 * set. This improves performance for apps that use
1746 * GENERAL for the main depth pass because this allows
1747 * compression and this reduces the number of
1748 * decompressions from/to GENERAL.
1749 */
1750 return true;
1751 }
1752
1753 return layout != VK_IMAGE_LAYOUT_GENERAL;
1754 }
1755
1756 return radv_image_has_htile(image) &&
1757 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1758 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1759 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1760 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1761 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1762 }
1763
1764 bool radv_layout_can_fast_clear(const struct radv_image *image,
1765 VkImageLayout layout,
1766 bool in_render_loop,
1767 unsigned queue_mask)
1768 {
1769 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1770 }
1771
1772 bool radv_layout_dcc_compressed(const struct radv_device *device,
1773 const struct radv_image *image,
1774 VkImageLayout layout,
1775 bool in_render_loop,
1776 unsigned queue_mask)
1777 {
1778 /* Don't compress compute transfer dst, as image stores are not supported. */
1779 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1780 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1781 return false;
1782
1783 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1784 }
1785
1786
1787 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1788 {
1789 if (!image->exclusive)
1790 return image->queue_family_mask;
1791 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1792 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1793 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1794 if (family == VK_QUEUE_FAMILY_IGNORED)
1795 return 1u << queue_family;
1796 return 1u << family;
1797 }
1798
1799 VkResult
1800 radv_CreateImage(VkDevice device,
1801 const VkImageCreateInfo *pCreateInfo,
1802 const VkAllocationCallbacks *pAllocator,
1803 VkImage *pImage)
1804 {
1805 #ifdef ANDROID
1806 const VkNativeBufferANDROID *gralloc_info =
1807 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1808
1809 if (gralloc_info)
1810 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1811 pAllocator, pImage);
1812 #endif
1813
1814 const struct wsi_image_create_info *wsi_info =
1815 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1816 bool scanout = wsi_info && wsi_info->scanout;
1817
1818 return radv_image_create(device,
1819 &(struct radv_image_create_info) {
1820 .vk_info = pCreateInfo,
1821 .scanout = scanout,
1822 },
1823 pAllocator,
1824 pImage);
1825 }
1826
1827 void
1828 radv_DestroyImage(VkDevice _device, VkImage _image,
1829 const VkAllocationCallbacks *pAllocator)
1830 {
1831 RADV_FROM_HANDLE(radv_device, device, _device);
1832 RADV_FROM_HANDLE(radv_image, image, _image);
1833
1834 if (!image)
1835 return;
1836
1837 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1838 device->ws->buffer_destroy(image->bo);
1839
1840 if (image->owned_memory != VK_NULL_HANDLE)
1841 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1842
1843 vk_object_base_finish(&image->base);
1844 vk_free2(&device->vk.alloc, pAllocator, image);
1845 }
1846
1847 void radv_GetImageSubresourceLayout(
1848 VkDevice _device,
1849 VkImage _image,
1850 const VkImageSubresource* pSubresource,
1851 VkSubresourceLayout* pLayout)
1852 {
1853 RADV_FROM_HANDLE(radv_image, image, _image);
1854 RADV_FROM_HANDLE(radv_device, device, _device);
1855 int level = pSubresource->mipLevel;
1856 int layer = pSubresource->arrayLayer;
1857
1858 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1859
1860 struct radv_image_plane *plane = &image->planes[plane_id];
1861 struct radeon_surf *surface = &plane->surface;
1862
1863 if (device->physical_device->rad_info.chip_class >= GFX9) {
1864 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1865
1866 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1867 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1868 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1869 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1870 /* Adjust the number of bytes between each row because
1871 * the pitch is actually the number of components per
1872 * row.
1873 */
1874 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1875 } else {
1876 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1877
1878 assert(util_is_power_of_two_nonzero(surface->bpe));
1879 pLayout->rowPitch = pitch * surface->bpe;
1880 }
1881
1882 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1883 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1884 pLayout->size = surface->u.gfx9.surf_slice_size;
1885 if (image->type == VK_IMAGE_TYPE_3D)
1886 pLayout->size *= u_minify(image->info.depth, level);
1887 } else {
1888 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1889 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1890 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1891 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1892 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1893 if (image->type == VK_IMAGE_TYPE_3D)
1894 pLayout->size *= u_minify(image->info.depth, level);
1895 }
1896 }
1897
1898
1899 VkResult
1900 radv_CreateImageView(VkDevice _device,
1901 const VkImageViewCreateInfo *pCreateInfo,
1902 const VkAllocationCallbacks *pAllocator,
1903 VkImageView *pView)
1904 {
1905 RADV_FROM_HANDLE(radv_device, device, _device);
1906 struct radv_image_view *view;
1907
1908 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1909 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1910 if (view == NULL)
1911 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1912
1913 vk_object_base_init(&device->vk, &view->base,
1914 VK_OBJECT_TYPE_IMAGE_VIEW);
1915
1916 radv_image_view_init(view, device, pCreateInfo, NULL);
1917
1918 *pView = radv_image_view_to_handle(view);
1919
1920 return VK_SUCCESS;
1921 }
1922
1923 void
1924 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1925 const VkAllocationCallbacks *pAllocator)
1926 {
1927 RADV_FROM_HANDLE(radv_device, device, _device);
1928 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1929
1930 if (!iview)
1931 return;
1932
1933 vk_object_base_finish(&iview->base);
1934 vk_free2(&device->vk.alloc, pAllocator, iview);
1935 }
1936
1937 void radv_buffer_view_init(struct radv_buffer_view *view,
1938 struct radv_device *device,
1939 const VkBufferViewCreateInfo* pCreateInfo)
1940 {
1941 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1942
1943 view->bo = buffer->bo;
1944 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1945 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1946 view->vk_format = pCreateInfo->format;
1947
1948 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1949 pCreateInfo->offset, view->range, view->state);
1950 }
1951
1952 VkResult
1953 radv_CreateBufferView(VkDevice _device,
1954 const VkBufferViewCreateInfo *pCreateInfo,
1955 const VkAllocationCallbacks *pAllocator,
1956 VkBufferView *pView)
1957 {
1958 RADV_FROM_HANDLE(radv_device, device, _device);
1959 struct radv_buffer_view *view;
1960
1961 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1962 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1963 if (!view)
1964 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1965
1966 vk_object_base_init(&device->vk, &view->base,
1967 VK_OBJECT_TYPE_BUFFER_VIEW);
1968
1969 radv_buffer_view_init(view, device, pCreateInfo);
1970
1971 *pView = radv_buffer_view_to_handle(view);
1972
1973 return VK_SUCCESS;
1974 }
1975
1976 void
1977 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1978 const VkAllocationCallbacks *pAllocator)
1979 {
1980 RADV_FROM_HANDLE(radv_device, device, _device);
1981 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1982
1983 if (!view)
1984 return;
1985
1986 vk_object_base_finish(&view->base);
1987 vk_free2(&device->vk.alloc, pAllocator, view);
1988 }