nir: Add a find_variable_with_[driver_]location helper
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class >= GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_image_use_fast_clear_for_image(const struct radv_image *image)
154 {
155 if (image->info.samples <= 1 &&
156 image->info.width * image->info.height <= 512 * 512) {
157 /* Do not enable CMASK or DCC for small surfaces where the cost
158 * of the eliminate pass can be higher than the benefit of fast
159 * clear. RadeonSI does this, but the image threshold is
160 * different.
161 */
162 return false;
163 }
164
165 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
166 (image->exclusive || image->queue_family_mask == 1);
167 }
168
169 static bool
170 radv_use_dcc_for_image(struct radv_device *device,
171 const struct radv_image *image,
172 const VkImageCreateInfo *pCreateInfo,
173 VkFormat format)
174 {
175 bool dcc_compatible_formats;
176 bool blendable;
177
178 /* DCC (Delta Color Compression) is only available for GFX8+. */
179 if (device->physical_device->rad_info.chip_class < GFX8)
180 return false;
181
182 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
183 return false;
184
185 if (image->shareable)
186 return false;
187
188 /* TODO: Enable DCC for storage images. */
189 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
190 return false;
191
192 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
193 return false;
194
195 if (vk_format_is_subsampled(format) ||
196 vk_format_get_plane_count(format) > 1)
197 return false;
198
199 if (!radv_image_use_fast_clear_for_image(image))
200 return false;
201
202 /* TODO: Enable DCC for mipmaps on GFX9+. */
203 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
204 device->physical_device->rad_info.chip_class >= GFX9)
205 return false;
206
207 /* Do not enable DCC for mipmapped arrays because performance is worse. */
208 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
209 return false;
210
211 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
212 * 2x can be enabled with an option.
213 */
214 if (pCreateInfo->samples > 2 ||
215 (pCreateInfo->samples == 2 &&
216 !device->physical_device->dcc_msaa_allowed))
217 return false;
218
219 /* Determine if the formats are DCC compatible. */
220 dcc_compatible_formats =
221 radv_is_colorbuffer_format_supported(format,
222 &blendable);
223
224 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
225 const struct VkImageFormatListCreateInfo *format_list =
226 (const struct VkImageFormatListCreateInfo *)
227 vk_find_struct_const(pCreateInfo->pNext,
228 IMAGE_FORMAT_LIST_CREATE_INFO);
229
230 /* We have to ignore the existence of the list if viewFormatCount = 0 */
231 if (format_list && format_list->viewFormatCount) {
232 /* compatibility is transitive, so we only need to check
233 * one format with everything else. */
234 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
235 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
236 continue;
237
238 if (!radv_dcc_formats_compatible(format,
239 format_list->pViewFormats[i]))
240 dcc_compatible_formats = false;
241 }
242 } else {
243 dcc_compatible_formats = false;
244 }
245 }
246
247 if (!dcc_compatible_formats)
248 return false;
249
250 return true;
251 }
252
253 static inline bool
254 radv_use_fmask_for_image(const struct radv_image *image)
255 {
256 return image->info.samples > 1 &&
257 image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
258 }
259
260 static inline bool
261 radv_use_htile_for_image(const struct radv_image *image)
262 {
263 return image->info.levels == 1 &&
264 image->info.width * image->info.height >= 8 * 8;
265 }
266
267 static bool
268 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
269 struct radv_image *image)
270 {
271 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
272 return false;
273
274 /* TC-compat CMASK is only available for GFX8+. */
275 if (device->physical_device->rad_info.chip_class < GFX8)
276 return false;
277
278 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
279 return false;
280
281 if (radv_image_has_dcc(image))
282 return false;
283
284 if (!radv_image_has_cmask(image))
285 return false;
286
287 return true;
288 }
289
290 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
291 {
292 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
293 }
294
295 static bool
296 radv_is_valid_opaque_metadata(const struct radv_device *device,
297 const struct radeon_bo_metadata *md)
298 {
299 if (md->metadata[0] != 1 ||
300 md->metadata[1] != si_get_bo_metadata_word1(device))
301 return false;
302
303 if (md->size_metadata < 40)
304 return false;
305
306 return true;
307 }
308
309 static void
310 radv_patch_surface_from_metadata(struct radv_device *device,
311 struct radeon_surf *surface,
312 const struct radeon_bo_metadata *md)
313 {
314 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
315
316 if (device->physical_device->rad_info.chip_class >= GFX9) {
317 if (md->u.gfx9.swizzle_mode > 0)
318 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
319 else
320 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
321
322 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
323 } else {
324 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
325 surface->u.legacy.bankw = md->u.legacy.bankw;
326 surface->u.legacy.bankh = md->u.legacy.bankh;
327 surface->u.legacy.tile_split = md->u.legacy.tile_split;
328 surface->u.legacy.mtilea = md->u.legacy.mtilea;
329 surface->u.legacy.num_banks = md->u.legacy.num_banks;
330
331 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
332 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
333 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
334 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
335 else
336 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
337
338 }
339 }
340
341 static VkResult
342 radv_patch_image_dimensions(struct radv_device *device,
343 struct radv_image *image,
344 const struct radv_image_create_info *create_info,
345 struct ac_surf_info *image_info)
346 {
347 unsigned width = image->info.width;
348 unsigned height = image->info.height;
349
350 /*
351 * minigbm sometimes allocates bigger images which is going to result in
352 * weird strides and other properties. Lets be lenient where possible and
353 * fail it on GFX10 (as we cannot cope there).
354 *
355 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
356 */
357 if (create_info->bo_metadata &&
358 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
359 const struct radeon_bo_metadata *md = create_info->bo_metadata;
360
361 if (device->physical_device->rad_info.chip_class >= GFX10) {
362 width = G_00A004_WIDTH_LO(md->metadata[3]) +
363 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
364 height = S_00A008_HEIGHT(md->metadata[4]) + 1;
365 } else {
366 width = G_008F18_WIDTH(md->metadata[4]) + 1;
367 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
368 }
369 }
370
371 if (image->info.width == width && image->info.height == height)
372 return VK_SUCCESS;
373
374 if (width < image->info.width || height < image->info.height) {
375 fprintf(stderr,
376 "The imported image has smaller dimensions than the internal\n"
377 "dimensions. Using it is going to fail badly, so we reject\n"
378 "this import.\n"
379 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
380 image->info.width, image->info.height, width, height);
381 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
382 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
383 fprintf(stderr,
384 "Tried to import an image with inconsistent width on GFX10.\n"
385 "As GFX10 has no separate stride fields we cannot cope with\n"
386 "an inconsistency in width and will fail this import.\n"
387 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
388 image->info.width, image->info.height, width, height);
389 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
390 } else {
391 fprintf(stderr,
392 "Tried to import an image with inconsistent width on pre-GFX10.\n"
393 "As GFX10 has no separate stride fields we cannot cope with\n"
394 "an inconsistency and would fail on GFX10.\n"
395 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
396 image->info.width, image->info.height, width, height);
397 }
398 image_info->width = width;
399 image_info->height = height;
400
401 return VK_SUCCESS;
402 }
403
404 static VkResult
405 radv_patch_image_from_extra_info(struct radv_device *device,
406 struct radv_image *image,
407 const struct radv_image_create_info *create_info,
408 struct ac_surf_info *image_info)
409 {
410 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
411 if (result != VK_SUCCESS)
412 return result;
413
414 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
415 if (create_info->bo_metadata) {
416 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
417 create_info->bo_metadata);
418 }
419
420 if (radv_surface_has_scanout(device, create_info)) {
421 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
422 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
423
424 image->info.surf_index = NULL;
425 }
426 }
427 return VK_SUCCESS;
428 }
429
430 static int
431 radv_init_surface(struct radv_device *device,
432 const struct radv_image *image,
433 struct radeon_surf *surface,
434 unsigned plane_id,
435 const VkImageCreateInfo *pCreateInfo,
436 VkFormat image_format)
437 {
438 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
439 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
440 const struct vk_format_description *desc = vk_format_description(format);
441 bool is_depth, is_stencil;
442
443 is_depth = vk_format_has_depth(desc);
444 is_stencil = vk_format_has_stencil(desc);
445
446 surface->blk_w = vk_format_get_blockwidth(format);
447 surface->blk_h = vk_format_get_blockheight(format);
448
449 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(format));
450 /* align byte per element on dword */
451 if (surface->bpe == 3) {
452 surface->bpe = 4;
453 }
454
455 surface->flags = RADEON_SURF_SET(array_mode, MODE);
456
457 switch (pCreateInfo->imageType){
458 case VK_IMAGE_TYPE_1D:
459 if (pCreateInfo->arrayLayers > 1)
460 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
461 else
462 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
463 break;
464 case VK_IMAGE_TYPE_2D:
465 if (pCreateInfo->arrayLayers > 1)
466 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
467 else
468 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
469 break;
470 case VK_IMAGE_TYPE_3D:
471 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
472 break;
473 default:
474 unreachable("unhandled image type");
475 }
476
477 /* Required for clearing/initializing a specific layer on GFX8. */
478 surface->flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
479
480 if (is_depth) {
481 surface->flags |= RADEON_SURF_ZBUFFER;
482 if (!radv_use_htile_for_image(image) ||
483 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
484 surface->flags |= RADEON_SURF_NO_HTILE;
485 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
486 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
487 }
488
489 if (is_stencil)
490 surface->flags |= RADEON_SURF_SBUFFER;
491
492 if (device->physical_device->rad_info.chip_class >= GFX9 &&
493 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
494 vk_format_get_blocksizebits(image_format) == 128 &&
495 vk_format_is_compressed(image_format))
496 surface->flags |= RADEON_SURF_NO_RENDER_TARGET;
497
498 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
499 surface->flags |= RADEON_SURF_DISABLE_DCC;
500
501 if (!radv_use_fmask_for_image(image))
502 surface->flags |= RADEON_SURF_NO_FMASK;
503
504 return 0;
505 }
506
507 static inline unsigned
508 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
509 {
510 if (stencil)
511 return plane->surface.u.legacy.stencil_tiling_index[level];
512 else
513 return plane->surface.u.legacy.tiling_index[level];
514 }
515
516 static unsigned radv_map_swizzle(unsigned swizzle)
517 {
518 switch (swizzle) {
519 case VK_SWIZZLE_Y:
520 return V_008F0C_SQ_SEL_Y;
521 case VK_SWIZZLE_Z:
522 return V_008F0C_SQ_SEL_Z;
523 case VK_SWIZZLE_W:
524 return V_008F0C_SQ_SEL_W;
525 case VK_SWIZZLE_0:
526 return V_008F0C_SQ_SEL_0;
527 case VK_SWIZZLE_1:
528 return V_008F0C_SQ_SEL_1;
529 default: /* VK_SWIZZLE_X */
530 return V_008F0C_SQ_SEL_X;
531 }
532 }
533
534 static void
535 radv_make_buffer_descriptor(struct radv_device *device,
536 struct radv_buffer *buffer,
537 VkFormat vk_format,
538 unsigned offset,
539 unsigned range,
540 uint32_t *state)
541 {
542 const struct vk_format_description *desc;
543 unsigned stride;
544 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
545 uint64_t va = gpu_address + buffer->offset;
546 unsigned num_format, data_format;
547 int first_non_void;
548 desc = vk_format_description(vk_format);
549 first_non_void = vk_format_get_first_non_void_channel(vk_format);
550 stride = desc->block.bits / 8;
551
552 va += offset;
553 state[0] = va;
554 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
555 S_008F04_STRIDE(stride);
556
557 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
558 range /= stride;
559 }
560
561 state[2] = range;
562 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
563 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
564 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
565 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
566
567 if (device->physical_device->rad_info.chip_class >= GFX10) {
568 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
569
570 /* OOB_SELECT chooses the out-of-bounds check:
571 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
572 * - 1: index >= NUM_RECORDS
573 * - 2: NUM_RECORDS == 0
574 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
575 * else: swizzle_address >= NUM_RECORDS
576 */
577 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
578 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
579 S_008F0C_RESOURCE_LEVEL(1);
580 } else {
581 num_format = radv_translate_buffer_numformat(desc, first_non_void);
582 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
583
584 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
585 assert(num_format != ~0);
586
587 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
588 S_008F0C_DATA_FORMAT(data_format);
589 }
590 }
591
592 static void
593 si_set_mutable_tex_desc_fields(struct radv_device *device,
594 struct radv_image *image,
595 const struct legacy_surf_level *base_level_info,
596 unsigned plane_id,
597 unsigned base_level, unsigned first_level,
598 unsigned block_width, bool is_stencil,
599 bool is_storage_image, bool disable_compression,
600 uint32_t *state)
601 {
602 struct radv_image_plane *plane = &image->planes[plane_id];
603 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
604 uint64_t va = gpu_address + plane->offset;
605 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
606 uint64_t meta_va = 0;
607 if (chip_class >= GFX9) {
608 if (is_stencil)
609 va += plane->surface.u.gfx9.stencil_offset;
610 else
611 va += plane->surface.u.gfx9.surf_offset;
612 } else
613 va += base_level_info->offset;
614
615 state[0] = va >> 8;
616 if (chip_class >= GFX9 ||
617 base_level_info->mode == RADEON_SURF_MODE_2D)
618 state[0] |= plane->surface.tile_swizzle;
619 state[1] &= C_008F14_BASE_ADDRESS_HI;
620 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
621
622 if (chip_class >= GFX8) {
623 state[6] &= C_008F28_COMPRESSION_EN;
624 state[7] = 0;
625 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
626 meta_va = gpu_address + plane->surface.dcc_offset;
627 if (chip_class <= GFX8)
628 meta_va += base_level_info->dcc_offset;
629
630 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
631 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
632 meta_va |= dcc_tile_swizzle;
633 } else if (!disable_compression &&
634 radv_image_is_tc_compat_htile(image)) {
635 meta_va = gpu_address + plane->surface.htile_offset;
636 }
637
638 if (meta_va) {
639 state[6] |= S_008F28_COMPRESSION_EN(1);
640 if (chip_class <= GFX9)
641 state[7] = meta_va >> 8;
642 }
643 }
644
645 if (chip_class >= GFX10) {
646 state[3] &= C_00A00C_SW_MODE;
647
648 if (is_stencil) {
649 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
650 } else {
651 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
652 }
653
654 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
655 C_00A018_META_PIPE_ALIGNED;
656
657 if (meta_va) {
658 struct gfx9_surf_meta_flags meta = {
659 .rb_aligned = 1,
660 .pipe_aligned = 1,
661 };
662
663 if (plane->surface.dcc_offset)
664 meta = plane->surface.u.gfx9.dcc;
665
666 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
667 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
668 }
669
670 state[7] = meta_va >> 16;
671 } else if (chip_class == GFX9) {
672 state[3] &= C_008F1C_SW_MODE;
673 state[4] &= C_008F20_PITCH;
674
675 if (is_stencil) {
676 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
677 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
678 } else {
679 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
680 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
681 }
682
683 state[5] &= C_008F24_META_DATA_ADDRESS &
684 C_008F24_META_PIPE_ALIGNED &
685 C_008F24_META_RB_ALIGNED;
686 if (meta_va) {
687 struct gfx9_surf_meta_flags meta = {
688 .rb_aligned = 1,
689 .pipe_aligned = 1,
690 };
691
692 if (plane->surface.dcc_offset)
693 meta = plane->surface.u.gfx9.dcc;
694
695 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
696 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
697 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
698 }
699 } else {
700 /* GFX6-GFX8 */
701 unsigned pitch = base_level_info->nblk_x * block_width;
702 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
703
704 state[3] &= C_008F1C_TILING_INDEX;
705 state[3] |= S_008F1C_TILING_INDEX(index);
706 state[4] &= C_008F20_PITCH;
707 state[4] |= S_008F20_PITCH(pitch - 1);
708 }
709 }
710
711 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
712 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
713 {
714 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
715 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
716
717 /* GFX9 allocates 1D textures as 2D. */
718 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
719 image_type = VK_IMAGE_TYPE_2D;
720 switch (image_type) {
721 case VK_IMAGE_TYPE_1D:
722 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
723 case VK_IMAGE_TYPE_2D:
724 if (nr_samples > 1)
725 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
726 else
727 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
728 case VK_IMAGE_TYPE_3D:
729 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
730 return V_008F1C_SQ_RSRC_IMG_3D;
731 else
732 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
733 default:
734 unreachable("illegal image type");
735 }
736 }
737
738 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
739 {
740 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
741
742 if (swizzle[3] == VK_SWIZZLE_X) {
743 /* For the pre-defined border color values (white, opaque
744 * black, transparent black), the only thing that matters is
745 * that the alpha channel winds up in the correct place
746 * (because the RGB channels are all the same) so either of
747 * these enumerations will work.
748 */
749 if (swizzle[2] == VK_SWIZZLE_Y)
750 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
751 else
752 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
753 } else if (swizzle[0] == VK_SWIZZLE_X) {
754 if (swizzle[1] == VK_SWIZZLE_Y)
755 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
756 else
757 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
758 } else if (swizzle[1] == VK_SWIZZLE_X) {
759 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
760 } else if (swizzle[2] == VK_SWIZZLE_X) {
761 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
762 }
763
764 return bc_swizzle;
765 }
766
767 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
768 {
769 const struct vk_format_description *desc = vk_format_description(format);
770
771 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
772 return desc->swizzle[3] == VK_SWIZZLE_X;
773
774 return radv_translate_colorswap(format, false) <= 1;
775 }
776 /**
777 * Build the sampler view descriptor for a texture (GFX10).
778 */
779 static void
780 gfx10_make_texture_descriptor(struct radv_device *device,
781 struct radv_image *image,
782 bool is_storage_image,
783 VkImageViewType view_type,
784 VkFormat vk_format,
785 const VkComponentMapping *mapping,
786 unsigned first_level, unsigned last_level,
787 unsigned first_layer, unsigned last_layer,
788 unsigned width, unsigned height, unsigned depth,
789 uint32_t *state,
790 uint32_t *fmask_state)
791 {
792 const struct vk_format_description *desc;
793 enum vk_swizzle swizzle[4];
794 unsigned img_format;
795 unsigned type;
796
797 desc = vk_format_description(vk_format);
798 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
799
800 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
801 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
802 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
803 } else {
804 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
805 }
806
807 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
808 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
809 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
810 height = 1;
811 depth = image->info.array_size;
812 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
813 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
814 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
815 depth = image->info.array_size;
816 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
817 depth = image->info.array_size / 6;
818
819 state[0] = 0;
820 state[1] = S_00A004_FORMAT(img_format) |
821 S_00A004_WIDTH_LO(width - 1);
822 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
823 S_00A008_HEIGHT(height - 1) |
824 S_00A008_RESOURCE_LEVEL(1);
825 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
826 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
827 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
828 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
829 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
830 0 : first_level) |
831 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
832 util_logbase2(image->info.samples) :
833 last_level) |
834 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
835 S_00A00C_TYPE(type);
836
837 if (type == V_008F1C_SQ_RSRC_IMG_1D ||
838 type == V_008F1C_SQ_RSRC_IMG_2D ||
839 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA) {
840 /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader
841 * resources starting with gfx10.3 (ignored if pitch <=
842 * width). Other texture targets can't. CB and DB can't set a
843 * custom pitch for any target.
844 * */
845 if (device->physical_device->rad_info.chip_class >= GFX10_3)
846 state[4] = S_00A010_DEPTH(image->planes[0].surface.u.gfx9.surf_pitch - 1);
847 else
848 state[4] = 0;
849 } else {
850 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
851 * to know the total number of layers.
852 */
853 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
854 S_00A010_BASE_ARRAY(first_layer);
855 }
856
857 state[5] = S_00A014_ARRAY_PITCH(0) |
858 S_00A014_MAX_MIP(image->info.samples > 1 ?
859 util_logbase2(image->info.samples) :
860 image->info.levels - 1) |
861 S_00A014_PERF_MOD(4);
862 state[6] = 0;
863 state[7] = 0;
864
865 if (radv_dcc_enabled(image, first_level)) {
866 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
867 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
868 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
869 }
870
871 /* Initialize the sampler view for FMASK. */
872 if (radv_image_has_fmask(image)) {
873 uint64_t gpu_address = radv_buffer_get_va(image->bo);
874 uint32_t format;
875 uint64_t va;
876
877 assert(image->plane_count == 1);
878
879 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
880
881 switch (image->info.samples) {
882 case 2:
883 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
884 break;
885 case 4:
886 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
887 break;
888 case 8:
889 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
890 break;
891 default:
892 unreachable("invalid nr_samples");
893 }
894
895 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
896 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
897 S_00A004_FORMAT(format) |
898 S_00A004_WIDTH_LO(width - 1);
899 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
900 S_00A008_HEIGHT(height - 1) |
901 S_00A008_RESOURCE_LEVEL(1);
902 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
903 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
904 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
905 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
906 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
907 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
908 fmask_state[4] = S_00A010_DEPTH(last_layer) |
909 S_00A010_BASE_ARRAY(first_layer);
910 fmask_state[5] = 0;
911 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
912 fmask_state[7] = 0;
913 } else if (fmask_state)
914 memset(fmask_state, 0, 8 * 4);
915 }
916
917 /**
918 * Build the sampler view descriptor for a texture (SI-GFX9)
919 */
920 static void
921 si_make_texture_descriptor(struct radv_device *device,
922 struct radv_image *image,
923 bool is_storage_image,
924 VkImageViewType view_type,
925 VkFormat vk_format,
926 const VkComponentMapping *mapping,
927 unsigned first_level, unsigned last_level,
928 unsigned first_layer, unsigned last_layer,
929 unsigned width, unsigned height, unsigned depth,
930 uint32_t *state,
931 uint32_t *fmask_state)
932 {
933 const struct vk_format_description *desc;
934 enum vk_swizzle swizzle[4];
935 int first_non_void;
936 unsigned num_format, data_format, type;
937
938 desc = vk_format_description(vk_format);
939
940 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
941 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
942 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
943 } else {
944 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
945 }
946
947 first_non_void = vk_format_get_first_non_void_channel(vk_format);
948
949 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
950 if (num_format == ~0) {
951 num_format = 0;
952 }
953
954 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
955 if (data_format == ~0) {
956 data_format = 0;
957 }
958
959 /* S8 with either Z16 or Z32 HTILE need a special format. */
960 if (device->physical_device->rad_info.chip_class == GFX9 &&
961 vk_format == VK_FORMAT_S8_UINT &&
962 radv_image_is_tc_compat_htile(image)) {
963 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
964 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
965 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
966 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
967 }
968 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
969 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
970 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
971 height = 1;
972 depth = image->info.array_size;
973 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
974 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
975 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
976 depth = image->info.array_size;
977 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
978 depth = image->info.array_size / 6;
979
980 state[0] = 0;
981 state[1] = (S_008F14_DATA_FORMAT(data_format) |
982 S_008F14_NUM_FORMAT(num_format));
983 state[2] = (S_008F18_WIDTH(width - 1) |
984 S_008F18_HEIGHT(height - 1) |
985 S_008F18_PERF_MOD(4));
986 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
987 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
988 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
989 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
990 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
991 0 : first_level) |
992 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
993 util_logbase2(image->info.samples) :
994 last_level) |
995 S_008F1C_TYPE(type));
996 state[4] = 0;
997 state[5] = S_008F24_BASE_ARRAY(first_layer);
998 state[6] = 0;
999 state[7] = 0;
1000
1001 if (device->physical_device->rad_info.chip_class == GFX9) {
1002 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
1003
1004 /* Depth is the last accessible layer on Gfx9.
1005 * The hw doesn't need to know the total number of layers.
1006 */
1007 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1008 state[4] |= S_008F20_DEPTH(depth - 1);
1009 else
1010 state[4] |= S_008F20_DEPTH(last_layer);
1011
1012 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1013 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
1014 util_logbase2(image->info.samples) :
1015 image->info.levels - 1);
1016 } else {
1017 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1018 state[4] |= S_008F20_DEPTH(depth - 1);
1019 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1020 }
1021 if (image->planes[0].surface.dcc_offset) {
1022 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1023 } else {
1024 /* The last dword is unused by hw. The shader uses it to clear
1025 * bits in the first dword of sampler state.
1026 */
1027 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1028 if (first_level == last_level)
1029 state[7] = C_008F30_MAX_ANISO_RATIO;
1030 else
1031 state[7] = 0xffffffff;
1032 }
1033 }
1034
1035 /* Initialize the sampler view for FMASK. */
1036 if (radv_image_has_fmask(image)) {
1037 uint32_t fmask_format, num_format;
1038 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1039 uint64_t va;
1040
1041 assert(image->plane_count == 1);
1042
1043 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1044
1045 if (device->physical_device->rad_info.chip_class == GFX9) {
1046 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1047 switch (image->info.samples) {
1048 case 2:
1049 num_format = V_008F14_IMG_FMASK_8_2_2;
1050 break;
1051 case 4:
1052 num_format = V_008F14_IMG_FMASK_8_4_4;
1053 break;
1054 case 8:
1055 num_format = V_008F14_IMG_FMASK_32_8_8;
1056 break;
1057 default:
1058 unreachable("invalid nr_samples");
1059 }
1060 } else {
1061 switch (image->info.samples) {
1062 case 2:
1063 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1064 break;
1065 case 4:
1066 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1067 break;
1068 case 8:
1069 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1070 break;
1071 default:
1072 assert(0);
1073 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1074 }
1075 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1076 }
1077
1078 fmask_state[0] = va >> 8;
1079 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1080 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1081 S_008F14_DATA_FORMAT(fmask_format) |
1082 S_008F14_NUM_FORMAT(num_format);
1083 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1084 S_008F18_HEIGHT(height - 1);
1085 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1086 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1087 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1088 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1089 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1090 fmask_state[4] = 0;
1091 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1092 fmask_state[6] = 0;
1093 fmask_state[7] = 0;
1094
1095 if (device->physical_device->rad_info.chip_class == GFX9) {
1096 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1097 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1098 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1099 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1100 S_008F24_META_RB_ALIGNED(1);
1101
1102 if (radv_image_is_tc_compat_cmask(image)) {
1103 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1104
1105 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1106 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1107 fmask_state[7] |= va >> 8;
1108 }
1109 } else {
1110 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1111 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1112 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1113 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1114
1115 if (radv_image_is_tc_compat_cmask(image)) {
1116 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1117
1118 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1119 fmask_state[7] |= va >> 8;
1120 }
1121 }
1122 } else if (fmask_state)
1123 memset(fmask_state, 0, 8 * 4);
1124 }
1125
1126 static void
1127 radv_make_texture_descriptor(struct radv_device *device,
1128 struct radv_image *image,
1129 bool is_storage_image,
1130 VkImageViewType view_type,
1131 VkFormat vk_format,
1132 const VkComponentMapping *mapping,
1133 unsigned first_level, unsigned last_level,
1134 unsigned first_layer, unsigned last_layer,
1135 unsigned width, unsigned height, unsigned depth,
1136 uint32_t *state,
1137 uint32_t *fmask_state)
1138 {
1139 if (device->physical_device->rad_info.chip_class >= GFX10) {
1140 gfx10_make_texture_descriptor(device, image, is_storage_image,
1141 view_type, vk_format, mapping,
1142 first_level, last_level,
1143 first_layer, last_layer,
1144 width, height, depth,
1145 state, fmask_state);
1146 } else {
1147 si_make_texture_descriptor(device, image, is_storage_image,
1148 view_type, vk_format, mapping,
1149 first_level, last_level,
1150 first_layer, last_layer,
1151 width, height, depth,
1152 state, fmask_state);
1153 }
1154 }
1155
1156 static void
1157 radv_query_opaque_metadata(struct radv_device *device,
1158 struct radv_image *image,
1159 struct radeon_bo_metadata *md)
1160 {
1161 static const VkComponentMapping fixedmapping;
1162 uint32_t desc[8], i;
1163
1164 assert(image->plane_count == 1);
1165
1166 /* Metadata image format format version 1:
1167 * [0] = 1 (metadata format identifier)
1168 * [1] = (VENDOR_ID << 16) | PCI_ID
1169 * [2:9] = image descriptor for the whole resource
1170 * [2] is always 0, because the base address is cleared
1171 * [9] is the DCC offset bits [39:8] from the beginning of
1172 * the buffer
1173 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1174 */
1175 md->metadata[0] = 1; /* metadata image format version 1 */
1176
1177 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1178 md->metadata[1] = si_get_bo_metadata_word1(device);
1179
1180
1181 radv_make_texture_descriptor(device, image, false,
1182 (VkImageViewType)image->type, image->vk_format,
1183 &fixedmapping, 0, image->info.levels - 1, 0,
1184 image->info.array_size - 1,
1185 image->info.width, image->info.height,
1186 image->info.depth,
1187 desc, NULL);
1188
1189 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1190 image->planes[0].surface.blk_w, false, false, false, desc);
1191
1192 /* Clear the base address and set the relative DCC offset. */
1193 desc[0] = 0;
1194 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1195 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1196
1197 /* Dwords [2:9] contain the image descriptor. */
1198 memcpy(&md->metadata[2], desc, sizeof(desc));
1199
1200 /* Dwords [10:..] contain the mipmap level offsets. */
1201 if (device->physical_device->rad_info.chip_class <= GFX8) {
1202 for (i = 0; i <= image->info.levels - 1; i++)
1203 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1204 md->size_metadata = (11 + image->info.levels - 1) * 4;
1205 } else
1206 md->size_metadata = 10 * 4;
1207 }
1208
1209 void
1210 radv_init_metadata(struct radv_device *device,
1211 struct radv_image *image,
1212 struct radeon_bo_metadata *metadata)
1213 {
1214 struct radeon_surf *surface = &image->planes[0].surface;
1215
1216 memset(metadata, 0, sizeof(*metadata));
1217
1218 if (device->physical_device->rad_info.chip_class >= GFX9) {
1219 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1220 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1221 } else {
1222 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1223 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1224 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1225 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1226 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1227 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1228 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1229 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1230 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1231 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1232 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1233 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1234 }
1235 radv_query_opaque_metadata(device, image, metadata);
1236 }
1237
1238 void
1239 radv_image_override_offset_stride(struct radv_device *device,
1240 struct radv_image *image,
1241 uint64_t offset, uint32_t stride)
1242 {
1243 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1244 &image->planes[0].surface,
1245 image->info.levels, offset, stride);
1246 }
1247
1248 static void
1249 radv_image_alloc_single_sample_cmask(const struct radv_image *image,
1250 struct radeon_surf *surf)
1251 {
1252 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
1253 image->info.levels > 1 || image->info.depth > 1 ||
1254 !radv_image_use_fast_clear_for_image(image))
1255 return;
1256
1257 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
1258 surf->total_size = surf->cmask_offset + surf->cmask_size;
1259 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
1260 }
1261
1262 static void
1263 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1264 {
1265 if (radv_image_has_dcc(image)) {
1266 image->fce_pred_offset = image->size;
1267 image->size += 8 * image->info.levels;
1268
1269 image->dcc_pred_offset = image->size;
1270 image->size += 8 * image->info.levels;
1271 }
1272
1273 if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
1274 radv_image_has_htile(image)) {
1275 image->clear_value_offset = image->size;
1276 image->size += 8 * image->info.levels;
1277 }
1278
1279 if (radv_image_is_tc_compat_htile(image) &&
1280 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1281 /* Metadata for the TC-compatible HTILE hardware bug which
1282 * have to be fixed by updating ZRANGE_PRECISION when doing
1283 * fast depth clears to 0.0f.
1284 */
1285 image->tc_compat_zrange_offset = image->size;
1286 image->size += image->info.levels * 4;
1287 }
1288 }
1289
1290 VkResult
1291 radv_image_create_layout(struct radv_device *device,
1292 struct radv_image_create_info create_info,
1293 struct radv_image *image)
1294 {
1295 /* Check that we did not initialize things earlier */
1296 assert(!image->planes[0].surface.surf_size);
1297
1298 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1299 * common internal case. */
1300 create_info.vk_info = NULL;
1301
1302 struct ac_surf_info image_info = image->info;
1303 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1304 if (result != VK_SUCCESS)
1305 return result;
1306
1307 image->size = 0;
1308 image->alignment = 1;
1309 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1310 struct ac_surf_info info = image_info;
1311
1312 if (plane) {
1313 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1314 assert(info.width % desc->width_divisor == 0);
1315 assert(info.height % desc->height_divisor == 0);
1316
1317 info.width /= desc->width_divisor;
1318 info.height /= desc->height_divisor;
1319 }
1320
1321 if (create_info.no_metadata_planes || image->plane_count > 1) {
1322 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1323 RADEON_SURF_NO_FMASK |
1324 RADEON_SURF_NO_HTILE;
1325 }
1326
1327 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1328
1329 if (!create_info.no_metadata_planes && image->plane_count == 1)
1330 radv_image_alloc_single_sample_cmask(image, &image->planes[plane].surface);
1331
1332 image->planes[plane].offset = align(image->size, image->planes[plane].surface.alignment);
1333 image->size = image->planes[plane].offset + image->planes[plane].surface.total_size;
1334 image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
1335
1336 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1337 }
1338
1339 image->tc_compatible_cmask = radv_image_has_cmask(image) &&
1340 radv_use_tc_compat_cmask_for_image(device, image);
1341
1342 image->tc_compatible_htile = radv_image_has_htile(image) &&
1343 image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1344
1345 radv_image_alloc_values(device, image);
1346
1347 assert(image->planes[0].surface.surf_size);
1348 return VK_SUCCESS;
1349 }
1350
1351 static void
1352 radv_destroy_image(struct radv_device *device,
1353 const VkAllocationCallbacks *pAllocator,
1354 struct radv_image *image)
1355 {
1356 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1357 device->ws->buffer_destroy(image->bo);
1358
1359 if (image->owned_memory != VK_NULL_HANDLE) {
1360 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1361 radv_free_memory(device, pAllocator, mem);
1362 }
1363
1364 vk_object_base_finish(&image->base);
1365 vk_free2(&device->vk.alloc, pAllocator, image);
1366 }
1367
1368 VkResult
1369 radv_image_create(VkDevice _device,
1370 const struct radv_image_create_info *create_info,
1371 const VkAllocationCallbacks* alloc,
1372 VkImage *pImage)
1373 {
1374 RADV_FROM_HANDLE(radv_device, device, _device);
1375 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1376 struct radv_image *image = NULL;
1377 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1378 pCreateInfo->format);
1379 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1380
1381 const unsigned plane_count = vk_format_get_plane_count(format);
1382 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1383
1384 radv_assert(pCreateInfo->mipLevels > 0);
1385 radv_assert(pCreateInfo->arrayLayers > 0);
1386 radv_assert(pCreateInfo->samples > 0);
1387 radv_assert(pCreateInfo->extent.width > 0);
1388 radv_assert(pCreateInfo->extent.height > 0);
1389 radv_assert(pCreateInfo->extent.depth > 0);
1390
1391 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1392 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1393 if (!image)
1394 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1395
1396 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1397
1398 image->type = pCreateInfo->imageType;
1399 image->info.width = pCreateInfo->extent.width;
1400 image->info.height = pCreateInfo->extent.height;
1401 image->info.depth = pCreateInfo->extent.depth;
1402 image->info.samples = pCreateInfo->samples;
1403 image->info.storage_samples = pCreateInfo->samples;
1404 image->info.array_size = pCreateInfo->arrayLayers;
1405 image->info.levels = pCreateInfo->mipLevels;
1406 image->info.num_channels = vk_format_get_nr_components(format);
1407
1408 image->vk_format = format;
1409 image->tiling = pCreateInfo->tiling;
1410 image->usage = pCreateInfo->usage;
1411 image->flags = pCreateInfo->flags;
1412 image->plane_count = plane_count;
1413
1414 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1415 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1416 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1417 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1418 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1419 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1420 else
1421 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1422 }
1423
1424 const VkExternalMemoryImageCreateInfo *external_info =
1425 vk_find_struct_const(pCreateInfo->pNext,
1426 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1427
1428 image->shareable = external_info;
1429 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1430 image->info.surf_index = &device->image_mrt_offset_counter;
1431 }
1432
1433 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1434 radv_init_surface(device, image, &image->planes[plane].surface, plane, pCreateInfo, format);
1435 }
1436
1437 bool delay_layout = external_info &&
1438 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1439
1440 if (delay_layout) {
1441 *pImage = radv_image_to_handle(image);
1442 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1443 return VK_SUCCESS;
1444 }
1445
1446 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1447 assert(result == VK_SUCCESS);
1448
1449 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1450 image->alignment = MAX2(image->alignment, 4096);
1451 image->size = align64(image->size, image->alignment);
1452 image->offset = 0;
1453
1454 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1455 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1456 if (!image->bo) {
1457 radv_destroy_image(device, alloc, image);
1458 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1459 }
1460 }
1461
1462 *pImage = radv_image_to_handle(image);
1463
1464 return VK_SUCCESS;
1465 }
1466
1467 static void
1468 radv_image_view_make_descriptor(struct radv_image_view *iview,
1469 struct radv_device *device,
1470 VkFormat vk_format,
1471 const VkComponentMapping *components,
1472 bool is_storage_image, bool disable_compression,
1473 unsigned plane_id, unsigned descriptor_plane_id)
1474 {
1475 struct radv_image *image = iview->image;
1476 struct radv_image_plane *plane = &image->planes[plane_id];
1477 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1478 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1479 uint32_t blk_w;
1480 union radv_descriptor *descriptor;
1481 uint32_t hw_level = 0;
1482
1483 if (is_storage_image) {
1484 descriptor = &iview->storage_descriptor;
1485 } else {
1486 descriptor = &iview->descriptor;
1487 }
1488
1489 assert(vk_format_get_plane_count(vk_format) == 1);
1490 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1491 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1492
1493 if (device->physical_device->rad_info.chip_class >= GFX9)
1494 hw_level = iview->base_mip;
1495 radv_make_texture_descriptor(device, image, is_storage_image,
1496 iview->type,
1497 vk_format,
1498 components,
1499 hw_level, hw_level + iview->level_count - 1,
1500 iview->base_layer,
1501 iview->base_layer + iview->layer_count - 1,
1502 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1503 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1504 iview->extent.depth,
1505 descriptor->plane_descriptors[descriptor_plane_id],
1506 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1507
1508 const struct legacy_surf_level *base_level_info = NULL;
1509 if (device->physical_device->rad_info.chip_class <= GFX9) {
1510 if (is_stencil)
1511 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1512 else
1513 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1514 }
1515 si_set_mutable_tex_desc_fields(device, image,
1516 base_level_info,
1517 plane_id,
1518 iview->base_mip,
1519 iview->base_mip,
1520 blk_w, is_stencil, is_storage_image,
1521 is_storage_image || disable_compression,
1522 descriptor->plane_descriptors[descriptor_plane_id]);
1523 }
1524
1525 static unsigned
1526 radv_plane_from_aspect(VkImageAspectFlags mask)
1527 {
1528 switch(mask) {
1529 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1530 return 1;
1531 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1532 return 2;
1533 default:
1534 return 0;
1535 }
1536 }
1537
1538 VkFormat
1539 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1540 {
1541 switch(mask) {
1542 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1543 return image->planes[0].format;
1544 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1545 return image->planes[1].format;
1546 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1547 return image->planes[2].format;
1548 case VK_IMAGE_ASPECT_STENCIL_BIT:
1549 return vk_format_stencil_only(image->vk_format);
1550 case VK_IMAGE_ASPECT_DEPTH_BIT:
1551 return vk_format_depth_only(image->vk_format);
1552 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1553 return vk_format_depth_only(image->vk_format);
1554 default:
1555 return image->vk_format;
1556 }
1557 }
1558
1559 void
1560 radv_image_view_init(struct radv_image_view *iview,
1561 struct radv_device *device,
1562 const VkImageViewCreateInfo* pCreateInfo,
1563 const struct radv_image_view_extra_create_info* extra_create_info)
1564 {
1565 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1566 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1567
1568 switch (image->type) {
1569 case VK_IMAGE_TYPE_1D:
1570 case VK_IMAGE_TYPE_2D:
1571 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1572 break;
1573 case VK_IMAGE_TYPE_3D:
1574 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1575 <= radv_minify(image->info.depth, range->baseMipLevel));
1576 break;
1577 default:
1578 unreachable("bad VkImageType");
1579 }
1580 iview->image = image;
1581 iview->bo = image->bo;
1582 iview->type = pCreateInfo->viewType;
1583 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1584 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1585 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1586
1587 iview->vk_format = pCreateInfo->format;
1588
1589 /* If the image has an Android external format, pCreateInfo->format will be
1590 * VK_FORMAT_UNDEFINED. */
1591 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1592 iview->vk_format = image->vk_format;
1593
1594 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1595 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1596 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1597 iview->vk_format = vk_format_depth_only(iview->vk_format);
1598 }
1599
1600 if (device->physical_device->rad_info.chip_class >= GFX9) {
1601 iview->extent = (VkExtent3D) {
1602 .width = image->info.width,
1603 .height = image->info.height,
1604 .depth = image->info.depth,
1605 };
1606 } else {
1607 iview->extent = (VkExtent3D) {
1608 .width = radv_minify(image->info.width , range->baseMipLevel),
1609 .height = radv_minify(image->info.height, range->baseMipLevel),
1610 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1611 };
1612 }
1613
1614 if (iview->vk_format != image->planes[iview->plane_id].format) {
1615 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1616 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1617 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1618 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1619
1620 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1621 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1622
1623 /* Comment ported from amdvlk -
1624 * If we have the following image:
1625 * Uncompressed pixels Compressed block sizes (4x4)
1626 * mip0: 22 x 22 6 x 6
1627 * mip1: 11 x 11 3 x 3
1628 * mip2: 5 x 5 2 x 2
1629 * mip3: 2 x 2 1 x 1
1630 * mip4: 1 x 1 1 x 1
1631 *
1632 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1633 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1634 * divide-by-two integer math):
1635 * mip0: 6x6
1636 * mip1: 3x3
1637 * mip2: 1x1
1638 * mip3: 1x1
1639 *
1640 * This means that mip2 will be missing texels.
1641 *
1642 * Fix this by calculating the base mip's width and height, then convert that, and round it
1643 * back up to get the level 0 size.
1644 * Clamp the converted size between the original values, and next power of two, which
1645 * means we don't oversize the image.
1646 */
1647 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1648 vk_format_is_compressed(image->vk_format) &&
1649 !vk_format_is_compressed(iview->vk_format)) {
1650 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1651 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1652
1653 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1654 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1655
1656 lvl_width <<= range->baseMipLevel;
1657 lvl_height <<= range->baseMipLevel;
1658
1659 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1660 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1661 }
1662 }
1663
1664 iview->base_layer = range->baseArrayLayer;
1665 iview->layer_count = radv_get_layerCount(image, range);
1666 iview->base_mip = range->baseMipLevel;
1667 iview->level_count = radv_get_levelCount(image, range);
1668
1669 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1670 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1671 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1672 radv_image_view_make_descriptor(iview, device, format,
1673 &pCreateInfo->components,
1674 false, disable_compression,
1675 iview->plane_id + i, i);
1676 radv_image_view_make_descriptor(iview, device,
1677 format, &pCreateInfo->components,
1678 true, disable_compression,
1679 iview->plane_id + i, i);
1680 }
1681 }
1682
1683 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1684 VkImageLayout layout,
1685 bool in_render_loop,
1686 unsigned queue_mask)
1687 {
1688 if (radv_image_is_tc_compat_htile(image)) {
1689 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1690 !in_render_loop &&
1691 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1692 /* It should be safe to enable TC-compat HTILE with
1693 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1694 * loop and if the image doesn't have the storage bit
1695 * set. This improves performance for apps that use
1696 * GENERAL for the main depth pass because this allows
1697 * compression and this reduces the number of
1698 * decompressions from/to GENERAL.
1699 */
1700 return true;
1701 }
1702
1703 return layout != VK_IMAGE_LAYOUT_GENERAL;
1704 }
1705
1706 return radv_image_has_htile(image) &&
1707 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1708 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1709 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1710 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1711 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1712 }
1713
1714 bool radv_layout_can_fast_clear(const struct radv_image *image,
1715 VkImageLayout layout,
1716 bool in_render_loop,
1717 unsigned queue_mask)
1718 {
1719 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1720 }
1721
1722 bool radv_layout_dcc_compressed(const struct radv_device *device,
1723 const struct radv_image *image,
1724 VkImageLayout layout,
1725 bool in_render_loop,
1726 unsigned queue_mask)
1727 {
1728 /* Don't compress compute transfer dst, as image stores are not supported. */
1729 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1730 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1731 return false;
1732
1733 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1734 }
1735
1736
1737 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1738 {
1739 if (!image->exclusive)
1740 return image->queue_family_mask;
1741 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1742 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1743 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1744 if (family == VK_QUEUE_FAMILY_IGNORED)
1745 return 1u << queue_family;
1746 return 1u << family;
1747 }
1748
1749 VkResult
1750 radv_CreateImage(VkDevice device,
1751 const VkImageCreateInfo *pCreateInfo,
1752 const VkAllocationCallbacks *pAllocator,
1753 VkImage *pImage)
1754 {
1755 #ifdef ANDROID
1756 const VkNativeBufferANDROID *gralloc_info =
1757 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1758
1759 if (gralloc_info)
1760 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1761 pAllocator, pImage);
1762 #endif
1763
1764 const struct wsi_image_create_info *wsi_info =
1765 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1766 bool scanout = wsi_info && wsi_info->scanout;
1767
1768 return radv_image_create(device,
1769 &(struct radv_image_create_info) {
1770 .vk_info = pCreateInfo,
1771 .scanout = scanout,
1772 },
1773 pAllocator,
1774 pImage);
1775 }
1776
1777 void
1778 radv_DestroyImage(VkDevice _device, VkImage _image,
1779 const VkAllocationCallbacks *pAllocator)
1780 {
1781 RADV_FROM_HANDLE(radv_device, device, _device);
1782 RADV_FROM_HANDLE(radv_image, image, _image);
1783
1784 if (!image)
1785 return;
1786
1787 radv_destroy_image(device, pAllocator, image);
1788 }
1789
1790 void radv_GetImageSubresourceLayout(
1791 VkDevice _device,
1792 VkImage _image,
1793 const VkImageSubresource* pSubresource,
1794 VkSubresourceLayout* pLayout)
1795 {
1796 RADV_FROM_HANDLE(radv_image, image, _image);
1797 RADV_FROM_HANDLE(radv_device, device, _device);
1798 int level = pSubresource->mipLevel;
1799 int layer = pSubresource->arrayLayer;
1800
1801 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1802
1803 struct radv_image_plane *plane = &image->planes[plane_id];
1804 struct radeon_surf *surface = &plane->surface;
1805
1806 if (device->physical_device->rad_info.chip_class >= GFX9) {
1807 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1808
1809 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1810 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1811 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1812 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1813 /* Adjust the number of bytes between each row because
1814 * the pitch is actually the number of components per
1815 * row.
1816 */
1817 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1818 } else {
1819 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1820
1821 assert(util_is_power_of_two_nonzero(surface->bpe));
1822 pLayout->rowPitch = pitch * surface->bpe;
1823 }
1824
1825 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1826 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1827 pLayout->size = surface->u.gfx9.surf_slice_size;
1828 if (image->type == VK_IMAGE_TYPE_3D)
1829 pLayout->size *= u_minify(image->info.depth, level);
1830 } else {
1831 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1832 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1833 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1834 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1835 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1836 if (image->type == VK_IMAGE_TYPE_3D)
1837 pLayout->size *= u_minify(image->info.depth, level);
1838 }
1839 }
1840
1841
1842 VkResult
1843 radv_CreateImageView(VkDevice _device,
1844 const VkImageViewCreateInfo *pCreateInfo,
1845 const VkAllocationCallbacks *pAllocator,
1846 VkImageView *pView)
1847 {
1848 RADV_FROM_HANDLE(radv_device, device, _device);
1849 struct radv_image_view *view;
1850
1851 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1852 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1853 if (view == NULL)
1854 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1855
1856 vk_object_base_init(&device->vk, &view->base,
1857 VK_OBJECT_TYPE_IMAGE_VIEW);
1858
1859 radv_image_view_init(view, device, pCreateInfo, NULL);
1860
1861 *pView = radv_image_view_to_handle(view);
1862
1863 return VK_SUCCESS;
1864 }
1865
1866 void
1867 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1868 const VkAllocationCallbacks *pAllocator)
1869 {
1870 RADV_FROM_HANDLE(radv_device, device, _device);
1871 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1872
1873 if (!iview)
1874 return;
1875
1876 vk_object_base_finish(&iview->base);
1877 vk_free2(&device->vk.alloc, pAllocator, iview);
1878 }
1879
1880 void radv_buffer_view_init(struct radv_buffer_view *view,
1881 struct radv_device *device,
1882 const VkBufferViewCreateInfo* pCreateInfo)
1883 {
1884 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1885
1886 view->bo = buffer->bo;
1887 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1888 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1889 view->vk_format = pCreateInfo->format;
1890
1891 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1892 pCreateInfo->offset, view->range, view->state);
1893 }
1894
1895 VkResult
1896 radv_CreateBufferView(VkDevice _device,
1897 const VkBufferViewCreateInfo *pCreateInfo,
1898 const VkAllocationCallbacks *pAllocator,
1899 VkBufferView *pView)
1900 {
1901 RADV_FROM_HANDLE(radv_device, device, _device);
1902 struct radv_buffer_view *view;
1903
1904 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1905 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1906 if (!view)
1907 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1908
1909 vk_object_base_init(&device->vk, &view->base,
1910 VK_OBJECT_TYPE_BUFFER_VIEW);
1911
1912 radv_buffer_view_init(view, device, pCreateInfo);
1913
1914 *pView = radv_buffer_view_to_handle(view);
1915
1916 return VK_SUCCESS;
1917 }
1918
1919 void
1920 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1921 const VkAllocationCallbacks *pAllocator)
1922 {
1923 RADV_FROM_HANDLE(radv_device, device, _device);
1924 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1925
1926 if (!view)
1927 return;
1928
1929 vk_object_base_finish(&view->base);
1930 vk_free2(&device->vk.alloc, pAllocator, view);
1931 }