radv: set BIG_PAGE to improve performance on GFX10.3
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "util/debug.h"
35 #include "util/u_atomic.h"
36 #include "vulkan/util/vk_format.h"
37
38 #include "gfx10_format_table.h"
39
40 static unsigned
41 radv_choose_tiling(struct radv_device *device,
42 const VkImageCreateInfo *pCreateInfo,
43 VkFormat format)
44 {
45 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
46 assert(pCreateInfo->samples <= 1);
47 return RADEON_SURF_MODE_LINEAR_ALIGNED;
48 }
49
50 if (!vk_format_is_compressed(format) &&
51 !vk_format_is_depth_or_stencil(format)
52 && device->physical_device->rad_info.chip_class <= GFX8) {
53 /* this causes hangs in some VK CTS tests on GFX9. */
54 /* Textures with a very small height are recommended to be linear. */
55 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
56 /* Only very thin and long 2D textures should benefit from
57 * linear_aligned. */
58 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
59 return RADEON_SURF_MODE_LINEAR_ALIGNED;
60 }
61
62 /* MSAA resources must be 2D tiled. */
63 if (pCreateInfo->samples > 1)
64 return RADEON_SURF_MODE_2D;
65
66 return RADEON_SURF_MODE_2D;
67 }
68
69 static bool
70 radv_use_tc_compat_htile_for_image(struct radv_device *device,
71 const VkImageCreateInfo *pCreateInfo,
72 VkFormat format)
73 {
74 /* TC-compat HTILE is only available for GFX8+. */
75 if (device->physical_device->rad_info.chip_class < GFX8)
76 return false;
77
78 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
79 return false;
80
81 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
82 return false;
83
84 if (pCreateInfo->mipLevels > 1)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
91 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
92 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
93 return false;
94
95 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
96 * tests - disable for now. On GFX10 D32_SFLOAT is affected as well.
97 */
98 if (pCreateInfo->samples >= 2 &&
99 (format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
100 (format == VK_FORMAT_D32_SFLOAT &&
101 device->physical_device->rad_info.chip_class >= GFX10)))
102 return false;
103
104 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
105 * supports 32-bit. Though, it's possible to enable TC-compat for
106 * 16-bit depth surfaces if no Z planes are compressed.
107 */
108 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
109 format != VK_FORMAT_D32_SFLOAT &&
110 format != VK_FORMAT_D16_UNORM)
111 return false;
112
113 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
114 const struct VkImageFormatListCreateInfo *format_list =
115 (const struct VkImageFormatListCreateInfo *)
116 vk_find_struct_const(pCreateInfo->pNext,
117 IMAGE_FORMAT_LIST_CREATE_INFO);
118
119 /* We have to ignore the existence of the list if viewFormatCount = 0 */
120 if (format_list && format_list->viewFormatCount) {
121 /* compatibility is transitive, so we only need to check
122 * one format with everything else.
123 */
124 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
125 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
126 continue;
127
128 if (format != format_list->pViewFormats[i])
129 return false;
130 }
131 } else {
132 return false;
133 }
134 }
135
136 return true;
137 }
138
139 static bool
140 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
141 {
142 if (info->bo_metadata) {
143 if (device->physical_device->rad_info.chip_class >= GFX9)
144 return info->bo_metadata->u.gfx9.scanout;
145 else
146 return info->bo_metadata->u.legacy.scanout;
147 }
148
149 return info->scanout;
150 }
151
152 static bool
153 radv_image_use_fast_clear_for_image(const struct radv_device *device,
154 const struct radv_image *image)
155 {
156 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
157 return true;
158
159 if (image->info.samples <= 1 &&
160 image->info.width * image->info.height <= 512 * 512) {
161 /* Do not enable CMASK or DCC for small surfaces where the cost
162 * of the eliminate pass can be higher than the benefit of fast
163 * clear. RadeonSI does this, but the image threshold is
164 * different.
165 */
166 return false;
167 }
168
169 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
170 (image->exclusive || image->queue_family_mask == 1);
171 }
172
173 static bool
174 radv_use_dcc_for_image(struct radv_device *device,
175 const struct radv_image *image,
176 const VkImageCreateInfo *pCreateInfo,
177 VkFormat format)
178 {
179 bool dcc_compatible_formats;
180 bool blendable;
181
182 /* DCC (Delta Color Compression) is only available for GFX8+. */
183 if (device->physical_device->rad_info.chip_class < GFX8)
184 return false;
185
186 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
187 return false;
188
189 if (image->shareable)
190 return false;
191
192 /* TODO: Enable DCC for storage images. */
193 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
194 return false;
195
196 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
197 return false;
198
199 if (vk_format_is_subsampled(format) ||
200 vk_format_get_plane_count(format) > 1)
201 return false;
202
203 if (!radv_image_use_fast_clear_for_image(device, image))
204 return false;
205
206 /* TODO: Enable DCC for mipmaps on GFX9+. */
207 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
208 device->physical_device->rad_info.chip_class >= GFX9)
209 return false;
210
211 /* Do not enable DCC for mipmapped arrays because performance is worse. */
212 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
213 return false;
214
215 /* FIXME: DCC for MSAA with 4x and 8x samples doesn't work yet, while
216 * 2x can be enabled with an option.
217 */
218 if (pCreateInfo->samples > 2 ||
219 (pCreateInfo->samples == 2 &&
220 !device->physical_device->dcc_msaa_allowed))
221 return false;
222
223 /* Determine if the formats are DCC compatible. */
224 dcc_compatible_formats =
225 radv_is_colorbuffer_format_supported(format,
226 &blendable);
227
228 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
229 const struct VkImageFormatListCreateInfo *format_list =
230 (const struct VkImageFormatListCreateInfo *)
231 vk_find_struct_const(pCreateInfo->pNext,
232 IMAGE_FORMAT_LIST_CREATE_INFO);
233
234 /* We have to ignore the existence of the list if viewFormatCount = 0 */
235 if (format_list && format_list->viewFormatCount) {
236 /* compatibility is transitive, so we only need to check
237 * one format with everything else. */
238 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
239 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
240 continue;
241
242 if (!radv_dcc_formats_compatible(format,
243 format_list->pViewFormats[i]))
244 dcc_compatible_formats = false;
245 }
246 } else {
247 dcc_compatible_formats = false;
248 }
249 }
250
251 if (!dcc_compatible_formats)
252 return false;
253
254 return true;
255 }
256
257 static inline bool
258 radv_use_fmask_for_image(const struct radv_device *device,
259 const struct radv_image *image)
260 {
261 return image->info.samples > 1 &&
262 ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
263 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
264 }
265
266 static inline bool
267 radv_use_htile_for_image(const struct radv_device *device,
268 const struct radv_image *image)
269 {
270 return image->info.levels == 1 &&
271 ((image->info.width * image->info.height >= 8 * 8) ||
272 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
273 }
274
275 static bool
276 radv_use_tc_compat_cmask_for_image(struct radv_device *device,
277 struct radv_image *image)
278 {
279 if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
280 return false;
281
282 /* TC-compat CMASK is only available for GFX8+. */
283 if (device->physical_device->rad_info.chip_class < GFX8)
284 return false;
285
286 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
287 return false;
288
289 if (radv_image_has_dcc(image))
290 return false;
291
292 if (!radv_image_has_cmask(image))
293 return false;
294
295 return true;
296 }
297
298 static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
299 {
300 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
301 }
302
303 static bool
304 radv_is_valid_opaque_metadata(const struct radv_device *device,
305 const struct radeon_bo_metadata *md)
306 {
307 if (md->metadata[0] != 1 ||
308 md->metadata[1] != si_get_bo_metadata_word1(device))
309 return false;
310
311 if (md->size_metadata < 40)
312 return false;
313
314 return true;
315 }
316
317 static void
318 radv_patch_surface_from_metadata(struct radv_device *device,
319 struct radeon_surf *surface,
320 const struct radeon_bo_metadata *md)
321 {
322 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
323
324 if (device->physical_device->rad_info.chip_class >= GFX9) {
325 if (md->u.gfx9.swizzle_mode > 0)
326 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
327 else
328 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
329
330 surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
331 } else {
332 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
333 surface->u.legacy.bankw = md->u.legacy.bankw;
334 surface->u.legacy.bankh = md->u.legacy.bankh;
335 surface->u.legacy.tile_split = md->u.legacy.tile_split;
336 surface->u.legacy.mtilea = md->u.legacy.mtilea;
337 surface->u.legacy.num_banks = md->u.legacy.num_banks;
338
339 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
340 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
341 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
342 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
343 else
344 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
345
346 }
347 }
348
349 static VkResult
350 radv_patch_image_dimensions(struct radv_device *device,
351 struct radv_image *image,
352 const struct radv_image_create_info *create_info,
353 struct ac_surf_info *image_info)
354 {
355 unsigned width = image->info.width;
356 unsigned height = image->info.height;
357
358 /*
359 * minigbm sometimes allocates bigger images which is going to result in
360 * weird strides and other properties. Lets be lenient where possible and
361 * fail it on GFX10 (as we cannot cope there).
362 *
363 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
364 */
365 if (create_info->bo_metadata &&
366 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
367 const struct radeon_bo_metadata *md = create_info->bo_metadata;
368
369 if (device->physical_device->rad_info.chip_class >= GFX10) {
370 width = G_00A004_WIDTH_LO(md->metadata[3]) +
371 (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
372 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
373 } else {
374 width = G_008F18_WIDTH(md->metadata[4]) + 1;
375 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
376 }
377 }
378
379 if (image->info.width == width && image->info.height == height)
380 return VK_SUCCESS;
381
382 if (width < image->info.width || height < image->info.height) {
383 fprintf(stderr,
384 "The imported image has smaller dimensions than the internal\n"
385 "dimensions. Using it is going to fail badly, so we reject\n"
386 "this import.\n"
387 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
388 image->info.width, image->info.height, width, height);
389 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
390 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
391 fprintf(stderr,
392 "Tried to import an image with inconsistent width on GFX10.\n"
393 "As GFX10 has no separate stride fields we cannot cope with\n"
394 "an inconsistency in width and will fail this import.\n"
395 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
396 image->info.width, image->info.height, width, height);
397 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
398 } else {
399 fprintf(stderr,
400 "Tried to import an image with inconsistent width on pre-GFX10.\n"
401 "As GFX10 has no separate stride fields we cannot cope with\n"
402 "an inconsistency and would fail on GFX10.\n"
403 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
404 image->info.width, image->info.height, width, height);
405 }
406 image_info->width = width;
407 image_info->height = height;
408
409 return VK_SUCCESS;
410 }
411
412 static VkResult
413 radv_patch_image_from_extra_info(struct radv_device *device,
414 struct radv_image *image,
415 const struct radv_image_create_info *create_info,
416 struct ac_surf_info *image_info)
417 {
418 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
419 if (result != VK_SUCCESS)
420 return result;
421
422 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
423 if (create_info->bo_metadata) {
424 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
425 create_info->bo_metadata);
426 }
427
428 if (radv_surface_has_scanout(device, create_info)) {
429 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
430 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
431
432 image->info.surf_index = NULL;
433 }
434 }
435 return VK_SUCCESS;
436 }
437
438 static uint32_t
439 radv_get_surface_flags(struct radv_device *device,
440 const struct radv_image *image,
441 unsigned plane_id,
442 const VkImageCreateInfo *pCreateInfo,
443 VkFormat image_format)
444 {
445 uint32_t flags;
446 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
447 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
448 const struct vk_format_description *desc = vk_format_description(format);
449 bool is_depth, is_stencil;
450
451 is_depth = vk_format_has_depth(desc);
452 is_stencil = vk_format_has_stencil(desc);
453
454
455 flags = RADEON_SURF_SET(array_mode, MODE);
456
457 switch (pCreateInfo->imageType){
458 case VK_IMAGE_TYPE_1D:
459 if (pCreateInfo->arrayLayers > 1)
460 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
461 else
462 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
463 break;
464 case VK_IMAGE_TYPE_2D:
465 if (pCreateInfo->arrayLayers > 1)
466 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
467 else
468 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
469 break;
470 case VK_IMAGE_TYPE_3D:
471 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
472 break;
473 default:
474 unreachable("unhandled image type");
475 }
476
477 /* Required for clearing/initializing a specific layer on GFX8. */
478 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
479
480 if (is_depth) {
481 flags |= RADEON_SURF_ZBUFFER;
482 if (!radv_use_htile_for_image(device, image) ||
483 (device->instance->debug_flags & RADV_DEBUG_NO_HIZ))
484 flags |= RADEON_SURF_NO_HTILE;
485 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
486 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
487 }
488
489 if (is_stencil)
490 flags |= RADEON_SURF_SBUFFER;
491
492 if (device->physical_device->rad_info.chip_class >= GFX9 &&
493 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
494 vk_format_get_blocksizebits(image_format) == 128 &&
495 vk_format_is_compressed(image_format))
496 flags |= RADEON_SURF_NO_RENDER_TARGET;
497
498 if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
499 flags |= RADEON_SURF_DISABLE_DCC;
500
501 if (!radv_use_fmask_for_image(device, image))
502 flags |= RADEON_SURF_NO_FMASK;
503
504 return flags;
505 }
506
507 static inline unsigned
508 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
509 {
510 if (stencil)
511 return plane->surface.u.legacy.stencil_tiling_index[level];
512 else
513 return plane->surface.u.legacy.tiling_index[level];
514 }
515
516 static unsigned radv_map_swizzle(unsigned swizzle)
517 {
518 switch (swizzle) {
519 case VK_SWIZZLE_Y:
520 return V_008F0C_SQ_SEL_Y;
521 case VK_SWIZZLE_Z:
522 return V_008F0C_SQ_SEL_Z;
523 case VK_SWIZZLE_W:
524 return V_008F0C_SQ_SEL_W;
525 case VK_SWIZZLE_0:
526 return V_008F0C_SQ_SEL_0;
527 case VK_SWIZZLE_1:
528 return V_008F0C_SQ_SEL_1;
529 default: /* VK_SWIZZLE_X */
530 return V_008F0C_SQ_SEL_X;
531 }
532 }
533
534 static void
535 radv_make_buffer_descriptor(struct radv_device *device,
536 struct radv_buffer *buffer,
537 VkFormat vk_format,
538 unsigned offset,
539 unsigned range,
540 uint32_t *state)
541 {
542 const struct vk_format_description *desc;
543 unsigned stride;
544 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
545 uint64_t va = gpu_address + buffer->offset;
546 unsigned num_format, data_format;
547 int first_non_void;
548 desc = vk_format_description(vk_format);
549 first_non_void = vk_format_get_first_non_void_channel(vk_format);
550 stride = desc->block.bits / 8;
551
552 va += offset;
553 state[0] = va;
554 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
555 S_008F04_STRIDE(stride);
556
557 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
558 range /= stride;
559 }
560
561 state[2] = range;
562 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
563 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
564 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
565 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3]));
566
567 if (device->physical_device->rad_info.chip_class >= GFX10) {
568 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
569
570 /* OOB_SELECT chooses the out-of-bounds check:
571 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
572 * - 1: index >= NUM_RECORDS
573 * - 2: NUM_RECORDS == 0
574 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
575 * else: swizzle_address >= NUM_RECORDS
576 */
577 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
578 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
579 S_008F0C_RESOURCE_LEVEL(1);
580 } else {
581 num_format = radv_translate_buffer_numformat(desc, first_non_void);
582 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
583
584 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
585 assert(num_format != ~0);
586
587 state[3] |= S_008F0C_NUM_FORMAT(num_format) |
588 S_008F0C_DATA_FORMAT(data_format);
589 }
590 }
591
592 static void
593 si_set_mutable_tex_desc_fields(struct radv_device *device,
594 struct radv_image *image,
595 const struct legacy_surf_level *base_level_info,
596 unsigned plane_id,
597 unsigned base_level, unsigned first_level,
598 unsigned block_width, bool is_stencil,
599 bool is_storage_image, bool disable_compression,
600 uint32_t *state)
601 {
602 struct radv_image_plane *plane = &image->planes[plane_id];
603 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
604 uint64_t va = gpu_address + plane->offset;
605 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
606 uint64_t meta_va = 0;
607 if (chip_class >= GFX9) {
608 if (is_stencil)
609 va += plane->surface.u.gfx9.stencil_offset;
610 else
611 va += plane->surface.u.gfx9.surf_offset;
612 } else
613 va += base_level_info->offset;
614
615 state[0] = va >> 8;
616 if (chip_class >= GFX9 ||
617 base_level_info->mode == RADEON_SURF_MODE_2D)
618 state[0] |= plane->surface.tile_swizzle;
619 state[1] &= C_008F14_BASE_ADDRESS_HI;
620 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
621
622 if (chip_class >= GFX8) {
623 state[6] &= C_008F28_COMPRESSION_EN;
624 state[7] = 0;
625 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
626 meta_va = gpu_address + plane->surface.dcc_offset;
627 if (chip_class <= GFX8)
628 meta_va += base_level_info->dcc_offset;
629
630 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
631 dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
632 meta_va |= dcc_tile_swizzle;
633 } else if (!disable_compression &&
634 radv_image_is_tc_compat_htile(image)) {
635 meta_va = gpu_address + plane->surface.htile_offset;
636 }
637
638 if (meta_va) {
639 state[6] |= S_008F28_COMPRESSION_EN(1);
640 if (chip_class <= GFX9)
641 state[7] = meta_va >> 8;
642 }
643 }
644
645 if (chip_class >= GFX10) {
646 state[3] &= C_00A00C_SW_MODE;
647
648 if (is_stencil) {
649 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
650 } else {
651 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
652 }
653
654 state[6] &= C_00A018_META_DATA_ADDRESS_LO &
655 C_00A018_META_PIPE_ALIGNED;
656
657 if (meta_va) {
658 struct gfx9_surf_meta_flags meta = {
659 .rb_aligned = 1,
660 .pipe_aligned = 1,
661 };
662
663 if (plane->surface.dcc_offset)
664 meta = plane->surface.u.gfx9.dcc;
665
666 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
667 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
668 }
669
670 state[7] = meta_va >> 16;
671 } else if (chip_class == GFX9) {
672 state[3] &= C_008F1C_SW_MODE;
673 state[4] &= C_008F20_PITCH;
674
675 if (is_stencil) {
676 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
677 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
678 } else {
679 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
680 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
681 }
682
683 state[5] &= C_008F24_META_DATA_ADDRESS &
684 C_008F24_META_PIPE_ALIGNED &
685 C_008F24_META_RB_ALIGNED;
686 if (meta_va) {
687 struct gfx9_surf_meta_flags meta = {
688 .rb_aligned = 1,
689 .pipe_aligned = 1,
690 };
691
692 if (plane->surface.dcc_offset)
693 meta = plane->surface.u.gfx9.dcc;
694
695 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
696 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
697 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
698 }
699 } else {
700 /* GFX6-GFX8 */
701 unsigned pitch = base_level_info->nblk_x * block_width;
702 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
703
704 state[3] &= C_008F1C_TILING_INDEX;
705 state[3] |= S_008F1C_TILING_INDEX(index);
706 state[4] &= C_008F20_PITCH;
707 state[4] |= S_008F20_PITCH(pitch - 1);
708 }
709 }
710
711 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
712 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
713 {
714 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
715 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
716
717 /* GFX9 allocates 1D textures as 2D. */
718 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
719 image_type = VK_IMAGE_TYPE_2D;
720 switch (image_type) {
721 case VK_IMAGE_TYPE_1D:
722 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
723 case VK_IMAGE_TYPE_2D:
724 if (nr_samples > 1)
725 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
726 else
727 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
728 case VK_IMAGE_TYPE_3D:
729 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
730 return V_008F1C_SQ_RSRC_IMG_3D;
731 else
732 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
733 default:
734 unreachable("illegal image type");
735 }
736 }
737
738 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
739 {
740 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
741
742 if (swizzle[3] == VK_SWIZZLE_X) {
743 /* For the pre-defined border color values (white, opaque
744 * black, transparent black), the only thing that matters is
745 * that the alpha channel winds up in the correct place
746 * (because the RGB channels are all the same) so either of
747 * these enumerations will work.
748 */
749 if (swizzle[2] == VK_SWIZZLE_Y)
750 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
751 else
752 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
753 } else if (swizzle[0] == VK_SWIZZLE_X) {
754 if (swizzle[1] == VK_SWIZZLE_Y)
755 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
756 else
757 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
758 } else if (swizzle[1] == VK_SWIZZLE_X) {
759 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
760 } else if (swizzle[2] == VK_SWIZZLE_X) {
761 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
762 }
763
764 return bc_swizzle;
765 }
766
767 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
768 {
769 const struct vk_format_description *desc = vk_format_description(format);
770
771 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
772 return desc->swizzle[3] == VK_SWIZZLE_X;
773
774 return radv_translate_colorswap(format, false) <= 1;
775 }
776 /**
777 * Build the sampler view descriptor for a texture (GFX10).
778 */
779 static void
780 gfx10_make_texture_descriptor(struct radv_device *device,
781 struct radv_image *image,
782 bool is_storage_image,
783 VkImageViewType view_type,
784 VkFormat vk_format,
785 const VkComponentMapping *mapping,
786 unsigned first_level, unsigned last_level,
787 unsigned first_layer, unsigned last_layer,
788 unsigned width, unsigned height, unsigned depth,
789 uint32_t *state,
790 uint32_t *fmask_state)
791 {
792 const struct vk_format_description *desc;
793 enum vk_swizzle swizzle[4];
794 unsigned img_format;
795 unsigned type;
796
797 desc = vk_format_description(vk_format);
798 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
799
800 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
801 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
802 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
803 } else {
804 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
805 }
806
807 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
808 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
809 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
810 height = 1;
811 depth = image->info.array_size;
812 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
813 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
814 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
815 depth = image->info.array_size;
816 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
817 depth = image->info.array_size / 6;
818
819 state[0] = 0;
820 state[1] = S_00A004_FORMAT(img_format) |
821 S_00A004_WIDTH_LO(width - 1);
822 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
823 S_00A008_HEIGHT(height - 1) |
824 S_00A008_RESOURCE_LEVEL(1);
825 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
826 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
827 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
828 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
829 S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
830 0 : first_level) |
831 S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
832 util_logbase2(image->info.samples) :
833 last_level) |
834 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
835 S_00A00C_TYPE(type);
836 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
837 * to know the total number of layers.
838 */
839 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
840 S_00A010_BASE_ARRAY(first_layer);
841 state[5] = S_00A014_ARRAY_PITCH(0) |
842 S_00A014_MAX_MIP(image->info.samples > 1 ?
843 util_logbase2(image->info.samples) :
844 image->info.levels - 1) |
845 S_00A014_PERF_MOD(4) |
846 S_00A014_BIG_PAGE(device->physical_device->rad_info.chip_class >= GFX10_3 &&
847 image->alignment % (64 * 1024) == 0);
848 state[6] = 0;
849 state[7] = 0;
850
851 if (radv_dcc_enabled(image, first_level)) {
852 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
853 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
854 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
855 }
856
857 /* Initialize the sampler view for FMASK. */
858 if (radv_image_has_fmask(image)) {
859 uint64_t gpu_address = radv_buffer_get_va(image->bo);
860 uint32_t format;
861 uint64_t va;
862
863 assert(image->plane_count == 1);
864
865 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
866
867 switch (image->info.samples) {
868 case 2:
869 format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
870 break;
871 case 4:
872 format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
873 break;
874 case 8:
875 format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
876 break;
877 default:
878 unreachable("invalid nr_samples");
879 }
880
881 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
882 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
883 S_00A004_FORMAT(format) |
884 S_00A004_WIDTH_LO(width - 1);
885 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
886 S_00A008_HEIGHT(height - 1) |
887 S_00A008_RESOURCE_LEVEL(1);
888 fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
889 S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
890 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
891 S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
892 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
893 S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
894 fmask_state[4] = S_00A010_DEPTH(last_layer) |
895 S_00A010_BASE_ARRAY(first_layer);
896 fmask_state[5] = 0;
897 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
898 fmask_state[7] = 0;
899 } else if (fmask_state)
900 memset(fmask_state, 0, 8 * 4);
901 }
902
903 /**
904 * Build the sampler view descriptor for a texture (SI-GFX9)
905 */
906 static void
907 si_make_texture_descriptor(struct radv_device *device,
908 struct radv_image *image,
909 bool is_storage_image,
910 VkImageViewType view_type,
911 VkFormat vk_format,
912 const VkComponentMapping *mapping,
913 unsigned first_level, unsigned last_level,
914 unsigned first_layer, unsigned last_layer,
915 unsigned width, unsigned height, unsigned depth,
916 uint32_t *state,
917 uint32_t *fmask_state)
918 {
919 const struct vk_format_description *desc;
920 enum vk_swizzle swizzle[4];
921 int first_non_void;
922 unsigned num_format, data_format, type;
923
924 desc = vk_format_description(vk_format);
925
926 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
927 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
928 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
929 } else {
930 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
931 }
932
933 first_non_void = vk_format_get_first_non_void_channel(vk_format);
934
935 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
936 if (num_format == ~0) {
937 num_format = 0;
938 }
939
940 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
941 if (data_format == ~0) {
942 data_format = 0;
943 }
944
945 /* S8 with either Z16 or Z32 HTILE need a special format. */
946 if (device->physical_device->rad_info.chip_class == GFX9 &&
947 vk_format == VK_FORMAT_S8_UINT &&
948 radv_image_is_tc_compat_htile(image)) {
949 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
950 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
951 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
952 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
953 }
954 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
955 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
956 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
957 height = 1;
958 depth = image->info.array_size;
959 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
960 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
961 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
962 depth = image->info.array_size;
963 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
964 depth = image->info.array_size / 6;
965
966 state[0] = 0;
967 state[1] = (S_008F14_DATA_FORMAT(data_format) |
968 S_008F14_NUM_FORMAT(num_format));
969 state[2] = (S_008F18_WIDTH(width - 1) |
970 S_008F18_HEIGHT(height - 1) |
971 S_008F18_PERF_MOD(4));
972 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
973 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
974 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
975 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
976 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
977 0 : first_level) |
978 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
979 util_logbase2(image->info.samples) :
980 last_level) |
981 S_008F1C_TYPE(type));
982 state[4] = 0;
983 state[5] = S_008F24_BASE_ARRAY(first_layer);
984 state[6] = 0;
985 state[7] = 0;
986
987 if (device->physical_device->rad_info.chip_class == GFX9) {
988 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
989
990 /* Depth is the last accessible layer on Gfx9.
991 * The hw doesn't need to know the total number of layers.
992 */
993 if (type == V_008F1C_SQ_RSRC_IMG_3D)
994 state[4] |= S_008F20_DEPTH(depth - 1);
995 else
996 state[4] |= S_008F20_DEPTH(last_layer);
997
998 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
999 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
1000 util_logbase2(image->info.samples) :
1001 image->info.levels - 1);
1002 } else {
1003 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1004 state[4] |= S_008F20_DEPTH(depth - 1);
1005 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1006 }
1007 if (image->planes[0].surface.dcc_offset) {
1008 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1009 } else {
1010 /* The last dword is unused by hw. The shader uses it to clear
1011 * bits in the first dword of sampler state.
1012 */
1013 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1014 if (first_level == last_level)
1015 state[7] = C_008F30_MAX_ANISO_RATIO;
1016 else
1017 state[7] = 0xffffffff;
1018 }
1019 }
1020
1021 /* Initialize the sampler view for FMASK. */
1022 if (radv_image_has_fmask(image)) {
1023 uint32_t fmask_format, num_format;
1024 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1025 uint64_t va;
1026
1027 assert(image->plane_count == 1);
1028
1029 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1030
1031 if (device->physical_device->rad_info.chip_class == GFX9) {
1032 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1033 switch (image->info.samples) {
1034 case 2:
1035 num_format = V_008F14_IMG_FMASK_8_2_2;
1036 break;
1037 case 4:
1038 num_format = V_008F14_IMG_FMASK_8_4_4;
1039 break;
1040 case 8:
1041 num_format = V_008F14_IMG_FMASK_32_8_8;
1042 break;
1043 default:
1044 unreachable("invalid nr_samples");
1045 }
1046 } else {
1047 switch (image->info.samples) {
1048 case 2:
1049 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1050 break;
1051 case 4:
1052 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1053 break;
1054 case 8:
1055 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1056 break;
1057 default:
1058 assert(0);
1059 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1060 }
1061 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1062 }
1063
1064 fmask_state[0] = va >> 8;
1065 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1066 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
1067 S_008F14_DATA_FORMAT(fmask_format) |
1068 S_008F14_NUM_FORMAT(num_format);
1069 fmask_state[2] = S_008F18_WIDTH(width - 1) |
1070 S_008F18_HEIGHT(height - 1);
1071 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
1072 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1073 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
1074 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1075 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1076 fmask_state[4] = 0;
1077 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1078 fmask_state[6] = 0;
1079 fmask_state[7] = 0;
1080
1081 if (device->physical_device->rad_info.chip_class == GFX9) {
1082 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
1083 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1084 S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
1085 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
1086 S_008F24_META_RB_ALIGNED(1);
1087
1088 if (radv_image_is_tc_compat_cmask(image)) {
1089 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1090
1091 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1092 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1093 fmask_state[7] |= va >> 8;
1094 }
1095 } else {
1096 fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
1097 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
1098 S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
1099 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1100
1101 if (radv_image_is_tc_compat_cmask(image)) {
1102 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1103
1104 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1105 fmask_state[7] |= va >> 8;
1106 }
1107 }
1108 } else if (fmask_state)
1109 memset(fmask_state, 0, 8 * 4);
1110 }
1111
1112 static void
1113 radv_make_texture_descriptor(struct radv_device *device,
1114 struct radv_image *image,
1115 bool is_storage_image,
1116 VkImageViewType view_type,
1117 VkFormat vk_format,
1118 const VkComponentMapping *mapping,
1119 unsigned first_level, unsigned last_level,
1120 unsigned first_layer, unsigned last_layer,
1121 unsigned width, unsigned height, unsigned depth,
1122 uint32_t *state,
1123 uint32_t *fmask_state)
1124 {
1125 if (device->physical_device->rad_info.chip_class >= GFX10) {
1126 gfx10_make_texture_descriptor(device, image, is_storage_image,
1127 view_type, vk_format, mapping,
1128 first_level, last_level,
1129 first_layer, last_layer,
1130 width, height, depth,
1131 state, fmask_state);
1132 } else {
1133 si_make_texture_descriptor(device, image, is_storage_image,
1134 view_type, vk_format, mapping,
1135 first_level, last_level,
1136 first_layer, last_layer,
1137 width, height, depth,
1138 state, fmask_state);
1139 }
1140 }
1141
1142 static void
1143 radv_query_opaque_metadata(struct radv_device *device,
1144 struct radv_image *image,
1145 struct radeon_bo_metadata *md)
1146 {
1147 static const VkComponentMapping fixedmapping;
1148 uint32_t desc[8], i;
1149
1150 assert(image->plane_count == 1);
1151
1152 /* Metadata image format format version 1:
1153 * [0] = 1 (metadata format identifier)
1154 * [1] = (VENDOR_ID << 16) | PCI_ID
1155 * [2:9] = image descriptor for the whole resource
1156 * [2] is always 0, because the base address is cleared
1157 * [9] is the DCC offset bits [39:8] from the beginning of
1158 * the buffer
1159 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
1160 */
1161 md->metadata[0] = 1; /* metadata image format version 1 */
1162
1163 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
1164 md->metadata[1] = si_get_bo_metadata_word1(device);
1165
1166
1167 radv_make_texture_descriptor(device, image, false,
1168 (VkImageViewType)image->type, image->vk_format,
1169 &fixedmapping, 0, image->info.levels - 1, 0,
1170 image->info.array_size - 1,
1171 image->info.width, image->info.height,
1172 image->info.depth,
1173 desc, NULL);
1174
1175 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
1176 image->planes[0].surface.blk_w, false, false, false, desc);
1177
1178 /* Clear the base address and set the relative DCC offset. */
1179 desc[0] = 0;
1180 desc[1] &= C_008F14_BASE_ADDRESS_HI;
1181 desc[7] = image->planes[0].surface.dcc_offset >> 8;
1182
1183 /* Dwords [2:9] contain the image descriptor. */
1184 memcpy(&md->metadata[2], desc, sizeof(desc));
1185
1186 /* Dwords [10:..] contain the mipmap level offsets. */
1187 if (device->physical_device->rad_info.chip_class <= GFX8) {
1188 for (i = 0; i <= image->info.levels - 1; i++)
1189 md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
1190 md->size_metadata = (11 + image->info.levels - 1) * 4;
1191 } else
1192 md->size_metadata = 10 * 4;
1193 }
1194
1195 void
1196 radv_init_metadata(struct radv_device *device,
1197 struct radv_image *image,
1198 struct radeon_bo_metadata *metadata)
1199 {
1200 struct radeon_surf *surface = &image->planes[0].surface;
1201
1202 memset(metadata, 0, sizeof(*metadata));
1203
1204 if (device->physical_device->rad_info.chip_class >= GFX9) {
1205 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
1206 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1207 } else {
1208 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
1209 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1210 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
1211 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
1212 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1213 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1214 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1215 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1216 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1217 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1218 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1219 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1220 }
1221 radv_query_opaque_metadata(device, image, metadata);
1222 }
1223
1224 void
1225 radv_image_override_offset_stride(struct radv_device *device,
1226 struct radv_image *image,
1227 uint64_t offset, uint32_t stride)
1228 {
1229 ac_surface_override_offset_stride(&device->physical_device->rad_info,
1230 &image->planes[0].surface,
1231 image->info.levels, offset, stride);
1232 }
1233
1234 static void
1235 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1236 const struct radv_image *image,
1237 struct radeon_surf *surf)
1238 {
1239 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
1240 image->info.levels > 1 || image->info.depth > 1 ||
1241 radv_image_has_dcc(image) ||
1242 !radv_image_use_fast_clear_for_image(device, image))
1243 return;
1244
1245 assert(image->info.storage_samples == 1);
1246
1247 surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
1248 surf->total_size = surf->cmask_offset + surf->cmask_size;
1249 surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
1250 }
1251
1252 static void
1253 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1254 {
1255 if (radv_image_has_dcc(image)) {
1256 image->fce_pred_offset = image->size;
1257 image->size += 8 * image->info.levels;
1258
1259 image->dcc_pred_offset = image->size;
1260 image->size += 8 * image->info.levels;
1261 }
1262
1263 if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
1264 radv_image_has_htile(image)) {
1265 image->clear_value_offset = image->size;
1266 image->size += 8 * image->info.levels;
1267 }
1268
1269 if (radv_image_is_tc_compat_htile(image) &&
1270 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1271 /* Metadata for the TC-compatible HTILE hardware bug which
1272 * have to be fixed by updating ZRANGE_PRECISION when doing
1273 * fast depth clears to 0.0f.
1274 */
1275 image->tc_compat_zrange_offset = image->size;
1276 image->size += image->info.levels * 4;
1277 }
1278 }
1279
1280
1281 static void
1282 radv_image_reset_layout(struct radv_image *image)
1283 {
1284 image->size = 0;
1285 image->alignment = 1;
1286
1287 image->tc_compatible_cmask = image->tc_compatible_htile = 0;
1288 image->fce_pred_offset = image->dcc_pred_offset = 0;
1289 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1290
1291 for (unsigned i = 0; i < image->plane_count; ++i) {
1292 VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1293
1294 uint32_t flags = image->planes[i].surface.flags;
1295 memset(image->planes + i, 0, sizeof(image->planes[i]));
1296
1297 image->planes[i].surface.flags = flags;
1298 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1299 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1300 image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
1301
1302 /* align byte per element on dword */
1303 if (image->planes[i].surface.bpe == 3) {
1304 image->planes[i].surface.bpe = 4;
1305 }
1306 }
1307 }
1308
1309 VkResult
1310 radv_image_create_layout(struct radv_device *device,
1311 struct radv_image_create_info create_info,
1312 struct radv_image *image)
1313 {
1314 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1315 * common internal case. */
1316 create_info.vk_info = NULL;
1317
1318 struct ac_surf_info image_info = image->info;
1319 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1320 if (result != VK_SUCCESS)
1321 return result;
1322
1323 radv_image_reset_layout(image);
1324
1325 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1326 struct ac_surf_info info = image_info;
1327
1328 if (plane) {
1329 const struct vk_format_description *desc = vk_format_description(image->vk_format);
1330 assert(info.width % desc->width_divisor == 0);
1331 assert(info.height % desc->height_divisor == 0);
1332
1333 info.width /= desc->width_divisor;
1334 info.height /= desc->height_divisor;
1335 }
1336
1337 if (create_info.no_metadata_planes || image->plane_count > 1) {
1338 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
1339 RADEON_SURF_NO_FMASK |
1340 RADEON_SURF_NO_HTILE;
1341 }
1342
1343 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1344
1345 if (!create_info.no_metadata_planes && image->plane_count == 1)
1346 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1347
1348 image->planes[plane].offset = align(image->size, image->planes[plane].surface.alignment);
1349 image->size = image->planes[plane].offset + image->planes[plane].surface.total_size;
1350 image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
1351
1352 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1353 }
1354
1355 image->tc_compatible_cmask = radv_image_has_cmask(image) &&
1356 radv_use_tc_compat_cmask_for_image(device, image);
1357
1358 image->tc_compatible_htile = radv_image_has_htile(image) &&
1359 image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1360
1361 radv_image_alloc_values(device, image);
1362
1363 assert(image->planes[0].surface.surf_size);
1364 return VK_SUCCESS;
1365 }
1366
1367 static void
1368 radv_destroy_image(struct radv_device *device,
1369 const VkAllocationCallbacks *pAllocator,
1370 struct radv_image *image)
1371 {
1372 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1373 device->ws->buffer_destroy(image->bo);
1374
1375 if (image->owned_memory != VK_NULL_HANDLE) {
1376 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1377 radv_free_memory(device, pAllocator, mem);
1378 }
1379
1380 vk_object_base_finish(&image->base);
1381 vk_free2(&device->vk.alloc, pAllocator, image);
1382 }
1383
1384 VkResult
1385 radv_image_create(VkDevice _device,
1386 const struct radv_image_create_info *create_info,
1387 const VkAllocationCallbacks* alloc,
1388 VkImage *pImage)
1389 {
1390 RADV_FROM_HANDLE(radv_device, device, _device);
1391 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1392 struct radv_image *image = NULL;
1393 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
1394 pCreateInfo->format);
1395 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1396
1397 const unsigned plane_count = vk_format_get_plane_count(format);
1398 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1399
1400 radv_assert(pCreateInfo->mipLevels > 0);
1401 radv_assert(pCreateInfo->arrayLayers > 0);
1402 radv_assert(pCreateInfo->samples > 0);
1403 radv_assert(pCreateInfo->extent.width > 0);
1404 radv_assert(pCreateInfo->extent.height > 0);
1405 radv_assert(pCreateInfo->extent.depth > 0);
1406
1407 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
1408 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1409 if (!image)
1410 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1411
1412 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1413
1414 image->type = pCreateInfo->imageType;
1415 image->info.width = pCreateInfo->extent.width;
1416 image->info.height = pCreateInfo->extent.height;
1417 image->info.depth = pCreateInfo->extent.depth;
1418 image->info.samples = pCreateInfo->samples;
1419 image->info.storage_samples = pCreateInfo->samples;
1420 image->info.array_size = pCreateInfo->arrayLayers;
1421 image->info.levels = pCreateInfo->mipLevels;
1422 image->info.num_channels = vk_format_get_nr_components(format);
1423
1424 image->vk_format = format;
1425 image->tiling = pCreateInfo->tiling;
1426 image->usage = pCreateInfo->usage;
1427 image->flags = pCreateInfo->flags;
1428 image->plane_count = plane_count;
1429
1430 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1431 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1432 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1433 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1434 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1435 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1436 else
1437 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1438 }
1439
1440 const VkExternalMemoryImageCreateInfo *external_info =
1441 vk_find_struct_const(pCreateInfo->pNext,
1442 EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
1443
1444 image->shareable = external_info;
1445 if (!vk_format_is_depth_or_stencil(format) && !image->shareable) {
1446 image->info.surf_index = &device->image_mrt_offset_counter;
1447 }
1448
1449 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1450 image->planes[plane].surface.flags =
1451 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1452 }
1453
1454 bool delay_layout = external_info &&
1455 (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1456
1457 if (delay_layout) {
1458 *pImage = radv_image_to_handle(image);
1459 assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1460 return VK_SUCCESS;
1461 }
1462
1463 ASSERTED VkResult result = radv_image_create_layout(device, *create_info, image);
1464 assert(result == VK_SUCCESS);
1465
1466 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1467 image->alignment = MAX2(image->alignment, 4096);
1468 image->size = align64(image->size, image->alignment);
1469 image->offset = 0;
1470
1471 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1472 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
1473 if (!image->bo) {
1474 radv_destroy_image(device, alloc, image);
1475 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1476 }
1477 }
1478
1479 *pImage = radv_image_to_handle(image);
1480
1481 return VK_SUCCESS;
1482 }
1483
1484 static void
1485 radv_image_view_make_descriptor(struct radv_image_view *iview,
1486 struct radv_device *device,
1487 VkFormat vk_format,
1488 const VkComponentMapping *components,
1489 bool is_storage_image, bool disable_compression,
1490 unsigned plane_id, unsigned descriptor_plane_id)
1491 {
1492 struct radv_image *image = iview->image;
1493 struct radv_image_plane *plane = &image->planes[plane_id];
1494 const struct vk_format_description *format_desc = vk_format_description(image->vk_format);
1495 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1496 uint32_t blk_w;
1497 union radv_descriptor *descriptor;
1498 uint32_t hw_level = 0;
1499
1500 if (is_storage_image) {
1501 descriptor = &iview->storage_descriptor;
1502 } else {
1503 descriptor = &iview->descriptor;
1504 }
1505
1506 assert(vk_format_get_plane_count(vk_format) == 1);
1507 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1508 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
1509
1510 if (device->physical_device->rad_info.chip_class >= GFX9)
1511 hw_level = iview->base_mip;
1512 radv_make_texture_descriptor(device, image, is_storage_image,
1513 iview->type,
1514 vk_format,
1515 components,
1516 hw_level, hw_level + iview->level_count - 1,
1517 iview->base_layer,
1518 iview->base_layer + iview->layer_count - 1,
1519 iview->extent.width / (plane_id ? format_desc->width_divisor : 1),
1520 iview->extent.height / (plane_id ? format_desc->height_divisor : 1),
1521 iview->extent.depth,
1522 descriptor->plane_descriptors[descriptor_plane_id],
1523 descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1524
1525 const struct legacy_surf_level *base_level_info = NULL;
1526 if (device->physical_device->rad_info.chip_class <= GFX9) {
1527 if (is_stencil)
1528 base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
1529 else
1530 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1531 }
1532 si_set_mutable_tex_desc_fields(device, image,
1533 base_level_info,
1534 plane_id,
1535 iview->base_mip,
1536 iview->base_mip,
1537 blk_w, is_stencil, is_storage_image,
1538 is_storage_image || disable_compression,
1539 descriptor->plane_descriptors[descriptor_plane_id]);
1540 }
1541
1542 static unsigned
1543 radv_plane_from_aspect(VkImageAspectFlags mask)
1544 {
1545 switch(mask) {
1546 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1547 return 1;
1548 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1549 return 2;
1550 default:
1551 return 0;
1552 }
1553 }
1554
1555 VkFormat
1556 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1557 {
1558 switch(mask) {
1559 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1560 return image->planes[0].format;
1561 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1562 return image->planes[1].format;
1563 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1564 return image->planes[2].format;
1565 case VK_IMAGE_ASPECT_STENCIL_BIT:
1566 return vk_format_stencil_only(image->vk_format);
1567 case VK_IMAGE_ASPECT_DEPTH_BIT:
1568 return vk_format_depth_only(image->vk_format);
1569 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1570 return vk_format_depth_only(image->vk_format);
1571 default:
1572 return image->vk_format;
1573 }
1574 }
1575
1576 void
1577 radv_image_view_init(struct radv_image_view *iview,
1578 struct radv_device *device,
1579 const VkImageViewCreateInfo* pCreateInfo,
1580 const struct radv_image_view_extra_create_info* extra_create_info)
1581 {
1582 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1583 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1584
1585 switch (image->type) {
1586 case VK_IMAGE_TYPE_1D:
1587 case VK_IMAGE_TYPE_2D:
1588 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1589 break;
1590 case VK_IMAGE_TYPE_3D:
1591 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1592 <= radv_minify(image->info.depth, range->baseMipLevel));
1593 break;
1594 default:
1595 unreachable("bad VkImageType");
1596 }
1597 iview->image = image;
1598 iview->bo = image->bo;
1599 iview->type = pCreateInfo->viewType;
1600 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1601 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1602 iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
1603
1604 iview->vk_format = pCreateInfo->format;
1605
1606 /* If the image has an Android external format, pCreateInfo->format will be
1607 * VK_FORMAT_UNDEFINED. */
1608 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1609 iview->vk_format = image->vk_format;
1610
1611 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1612 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1613 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1614 iview->vk_format = vk_format_depth_only(iview->vk_format);
1615 }
1616
1617 if (device->physical_device->rad_info.chip_class >= GFX9) {
1618 iview->extent = (VkExtent3D) {
1619 .width = image->info.width,
1620 .height = image->info.height,
1621 .depth = image->info.depth,
1622 };
1623 } else {
1624 iview->extent = (VkExtent3D) {
1625 .width = radv_minify(image->info.width , range->baseMipLevel),
1626 .height = radv_minify(image->info.height, range->baseMipLevel),
1627 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1628 };
1629 }
1630
1631 if (iview->vk_format != image->planes[iview->plane_id].format) {
1632 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1633 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1634 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1635 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1636
1637 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1638 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1639
1640 /* Comment ported from amdvlk -
1641 * If we have the following image:
1642 * Uncompressed pixels Compressed block sizes (4x4)
1643 * mip0: 22 x 22 6 x 6
1644 * mip1: 11 x 11 3 x 3
1645 * mip2: 5 x 5 2 x 2
1646 * mip3: 2 x 2 1 x 1
1647 * mip4: 1 x 1 1 x 1
1648 *
1649 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1650 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1651 * divide-by-two integer math):
1652 * mip0: 6x6
1653 * mip1: 3x3
1654 * mip2: 1x1
1655 * mip3: 1x1
1656 *
1657 * This means that mip2 will be missing texels.
1658 *
1659 * Fix this by calculating the base mip's width and height, then convert that, and round it
1660 * back up to get the level 0 size.
1661 * Clamp the converted size between the original values, and next power of two, which
1662 * means we don't oversize the image.
1663 */
1664 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1665 vk_format_is_compressed(image->vk_format) &&
1666 !vk_format_is_compressed(iview->vk_format)) {
1667 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1668 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1669
1670 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1671 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1672
1673 lvl_width <<= range->baseMipLevel;
1674 lvl_height <<= range->baseMipLevel;
1675
1676 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->planes[0].surface.u.gfx9.surf_pitch);
1677 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->planes[0].surface.u.gfx9.surf_height);
1678 }
1679 }
1680
1681 iview->base_layer = range->baseArrayLayer;
1682 iview->layer_count = radv_get_layerCount(image, range);
1683 iview->base_mip = range->baseMipLevel;
1684 iview->level_count = radv_get_levelCount(image, range);
1685
1686 bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
1687 for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
1688 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1689 radv_image_view_make_descriptor(iview, device, format,
1690 &pCreateInfo->components,
1691 false, disable_compression,
1692 iview->plane_id + i, i);
1693 radv_image_view_make_descriptor(iview, device,
1694 format, &pCreateInfo->components,
1695 true, disable_compression,
1696 iview->plane_id + i, i);
1697 }
1698 }
1699
1700 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1701 VkImageLayout layout,
1702 bool in_render_loop,
1703 unsigned queue_mask)
1704 {
1705 if (radv_image_is_tc_compat_htile(image)) {
1706 if (layout == VK_IMAGE_LAYOUT_GENERAL &&
1707 !in_render_loop &&
1708 !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1709 /* It should be safe to enable TC-compat HTILE with
1710 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render
1711 * loop and if the image doesn't have the storage bit
1712 * set. This improves performance for apps that use
1713 * GENERAL for the main depth pass because this allows
1714 * compression and this reduces the number of
1715 * decompressions from/to GENERAL.
1716 */
1717 return true;
1718 }
1719
1720 return layout != VK_IMAGE_LAYOUT_GENERAL;
1721 }
1722
1723 return radv_image_has_htile(image) &&
1724 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1725 layout == VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR ||
1726 layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR ||
1727 (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1728 queue_mask == (1u << RADV_QUEUE_GENERAL)));
1729 }
1730
1731 bool radv_layout_can_fast_clear(const struct radv_image *image,
1732 VkImageLayout layout,
1733 bool in_render_loop,
1734 unsigned queue_mask)
1735 {
1736 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1737 queue_mask == (1u << RADV_QUEUE_GENERAL);
1738 }
1739
1740 bool radv_layout_dcc_compressed(const struct radv_device *device,
1741 const struct radv_image *image,
1742 VkImageLayout layout,
1743 bool in_render_loop,
1744 unsigned queue_mask)
1745 {
1746 /* Don't compress compute transfer dst, as image stores are not supported. */
1747 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1748 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1749 return false;
1750
1751 return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
1752 }
1753
1754
1755 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1756 {
1757 if (!image->exclusive)
1758 return image->queue_family_mask;
1759 if (family == VK_QUEUE_FAMILY_EXTERNAL ||
1760 family == VK_QUEUE_FAMILY_FOREIGN_EXT)
1761 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1762 if (family == VK_QUEUE_FAMILY_IGNORED)
1763 return 1u << queue_family;
1764 return 1u << family;
1765 }
1766
1767 VkResult
1768 radv_CreateImage(VkDevice device,
1769 const VkImageCreateInfo *pCreateInfo,
1770 const VkAllocationCallbacks *pAllocator,
1771 VkImage *pImage)
1772 {
1773 #ifdef ANDROID
1774 const VkNativeBufferANDROID *gralloc_info =
1775 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1776
1777 if (gralloc_info)
1778 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1779 pAllocator, pImage);
1780 #endif
1781
1782 const struct wsi_image_create_info *wsi_info =
1783 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1784 bool scanout = wsi_info && wsi_info->scanout;
1785
1786 return radv_image_create(device,
1787 &(struct radv_image_create_info) {
1788 .vk_info = pCreateInfo,
1789 .scanout = scanout,
1790 },
1791 pAllocator,
1792 pImage);
1793 }
1794
1795 void
1796 radv_DestroyImage(VkDevice _device, VkImage _image,
1797 const VkAllocationCallbacks *pAllocator)
1798 {
1799 RADV_FROM_HANDLE(radv_device, device, _device);
1800 RADV_FROM_HANDLE(radv_image, image, _image);
1801
1802 if (!image)
1803 return;
1804
1805 radv_destroy_image(device, pAllocator, image);
1806 }
1807
1808 void radv_GetImageSubresourceLayout(
1809 VkDevice _device,
1810 VkImage _image,
1811 const VkImageSubresource* pSubresource,
1812 VkSubresourceLayout* pLayout)
1813 {
1814 RADV_FROM_HANDLE(radv_image, image, _image);
1815 RADV_FROM_HANDLE(radv_device, device, _device);
1816 int level = pSubresource->mipLevel;
1817 int layer = pSubresource->arrayLayer;
1818
1819 unsigned plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
1820
1821 struct radv_image_plane *plane = &image->planes[plane_id];
1822 struct radeon_surf *surface = &plane->surface;
1823
1824 if (device->physical_device->rad_info.chip_class >= GFX9) {
1825 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1826
1827 pLayout->offset = plane->offset + level_offset + surface->u.gfx9.surf_slice_size * layer;
1828 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1829 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1830 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1831 /* Adjust the number of bytes between each row because
1832 * the pitch is actually the number of components per
1833 * row.
1834 */
1835 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1836 } else {
1837 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1838
1839 assert(util_is_power_of_two_nonzero(surface->bpe));
1840 pLayout->rowPitch = pitch * surface->bpe;
1841 }
1842
1843 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1844 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1845 pLayout->size = surface->u.gfx9.surf_slice_size;
1846 if (image->type == VK_IMAGE_TYPE_3D)
1847 pLayout->size *= u_minify(image->info.depth, level);
1848 } else {
1849 pLayout->offset = plane->offset + surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1850 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1851 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1852 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1853 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1854 if (image->type == VK_IMAGE_TYPE_3D)
1855 pLayout->size *= u_minify(image->info.depth, level);
1856 }
1857 }
1858
1859
1860 VkResult
1861 radv_CreateImageView(VkDevice _device,
1862 const VkImageViewCreateInfo *pCreateInfo,
1863 const VkAllocationCallbacks *pAllocator,
1864 VkImageView *pView)
1865 {
1866 RADV_FROM_HANDLE(radv_device, device, _device);
1867 struct radv_image_view *view;
1868
1869 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1870 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1871 if (view == NULL)
1872 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1873
1874 vk_object_base_init(&device->vk, &view->base,
1875 VK_OBJECT_TYPE_IMAGE_VIEW);
1876
1877 radv_image_view_init(view, device, pCreateInfo, NULL);
1878
1879 *pView = radv_image_view_to_handle(view);
1880
1881 return VK_SUCCESS;
1882 }
1883
1884 void
1885 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1886 const VkAllocationCallbacks *pAllocator)
1887 {
1888 RADV_FROM_HANDLE(radv_device, device, _device);
1889 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1890
1891 if (!iview)
1892 return;
1893
1894 vk_object_base_finish(&iview->base);
1895 vk_free2(&device->vk.alloc, pAllocator, iview);
1896 }
1897
1898 void radv_buffer_view_init(struct radv_buffer_view *view,
1899 struct radv_device *device,
1900 const VkBufferViewCreateInfo* pCreateInfo)
1901 {
1902 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1903
1904 view->bo = buffer->bo;
1905 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1906 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1907 view->vk_format = pCreateInfo->format;
1908
1909 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1910 pCreateInfo->offset, view->range, view->state);
1911 }
1912
1913 VkResult
1914 radv_CreateBufferView(VkDevice _device,
1915 const VkBufferViewCreateInfo *pCreateInfo,
1916 const VkAllocationCallbacks *pAllocator,
1917 VkBufferView *pView)
1918 {
1919 RADV_FROM_HANDLE(radv_device, device, _device);
1920 struct radv_buffer_view *view;
1921
1922 view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
1923 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1924 if (!view)
1925 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1926
1927 vk_object_base_init(&device->vk, &view->base,
1928 VK_OBJECT_TYPE_BUFFER_VIEW);
1929
1930 radv_buffer_view_init(view, device, pCreateInfo);
1931
1932 *pView = radv_buffer_view_to_handle(view);
1933
1934 return VK_SUCCESS;
1935 }
1936
1937 void
1938 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1939 const VkAllocationCallbacks *pAllocator)
1940 {
1941 RADV_FROM_HANDLE(radv_device, device, _device);
1942 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1943
1944 if (!view)
1945 return;
1946
1947 vk_object_base_finish(&view->base);
1948 vk_free2(&device->vk.alloc, pAllocator, view);
1949 }