radv: add radv_image_is_tc_compat_htile() helper
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "gfx9d.h"
35 #include "util/debug.h"
36 #include "util/u_atomic.h"
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const struct radv_image_create_info *create_info)
40 {
41 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
42
43 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
44 assert(pCreateInfo->samples <= 1);
45 return RADEON_SURF_MODE_LINEAR_ALIGNED;
46 }
47
48 if (!vk_format_is_compressed(pCreateInfo->format) &&
49 !vk_format_is_depth_or_stencil(pCreateInfo->format)
50 && device->physical_device->rad_info.chip_class <= VI) {
51 /* this causes hangs in some VK CTS tests on GFX9. */
52 /* Textures with a very small height are recommended to be linear. */
53 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
54 /* Only very thin and long 2D textures should benefit from
55 * linear_aligned. */
56 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
57 return RADEON_SURF_MODE_LINEAR_ALIGNED;
58 }
59
60 /* MSAA resources must be 2D tiled. */
61 if (pCreateInfo->samples > 1)
62 return RADEON_SURF_MODE_2D;
63
64 return RADEON_SURF_MODE_2D;
65 }
66
67 static bool
68 radv_use_tc_compat_htile_for_image(struct radv_device *device,
69 const VkImageCreateInfo *pCreateInfo)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < VI)
73 return false;
74
75 if (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)
76 return false;
77
78 if (pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
79 VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
80 return false;
81
82 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
83 return false;
84
85 if (pCreateInfo->mipLevels > 1)
86 return false;
87
88 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
89 * tests - disable for now */
90 if (pCreateInfo->samples >= 2 &&
91 pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
92 return false;
93
94 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95 * supports 32-bit. Though, it's possible to enable TC-compat for
96 * 16-bit depth surfaces if no Z planes are compressed.
97 */
98 if (pCreateInfo->format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
99 pCreateInfo->format != VK_FORMAT_D32_SFLOAT &&
100 pCreateInfo->format != VK_FORMAT_D16_UNORM)
101 return false;
102
103 return true;
104 }
105
106 static bool
107 radv_use_dcc_for_image(struct radv_device *device,
108 const struct radv_image_create_info *create_info,
109 const VkImageCreateInfo *pCreateInfo)
110 {
111 bool dcc_compatible_formats;
112 bool blendable;
113
114 /* DCC (Delta Color Compression) is only available for GFX8+. */
115 if (device->physical_device->rad_info.chip_class < VI)
116 return false;
117
118 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
119 return false;
120
121 /* TODO: Enable DCC for storage images. */
122 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
123 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
124 return false;
125
126 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
127 return false;
128
129 /* TODO: Enable DCC for mipmaps and array layers. */
130 if (pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1)
131 return false;
132
133 if (create_info->scanout)
134 return false;
135
136 /* TODO: Enable DCC for MSAA textures. */
137 if (pCreateInfo->samples >= 2)
138 return false;
139
140 /* Determine if the formats are DCC compatible. */
141 dcc_compatible_formats =
142 radv_is_colorbuffer_format_supported(pCreateInfo->format,
143 &blendable);
144
145 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
146 const struct VkImageFormatListCreateInfoKHR *format_list =
147 (const struct VkImageFormatListCreateInfoKHR *)
148 vk_find_struct_const(pCreateInfo->pNext,
149 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
150
151 /* We have to ignore the existence of the list if viewFormatCount = 0 */
152 if (format_list && format_list->viewFormatCount) {
153 /* compatibility is transitive, so we only need to check
154 * one format with everything else. */
155 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
156 if (!radv_dcc_formats_compatible(pCreateInfo->format,
157 format_list->pViewFormats[i]))
158 dcc_compatible_formats = false;
159 }
160 } else {
161 dcc_compatible_formats = false;
162 }
163 }
164
165 if (!dcc_compatible_formats)
166 return false;
167
168 return true;
169 }
170
171 static int
172 radv_init_surface(struct radv_device *device,
173 struct radeon_surf *surface,
174 const struct radv_image_create_info *create_info)
175 {
176 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
177 unsigned array_mode = radv_choose_tiling(device, create_info);
178 const struct vk_format_description *desc =
179 vk_format_description(pCreateInfo->format);
180 bool is_depth, is_stencil;
181
182 is_depth = vk_format_has_depth(desc);
183 is_stencil = vk_format_has_stencil(desc);
184
185 surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
186 surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
187
188 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format));
189 /* align byte per element on dword */
190 if (surface->bpe == 3) {
191 surface->bpe = 4;
192 }
193 surface->flags = RADEON_SURF_SET(array_mode, MODE);
194
195 switch (pCreateInfo->imageType){
196 case VK_IMAGE_TYPE_1D:
197 if (pCreateInfo->arrayLayers > 1)
198 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
199 else
200 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
201 break;
202 case VK_IMAGE_TYPE_2D:
203 if (pCreateInfo->arrayLayers > 1)
204 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
205 else
206 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
207 break;
208 case VK_IMAGE_TYPE_3D:
209 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
210 break;
211 default:
212 unreachable("unhandled image type");
213 }
214
215 if (is_depth) {
216 surface->flags |= RADEON_SURF_ZBUFFER;
217 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo))
218 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
219 }
220
221 if (is_stencil)
222 surface->flags |= RADEON_SURF_SBUFFER;
223
224 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
225
226 if (!radv_use_dcc_for_image(device, create_info, pCreateInfo))
227 surface->flags |= RADEON_SURF_DISABLE_DCC;
228
229 if (create_info->scanout)
230 surface->flags |= RADEON_SURF_SCANOUT;
231 return 0;
232 }
233
234 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
235 {
236 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
237 }
238
239 static inline unsigned
240 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
241 {
242 if (stencil)
243 return image->surface.u.legacy.stencil_tiling_index[level];
244 else
245 return image->surface.u.legacy.tiling_index[level];
246 }
247
248 static unsigned radv_map_swizzle(unsigned swizzle)
249 {
250 switch (swizzle) {
251 case VK_SWIZZLE_Y:
252 return V_008F0C_SQ_SEL_Y;
253 case VK_SWIZZLE_Z:
254 return V_008F0C_SQ_SEL_Z;
255 case VK_SWIZZLE_W:
256 return V_008F0C_SQ_SEL_W;
257 case VK_SWIZZLE_0:
258 return V_008F0C_SQ_SEL_0;
259 case VK_SWIZZLE_1:
260 return V_008F0C_SQ_SEL_1;
261 default: /* VK_SWIZZLE_X */
262 return V_008F0C_SQ_SEL_X;
263 }
264 }
265
266 static void
267 radv_make_buffer_descriptor(struct radv_device *device,
268 struct radv_buffer *buffer,
269 VkFormat vk_format,
270 unsigned offset,
271 unsigned range,
272 uint32_t *state)
273 {
274 const struct vk_format_description *desc;
275 unsigned stride;
276 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
277 uint64_t va = gpu_address + buffer->offset;
278 unsigned num_format, data_format;
279 int first_non_void;
280 desc = vk_format_description(vk_format);
281 first_non_void = vk_format_get_first_non_void_channel(vk_format);
282 stride = desc->block.bits / 8;
283
284 num_format = radv_translate_buffer_numformat(desc, first_non_void);
285 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
286
287 va += offset;
288 state[0] = va;
289 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
290 S_008F04_STRIDE(stride);
291
292 if (device->physical_device->rad_info.chip_class != VI && stride) {
293 range /= stride;
294 }
295
296 state[2] = range;
297 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
298 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
299 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
300 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
301 S_008F0C_NUM_FORMAT(num_format) |
302 S_008F0C_DATA_FORMAT(data_format);
303 }
304
305 static void
306 si_set_mutable_tex_desc_fields(struct radv_device *device,
307 struct radv_image *image,
308 const struct legacy_surf_level *base_level_info,
309 unsigned base_level, unsigned first_level,
310 unsigned block_width, bool is_stencil,
311 bool is_storage_image, uint32_t *state)
312 {
313 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
314 uint64_t va = gpu_address;
315 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
316 uint64_t meta_va = 0;
317 if (chip_class >= GFX9) {
318 if (is_stencil)
319 va += image->surface.u.gfx9.stencil_offset;
320 else
321 va += image->surface.u.gfx9.surf_offset;
322 } else
323 va += base_level_info->offset;
324
325 state[0] = va >> 8;
326 if (chip_class >= GFX9 ||
327 base_level_info->mode == RADEON_SURF_MODE_2D)
328 state[0] |= image->surface.tile_swizzle;
329 state[1] &= C_008F14_BASE_ADDRESS_HI;
330 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
331
332 if (chip_class >= VI) {
333 state[6] &= C_008F28_COMPRESSION_EN;
334 state[7] = 0;
335 if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
336 meta_va = gpu_address + image->dcc_offset;
337 if (chip_class <= VI)
338 meta_va += base_level_info->dcc_offset;
339 } else if (!is_storage_image &&
340 radv_image_is_tc_compat_htile(image)) {
341 meta_va = gpu_address + image->htile_offset;
342 }
343
344 if (meta_va) {
345 state[6] |= S_008F28_COMPRESSION_EN(1);
346 state[7] = meta_va >> 8;
347 state[7] |= image->surface.tile_swizzle;
348 }
349 }
350
351 if (chip_class >= GFX9) {
352 state[3] &= C_008F1C_SW_MODE;
353 state[4] &= C_008F20_PITCH_GFX9;
354
355 if (is_stencil) {
356 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
357 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
358 } else {
359 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
360 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
361 }
362
363 state[5] &= C_008F24_META_DATA_ADDRESS &
364 C_008F24_META_PIPE_ALIGNED &
365 C_008F24_META_RB_ALIGNED;
366 if (meta_va) {
367 struct gfx9_surf_meta_flags meta;
368
369 if (image->dcc_offset)
370 meta = image->surface.u.gfx9.dcc;
371 else
372 meta = image->surface.u.gfx9.htile;
373
374 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
375 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
376 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
377 }
378 } else {
379 /* SI-CI-VI */
380 unsigned pitch = base_level_info->nblk_x * block_width;
381 unsigned index = si_tile_mode_index(image, base_level, is_stencil);
382
383 state[3] &= C_008F1C_TILING_INDEX;
384 state[3] |= S_008F1C_TILING_INDEX(index);
385 state[4] &= C_008F20_PITCH_GFX6;
386 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
387 }
388 }
389
390 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
391 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
392 {
393 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
394 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
395
396 /* GFX9 allocates 1D textures as 2D. */
397 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
398 image_type = VK_IMAGE_TYPE_2D;
399 switch (image_type) {
400 case VK_IMAGE_TYPE_1D:
401 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
402 case VK_IMAGE_TYPE_2D:
403 if (nr_samples > 1)
404 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
405 else
406 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
407 case VK_IMAGE_TYPE_3D:
408 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
409 return V_008F1C_SQ_RSRC_IMG_3D;
410 else
411 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
412 default:
413 unreachable("illegale image type");
414 }
415 }
416
417 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
418 {
419 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
420
421 if (swizzle[3] == VK_SWIZZLE_X) {
422 /* For the pre-defined border color values (white, opaque
423 * black, transparent black), the only thing that matters is
424 * that the alpha channel winds up in the correct place
425 * (because the RGB channels are all the same) so either of
426 * these enumerations will work.
427 */
428 if (swizzle[2] == VK_SWIZZLE_Y)
429 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
430 else
431 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
432 } else if (swizzle[0] == VK_SWIZZLE_X) {
433 if (swizzle[1] == VK_SWIZZLE_Y)
434 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
435 else
436 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
437 } else if (swizzle[1] == VK_SWIZZLE_X) {
438 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
439 } else if (swizzle[2] == VK_SWIZZLE_X) {
440 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
441 }
442
443 return bc_swizzle;
444 }
445
446 /**
447 * Build the sampler view descriptor for a texture.
448 */
449 static void
450 si_make_texture_descriptor(struct radv_device *device,
451 struct radv_image *image,
452 bool is_storage_image,
453 VkImageViewType view_type,
454 VkFormat vk_format,
455 const VkComponentMapping *mapping,
456 unsigned first_level, unsigned last_level,
457 unsigned first_layer, unsigned last_layer,
458 unsigned width, unsigned height, unsigned depth,
459 uint32_t *state,
460 uint32_t *fmask_state)
461 {
462 const struct vk_format_description *desc;
463 enum vk_swizzle swizzle[4];
464 int first_non_void;
465 unsigned num_format, data_format, type;
466
467 desc = vk_format_description(vk_format);
468
469 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
470 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
471 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
472 } else {
473 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
474 }
475
476 first_non_void = vk_format_get_first_non_void_channel(vk_format);
477
478 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
479 if (num_format == ~0) {
480 num_format = 0;
481 }
482
483 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
484 if (data_format == ~0) {
485 data_format = 0;
486 }
487
488 /* S8 with either Z16 or Z32 HTILE need a special format. */
489 if (device->physical_device->rad_info.chip_class >= GFX9 &&
490 vk_format == VK_FORMAT_S8_UINT &&
491 radv_image_is_tc_compat_htile(image)) {
492 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
493 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
494 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
495 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
496 }
497 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
498 is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
499 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
500 height = 1;
501 depth = image->info.array_size;
502 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
503 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
504 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
505 depth = image->info.array_size;
506 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
507 depth = image->info.array_size / 6;
508
509 state[0] = 0;
510 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
511 S_008F14_NUM_FORMAT_GFX6(num_format));
512 state[2] = (S_008F18_WIDTH(width - 1) |
513 S_008F18_HEIGHT(height - 1) |
514 S_008F18_PERF_MOD(4));
515 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
516 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
517 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
518 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
519 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
520 0 : first_level) |
521 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
522 util_logbase2(image->info.samples) :
523 last_level) |
524 S_008F1C_TYPE(type));
525 state[4] = 0;
526 state[5] = S_008F24_BASE_ARRAY(first_layer);
527 state[6] = 0;
528 state[7] = 0;
529
530 if (device->physical_device->rad_info.chip_class >= GFX9) {
531 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
532
533 /* Depth is the the last accessible layer on Gfx9.
534 * The hw doesn't need to know the total number of layers.
535 */
536 if (type == V_008F1C_SQ_RSRC_IMG_3D)
537 state[4] |= S_008F20_DEPTH(depth - 1);
538 else
539 state[4] |= S_008F20_DEPTH(last_layer);
540
541 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
542 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
543 util_logbase2(image->info.samples) :
544 image->info.levels - 1);
545 } else {
546 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
547 state[4] |= S_008F20_DEPTH(depth - 1);
548 state[5] |= S_008F24_LAST_ARRAY(last_layer);
549 }
550 if (image->dcc_offset) {
551 unsigned swap = radv_translate_colorswap(vk_format, FALSE);
552
553 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
554 } else {
555 /* The last dword is unused by hw. The shader uses it to clear
556 * bits in the first dword of sampler state.
557 */
558 if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
559 if (first_level == last_level)
560 state[7] = C_008F30_MAX_ANISO_RATIO;
561 else
562 state[7] = 0xffffffff;
563 }
564 }
565
566 /* Initialize the sampler view for FMASK. */
567 if (radv_image_has_fmask(image)) {
568 uint32_t fmask_format, num_format;
569 uint64_t gpu_address = radv_buffer_get_va(image->bo);
570 uint64_t va;
571
572 va = gpu_address + image->offset + image->fmask.offset;
573
574 if (device->physical_device->rad_info.chip_class >= GFX9) {
575 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
576 switch (image->info.samples) {
577 case 2:
578 num_format = V_008F14_IMG_FMASK_8_2_2;
579 break;
580 case 4:
581 num_format = V_008F14_IMG_FMASK_8_4_4;
582 break;
583 case 8:
584 num_format = V_008F14_IMG_FMASK_32_8_8;
585 break;
586 default:
587 unreachable("invalid nr_samples");
588 }
589 } else {
590 switch (image->info.samples) {
591 case 2:
592 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
593 break;
594 case 4:
595 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
596 break;
597 case 8:
598 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
599 break;
600 default:
601 assert(0);
602 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
603 }
604 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
605 }
606
607 fmask_state[0] = va >> 8;
608 fmask_state[0] |= image->fmask.tile_swizzle;
609 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
610 S_008F14_DATA_FORMAT_GFX6(fmask_format) |
611 S_008F14_NUM_FORMAT_GFX6(num_format);
612 fmask_state[2] = S_008F18_WIDTH(width - 1) |
613 S_008F18_HEIGHT(height - 1);
614 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
615 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
616 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
617 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
618 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
619 fmask_state[4] = 0;
620 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
621 fmask_state[6] = 0;
622 fmask_state[7] = 0;
623
624 if (device->physical_device->rad_info.chip_class >= GFX9) {
625 fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
626 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
627 S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
628 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
629 S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
630 } else {
631 fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
632 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
633 S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
634 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
635 }
636 } else if (fmask_state)
637 memset(fmask_state, 0, 8 * 4);
638 }
639
640 static void
641 radv_query_opaque_metadata(struct radv_device *device,
642 struct radv_image *image,
643 struct radeon_bo_metadata *md)
644 {
645 static const VkComponentMapping fixedmapping;
646 uint32_t desc[8], i;
647
648 /* Metadata image format format version 1:
649 * [0] = 1 (metadata format identifier)
650 * [1] = (VENDOR_ID << 16) | PCI_ID
651 * [2:9] = image descriptor for the whole resource
652 * [2] is always 0, because the base address is cleared
653 * [9] is the DCC offset bits [39:8] from the beginning of
654 * the buffer
655 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
656 */
657 md->metadata[0] = 1; /* metadata image format version 1 */
658
659 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
660 md->metadata[1] = si_get_bo_metadata_word1(device);
661
662
663 si_make_texture_descriptor(device, image, false,
664 (VkImageViewType)image->type, image->vk_format,
665 &fixedmapping, 0, image->info.levels - 1, 0,
666 image->info.array_size,
667 image->info.width, image->info.height,
668 image->info.depth,
669 desc, NULL);
670
671 si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
672 image->surface.blk_w, false, false, desc);
673
674 /* Clear the base address and set the relative DCC offset. */
675 desc[0] = 0;
676 desc[1] &= C_008F14_BASE_ADDRESS_HI;
677 desc[7] = image->dcc_offset >> 8;
678
679 /* Dwords [2:9] contain the image descriptor. */
680 memcpy(&md->metadata[2], desc, sizeof(desc));
681
682 /* Dwords [10:..] contain the mipmap level offsets. */
683 if (device->physical_device->rad_info.chip_class <= VI) {
684 for (i = 0; i <= image->info.levels - 1; i++)
685 md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
686 md->size_metadata = (11 + image->info.levels - 1) * 4;
687 }
688 }
689
690 void
691 radv_init_metadata(struct radv_device *device,
692 struct radv_image *image,
693 struct radeon_bo_metadata *metadata)
694 {
695 struct radeon_surf *surface = &image->surface;
696
697 memset(metadata, 0, sizeof(*metadata));
698
699 if (device->physical_device->rad_info.chip_class >= GFX9) {
700 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
701 } else {
702 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
703 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
704 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
705 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
706 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
707 metadata->u.legacy.bankw = surface->u.legacy.bankw;
708 metadata->u.legacy.bankh = surface->u.legacy.bankh;
709 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
710 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
711 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
712 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
713 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
714 }
715 radv_query_opaque_metadata(device, image, metadata);
716 }
717
718 /* The number of samples can be specified independently of the texture. */
719 static void
720 radv_image_get_fmask_info(struct radv_device *device,
721 struct radv_image *image,
722 unsigned nr_samples,
723 struct radv_fmask_info *out)
724 {
725 /* FMASK is allocated like an ordinary texture. */
726 struct radeon_surf fmask = {};
727 struct ac_surf_info info = image->info;
728 memset(out, 0, sizeof(*out));
729
730 if (device->physical_device->rad_info.chip_class >= GFX9) {
731 out->alignment = image->surface.u.gfx9.fmask_alignment;
732 out->size = image->surface.u.gfx9.fmask_size;
733 return;
734 }
735
736 fmask.blk_w = image->surface.blk_w;
737 fmask.blk_h = image->surface.blk_h;
738 info.samples = 1;
739 fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
740
741 if (!image->shareable)
742 info.surf_index = &device->fmask_mrt_offset_counter;
743
744 /* Force 2D tiling if it wasn't set. This may occur when creating
745 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
746 * destination buffer must have an FMASK too. */
747 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
748 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
749
750 switch (nr_samples) {
751 case 2:
752 case 4:
753 fmask.bpe = 1;
754 break;
755 case 8:
756 fmask.bpe = 4;
757 break;
758 default:
759 return;
760 }
761
762 device->ws->surface_init(device->ws, &info, &fmask);
763 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
764
765 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
766 if (out->slice_tile_max)
767 out->slice_tile_max -= 1;
768
769 out->tile_mode_index = fmask.u.legacy.tiling_index[0];
770 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
771 out->bank_height = fmask.u.legacy.bankh;
772 out->tile_swizzle = fmask.tile_swizzle;
773 out->alignment = MAX2(256, fmask.surf_alignment);
774 out->size = fmask.surf_size;
775
776 assert(!out->tile_swizzle || !image->shareable);
777 }
778
779 static void
780 radv_image_alloc_fmask(struct radv_device *device,
781 struct radv_image *image)
782 {
783 radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
784
785 image->fmask.offset = align64(image->size, image->fmask.alignment);
786 image->size = image->fmask.offset + image->fmask.size;
787 image->alignment = MAX2(image->alignment, image->fmask.alignment);
788 }
789
790 static void
791 radv_image_get_cmask_info(struct radv_device *device,
792 struct radv_image *image,
793 struct radv_cmask_info *out)
794 {
795 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
796 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
797 unsigned cl_width, cl_height;
798
799 if (device->physical_device->rad_info.chip_class >= GFX9) {
800 out->alignment = image->surface.u.gfx9.cmask_alignment;
801 out->size = image->surface.u.gfx9.cmask_size;
802 return;
803 }
804
805 switch (num_pipes) {
806 case 2:
807 cl_width = 32;
808 cl_height = 16;
809 break;
810 case 4:
811 cl_width = 32;
812 cl_height = 32;
813 break;
814 case 8:
815 cl_width = 64;
816 cl_height = 32;
817 break;
818 case 16: /* Hawaii */
819 cl_width = 64;
820 cl_height = 64;
821 break;
822 default:
823 assert(0);
824 return;
825 }
826
827 unsigned base_align = num_pipes * pipe_interleave_bytes;
828
829 unsigned width = align(image->info.width, cl_width*8);
830 unsigned height = align(image->info.height, cl_height*8);
831 unsigned slice_elements = (width * height) / (8*8);
832
833 /* Each element of CMASK is a nibble. */
834 unsigned slice_bytes = slice_elements / 2;
835
836 out->slice_tile_max = (width * height) / (128*128);
837 if (out->slice_tile_max)
838 out->slice_tile_max -= 1;
839
840 out->alignment = MAX2(256, base_align);
841 out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
842 align(slice_bytes, base_align);
843 }
844
845 static void
846 radv_image_alloc_cmask(struct radv_device *device,
847 struct radv_image *image)
848 {
849 uint32_t clear_value_size = 0;
850 radv_image_get_cmask_info(device, image, &image->cmask);
851
852 image->cmask.offset = align64(image->size, image->cmask.alignment);
853 /* + 8 for storing the clear values */
854 if (!image->clear_value_offset) {
855 image->clear_value_offset = image->cmask.offset + image->cmask.size;
856 clear_value_size = 8;
857 }
858 image->size = image->cmask.offset + image->cmask.size + clear_value_size;
859 image->alignment = MAX2(image->alignment, image->cmask.alignment);
860 }
861
862 static void
863 radv_image_alloc_dcc(struct radv_image *image)
864 {
865 image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
866 /* + 16 for storing the clear values + dcc pred */
867 image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
868 image->dcc_pred_offset = image->clear_value_offset + 8;
869 image->size = image->dcc_offset + image->surface.dcc_size + 16;
870 image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
871 }
872
873 static void
874 radv_image_alloc_htile(struct radv_image *image)
875 {
876 image->htile_offset = align64(image->size, image->surface.htile_alignment);
877
878 /* + 8 for storing the clear values */
879 image->clear_value_offset = image->htile_offset + image->surface.htile_size;
880 image->size = image->clear_value_offset + 8;
881 image->alignment = align64(image->alignment, image->surface.htile_alignment);
882 }
883
884 static inline bool
885 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
886 {
887 if (image->info.samples <= 1 &&
888 image->info.width * image->info.height <= 512 * 512) {
889 /* Do not enable CMASK or DCC for small surfaces where the cost
890 * of the eliminate pass can be higher than the benefit of fast
891 * clear. RadeonSI does this, but the image threshold is
892 * different.
893 */
894 return false;
895 }
896
897 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
898 (image->exclusive || image->queue_family_mask == 1);
899 }
900
901 static inline bool
902 radv_image_can_enable_dcc(struct radv_image *image)
903 {
904 return radv_image_can_enable_dcc_or_cmask(image) &&
905 radv_image_has_dcc(image);
906 }
907
908 static inline bool
909 radv_image_can_enable_cmask(struct radv_image *image)
910 {
911 if (image->surface.bpe > 8 && image->info.samples == 1) {
912 /* Do not enable CMASK for non-MSAA images (fast color clear)
913 * because 128 bit formats are not supported, but FMASK might
914 * still be used.
915 */
916 return false;
917 }
918
919 return radv_image_can_enable_dcc_or_cmask(image) &&
920 image->info.levels == 1 &&
921 image->info.depth == 1 &&
922 !image->surface.is_linear;
923 }
924
925 static inline bool
926 radv_image_can_enable_fmask(struct radv_image *image)
927 {
928 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
929 }
930
931 static inline bool
932 radv_image_can_enable_htile(struct radv_image *image)
933 {
934 return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
935 }
936
937 VkResult
938 radv_image_create(VkDevice _device,
939 const struct radv_image_create_info *create_info,
940 const VkAllocationCallbacks* alloc,
941 VkImage *pImage)
942 {
943 RADV_FROM_HANDLE(radv_device, device, _device);
944 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
945 struct radv_image *image = NULL;
946 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
947
948 radv_assert(pCreateInfo->mipLevels > 0);
949 radv_assert(pCreateInfo->arrayLayers > 0);
950 radv_assert(pCreateInfo->samples > 0);
951 radv_assert(pCreateInfo->extent.width > 0);
952 radv_assert(pCreateInfo->extent.height > 0);
953 radv_assert(pCreateInfo->extent.depth > 0);
954
955 image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
956 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
957 if (!image)
958 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
959
960 image->type = pCreateInfo->imageType;
961 image->info.width = pCreateInfo->extent.width;
962 image->info.height = pCreateInfo->extent.height;
963 image->info.depth = pCreateInfo->extent.depth;
964 image->info.samples = pCreateInfo->samples;
965 image->info.array_size = pCreateInfo->arrayLayers;
966 image->info.levels = pCreateInfo->mipLevels;
967
968 image->vk_format = pCreateInfo->format;
969 image->tiling = pCreateInfo->tiling;
970 image->usage = pCreateInfo->usage;
971 image->flags = pCreateInfo->flags;
972
973 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
974 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
975 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
976 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
977 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
978 else
979 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
980 }
981
982 image->shareable = vk_find_struct_const(pCreateInfo->pNext,
983 EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
984 if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
985 image->info.surf_index = &device->image_mrt_offset_counter;
986 }
987
988 radv_init_surface(device, &image->surface, create_info);
989
990 device->ws->surface_init(device->ws, &image->info, &image->surface);
991
992 image->size = image->surface.surf_size;
993 image->alignment = image->surface.surf_alignment;
994
995 if (!create_info->no_metadata_planes) {
996 /* Try to enable DCC first. */
997 if (radv_image_can_enable_dcc(image)) {
998 radv_image_alloc_dcc(image);
999 } else {
1000 /* When DCC cannot be enabled, try CMASK. */
1001 image->surface.dcc_size = 0;
1002 if (radv_image_can_enable_cmask(image)) {
1003 radv_image_alloc_cmask(device, image);
1004 }
1005 }
1006
1007 /* Try to enable FMASK for multisampled images. */
1008 if (radv_image_can_enable_fmask(image)) {
1009 radv_image_alloc_fmask(device, image);
1010 } else {
1011 /* Otherwise, try to enable HTILE for depth surfaces. */
1012 if (radv_image_can_enable_htile(image) &&
1013 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
1014 radv_image_alloc_htile(image);
1015 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1016 } else {
1017 image->surface.htile_size = 0;
1018 }
1019 }
1020 } else {
1021 image->surface.dcc_size = 0;
1022 image->surface.htile_size = 0;
1023 }
1024
1025 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1026 image->alignment = MAX2(image->alignment, 4096);
1027 image->size = align64(image->size, image->alignment);
1028 image->offset = 0;
1029
1030 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
1031 0, RADEON_FLAG_VIRTUAL);
1032 if (!image->bo) {
1033 vk_free2(&device->alloc, alloc, image);
1034 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1035 }
1036 }
1037
1038 *pImage = radv_image_to_handle(image);
1039
1040 return VK_SUCCESS;
1041 }
1042
1043 static void
1044 radv_image_view_make_descriptor(struct radv_image_view *iview,
1045 struct radv_device *device,
1046 const VkComponentMapping *components,
1047 bool is_storage_image)
1048 {
1049 struct radv_image *image = iview->image;
1050 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1051 uint32_t blk_w;
1052 uint32_t *descriptor;
1053 uint32_t hw_level = 0;
1054
1055 if (is_storage_image) {
1056 descriptor = iview->storage_descriptor;
1057 } else {
1058 descriptor = iview->descriptor;
1059 }
1060
1061 assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
1062 blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
1063
1064 if (device->physical_device->rad_info.chip_class >= GFX9)
1065 hw_level = iview->base_mip;
1066 si_make_texture_descriptor(device, image, is_storage_image,
1067 iview->type,
1068 iview->vk_format,
1069 components,
1070 hw_level, hw_level + iview->level_count - 1,
1071 iview->base_layer,
1072 iview->base_layer + iview->layer_count - 1,
1073 iview->extent.width,
1074 iview->extent.height,
1075 iview->extent.depth,
1076 descriptor,
1077 descriptor + 8);
1078
1079 const struct legacy_surf_level *base_level_info = NULL;
1080 if (device->physical_device->rad_info.chip_class <= GFX9) {
1081 if (is_stencil)
1082 base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
1083 else
1084 base_level_info = &image->surface.u.legacy.level[iview->base_mip];
1085 }
1086 si_set_mutable_tex_desc_fields(device, image,
1087 base_level_info,
1088 iview->base_mip,
1089 iview->base_mip,
1090 blk_w, is_stencil, is_storage_image, descriptor);
1091 }
1092
1093 void
1094 radv_image_view_init(struct radv_image_view *iview,
1095 struct radv_device *device,
1096 const VkImageViewCreateInfo* pCreateInfo)
1097 {
1098 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1099 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1100
1101 switch (image->type) {
1102 case VK_IMAGE_TYPE_1D:
1103 case VK_IMAGE_TYPE_2D:
1104 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1105 break;
1106 case VK_IMAGE_TYPE_3D:
1107 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1108 <= radv_minify(image->info.depth, range->baseMipLevel));
1109 break;
1110 default:
1111 unreachable("bad VkImageType");
1112 }
1113 iview->image = image;
1114 iview->bo = image->bo;
1115 iview->type = pCreateInfo->viewType;
1116 iview->vk_format = pCreateInfo->format;
1117 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1118
1119 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1120 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1121 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1122 iview->vk_format = vk_format_depth_only(iview->vk_format);
1123 }
1124
1125 if (device->physical_device->rad_info.chip_class >= GFX9) {
1126 iview->extent = (VkExtent3D) {
1127 .width = image->info.width,
1128 .height = image->info.height,
1129 .depth = image->info.depth,
1130 };
1131 } else {
1132 iview->extent = (VkExtent3D) {
1133 .width = radv_minify(image->info.width , range->baseMipLevel),
1134 .height = radv_minify(image->info.height, range->baseMipLevel),
1135 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1136 };
1137 }
1138
1139 if (iview->vk_format != image->vk_format) {
1140 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1141 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1142 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1143 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1144
1145 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1146 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1147
1148 /* Comment ported from amdvlk -
1149 * If we have the following image:
1150 * Uncompressed pixels Compressed block sizes (4x4)
1151 * mip0: 22 x 22 6 x 6
1152 * mip1: 11 x 11 3 x 3
1153 * mip2: 5 x 5 2 x 2
1154 * mip3: 2 x 2 1 x 1
1155 * mip4: 1 x 1 1 x 1
1156 *
1157 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1158 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1159 * divide-by-two integer math):
1160 * mip0: 6x6
1161 * mip1: 3x3
1162 * mip2: 1x1
1163 * mip3: 1x1
1164 *
1165 * This means that mip2 will be missing texels.
1166 *
1167 * Fix this by calculating the base mip's width and height, then convert that, and round it
1168 * back up to get the level 0 size.
1169 * Clamp the converted size between the original values, and next power of two, which
1170 * means we don't oversize the image.
1171 */
1172 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1173 vk_format_is_compressed(image->vk_format) &&
1174 !vk_format_is_compressed(iview->vk_format)) {
1175 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
1176 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
1177 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1178 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1179
1180 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1181 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1182
1183 lvl_width <<= range->baseMipLevel;
1184 lvl_height <<= range->baseMipLevel;
1185
1186 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
1187 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
1188 }
1189 }
1190
1191 iview->base_layer = range->baseArrayLayer;
1192 iview->layer_count = radv_get_layerCount(image, range);
1193 iview->base_mip = range->baseMipLevel;
1194 iview->level_count = radv_get_levelCount(image, range);
1195
1196 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
1197 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
1198 }
1199
1200 bool radv_layout_has_htile(const struct radv_image *image,
1201 VkImageLayout layout,
1202 unsigned queue_mask)
1203 {
1204 if (radv_image_is_tc_compat_htile(image))
1205 return layout != VK_IMAGE_LAYOUT_GENERAL;
1206
1207 return radv_image_has_htile(image) &&
1208 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1209 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1210 queue_mask == (1u << RADV_QUEUE_GENERAL);
1211 }
1212
1213 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1214 VkImageLayout layout,
1215 unsigned queue_mask)
1216 {
1217 if (radv_image_is_tc_compat_htile(image))
1218 return layout != VK_IMAGE_LAYOUT_GENERAL;
1219
1220 return radv_image_has_htile(image) &&
1221 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1222 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1223 queue_mask == (1u << RADV_QUEUE_GENERAL);
1224 }
1225
1226 bool radv_layout_can_fast_clear(const struct radv_image *image,
1227 VkImageLayout layout,
1228 unsigned queue_mask)
1229 {
1230 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1231 queue_mask == (1u << RADV_QUEUE_GENERAL);
1232 }
1233
1234 bool radv_layout_dcc_compressed(const struct radv_image *image,
1235 VkImageLayout layout,
1236 unsigned queue_mask)
1237 {
1238 /* Don't compress compute transfer dst, as image stores are not supported. */
1239 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1240 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1241 return false;
1242
1243 return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
1244 }
1245
1246
1247 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1248 {
1249 if (!image->exclusive)
1250 return image->queue_family_mask;
1251 if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
1252 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1253 if (family == VK_QUEUE_FAMILY_IGNORED)
1254 return 1u << queue_family;
1255 return 1u << family;
1256 }
1257
1258 VkResult
1259 radv_CreateImage(VkDevice device,
1260 const VkImageCreateInfo *pCreateInfo,
1261 const VkAllocationCallbacks *pAllocator,
1262 VkImage *pImage)
1263 {
1264 #ifdef ANDROID
1265 const VkNativeBufferANDROID *gralloc_info =
1266 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1267
1268 if (gralloc_info)
1269 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1270 pAllocator, pImage);
1271 #endif
1272
1273 const struct wsi_image_create_info *wsi_info =
1274 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1275 bool scanout = wsi_info && wsi_info->scanout;
1276
1277 return radv_image_create(device,
1278 &(struct radv_image_create_info) {
1279 .vk_info = pCreateInfo,
1280 .scanout = scanout,
1281 },
1282 pAllocator,
1283 pImage);
1284 }
1285
1286 void
1287 radv_DestroyImage(VkDevice _device, VkImage _image,
1288 const VkAllocationCallbacks *pAllocator)
1289 {
1290 RADV_FROM_HANDLE(radv_device, device, _device);
1291 RADV_FROM_HANDLE(radv_image, image, _image);
1292
1293 if (!image)
1294 return;
1295
1296 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1297 device->ws->buffer_destroy(image->bo);
1298
1299 if (image->owned_memory != VK_NULL_HANDLE)
1300 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1301
1302 vk_free2(&device->alloc, pAllocator, image);
1303 }
1304
1305 void radv_GetImageSubresourceLayout(
1306 VkDevice _device,
1307 VkImage _image,
1308 const VkImageSubresource* pSubresource,
1309 VkSubresourceLayout* pLayout)
1310 {
1311 RADV_FROM_HANDLE(radv_image, image, _image);
1312 RADV_FROM_HANDLE(radv_device, device, _device);
1313 int level = pSubresource->mipLevel;
1314 int layer = pSubresource->arrayLayer;
1315 struct radeon_surf *surface = &image->surface;
1316
1317 if (device->physical_device->rad_info.chip_class >= GFX9) {
1318 pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1319 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1320 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1321 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1322 pLayout->size = surface->u.gfx9.surf_slice_size;
1323 if (image->type == VK_IMAGE_TYPE_3D)
1324 pLayout->size *= u_minify(image->info.depth, level);
1325 } else {
1326 pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1327 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1328 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1329 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1330 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1331 if (image->type == VK_IMAGE_TYPE_3D)
1332 pLayout->size *= u_minify(image->info.depth, level);
1333 }
1334 }
1335
1336
1337 VkResult
1338 radv_CreateImageView(VkDevice _device,
1339 const VkImageViewCreateInfo *pCreateInfo,
1340 const VkAllocationCallbacks *pAllocator,
1341 VkImageView *pView)
1342 {
1343 RADV_FROM_HANDLE(radv_device, device, _device);
1344 struct radv_image_view *view;
1345
1346 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1347 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1348 if (view == NULL)
1349 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1350
1351 radv_image_view_init(view, device, pCreateInfo);
1352
1353 *pView = radv_image_view_to_handle(view);
1354
1355 return VK_SUCCESS;
1356 }
1357
1358 void
1359 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1360 const VkAllocationCallbacks *pAllocator)
1361 {
1362 RADV_FROM_HANDLE(radv_device, device, _device);
1363 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1364
1365 if (!iview)
1366 return;
1367 vk_free2(&device->alloc, pAllocator, iview);
1368 }
1369
1370 void radv_buffer_view_init(struct radv_buffer_view *view,
1371 struct radv_device *device,
1372 const VkBufferViewCreateInfo* pCreateInfo)
1373 {
1374 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1375
1376 view->bo = buffer->bo;
1377 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1378 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1379 view->vk_format = pCreateInfo->format;
1380
1381 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1382 pCreateInfo->offset, view->range, view->state);
1383 }
1384
1385 VkResult
1386 radv_CreateBufferView(VkDevice _device,
1387 const VkBufferViewCreateInfo *pCreateInfo,
1388 const VkAllocationCallbacks *pAllocator,
1389 VkBufferView *pView)
1390 {
1391 RADV_FROM_HANDLE(radv_device, device, _device);
1392 struct radv_buffer_view *view;
1393
1394 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1395 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1396 if (!view)
1397 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1398
1399 radv_buffer_view_init(view, device, pCreateInfo);
1400
1401 *pView = radv_buffer_view_to_handle(view);
1402
1403 return VK_SUCCESS;
1404 }
1405
1406 void
1407 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1408 const VkAllocationCallbacks *pAllocator)
1409 {
1410 RADV_FROM_HANDLE(radv_device, device, _device);
1411 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1412
1413 if (!view)
1414 return;
1415
1416 vk_free2(&device->alloc, pAllocator, view);
1417 }