radv: enable TC-compat HTILE for 16-bit depth surfaces on GFX8
[mesa.git] / src / amd / vulkan / radv_image.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "vk_format.h"
31 #include "vk_util.h"
32 #include "radv_radeon_winsys.h"
33 #include "sid.h"
34 #include "gfx9d.h"
35 #include "util/debug.h"
36 #include "util/u_atomic.h"
37 static unsigned
38 radv_choose_tiling(struct radv_device *device,
39 const struct radv_image_create_info *create_info)
40 {
41 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
42
43 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
44 assert(pCreateInfo->samples <= 1);
45 return RADEON_SURF_MODE_LINEAR_ALIGNED;
46 }
47
48 if (!vk_format_is_compressed(pCreateInfo->format) &&
49 !vk_format_is_depth_or_stencil(pCreateInfo->format)
50 && device->physical_device->rad_info.chip_class <= VI) {
51 /* this causes hangs in some VK CTS tests on GFX9. */
52 /* Textures with a very small height are recommended to be linear. */
53 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
54 /* Only very thin and long 2D textures should benefit from
55 * linear_aligned. */
56 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
57 return RADEON_SURF_MODE_LINEAR_ALIGNED;
58 }
59
60 /* MSAA resources must be 2D tiled. */
61 if (pCreateInfo->samples > 1)
62 return RADEON_SURF_MODE_2D;
63
64 return RADEON_SURF_MODE_2D;
65 }
66
67 static bool
68 radv_image_is_tc_compat_htile(struct radv_device *device,
69 const VkImageCreateInfo *pCreateInfo)
70 {
71 /* TC-compat HTILE is only available for GFX8+. */
72 if (device->physical_device->rad_info.chip_class < VI)
73 return false;
74
75 if (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)
76 return false;
77
78 if (pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
79 VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
80 return false;
81
82 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
83 return false;
84
85 if (pCreateInfo->mipLevels > 1)
86 return false;
87
88 /* FIXME: for some reason TC compat with 2/4/8 samples breaks some cts
89 * tests - disable for now */
90 if (pCreateInfo->samples >= 2 &&
91 pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
92 return false;
93
94 /* GFX9 supports both 32-bit and 16-bit depth surfaces, while GFX8 only
95 * supports 32-bit. Though, it's possible to enable TC-compat for
96 * 16-bit depth surfaces if no Z planes are compressed.
97 */
98 if (pCreateInfo->format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
99 pCreateInfo->format != VK_FORMAT_D32_SFLOAT &&
100 pCreateInfo->format != VK_FORMAT_D16_UNORM)
101 return false;
102
103 return true;
104 }
105
106 static int
107 radv_init_surface(struct radv_device *device,
108 struct radeon_surf *surface,
109 const struct radv_image_create_info *create_info)
110 {
111 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
112 unsigned array_mode = radv_choose_tiling(device, create_info);
113 const struct vk_format_description *desc =
114 vk_format_description(pCreateInfo->format);
115 bool is_depth, is_stencil, blendable;
116
117 is_depth = vk_format_has_depth(desc);
118 is_stencil = vk_format_has_stencil(desc);
119
120 surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
121 surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
122
123 surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format));
124 /* align byte per element on dword */
125 if (surface->bpe == 3) {
126 surface->bpe = 4;
127 }
128 surface->flags = RADEON_SURF_SET(array_mode, MODE);
129
130 switch (pCreateInfo->imageType){
131 case VK_IMAGE_TYPE_1D:
132 if (pCreateInfo->arrayLayers > 1)
133 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
134 else
135 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
136 break;
137 case VK_IMAGE_TYPE_2D:
138 if (pCreateInfo->arrayLayers > 1)
139 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
140 else
141 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
142 break;
143 case VK_IMAGE_TYPE_3D:
144 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
145 break;
146 default:
147 unreachable("unhandled image type");
148 }
149
150 if (is_depth) {
151 surface->flags |= RADEON_SURF_ZBUFFER;
152 if (radv_image_is_tc_compat_htile(device, pCreateInfo))
153 surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
154 }
155
156 if (is_stencil)
157 surface->flags |= RADEON_SURF_SBUFFER;
158
159 surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
160
161 bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
162 if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
163 const struct VkImageFormatListCreateInfoKHR *format_list =
164 (const struct VkImageFormatListCreateInfoKHR *)
165 vk_find_struct_const(pCreateInfo->pNext,
166 IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
167
168 /* We have to ignore the existence of the list if viewFormatCount = 0 */
169 if (format_list && format_list->viewFormatCount) {
170 /* compatibility is transitive, so we only need to check
171 * one format with everything else. */
172 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
173 if (!radv_dcc_formats_compatible(pCreateInfo->format,
174 format_list->pViewFormats[i]))
175 dcc_compatible_formats = false;
176 }
177 } else {
178 dcc_compatible_formats = false;
179 }
180 }
181
182 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
183 (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR) ||
184 !dcc_compatible_formats ||
185 (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
186 pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
187 device->physical_device->rad_info.chip_class < VI ||
188 create_info->scanout || (device->instance->debug_flags & RADV_DEBUG_NO_DCC) ||
189 pCreateInfo->samples >= 2)
190 surface->flags |= RADEON_SURF_DISABLE_DCC;
191 if (create_info->scanout)
192 surface->flags |= RADEON_SURF_SCANOUT;
193 return 0;
194 }
195
196 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
197 {
198 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
199 }
200
201 static inline unsigned
202 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
203 {
204 if (stencil)
205 return image->surface.u.legacy.stencil_tiling_index[level];
206 else
207 return image->surface.u.legacy.tiling_index[level];
208 }
209
210 static unsigned radv_map_swizzle(unsigned swizzle)
211 {
212 switch (swizzle) {
213 case VK_SWIZZLE_Y:
214 return V_008F0C_SQ_SEL_Y;
215 case VK_SWIZZLE_Z:
216 return V_008F0C_SQ_SEL_Z;
217 case VK_SWIZZLE_W:
218 return V_008F0C_SQ_SEL_W;
219 case VK_SWIZZLE_0:
220 return V_008F0C_SQ_SEL_0;
221 case VK_SWIZZLE_1:
222 return V_008F0C_SQ_SEL_1;
223 default: /* VK_SWIZZLE_X */
224 return V_008F0C_SQ_SEL_X;
225 }
226 }
227
228 static void
229 radv_make_buffer_descriptor(struct radv_device *device,
230 struct radv_buffer *buffer,
231 VkFormat vk_format,
232 unsigned offset,
233 unsigned range,
234 uint32_t *state)
235 {
236 const struct vk_format_description *desc;
237 unsigned stride;
238 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
239 uint64_t va = gpu_address + buffer->offset;
240 unsigned num_format, data_format;
241 int first_non_void;
242 desc = vk_format_description(vk_format);
243 first_non_void = vk_format_get_first_non_void_channel(vk_format);
244 stride = desc->block.bits / 8;
245
246 num_format = radv_translate_buffer_numformat(desc, first_non_void);
247 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
248
249 va += offset;
250 state[0] = va;
251 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
252 S_008F04_STRIDE(stride);
253
254 if (device->physical_device->rad_info.chip_class != VI && stride) {
255 range /= stride;
256 }
257
258 state[2] = range;
259 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
260 S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
261 S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
262 S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
263 S_008F0C_NUM_FORMAT(num_format) |
264 S_008F0C_DATA_FORMAT(data_format);
265 }
266
267 static void
268 si_set_mutable_tex_desc_fields(struct radv_device *device,
269 struct radv_image *image,
270 const struct legacy_surf_level *base_level_info,
271 unsigned base_level, unsigned first_level,
272 unsigned block_width, bool is_stencil,
273 bool is_storage_image, uint32_t *state)
274 {
275 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
276 uint64_t va = gpu_address;
277 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
278 uint64_t meta_va = 0;
279 if (chip_class >= GFX9) {
280 if (is_stencil)
281 va += image->surface.u.gfx9.stencil_offset;
282 else
283 va += image->surface.u.gfx9.surf_offset;
284 } else
285 va += base_level_info->offset;
286
287 state[0] = va >> 8;
288 if (chip_class >= GFX9 ||
289 base_level_info->mode == RADEON_SURF_MODE_2D)
290 state[0] |= image->surface.tile_swizzle;
291 state[1] &= C_008F14_BASE_ADDRESS_HI;
292 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
293
294 if (chip_class >= VI) {
295 state[6] &= C_008F28_COMPRESSION_EN;
296 state[7] = 0;
297 if (!is_storage_image && radv_vi_dcc_enabled(image, first_level)) {
298 meta_va = gpu_address + image->dcc_offset;
299 if (chip_class <= VI)
300 meta_va += base_level_info->dcc_offset;
301 } else if(!is_storage_image && image->tc_compatible_htile &&
302 image->surface.htile_size) {
303 meta_va = gpu_address + image->htile_offset;
304 }
305
306 if (meta_va) {
307 state[6] |= S_008F28_COMPRESSION_EN(1);
308 state[7] = meta_va >> 8;
309 state[7] |= image->surface.tile_swizzle;
310 }
311 }
312
313 if (chip_class >= GFX9) {
314 state[3] &= C_008F1C_SW_MODE;
315 state[4] &= C_008F20_PITCH_GFX9;
316
317 if (is_stencil) {
318 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
319 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
320 } else {
321 state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
322 state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
323 }
324
325 state[5] &= C_008F24_META_DATA_ADDRESS &
326 C_008F24_META_PIPE_ALIGNED &
327 C_008F24_META_RB_ALIGNED;
328 if (meta_va) {
329 struct gfx9_surf_meta_flags meta;
330
331 if (image->dcc_offset)
332 meta = image->surface.u.gfx9.dcc;
333 else
334 meta = image->surface.u.gfx9.htile;
335
336 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
337 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
338 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
339 }
340 } else {
341 /* SI-CI-VI */
342 unsigned pitch = base_level_info->nblk_x * block_width;
343 unsigned index = si_tile_mode_index(image, base_level, is_stencil);
344
345 state[3] &= C_008F1C_TILING_INDEX;
346 state[3] |= S_008F1C_TILING_INDEX(index);
347 state[4] &= C_008F20_PITCH_GFX6;
348 state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
349 }
350 }
351
352 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
353 unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
354 {
355 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
356 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
357
358 /* GFX9 allocates 1D textures as 2D. */
359 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
360 image_type = VK_IMAGE_TYPE_2D;
361 switch (image_type) {
362 case VK_IMAGE_TYPE_1D:
363 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
364 case VK_IMAGE_TYPE_2D:
365 if (nr_samples > 1)
366 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
367 else
368 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
369 case VK_IMAGE_TYPE_3D:
370 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
371 return V_008F1C_SQ_RSRC_IMG_3D;
372 else
373 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
374 default:
375 unreachable("illegale image type");
376 }
377 }
378
379 static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
380 {
381 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
382
383 if (swizzle[3] == VK_SWIZZLE_X) {
384 /* For the pre-defined border color values (white, opaque
385 * black, transparent black), the only thing that matters is
386 * that the alpha channel winds up in the correct place
387 * (because the RGB channels are all the same) so either of
388 * these enumerations will work.
389 */
390 if (swizzle[2] == VK_SWIZZLE_Y)
391 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
392 else
393 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
394 } else if (swizzle[0] == VK_SWIZZLE_X) {
395 if (swizzle[1] == VK_SWIZZLE_Y)
396 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
397 else
398 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
399 } else if (swizzle[1] == VK_SWIZZLE_X) {
400 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
401 } else if (swizzle[2] == VK_SWIZZLE_X) {
402 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
403 }
404
405 return bc_swizzle;
406 }
407
408 /**
409 * Build the sampler view descriptor for a texture.
410 */
411 static void
412 si_make_texture_descriptor(struct radv_device *device,
413 struct radv_image *image,
414 bool is_storage_image,
415 VkImageViewType view_type,
416 VkFormat vk_format,
417 const VkComponentMapping *mapping,
418 unsigned first_level, unsigned last_level,
419 unsigned first_layer, unsigned last_layer,
420 unsigned width, unsigned height, unsigned depth,
421 uint32_t *state,
422 uint32_t *fmask_state)
423 {
424 const struct vk_format_description *desc;
425 enum vk_swizzle swizzle[4];
426 int first_non_void;
427 unsigned num_format, data_format, type;
428
429 desc = vk_format_description(vk_format);
430
431 if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
432 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
433 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
434 } else {
435 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
436 }
437
438 first_non_void = vk_format_get_first_non_void_channel(vk_format);
439
440 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
441 if (num_format == ~0) {
442 num_format = 0;
443 }
444
445 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
446 if (data_format == ~0) {
447 data_format = 0;
448 }
449
450 /* S8 with either Z16 or Z32 HTILE need a special format. */
451 if (device->physical_device->rad_info.chip_class >= GFX9 &&
452 vk_format == VK_FORMAT_S8_UINT &&
453 image->tc_compatible_htile) {
454 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
455 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
456 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
457 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
458 }
459 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
460 is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
461 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
462 height = 1;
463 depth = image->info.array_size;
464 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
465 type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
466 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
467 depth = image->info.array_size;
468 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
469 depth = image->info.array_size / 6;
470
471 state[0] = 0;
472 state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
473 S_008F14_NUM_FORMAT_GFX6(num_format));
474 state[2] = (S_008F18_WIDTH(width - 1) |
475 S_008F18_HEIGHT(height - 1) |
476 S_008F18_PERF_MOD(4));
477 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
478 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
479 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
480 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
481 S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
482 0 : first_level) |
483 S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
484 util_logbase2(image->info.samples) :
485 last_level) |
486 S_008F1C_TYPE(type));
487 state[4] = 0;
488 state[5] = S_008F24_BASE_ARRAY(first_layer);
489 state[6] = 0;
490 state[7] = 0;
491
492 if (device->physical_device->rad_info.chip_class >= GFX9) {
493 unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
494
495 /* Depth is the the last accessible layer on Gfx9.
496 * The hw doesn't need to know the total number of layers.
497 */
498 if (type == V_008F1C_SQ_RSRC_IMG_3D)
499 state[4] |= S_008F20_DEPTH(depth - 1);
500 else
501 state[4] |= S_008F20_DEPTH(last_layer);
502
503 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
504 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
505 util_logbase2(image->info.samples) :
506 image->info.levels - 1);
507 } else {
508 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
509 state[4] |= S_008F20_DEPTH(depth - 1);
510 state[5] |= S_008F24_LAST_ARRAY(last_layer);
511 }
512 if (image->dcc_offset) {
513 unsigned swap = radv_translate_colorswap(vk_format, FALSE);
514
515 state[6] = S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
516 } else {
517 /* The last dword is unused by hw. The shader uses it to clear
518 * bits in the first dword of sampler state.
519 */
520 if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
521 if (first_level == last_level)
522 state[7] = C_008F30_MAX_ANISO_RATIO;
523 else
524 state[7] = 0xffffffff;
525 }
526 }
527
528 /* Initialize the sampler view for FMASK. */
529 if (image->fmask.size) {
530 uint32_t fmask_format, num_format;
531 uint64_t gpu_address = radv_buffer_get_va(image->bo);
532 uint64_t va;
533
534 va = gpu_address + image->offset + image->fmask.offset;
535
536 if (device->physical_device->rad_info.chip_class >= GFX9) {
537 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
538 switch (image->info.samples) {
539 case 2:
540 num_format = V_008F14_IMG_FMASK_8_2_2;
541 break;
542 case 4:
543 num_format = V_008F14_IMG_FMASK_8_4_4;
544 break;
545 case 8:
546 num_format = V_008F14_IMG_FMASK_32_8_8;
547 break;
548 default:
549 unreachable("invalid nr_samples");
550 }
551 } else {
552 switch (image->info.samples) {
553 case 2:
554 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
555 break;
556 case 4:
557 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
558 break;
559 case 8:
560 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
561 break;
562 default:
563 assert(0);
564 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
565 }
566 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
567 }
568
569 fmask_state[0] = va >> 8;
570 fmask_state[0] |= image->fmask.tile_swizzle;
571 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
572 S_008F14_DATA_FORMAT_GFX6(fmask_format) |
573 S_008F14_NUM_FORMAT_GFX6(num_format);
574 fmask_state[2] = S_008F18_WIDTH(width - 1) |
575 S_008F18_HEIGHT(height - 1);
576 fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
577 S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
578 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
579 S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
580 S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
581 fmask_state[4] = 0;
582 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
583 fmask_state[6] = 0;
584 fmask_state[7] = 0;
585
586 if (device->physical_device->rad_info.chip_class >= GFX9) {
587 fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
588 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
589 S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
590 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
591 S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
592 } else {
593 fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
594 fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
595 S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
596 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
597 }
598 } else if (fmask_state)
599 memset(fmask_state, 0, 8 * 4);
600 }
601
602 static void
603 radv_query_opaque_metadata(struct radv_device *device,
604 struct radv_image *image,
605 struct radeon_bo_metadata *md)
606 {
607 static const VkComponentMapping fixedmapping;
608 uint32_t desc[8], i;
609
610 /* Metadata image format format version 1:
611 * [0] = 1 (metadata format identifier)
612 * [1] = (VENDOR_ID << 16) | PCI_ID
613 * [2:9] = image descriptor for the whole resource
614 * [2] is always 0, because the base address is cleared
615 * [9] is the DCC offset bits [39:8] from the beginning of
616 * the buffer
617 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
618 */
619 md->metadata[0] = 1; /* metadata image format version 1 */
620
621 /* TILE_MODE_INDEX is ambiguous without a PCI ID. */
622 md->metadata[1] = si_get_bo_metadata_word1(device);
623
624
625 si_make_texture_descriptor(device, image, false,
626 (VkImageViewType)image->type, image->vk_format,
627 &fixedmapping, 0, image->info.levels - 1, 0,
628 image->info.array_size,
629 image->info.width, image->info.height,
630 image->info.depth,
631 desc, NULL);
632
633 si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
634 image->surface.blk_w, false, false, desc);
635
636 /* Clear the base address and set the relative DCC offset. */
637 desc[0] = 0;
638 desc[1] &= C_008F14_BASE_ADDRESS_HI;
639 desc[7] = image->dcc_offset >> 8;
640
641 /* Dwords [2:9] contain the image descriptor. */
642 memcpy(&md->metadata[2], desc, sizeof(desc));
643
644 /* Dwords [10:..] contain the mipmap level offsets. */
645 if (device->physical_device->rad_info.chip_class <= VI) {
646 for (i = 0; i <= image->info.levels - 1; i++)
647 md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
648 md->size_metadata = (11 + image->info.levels - 1) * 4;
649 }
650 }
651
652 void
653 radv_init_metadata(struct radv_device *device,
654 struct radv_image *image,
655 struct radeon_bo_metadata *metadata)
656 {
657 struct radeon_surf *surface = &image->surface;
658
659 memset(metadata, 0, sizeof(*metadata));
660
661 if (device->physical_device->rad_info.chip_class >= GFX9) {
662 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
663 } else {
664 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
665 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
666 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
667 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
668 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
669 metadata->u.legacy.bankw = surface->u.legacy.bankw;
670 metadata->u.legacy.bankh = surface->u.legacy.bankh;
671 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
672 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
673 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
674 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
675 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
676 }
677 radv_query_opaque_metadata(device, image, metadata);
678 }
679
680 /* The number of samples can be specified independently of the texture. */
681 static void
682 radv_image_get_fmask_info(struct radv_device *device,
683 struct radv_image *image,
684 unsigned nr_samples,
685 struct radv_fmask_info *out)
686 {
687 /* FMASK is allocated like an ordinary texture. */
688 struct radeon_surf fmask = {};
689 struct ac_surf_info info = image->info;
690 memset(out, 0, sizeof(*out));
691
692 if (device->physical_device->rad_info.chip_class >= GFX9) {
693 out->alignment = image->surface.u.gfx9.fmask_alignment;
694 out->size = image->surface.u.gfx9.fmask_size;
695 return;
696 }
697
698 fmask.blk_w = image->surface.blk_w;
699 fmask.blk_h = image->surface.blk_h;
700 info.samples = 1;
701 fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
702
703 if (!image->shareable)
704 info.surf_index = &device->fmask_mrt_offset_counter;
705
706 /* Force 2D tiling if it wasn't set. This may occur when creating
707 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
708 * destination buffer must have an FMASK too. */
709 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
710 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
711
712 switch (nr_samples) {
713 case 2:
714 case 4:
715 fmask.bpe = 1;
716 break;
717 case 8:
718 fmask.bpe = 4;
719 break;
720 default:
721 return;
722 }
723
724 device->ws->surface_init(device->ws, &info, &fmask);
725 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
726
727 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
728 if (out->slice_tile_max)
729 out->slice_tile_max -= 1;
730
731 out->tile_mode_index = fmask.u.legacy.tiling_index[0];
732 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
733 out->bank_height = fmask.u.legacy.bankh;
734 out->tile_swizzle = fmask.tile_swizzle;
735 out->alignment = MAX2(256, fmask.surf_alignment);
736 out->size = fmask.surf_size;
737
738 assert(!out->tile_swizzle || !image->shareable);
739 }
740
741 static void
742 radv_image_alloc_fmask(struct radv_device *device,
743 struct radv_image *image)
744 {
745 radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);
746
747 image->fmask.offset = align64(image->size, image->fmask.alignment);
748 image->size = image->fmask.offset + image->fmask.size;
749 image->alignment = MAX2(image->alignment, image->fmask.alignment);
750 }
751
752 static void
753 radv_image_get_cmask_info(struct radv_device *device,
754 struct radv_image *image,
755 struct radv_cmask_info *out)
756 {
757 unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
758 unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
759 unsigned cl_width, cl_height;
760
761 if (device->physical_device->rad_info.chip_class >= GFX9) {
762 out->alignment = image->surface.u.gfx9.cmask_alignment;
763 out->size = image->surface.u.gfx9.cmask_size;
764 return;
765 }
766
767 switch (num_pipes) {
768 case 2:
769 cl_width = 32;
770 cl_height = 16;
771 break;
772 case 4:
773 cl_width = 32;
774 cl_height = 32;
775 break;
776 case 8:
777 cl_width = 64;
778 cl_height = 32;
779 break;
780 case 16: /* Hawaii */
781 cl_width = 64;
782 cl_height = 64;
783 break;
784 default:
785 assert(0);
786 return;
787 }
788
789 unsigned base_align = num_pipes * pipe_interleave_bytes;
790
791 unsigned width = align(image->info.width, cl_width*8);
792 unsigned height = align(image->info.height, cl_height*8);
793 unsigned slice_elements = (width * height) / (8*8);
794
795 /* Each element of CMASK is a nibble. */
796 unsigned slice_bytes = slice_elements / 2;
797
798 out->slice_tile_max = (width * height) / (128*128);
799 if (out->slice_tile_max)
800 out->slice_tile_max -= 1;
801
802 out->alignment = MAX2(256, base_align);
803 out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
804 align(slice_bytes, base_align);
805 }
806
807 static void
808 radv_image_alloc_cmask(struct radv_device *device,
809 struct radv_image *image)
810 {
811 uint32_t clear_value_size = 0;
812 radv_image_get_cmask_info(device, image, &image->cmask);
813
814 image->cmask.offset = align64(image->size, image->cmask.alignment);
815 /* + 8 for storing the clear values */
816 if (!image->clear_value_offset) {
817 image->clear_value_offset = image->cmask.offset + image->cmask.size;
818 clear_value_size = 8;
819 }
820 image->size = image->cmask.offset + image->cmask.size + clear_value_size;
821 image->alignment = MAX2(image->alignment, image->cmask.alignment);
822 }
823
824 static void
825 radv_image_alloc_dcc(struct radv_image *image)
826 {
827 image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
828 /* + 16 for storing the clear values + dcc pred */
829 image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
830 image->dcc_pred_offset = image->clear_value_offset + 8;
831 image->size = image->dcc_offset + image->surface.dcc_size + 16;
832 image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
833 }
834
835 static void
836 radv_image_alloc_htile(struct radv_image *image)
837 {
838 image->htile_offset = align64(image->size, image->surface.htile_alignment);
839
840 /* + 8 for storing the clear values */
841 image->clear_value_offset = image->htile_offset + image->surface.htile_size;
842 image->size = image->clear_value_offset + 8;
843 image->alignment = align64(image->alignment, image->surface.htile_alignment);
844 }
845
846 static inline bool
847 radv_image_can_enable_dcc_or_cmask(struct radv_image *image)
848 {
849 if (image->info.samples <= 1 &&
850 image->info.width * image->info.height <= 512 * 512) {
851 /* Do not enable CMASK or DCC for small surfaces where the cost
852 * of the eliminate pass can be higher than the benefit of fast
853 * clear. RadeonSI does this, but the image threshold is
854 * different.
855 */
856 return false;
857 }
858
859 return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
860 (image->exclusive || image->queue_family_mask == 1);
861 }
862
863 static inline bool
864 radv_image_can_enable_dcc(struct radv_image *image)
865 {
866 return radv_image_can_enable_dcc_or_cmask(image) &&
867 image->surface.dcc_size;
868 }
869
870 static inline bool
871 radv_image_can_enable_cmask(struct radv_image *image)
872 {
873 if (image->surface.bpe > 8 && image->info.samples == 1) {
874 /* Do not enable CMASK for non-MSAA images (fast color clear)
875 * because 128 bit formats are not supported, but FMASK might
876 * still be used.
877 */
878 return false;
879 }
880
881 return radv_image_can_enable_dcc_or_cmask(image) &&
882 image->info.levels == 1 &&
883 image->info.depth == 1 &&
884 !image->surface.is_linear;
885 }
886
887 static inline bool
888 radv_image_can_enable_fmask(struct radv_image *image)
889 {
890 return image->info.samples > 1 && vk_format_is_color(image->vk_format);
891 }
892
893 static inline bool
894 radv_image_can_enable_htile(struct radv_image *image)
895 {
896 return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
897 }
898
899 VkResult
900 radv_image_create(VkDevice _device,
901 const struct radv_image_create_info *create_info,
902 const VkAllocationCallbacks* alloc,
903 VkImage *pImage)
904 {
905 RADV_FROM_HANDLE(radv_device, device, _device);
906 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
907 struct radv_image *image = NULL;
908 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
909
910 radv_assert(pCreateInfo->mipLevels > 0);
911 radv_assert(pCreateInfo->arrayLayers > 0);
912 radv_assert(pCreateInfo->samples > 0);
913 radv_assert(pCreateInfo->extent.width > 0);
914 radv_assert(pCreateInfo->extent.height > 0);
915 radv_assert(pCreateInfo->extent.depth > 0);
916
917 image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
918 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
919 if (!image)
920 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
921
922 image->type = pCreateInfo->imageType;
923 image->info.width = pCreateInfo->extent.width;
924 image->info.height = pCreateInfo->extent.height;
925 image->info.depth = pCreateInfo->extent.depth;
926 image->info.samples = pCreateInfo->samples;
927 image->info.array_size = pCreateInfo->arrayLayers;
928 image->info.levels = pCreateInfo->mipLevels;
929
930 image->vk_format = pCreateInfo->format;
931 image->tiling = pCreateInfo->tiling;
932 image->usage = pCreateInfo->usage;
933 image->flags = pCreateInfo->flags;
934
935 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
936 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
937 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
938 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHR)
939 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
940 else
941 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
942 }
943
944 image->shareable = vk_find_struct_const(pCreateInfo->pNext,
945 EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
946 if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
947 image->info.surf_index = &device->image_mrt_offset_counter;
948 }
949
950 radv_init_surface(device, &image->surface, create_info);
951
952 device->ws->surface_init(device->ws, &image->info, &image->surface);
953
954 image->size = image->surface.surf_size;
955 image->alignment = image->surface.surf_alignment;
956
957 if (!create_info->no_metadata_planes) {
958 /* Try to enable DCC first. */
959 if (radv_image_can_enable_dcc(image)) {
960 radv_image_alloc_dcc(image);
961 } else {
962 /* When DCC cannot be enabled, try CMASK. */
963 image->surface.dcc_size = 0;
964 if (radv_image_can_enable_cmask(image)) {
965 radv_image_alloc_cmask(device, image);
966 }
967 }
968
969 /* Try to enable FMASK for multisampled images. */
970 if (radv_image_can_enable_fmask(image)) {
971 radv_image_alloc_fmask(device, image);
972 } else {
973 /* Otherwise, try to enable HTILE for depth surfaces. */
974 if (radv_image_can_enable_htile(image) &&
975 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
976 radv_image_alloc_htile(image);
977 image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
978 } else {
979 image->surface.htile_size = 0;
980 }
981 }
982 } else {
983 image->surface.dcc_size = 0;
984 image->surface.htile_size = 0;
985 }
986
987 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
988 image->alignment = MAX2(image->alignment, 4096);
989 image->size = align64(image->size, image->alignment);
990 image->offset = 0;
991
992 image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
993 0, RADEON_FLAG_VIRTUAL);
994 if (!image->bo) {
995 vk_free2(&device->alloc, alloc, image);
996 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
997 }
998 }
999
1000 *pImage = radv_image_to_handle(image);
1001
1002 return VK_SUCCESS;
1003 }
1004
1005 static void
1006 radv_image_view_make_descriptor(struct radv_image_view *iview,
1007 struct radv_device *device,
1008 const VkComponentMapping *components,
1009 bool is_storage_image)
1010 {
1011 struct radv_image *image = iview->image;
1012 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1013 uint32_t blk_w;
1014 uint32_t *descriptor;
1015 uint32_t hw_level = 0;
1016
1017 if (is_storage_image) {
1018 descriptor = iview->storage_descriptor;
1019 } else {
1020 descriptor = iview->descriptor;
1021 }
1022
1023 assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
1024 blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
1025
1026 if (device->physical_device->rad_info.chip_class >= GFX9)
1027 hw_level = iview->base_mip;
1028 si_make_texture_descriptor(device, image, is_storage_image,
1029 iview->type,
1030 iview->vk_format,
1031 components,
1032 hw_level, hw_level + iview->level_count - 1,
1033 iview->base_layer,
1034 iview->base_layer + iview->layer_count - 1,
1035 iview->extent.width,
1036 iview->extent.height,
1037 iview->extent.depth,
1038 descriptor,
1039 descriptor + 8);
1040
1041 const struct legacy_surf_level *base_level_info = NULL;
1042 if (device->physical_device->rad_info.chip_class <= GFX9) {
1043 if (is_stencil)
1044 base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
1045 else
1046 base_level_info = &image->surface.u.legacy.level[iview->base_mip];
1047 }
1048 si_set_mutable_tex_desc_fields(device, image,
1049 base_level_info,
1050 iview->base_mip,
1051 iview->base_mip,
1052 blk_w, is_stencil, is_storage_image, descriptor);
1053 }
1054
1055 void
1056 radv_image_view_init(struct radv_image_view *iview,
1057 struct radv_device *device,
1058 const VkImageViewCreateInfo* pCreateInfo)
1059 {
1060 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1061 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1062
1063 switch (image->type) {
1064 case VK_IMAGE_TYPE_1D:
1065 case VK_IMAGE_TYPE_2D:
1066 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
1067 break;
1068 case VK_IMAGE_TYPE_3D:
1069 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
1070 <= radv_minify(image->info.depth, range->baseMipLevel));
1071 break;
1072 default:
1073 unreachable("bad VkImageType");
1074 }
1075 iview->image = image;
1076 iview->bo = image->bo;
1077 iview->type = pCreateInfo->viewType;
1078 iview->vk_format = pCreateInfo->format;
1079 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1080
1081 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1082 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1083 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1084 iview->vk_format = vk_format_depth_only(iview->vk_format);
1085 }
1086
1087 if (device->physical_device->rad_info.chip_class >= GFX9) {
1088 iview->extent = (VkExtent3D) {
1089 .width = image->info.width,
1090 .height = image->info.height,
1091 .depth = image->info.depth,
1092 };
1093 } else {
1094 iview->extent = (VkExtent3D) {
1095 .width = radv_minify(image->info.width , range->baseMipLevel),
1096 .height = radv_minify(image->info.height, range->baseMipLevel),
1097 .depth = radv_minify(image->info.depth , range->baseMipLevel),
1098 };
1099 }
1100
1101 if (iview->vk_format != image->vk_format) {
1102 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1103 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1104 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1105 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1106
1107 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1108 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1109
1110 /* Comment ported from amdvlk -
1111 * If we have the following image:
1112 * Uncompressed pixels Compressed block sizes (4x4)
1113 * mip0: 22 x 22 6 x 6
1114 * mip1: 11 x 11 3 x 3
1115 * mip2: 5 x 5 2 x 2
1116 * mip3: 2 x 2 1 x 1
1117 * mip4: 1 x 1 1 x 1
1118 *
1119 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
1120 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
1121 * divide-by-two integer math):
1122 * mip0: 6x6
1123 * mip1: 3x3
1124 * mip2: 1x1
1125 * mip3: 1x1
1126 *
1127 * This means that mip2 will be missing texels.
1128 *
1129 * Fix this by calculating the base mip's width and height, then convert that, and round it
1130 * back up to get the level 0 size.
1131 * Clamp the converted size between the original values, and next power of two, which
1132 * means we don't oversize the image.
1133 */
1134 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1135 vk_format_is_compressed(image->vk_format) &&
1136 !vk_format_is_compressed(iview->vk_format)) {
1137 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
1138 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
1139 unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
1140 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1141
1142 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1143 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1144
1145 lvl_width <<= range->baseMipLevel;
1146 lvl_height <<= range->baseMipLevel;
1147
1148 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
1149 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
1150 }
1151 }
1152
1153 iview->base_layer = range->baseArrayLayer;
1154 iview->layer_count = radv_get_layerCount(image, range);
1155 iview->base_mip = range->baseMipLevel;
1156 iview->level_count = radv_get_levelCount(image, range);
1157
1158 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
1159 radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
1160 }
1161
1162 bool radv_layout_has_htile(const struct radv_image *image,
1163 VkImageLayout layout,
1164 unsigned queue_mask)
1165 {
1166 if (image->surface.htile_size && image->tc_compatible_htile)
1167 return layout != VK_IMAGE_LAYOUT_GENERAL;
1168
1169 return image->surface.htile_size &&
1170 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1171 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1172 queue_mask == (1u << RADV_QUEUE_GENERAL);
1173 }
1174
1175 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1176 VkImageLayout layout,
1177 unsigned queue_mask)
1178 {
1179 if (image->surface.htile_size && image->tc_compatible_htile)
1180 return layout != VK_IMAGE_LAYOUT_GENERAL;
1181
1182 return image->surface.htile_size &&
1183 (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
1184 layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
1185 queue_mask == (1u << RADV_QUEUE_GENERAL);
1186 }
1187
1188 bool radv_layout_can_fast_clear(const struct radv_image *image,
1189 VkImageLayout layout,
1190 unsigned queue_mask)
1191 {
1192 return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
1193 queue_mask == (1u << RADV_QUEUE_GENERAL);
1194 }
1195
1196 bool radv_layout_dcc_compressed(const struct radv_image *image,
1197 VkImageLayout layout,
1198 unsigned queue_mask)
1199 {
1200 /* Don't compress compute transfer dst, as image stores are not supported. */
1201 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
1202 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1203 return false;
1204
1205 return image->surface.num_dcc_levels > 0 && layout != VK_IMAGE_LAYOUT_GENERAL;
1206 }
1207
1208
1209 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
1210 {
1211 if (!image->exclusive)
1212 return image->queue_family_mask;
1213 if (family == VK_QUEUE_FAMILY_EXTERNAL_KHR)
1214 return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1215 if (family == VK_QUEUE_FAMILY_IGNORED)
1216 return 1u << queue_family;
1217 return 1u << family;
1218 }
1219
1220 VkResult
1221 radv_CreateImage(VkDevice device,
1222 const VkImageCreateInfo *pCreateInfo,
1223 const VkAllocationCallbacks *pAllocator,
1224 VkImage *pImage)
1225 {
1226 #ifdef ANDROID
1227 const VkNativeBufferANDROID *gralloc_info =
1228 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1229
1230 if (gralloc_info)
1231 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
1232 pAllocator, pImage);
1233 #endif
1234
1235 const struct wsi_image_create_info *wsi_info =
1236 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1237 bool scanout = wsi_info && wsi_info->scanout;
1238
1239 return radv_image_create(device,
1240 &(struct radv_image_create_info) {
1241 .vk_info = pCreateInfo,
1242 .scanout = scanout,
1243 },
1244 pAllocator,
1245 pImage);
1246 }
1247
1248 void
1249 radv_DestroyImage(VkDevice _device, VkImage _image,
1250 const VkAllocationCallbacks *pAllocator)
1251 {
1252 RADV_FROM_HANDLE(radv_device, device, _device);
1253 RADV_FROM_HANDLE(radv_image, image, _image);
1254
1255 if (!image)
1256 return;
1257
1258 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
1259 device->ws->buffer_destroy(image->bo);
1260
1261 if (image->owned_memory != VK_NULL_HANDLE)
1262 radv_FreeMemory(_device, image->owned_memory, pAllocator);
1263
1264 vk_free2(&device->alloc, pAllocator, image);
1265 }
1266
1267 void radv_GetImageSubresourceLayout(
1268 VkDevice _device,
1269 VkImage _image,
1270 const VkImageSubresource* pSubresource,
1271 VkSubresourceLayout* pLayout)
1272 {
1273 RADV_FROM_HANDLE(radv_image, image, _image);
1274 RADV_FROM_HANDLE(radv_device, device, _device);
1275 int level = pSubresource->mipLevel;
1276 int layer = pSubresource->arrayLayer;
1277 struct radeon_surf *surface = &image->surface;
1278
1279 if (device->physical_device->rad_info.chip_class >= GFX9) {
1280 pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
1281 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
1282 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
1283 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
1284 pLayout->size = surface->u.gfx9.surf_slice_size;
1285 if (image->type == VK_IMAGE_TYPE_3D)
1286 pLayout->size *= u_minify(image->info.depth, level);
1287 } else {
1288 pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1289 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1290 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1291 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1292 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1293 if (image->type == VK_IMAGE_TYPE_3D)
1294 pLayout->size *= u_minify(image->info.depth, level);
1295 }
1296 }
1297
1298
1299 VkResult
1300 radv_CreateImageView(VkDevice _device,
1301 const VkImageViewCreateInfo *pCreateInfo,
1302 const VkAllocationCallbacks *pAllocator,
1303 VkImageView *pView)
1304 {
1305 RADV_FROM_HANDLE(radv_device, device, _device);
1306 struct radv_image_view *view;
1307
1308 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1309 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1310 if (view == NULL)
1311 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1312
1313 radv_image_view_init(view, device, pCreateInfo);
1314
1315 *pView = radv_image_view_to_handle(view);
1316
1317 return VK_SUCCESS;
1318 }
1319
1320 void
1321 radv_DestroyImageView(VkDevice _device, VkImageView _iview,
1322 const VkAllocationCallbacks *pAllocator)
1323 {
1324 RADV_FROM_HANDLE(radv_device, device, _device);
1325 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
1326
1327 if (!iview)
1328 return;
1329 vk_free2(&device->alloc, pAllocator, iview);
1330 }
1331
1332 void radv_buffer_view_init(struct radv_buffer_view *view,
1333 struct radv_device *device,
1334 const VkBufferViewCreateInfo* pCreateInfo)
1335 {
1336 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
1337
1338 view->bo = buffer->bo;
1339 view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
1340 buffer->size - pCreateInfo->offset : pCreateInfo->range;
1341 view->vk_format = pCreateInfo->format;
1342
1343 radv_make_buffer_descriptor(device, buffer, view->vk_format,
1344 pCreateInfo->offset, view->range, view->state);
1345 }
1346
1347 VkResult
1348 radv_CreateBufferView(VkDevice _device,
1349 const VkBufferViewCreateInfo *pCreateInfo,
1350 const VkAllocationCallbacks *pAllocator,
1351 VkBufferView *pView)
1352 {
1353 RADV_FROM_HANDLE(radv_device, device, _device);
1354 struct radv_buffer_view *view;
1355
1356 view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
1357 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1358 if (!view)
1359 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1360
1361 radv_buffer_view_init(view, device, pCreateInfo);
1362
1363 *pView = radv_buffer_view_to_handle(view);
1364
1365 return VK_SUCCESS;
1366 }
1367
1368 void
1369 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
1370 const VkAllocationCallbacks *pAllocator)
1371 {
1372 RADV_FROM_HANDLE(radv_device, device, _device);
1373 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
1374
1375 if (!view)
1376 return;
1377
1378 vk_free2(&device->alloc, pAllocator, view);
1379 }