ac: add has_clear_state to ac_gpu_info
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "git_sha1.h"
47 #include "util/build_id.h"
48 #include "util/debug.h"
49 #include "util/mesa-sha1.h"
50 #include "compiler/glsl_types.h"
51 #include "util/xmlpool.h"
52
53 static int
54 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
55 {
56 struct mesa_sha1 ctx;
57 unsigned char sha1[20];
58 unsigned ptr_size = sizeof(void*);
59
60 memset(uuid, 0, VK_UUID_SIZE);
61 _mesa_sha1_init(&ctx);
62
63 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
64 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
65 return -1;
66
67 _mesa_sha1_update(&ctx, &family, sizeof(family));
68 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
69 _mesa_sha1_final(&ctx, sha1);
70
71 memcpy(uuid, sha1, VK_UUID_SIZE);
72 return 0;
73 }
74
75 static void
76 radv_get_driver_uuid(void *uuid)
77 {
78 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
79 }
80
81 static void
82 radv_get_device_uuid(struct radeon_info *info, void *uuid)
83 {
84 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
85 }
86
87 static void
88 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
89 {
90 const char *chip_string;
91
92 switch (family) {
93 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
94 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
95 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
96 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
97 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
98 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
99 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
100 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
101 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
102 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
103 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
104 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
105 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
106 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
107 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
108 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
109 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
110 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
111 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
112 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
113 case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
114 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
115 case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
116 case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
117 case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
118 case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
119 default: chip_string = "AMD RADV unknown"; break;
120 }
121
122 snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
123 }
124
125 static uint64_t
126 radv_get_visible_vram_size(struct radv_physical_device *device)
127 {
128 return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
129 }
130
131 static uint64_t
132 radv_get_vram_size(struct radv_physical_device *device)
133 {
134 return device->rad_info.vram_size - radv_get_visible_vram_size(device);
135 }
136
137 static void
138 radv_physical_device_init_mem_types(struct radv_physical_device *device)
139 {
140 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
141 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
142 uint64_t vram_size = radv_get_vram_size(device);
143 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
144 device->memory_properties.memoryHeapCount = 0;
145 if (vram_size > 0) {
146 vram_index = device->memory_properties.memoryHeapCount++;
147 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
148 .size = vram_size,
149 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
150 };
151 }
152 if (visible_vram_size) {
153 visible_vram_index = device->memory_properties.memoryHeapCount++;
154 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
155 .size = visible_vram_size,
156 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
157 };
158 }
159 if (device->rad_info.gart_size > 0) {
160 gart_index = device->memory_properties.memoryHeapCount++;
161 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
162 .size = device->rad_info.gart_size,
163 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
164 };
165 }
166
167 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
168 unsigned type_count = 0;
169 if (vram_index >= 0) {
170 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
171 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
172 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
173 .heapIndex = vram_index,
174 };
175 }
176 if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
177 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
178 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
179 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
180 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
181 .heapIndex = gart_index,
182 };
183 }
184 if (visible_vram_index >= 0) {
185 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
186 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
187 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
188 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
189 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
190 .heapIndex = visible_vram_index,
191 };
192 }
193 if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
194 /* Put GTT after visible VRAM for GPUs without dedicated VRAM
195 * as they have identical property flags, and according to the
196 * spec, for types with identical flags, the one with greater
197 * performance must be given a lower index. */
198 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
199 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
200 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
201 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
202 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
203 .heapIndex = gart_index,
204 };
205 }
206 if (gart_index >= 0) {
207 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
208 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
209 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
210 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
211 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
212 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
213 .heapIndex = gart_index,
214 };
215 }
216 device->memory_properties.memoryTypeCount = type_count;
217 }
218
219 static void
220 radv_handle_env_var_force_family(struct radv_physical_device *device)
221 {
222 const char *family = getenv("RADV_FORCE_FAMILY");
223 unsigned i;
224
225 if (!family)
226 return;
227
228 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
229 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
230 /* Override family and chip_class. */
231 device->rad_info.family = i;
232
233 if (i >= CHIP_NAVI10)
234 device->rad_info.chip_class = GFX10;
235 else if (i >= CHIP_VEGA10)
236 device->rad_info.chip_class = GFX9;
237 else if (i >= CHIP_TONGA)
238 device->rad_info.chip_class = GFX8;
239 else if (i >= CHIP_BONAIRE)
240 device->rad_info.chip_class = GFX7;
241 else
242 device->rad_info.chip_class = GFX6;
243
244 return;
245 }
246 }
247
248 fprintf(stderr, "radv: Unknown family: %s\n", family);
249 exit(1);
250 }
251
252 static VkResult
253 radv_physical_device_init(struct radv_physical_device *device,
254 struct radv_instance *instance,
255 drmDevicePtr drm_device)
256 {
257 const char *path = drm_device->nodes[DRM_NODE_RENDER];
258 VkResult result;
259 drmVersionPtr version;
260 int fd;
261 int master_fd = -1;
262
263 fd = open(path, O_RDWR | O_CLOEXEC);
264 if (fd < 0) {
265 if (instance->debug_flags & RADV_DEBUG_STARTUP)
266 radv_logi("Could not open device '%s'", path);
267
268 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
269 }
270
271 version = drmGetVersion(fd);
272 if (!version) {
273 close(fd);
274
275 if (instance->debug_flags & RADV_DEBUG_STARTUP)
276 radv_logi("Could not get the kernel driver version for device '%s'", path);
277
278 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
279 "failed to get version %s: %m", path);
280 }
281
282 if (strcmp(version->name, "amdgpu")) {
283 drmFreeVersion(version);
284 close(fd);
285
286 if (instance->debug_flags & RADV_DEBUG_STARTUP)
287 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
288
289 return VK_ERROR_INCOMPATIBLE_DRIVER;
290 }
291 drmFreeVersion(version);
292
293 if (instance->debug_flags & RADV_DEBUG_STARTUP)
294 radv_logi("Found compatible device '%s'.", path);
295
296 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
297 device->instance = instance;
298
299 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
300 instance->perftest_flags);
301 if (!device->ws) {
302 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
303 goto fail;
304 }
305
306 if (instance->enabled_extensions.KHR_display) {
307 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
308 if (master_fd >= 0) {
309 uint32_t accel_working = 0;
310 struct drm_amdgpu_info request = {
311 .return_pointer = (uintptr_t)&accel_working,
312 .return_size = sizeof(accel_working),
313 .query = AMDGPU_INFO_ACCEL_WORKING
314 };
315
316 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
317 close(master_fd);
318 master_fd = -1;
319 }
320 }
321 }
322
323 device->master_fd = master_fd;
324 device->local_fd = fd;
325 device->ws->query_info(device->ws, &device->rad_info);
326
327 radv_handle_env_var_force_family(device);
328
329 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
330
331 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
332 device->ws->destroy(device->ws);
333 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
334 "cannot generate UUID");
335 goto fail;
336 }
337
338 /* These flags affect shader compilation. */
339 uint64_t shader_env_flags =
340 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
341 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
342
343 /* The gpu id is already embedded in the uuid so we just pass "radv"
344 * when creating the cache.
345 */
346 char buf[VK_UUID_SIZE * 2 + 1];
347 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
348 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
349
350 if (device->rad_info.chip_class < GFX8 ||
351 device->rad_info.chip_class > GFX9)
352 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
353
354 radv_get_driver_uuid(&device->driver_uuid);
355 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
356
357 if (device->rad_info.family == CHIP_STONEY ||
358 device->rad_info.chip_class >= GFX9) {
359 device->has_rbplus = true;
360 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
361 device->rad_info.family == CHIP_VEGA12 ||
362 device->rad_info.family == CHIP_RAVEN ||
363 device->rad_info.family == CHIP_RAVEN2 ||
364 device->rad_info.family == CHIP_RENOIR;
365 }
366
367 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8;
368
369 /* Vega10/Raven need a special workaround for a hardware bug. */
370 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
371 device->rad_info.family == CHIP_RAVEN;
372
373 device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10;
374
375 /* Out-of-order primitive rasterization. */
376 device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
377 device->rad_info.max_se >= 2;
378 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
379 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
380
381 device->dcc_msaa_allowed =
382 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
383
384 /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
385 device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
386 (device->rad_info.chip_class >= GFX8 &&
387 device->rad_info.me_fw_feature >= 41);
388
389 device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
390 device->rad_info.family == CHIP_RENOIR ||
391 device->rad_info.chip_class >= GFX10;
392
393 device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
394 device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
395
396 /* Determine the number of threads per wave for all stages. */
397 device->cs_wave_size = 64;
398 device->ps_wave_size = 64;
399 device->ge_wave_size = 64;
400
401 if (device->rad_info.chip_class >= GFX10) {
402 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
403 device->cs_wave_size = 32;
404
405 /* For pixel shaders, wave64 is recommanded. */
406 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
407 device->ps_wave_size = 32;
408
409 if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
410 device->ge_wave_size = 32;
411 }
412
413 radv_physical_device_init_mem_types(device);
414 radv_fill_device_extension_table(device, &device->supported_extensions);
415
416 device->bus_info = *drm_device->businfo.pci;
417
418 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
419 ac_print_gpu_info(&device->rad_info);
420
421 /* The WSI is structured as a layer on top of the driver, so this has
422 * to be the last part of initialization (at least until we get other
423 * semi-layers).
424 */
425 result = radv_init_wsi(device);
426 if (result != VK_SUCCESS) {
427 device->ws->destroy(device->ws);
428 vk_error(instance, result);
429 goto fail;
430 }
431
432 return VK_SUCCESS;
433
434 fail:
435 close(fd);
436 if (master_fd != -1)
437 close(master_fd);
438 return result;
439 }
440
441 static void
442 radv_physical_device_finish(struct radv_physical_device *device)
443 {
444 radv_finish_wsi(device);
445 device->ws->destroy(device->ws);
446 disk_cache_destroy(device->disk_cache);
447 close(device->local_fd);
448 if (device->master_fd != -1)
449 close(device->master_fd);
450 }
451
452 static void *
453 default_alloc_func(void *pUserData, size_t size, size_t align,
454 VkSystemAllocationScope allocationScope)
455 {
456 return malloc(size);
457 }
458
459 static void *
460 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
461 size_t align, VkSystemAllocationScope allocationScope)
462 {
463 return realloc(pOriginal, size);
464 }
465
466 static void
467 default_free_func(void *pUserData, void *pMemory)
468 {
469 free(pMemory);
470 }
471
472 static const VkAllocationCallbacks default_alloc = {
473 .pUserData = NULL,
474 .pfnAllocation = default_alloc_func,
475 .pfnReallocation = default_realloc_func,
476 .pfnFree = default_free_func,
477 };
478
479 static const struct debug_control radv_debug_options[] = {
480 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
481 {"nodcc", RADV_DEBUG_NO_DCC},
482 {"shaders", RADV_DEBUG_DUMP_SHADERS},
483 {"nocache", RADV_DEBUG_NO_CACHE},
484 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
485 {"nohiz", RADV_DEBUG_NO_HIZ},
486 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
487 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
488 {"allbos", RADV_DEBUG_ALL_BOS},
489 {"noibs", RADV_DEBUG_NO_IBS},
490 {"spirv", RADV_DEBUG_DUMP_SPIRV},
491 {"vmfaults", RADV_DEBUG_VM_FAULTS},
492 {"zerovram", RADV_DEBUG_ZERO_VRAM},
493 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
494 {"nosisched", RADV_DEBUG_NO_SISCHED},
495 {"preoptir", RADV_DEBUG_PREOPTIR},
496 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
497 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
498 {"info", RADV_DEBUG_INFO},
499 {"errors", RADV_DEBUG_ERRORS},
500 {"startup", RADV_DEBUG_STARTUP},
501 {"checkir", RADV_DEBUG_CHECKIR},
502 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
503 {"nobinning", RADV_DEBUG_NOBINNING},
504 {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
505 {"nongg", RADV_DEBUG_NO_NGG},
506 {"noshaderballot", RADV_DEBUG_NO_SHADER_BALLOT},
507 {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
508 {NULL, 0}
509 };
510
511 const char *
512 radv_get_debug_option_name(int id)
513 {
514 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
515 return radv_debug_options[id].string;
516 }
517
518 static const struct debug_control radv_perftest_options[] = {
519 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
520 {"sisched", RADV_PERFTEST_SISCHED},
521 {"localbos", RADV_PERFTEST_LOCAL_BOS},
522 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
523 {"bolist", RADV_PERFTEST_BO_LIST},
524 {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
525 {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
526 {"cswave32", RADV_PERFTEST_CS_WAVE_32},
527 {"pswave32", RADV_PERFTEST_PS_WAVE_32},
528 {"gewave32", RADV_PERFTEST_GE_WAVE_32},
529 {NULL, 0}
530 };
531
532 const char *
533 radv_get_perftest_option_name(int id)
534 {
535 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
536 return radv_perftest_options[id].string;
537 }
538
539 static void
540 radv_handle_per_app_options(struct radv_instance *instance,
541 const VkApplicationInfo *info)
542 {
543 const char *name = info ? info->pApplicationName : NULL;
544
545 if (!name)
546 return;
547
548 if (!strcmp(name, "Talos - Linux - 32bit") ||
549 !strcmp(name, "Talos - Linux - 64bit")) {
550 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
551 /* Force enable LLVM sisched for Talos because it looks
552 * safe and it gives few more FPS.
553 */
554 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
555 }
556 } else if (!strcmp(name, "DOOM_VFR")) {
557 /* Work around a Doom VFR game bug */
558 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
559 } else if (!strcmp(name, "MonsterHunterWorld.exe")) {
560 /* Workaround for a WaW hazard when LLVM moves/merges
561 * load/store memory operations.
562 * See https://reviews.llvm.org/D61313
563 */
564 if (HAVE_LLVM < 0x900)
565 instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
566 } else if (!strcmp(name, "Wolfenstein: Youngblood")) {
567 if (!(instance->debug_flags & RADV_DEBUG_NO_SHADER_BALLOT)) {
568 /* Force enable VK_AMD_shader_ballot because it looks
569 * safe and it gives a nice boost (+20% on Vega 56 at
570 * this time).
571 */
572 instance->perftest_flags |= RADV_PERFTEST_SHADER_BALLOT;
573 }
574 }
575 }
576
577 static int radv_get_instance_extension_index(const char *name)
578 {
579 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
580 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
581 return i;
582 }
583 return -1;
584 }
585
586 static const char radv_dri_options_xml[] =
587 DRI_CONF_BEGIN
588 DRI_CONF_SECTION_QUALITY
589 DRI_CONF_ADAPTIVE_SYNC("true")
590 DRI_CONF_SECTION_END
591 DRI_CONF_END;
592
593 static void radv_init_dri_options(struct radv_instance *instance)
594 {
595 driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml);
596 driParseConfigFiles(&instance->dri_options,
597 &instance->available_dri_options,
598 0, "radv", NULL);
599 }
600
601 VkResult radv_CreateInstance(
602 const VkInstanceCreateInfo* pCreateInfo,
603 const VkAllocationCallbacks* pAllocator,
604 VkInstance* pInstance)
605 {
606 struct radv_instance *instance;
607 VkResult result;
608
609 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
610
611 uint32_t client_version;
612 if (pCreateInfo->pApplicationInfo &&
613 pCreateInfo->pApplicationInfo->apiVersion != 0) {
614 client_version = pCreateInfo->pApplicationInfo->apiVersion;
615 } else {
616 client_version = VK_API_VERSION_1_0;
617 }
618
619 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
620 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
621 if (!instance)
622 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
623
624 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
625
626 if (pAllocator)
627 instance->alloc = *pAllocator;
628 else
629 instance->alloc = default_alloc;
630
631 instance->apiVersion = client_version;
632 instance->physicalDeviceCount = -1;
633
634 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
635 radv_debug_options);
636
637 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
638 radv_perftest_options);
639
640
641 if (instance->debug_flags & RADV_DEBUG_STARTUP)
642 radv_logi("Created an instance");
643
644 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
645 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
646 int index = radv_get_instance_extension_index(ext_name);
647
648 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
649 vk_free2(&default_alloc, pAllocator, instance);
650 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
651 }
652
653 instance->enabled_extensions.extensions[index] = true;
654 }
655
656 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
657 if (result != VK_SUCCESS) {
658 vk_free2(&default_alloc, pAllocator, instance);
659 return vk_error(instance, result);
660 }
661
662 _mesa_locale_init();
663 glsl_type_singleton_init_or_ref();
664
665 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
666
667 radv_init_dri_options(instance);
668 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
669
670 *pInstance = radv_instance_to_handle(instance);
671
672 return VK_SUCCESS;
673 }
674
675 void radv_DestroyInstance(
676 VkInstance _instance,
677 const VkAllocationCallbacks* pAllocator)
678 {
679 RADV_FROM_HANDLE(radv_instance, instance, _instance);
680
681 if (!instance)
682 return;
683
684 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
685 radv_physical_device_finish(instance->physicalDevices + i);
686 }
687
688 VG(VALGRIND_DESTROY_MEMPOOL(instance));
689
690 glsl_type_singleton_decref();
691 _mesa_locale_fini();
692
693 driDestroyOptionCache(&instance->dri_options);
694 driDestroyOptionInfo(&instance->available_dri_options);
695
696 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
697
698 vk_free(&instance->alloc, instance);
699 }
700
701 static VkResult
702 radv_enumerate_devices(struct radv_instance *instance)
703 {
704 /* TODO: Check for more devices ? */
705 drmDevicePtr devices[8];
706 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
707 int max_devices;
708
709 instance->physicalDeviceCount = 0;
710
711 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
712
713 if (instance->debug_flags & RADV_DEBUG_STARTUP)
714 radv_logi("Found %d drm nodes", max_devices);
715
716 if (max_devices < 1)
717 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
718
719 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
720 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
721 devices[i]->bustype == DRM_BUS_PCI &&
722 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
723
724 result = radv_physical_device_init(instance->physicalDevices +
725 instance->physicalDeviceCount,
726 instance,
727 devices[i]);
728 if (result == VK_SUCCESS)
729 ++instance->physicalDeviceCount;
730 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
731 break;
732 }
733 }
734 drmFreeDevices(devices, max_devices);
735
736 return result;
737 }
738
739 VkResult radv_EnumeratePhysicalDevices(
740 VkInstance _instance,
741 uint32_t* pPhysicalDeviceCount,
742 VkPhysicalDevice* pPhysicalDevices)
743 {
744 RADV_FROM_HANDLE(radv_instance, instance, _instance);
745 VkResult result;
746
747 if (instance->physicalDeviceCount < 0) {
748 result = radv_enumerate_devices(instance);
749 if (result != VK_SUCCESS &&
750 result != VK_ERROR_INCOMPATIBLE_DRIVER)
751 return result;
752 }
753
754 if (!pPhysicalDevices) {
755 *pPhysicalDeviceCount = instance->physicalDeviceCount;
756 } else {
757 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
758 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
759 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
760 }
761
762 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
763 : VK_SUCCESS;
764 }
765
766 VkResult radv_EnumeratePhysicalDeviceGroups(
767 VkInstance _instance,
768 uint32_t* pPhysicalDeviceGroupCount,
769 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
770 {
771 RADV_FROM_HANDLE(radv_instance, instance, _instance);
772 VkResult result;
773
774 if (instance->physicalDeviceCount < 0) {
775 result = radv_enumerate_devices(instance);
776 if (result != VK_SUCCESS &&
777 result != VK_ERROR_INCOMPATIBLE_DRIVER)
778 return result;
779 }
780
781 if (!pPhysicalDeviceGroupProperties) {
782 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
783 } else {
784 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
785 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
786 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
787 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
788 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
789 }
790 }
791 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
792 : VK_SUCCESS;
793 }
794
795 void radv_GetPhysicalDeviceFeatures(
796 VkPhysicalDevice physicalDevice,
797 VkPhysicalDeviceFeatures* pFeatures)
798 {
799 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
800 memset(pFeatures, 0, sizeof(*pFeatures));
801
802 *pFeatures = (VkPhysicalDeviceFeatures) {
803 .robustBufferAccess = true,
804 .fullDrawIndexUint32 = true,
805 .imageCubeArray = true,
806 .independentBlend = true,
807 .geometryShader = true,
808 .tessellationShader = true,
809 .sampleRateShading = true,
810 .dualSrcBlend = true,
811 .logicOp = true,
812 .multiDrawIndirect = true,
813 .drawIndirectFirstInstance = true,
814 .depthClamp = true,
815 .depthBiasClamp = true,
816 .fillModeNonSolid = true,
817 .depthBounds = true,
818 .wideLines = true,
819 .largePoints = true,
820 .alphaToOne = true,
821 .multiViewport = true,
822 .samplerAnisotropy = true,
823 .textureCompressionETC2 = radv_device_supports_etc(pdevice),
824 .textureCompressionASTC_LDR = false,
825 .textureCompressionBC = true,
826 .occlusionQueryPrecise = true,
827 .pipelineStatisticsQuery = true,
828 .vertexPipelineStoresAndAtomics = true,
829 .fragmentStoresAndAtomics = true,
830 .shaderTessellationAndGeometryPointSize = true,
831 .shaderImageGatherExtended = true,
832 .shaderStorageImageExtendedFormats = true,
833 .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= GFX8,
834 .shaderUniformBufferArrayDynamicIndexing = true,
835 .shaderSampledImageArrayDynamicIndexing = true,
836 .shaderStorageBufferArrayDynamicIndexing = true,
837 .shaderStorageImageArrayDynamicIndexing = true,
838 .shaderStorageImageReadWithoutFormat = true,
839 .shaderStorageImageWriteWithoutFormat = true,
840 .shaderClipDistance = true,
841 .shaderCullDistance = true,
842 .shaderFloat64 = true,
843 .shaderInt64 = true,
844 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
845 .sparseBinding = true,
846 .variableMultisampleRate = true,
847 .inheritedQueries = true,
848 };
849 }
850
851 void radv_GetPhysicalDeviceFeatures2(
852 VkPhysicalDevice physicalDevice,
853 VkPhysicalDeviceFeatures2 *pFeatures)
854 {
855 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
856 vk_foreach_struct(ext, pFeatures->pNext) {
857 switch (ext->sType) {
858 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
859 VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
860 features->variablePointersStorageBuffer = true;
861 features->variablePointers = true;
862 break;
863 }
864 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
865 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
866 features->multiview = true;
867 features->multiviewGeometryShader = true;
868 features->multiviewTessellationShader = true;
869 break;
870 }
871 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
872 VkPhysicalDeviceShaderDrawParametersFeatures *features =
873 (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
874 features->shaderDrawParameters = true;
875 break;
876 }
877 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
878 VkPhysicalDeviceProtectedMemoryFeatures *features =
879 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
880 features->protectedMemory = false;
881 break;
882 }
883 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
884 VkPhysicalDevice16BitStorageFeatures *features =
885 (VkPhysicalDevice16BitStorageFeatures*)ext;
886 bool enabled = pdevice->rad_info.chip_class >= GFX8;
887 features->storageBuffer16BitAccess = enabled;
888 features->uniformAndStorageBuffer16BitAccess = enabled;
889 features->storagePushConstant16 = enabled;
890 features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
891 break;
892 }
893 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
894 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
895 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
896 features->samplerYcbcrConversion = true;
897 break;
898 }
899 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
900 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
901 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
902 features->shaderInputAttachmentArrayDynamicIndexing = true;
903 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
904 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
905 features->shaderUniformBufferArrayNonUniformIndexing = true;
906 features->shaderSampledImageArrayNonUniformIndexing = true;
907 features->shaderStorageBufferArrayNonUniformIndexing = true;
908 features->shaderStorageImageArrayNonUniformIndexing = true;
909 features->shaderInputAttachmentArrayNonUniformIndexing = true;
910 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
911 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
912 features->descriptorBindingUniformBufferUpdateAfterBind = true;
913 features->descriptorBindingSampledImageUpdateAfterBind = true;
914 features->descriptorBindingStorageImageUpdateAfterBind = true;
915 features->descriptorBindingStorageBufferUpdateAfterBind = true;
916 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
917 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
918 features->descriptorBindingUpdateUnusedWhilePending = true;
919 features->descriptorBindingPartiallyBound = true;
920 features->descriptorBindingVariableDescriptorCount = true;
921 features->runtimeDescriptorArray = true;
922 break;
923 }
924 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
925 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
926 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
927 features->conditionalRendering = true;
928 features->inheritedConditionalRendering = false;
929 break;
930 }
931 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
932 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
933 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
934 features->vertexAttributeInstanceRateDivisor = VK_TRUE;
935 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
936 break;
937 }
938 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
939 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
940 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
941 features->transformFeedback = true;
942 features->geometryStreams = true;
943 break;
944 }
945 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
946 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
947 (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
948 features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
949 break;
950 }
951 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
952 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
953 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
954 features->memoryPriority = VK_TRUE;
955 break;
956 }
957 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
958 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
959 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
960 features->bufferDeviceAddress = true;
961 features->bufferDeviceAddressCaptureReplay = false;
962 features->bufferDeviceAddressMultiDevice = false;
963 break;
964 }
965 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
966 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
967 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
968 features->depthClipEnable = true;
969 break;
970 }
971 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
972 VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
973 (VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
974 features->hostQueryReset = true;
975 break;
976 }
977 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
978 VkPhysicalDevice8BitStorageFeaturesKHR *features =
979 (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
980 bool enabled = pdevice->rad_info.chip_class >= GFX8;
981 features->storageBuffer8BitAccess = enabled;
982 features->uniformAndStorageBuffer8BitAccess = enabled;
983 features->storagePushConstant8 = enabled;
984 break;
985 }
986 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
987 VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
988 (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
989 features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
990 features->shaderInt8 = true;
991 break;
992 }
993 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
994 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
995 (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
996 features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900;
997 features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900;
998 break;
999 }
1000 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1001 VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1002 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1003
1004 features->inlineUniformBlock = true;
1005 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1006 break;
1007 }
1008 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1009 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1010 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1011 features->computeDerivativeGroupQuads = false;
1012 features->computeDerivativeGroupLinear = true;
1013 break;
1014 }
1015 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1016 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1017 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1018 features->ycbcrImageArrays = true;
1019 break;
1020 }
1021 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
1022 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features =
1023 (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext;
1024 features->uniformBufferStandardLayout = true;
1025 break;
1026 }
1027 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1028 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1029 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1030 features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1031 break;
1032 }
1033 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: {
1034 VkPhysicalDeviceImagelessFramebufferFeaturesKHR *features =
1035 (VkPhysicalDeviceImagelessFramebufferFeaturesKHR *)ext;
1036 features->imagelessFramebuffer = true;
1037 break;
1038 }
1039 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1040 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1041 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1042 features->pipelineExecutableInfo = true;
1043 break;
1044 }
1045 default:
1046 break;
1047 }
1048 }
1049 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1050 }
1051
1052 void radv_GetPhysicalDeviceProperties(
1053 VkPhysicalDevice physicalDevice,
1054 VkPhysicalDeviceProperties* pProperties)
1055 {
1056 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1057 VkSampleCountFlags sample_counts = 0xf;
1058
1059 /* make sure that the entire descriptor set is addressable with a signed
1060 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1061 * be at most 2 GiB. the combined image & samples object count as one of
1062 * both. This limit is for the pipeline layout, not for the set layout, but
1063 * there is no set limit, so we just set a pipeline limit. I don't think
1064 * any app is going to hit this soon. */
1065 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1066 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1067 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1068 32 /* sampler, largest when combined with image */ +
1069 64 /* sampled image */ +
1070 64 /* storage image */);
1071
1072 VkPhysicalDeviceLimits limits = {
1073 .maxImageDimension1D = (1 << 14),
1074 .maxImageDimension2D = (1 << 14),
1075 .maxImageDimension3D = (1 << 11),
1076 .maxImageDimensionCube = (1 << 14),
1077 .maxImageArrayLayers = (1 << 11),
1078 .maxTexelBufferElements = 128 * 1024 * 1024,
1079 .maxUniformBufferRange = UINT32_MAX,
1080 .maxStorageBufferRange = UINT32_MAX,
1081 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1082 .maxMemoryAllocationCount = UINT32_MAX,
1083 .maxSamplerAllocationCount = 64 * 1024,
1084 .bufferImageGranularity = 64, /* A cache line */
1085 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
1086 .maxBoundDescriptorSets = MAX_SETS,
1087 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1088 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1089 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1090 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1091 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1092 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1093 .maxPerStageResources = max_descriptor_set_size,
1094 .maxDescriptorSetSamplers = max_descriptor_set_size,
1095 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1096 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1097 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1098 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1099 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1100 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1101 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1102 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1103 .maxVertexInputBindings = MAX_VBS,
1104 .maxVertexInputAttributeOffset = 2047,
1105 .maxVertexInputBindingStride = 2048,
1106 .maxVertexOutputComponents = 128,
1107 .maxTessellationGenerationLevel = 64,
1108 .maxTessellationPatchSize = 32,
1109 .maxTessellationControlPerVertexInputComponents = 128,
1110 .maxTessellationControlPerVertexOutputComponents = 128,
1111 .maxTessellationControlPerPatchOutputComponents = 120,
1112 .maxTessellationControlTotalOutputComponents = 4096,
1113 .maxTessellationEvaluationInputComponents = 128,
1114 .maxTessellationEvaluationOutputComponents = 128,
1115 .maxGeometryShaderInvocations = 127,
1116 .maxGeometryInputComponents = 64,
1117 .maxGeometryOutputComponents = 128,
1118 .maxGeometryOutputVertices = 256,
1119 .maxGeometryTotalOutputComponents = 1024,
1120 .maxFragmentInputComponents = 128,
1121 .maxFragmentOutputAttachments = 8,
1122 .maxFragmentDualSrcAttachments = 1,
1123 .maxFragmentCombinedOutputResources = 8,
1124 .maxComputeSharedMemorySize = 32768,
1125 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1126 .maxComputeWorkGroupInvocations = 2048,
1127 .maxComputeWorkGroupSize = {
1128 2048,
1129 2048,
1130 2048
1131 },
1132 .subPixelPrecisionBits = 8,
1133 .subTexelPrecisionBits = 8,
1134 .mipmapPrecisionBits = 8,
1135 .maxDrawIndexedIndexValue = UINT32_MAX,
1136 .maxDrawIndirectCount = UINT32_MAX,
1137 .maxSamplerLodBias = 16,
1138 .maxSamplerAnisotropy = 16,
1139 .maxViewports = MAX_VIEWPORTS,
1140 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1141 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1142 .viewportSubPixelBits = 8,
1143 .minMemoryMapAlignment = 4096, /* A page */
1144 .minTexelBufferOffsetAlignment = 1,
1145 .minUniformBufferOffsetAlignment = 4,
1146 .minStorageBufferOffsetAlignment = 4,
1147 .minTexelOffset = -32,
1148 .maxTexelOffset = 31,
1149 .minTexelGatherOffset = -32,
1150 .maxTexelGatherOffset = 31,
1151 .minInterpolationOffset = -2,
1152 .maxInterpolationOffset = 2,
1153 .subPixelInterpolationOffsetBits = 8,
1154 .maxFramebufferWidth = (1 << 14),
1155 .maxFramebufferHeight = (1 << 14),
1156 .maxFramebufferLayers = (1 << 10),
1157 .framebufferColorSampleCounts = sample_counts,
1158 .framebufferDepthSampleCounts = sample_counts,
1159 .framebufferStencilSampleCounts = sample_counts,
1160 .framebufferNoAttachmentsSampleCounts = sample_counts,
1161 .maxColorAttachments = MAX_RTS,
1162 .sampledImageColorSampleCounts = sample_counts,
1163 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1164 .sampledImageDepthSampleCounts = sample_counts,
1165 .sampledImageStencilSampleCounts = sample_counts,
1166 .storageImageSampleCounts = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
1167 .maxSampleMaskWords = 1,
1168 .timestampComputeAndGraphics = true,
1169 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1170 .maxClipDistances = 8,
1171 .maxCullDistances = 8,
1172 .maxCombinedClipAndCullDistances = 8,
1173 .discreteQueuePriorities = 2,
1174 .pointSizeRange = { 0.0, 8192.0 },
1175 .lineWidthRange = { 0.0, 7.9921875 },
1176 .pointSizeGranularity = (1.0 / 8.0),
1177 .lineWidthGranularity = (1.0 / 128.0),
1178 .strictLines = false, /* FINISHME */
1179 .standardSampleLocations = true,
1180 .optimalBufferCopyOffsetAlignment = 128,
1181 .optimalBufferCopyRowPitchAlignment = 128,
1182 .nonCoherentAtomSize = 64,
1183 };
1184
1185 *pProperties = (VkPhysicalDeviceProperties) {
1186 .apiVersion = radv_physical_device_api_version(pdevice),
1187 .driverVersion = vk_get_driver_version(),
1188 .vendorID = ATI_VENDOR_ID,
1189 .deviceID = pdevice->rad_info.pci_id,
1190 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1191 .limits = limits,
1192 .sparseProperties = {0},
1193 };
1194
1195 strcpy(pProperties->deviceName, pdevice->name);
1196 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1197 }
1198
1199 void radv_GetPhysicalDeviceProperties2(
1200 VkPhysicalDevice physicalDevice,
1201 VkPhysicalDeviceProperties2 *pProperties)
1202 {
1203 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1204 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1205
1206 vk_foreach_struct(ext, pProperties->pNext) {
1207 switch (ext->sType) {
1208 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1209 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1210 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1211 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1212 break;
1213 }
1214 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1215 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1216 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1217 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1218 properties->deviceLUIDValid = false;
1219 break;
1220 }
1221 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1222 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1223 properties->maxMultiviewViewCount = MAX_VIEWS;
1224 properties->maxMultiviewInstanceIndex = INT_MAX;
1225 break;
1226 }
1227 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1228 VkPhysicalDevicePointClippingProperties *properties =
1229 (VkPhysicalDevicePointClippingProperties*)ext;
1230 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1231 break;
1232 }
1233 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1234 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1235 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1236 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1237 break;
1238 }
1239 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1240 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1241 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1242 properties->minImportedHostPointerAlignment = 4096;
1243 break;
1244 }
1245 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1246 VkPhysicalDeviceSubgroupProperties *properties =
1247 (VkPhysicalDeviceSubgroupProperties*)ext;
1248 properties->subgroupSize = 64;
1249 properties->supportedStages = VK_SHADER_STAGE_ALL;
1250 properties->supportedOperations =
1251 VK_SUBGROUP_FEATURE_BASIC_BIT |
1252 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1253 VK_SUBGROUP_FEATURE_QUAD_BIT |
1254 VK_SUBGROUP_FEATURE_VOTE_BIT;
1255 if (pdevice->rad_info.chip_class >= GFX8) {
1256 properties->supportedOperations |=
1257 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1258 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1259 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1260 }
1261 properties->quadOperationsInAllStages = true;
1262 break;
1263 }
1264 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1265 VkPhysicalDeviceMaintenance3Properties *properties =
1266 (VkPhysicalDeviceMaintenance3Properties*)ext;
1267 /* Make sure everything is addressable by a signed 32-bit int, and
1268 * our largest descriptors are 96 bytes. */
1269 properties->maxPerSetDescriptors = (1ull << 31) / 96;
1270 /* Our buffer size fields allow only this much */
1271 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1272 break;
1273 }
1274 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1275 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1276 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1277 /* GFX6-8 only support single channel min/max filter. */
1278 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1279 properties->filterMinmaxSingleComponentFormats = true;
1280 break;
1281 }
1282 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1283 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1284 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1285
1286 /* Shader engines. */
1287 properties->shaderEngineCount =
1288 pdevice->rad_info.max_se;
1289 properties->shaderArraysPerEngineCount =
1290 pdevice->rad_info.max_sh_per_se;
1291 properties->computeUnitsPerShaderArray =
1292 pdevice->rad_info.num_good_cu_per_sh;
1293 properties->simdPerComputeUnit = 4;
1294 properties->wavefrontsPerSimd =
1295 pdevice->rad_info.family == CHIP_TONGA ||
1296 pdevice->rad_info.family == CHIP_ICELAND ||
1297 pdevice->rad_info.family == CHIP_POLARIS10 ||
1298 pdevice->rad_info.family == CHIP_POLARIS11 ||
1299 pdevice->rad_info.family == CHIP_POLARIS12 ||
1300 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1301 properties->wavefrontSize = 64;
1302
1303 /* SGPR. */
1304 properties->sgprsPerSimd =
1305 ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
1306 properties->minSgprAllocation =
1307 pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
1308 properties->maxSgprAllocation =
1309 pdevice->rad_info.family == CHIP_TONGA ||
1310 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1311 properties->sgprAllocationGranularity =
1312 pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
1313
1314 /* VGPR. */
1315 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1316 properties->minVgprAllocation = 4;
1317 properties->maxVgprAllocation = 256;
1318 properties->vgprAllocationGranularity = 4;
1319 break;
1320 }
1321 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1322 VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1323 (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1324
1325 properties->shaderCoreFeatures = 0;
1326 properties->activeComputeUnitCount =
1327 pdevice->rad_info.num_good_compute_units;
1328 break;
1329 }
1330 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1331 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1332 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1333 properties->maxVertexAttribDivisor = UINT32_MAX;
1334 break;
1335 }
1336 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1337 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1338 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1339 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1340 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1341 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1342 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1343 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1344 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1345 properties->robustBufferAccessUpdateAfterBind = false;
1346 properties->quadDivergentImplicitLod = false;
1347
1348 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1349 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1350 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1351 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1352 32 /* sampler, largest when combined with image */ +
1353 64 /* sampled image */ +
1354 64 /* storage image */);
1355 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1356 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1357 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1358 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1359 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1360 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1361 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1362 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1363 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1364 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1365 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1366 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1367 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1368 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1369 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1370 break;
1371 }
1372 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1373 VkPhysicalDeviceProtectedMemoryProperties *properties =
1374 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1375 properties->protectedNoFault = false;
1376 break;
1377 }
1378 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1379 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1380 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1381 properties->primitiveOverestimationSize = 0;
1382 properties->maxExtraPrimitiveOverestimationSize = 0;
1383 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1384 properties->primitiveUnderestimation = VK_FALSE;
1385 properties->conservativePointAndLineRasterization = VK_FALSE;
1386 properties->degenerateTrianglesRasterized = VK_FALSE;
1387 properties->degenerateLinesRasterized = VK_FALSE;
1388 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1389 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1390 break;
1391 }
1392 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1393 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1394 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1395 properties->pciDomain = pdevice->bus_info.domain;
1396 properties->pciBus = pdevice->bus_info.bus;
1397 properties->pciDevice = pdevice->bus_info.dev;
1398 properties->pciFunction = pdevice->bus_info.func;
1399 break;
1400 }
1401 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
1402 VkPhysicalDeviceDriverPropertiesKHR *driver_props =
1403 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
1404
1405 driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
1406 snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv");
1407 snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
1408 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
1409 " (LLVM " MESA_LLVM_VERSION_STRING ")");
1410
1411 driver_props->conformanceVersion = (VkConformanceVersionKHR) {
1412 .major = 1,
1413 .minor = 1,
1414 .subminor = 2,
1415 .patch = 0,
1416 };
1417 break;
1418 }
1419 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1420 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1421 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1422 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1423 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1424 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1425 properties->maxTransformFeedbackStreamDataSize = 512;
1426 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1427 properties->maxTransformFeedbackBufferDataStride = 512;
1428 properties->transformFeedbackQueries = true;
1429 properties->transformFeedbackStreamsLinesTriangles = true;
1430 properties->transformFeedbackRasterizationStreamSelect = false;
1431 properties->transformFeedbackDraw = true;
1432 break;
1433 }
1434 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
1435 VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
1436 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
1437
1438 props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1439 props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1440 props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1441 props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1442 props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1443 break;
1444 }
1445 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1446 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1447 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1448 properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
1449 VK_SAMPLE_COUNT_4_BIT |
1450 VK_SAMPLE_COUNT_8_BIT;
1451 properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
1452 properties->sampleLocationCoordinateRange[0] = 0.0f;
1453 properties->sampleLocationCoordinateRange[1] = 0.9375f;
1454 properties->sampleLocationSubPixelBits = 4;
1455 properties->variableSampleLocations = VK_FALSE;
1456 break;
1457 }
1458 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
1459 VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
1460 (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
1461
1462 /* We support all of the depth resolve modes */
1463 properties->supportedDepthResolveModes =
1464 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1465 VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1466 VK_RESOLVE_MODE_MIN_BIT_KHR |
1467 VK_RESOLVE_MODE_MAX_BIT_KHR;
1468
1469 /* Average doesn't make sense for stencil so we don't support that */
1470 properties->supportedStencilResolveModes =
1471 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1472 VK_RESOLVE_MODE_MIN_BIT_KHR |
1473 VK_RESOLVE_MODE_MAX_BIT_KHR;
1474
1475 properties->independentResolveNone = VK_TRUE;
1476 properties->independentResolve = VK_TRUE;
1477 break;
1478 }
1479 default:
1480 break;
1481 }
1482 }
1483 }
1484
1485 static void radv_get_physical_device_queue_family_properties(
1486 struct radv_physical_device* pdevice,
1487 uint32_t* pCount,
1488 VkQueueFamilyProperties** pQueueFamilyProperties)
1489 {
1490 int num_queue_families = 1;
1491 int idx;
1492 if (pdevice->rad_info.num_compute_rings > 0 &&
1493 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1494 num_queue_families++;
1495
1496 if (pQueueFamilyProperties == NULL) {
1497 *pCount = num_queue_families;
1498 return;
1499 }
1500
1501 if (!*pCount)
1502 return;
1503
1504 idx = 0;
1505 if (*pCount >= 1) {
1506 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1507 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1508 VK_QUEUE_COMPUTE_BIT |
1509 VK_QUEUE_TRANSFER_BIT |
1510 VK_QUEUE_SPARSE_BINDING_BIT,
1511 .queueCount = 1,
1512 .timestampValidBits = 64,
1513 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1514 };
1515 idx++;
1516 }
1517
1518 if (pdevice->rad_info.num_compute_rings > 0 &&
1519 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1520 if (*pCount > idx) {
1521 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1522 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1523 VK_QUEUE_TRANSFER_BIT |
1524 VK_QUEUE_SPARSE_BINDING_BIT,
1525 .queueCount = pdevice->rad_info.num_compute_rings,
1526 .timestampValidBits = 64,
1527 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1528 };
1529 idx++;
1530 }
1531 }
1532 *pCount = idx;
1533 }
1534
1535 void radv_GetPhysicalDeviceQueueFamilyProperties(
1536 VkPhysicalDevice physicalDevice,
1537 uint32_t* pCount,
1538 VkQueueFamilyProperties* pQueueFamilyProperties)
1539 {
1540 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1541 if (!pQueueFamilyProperties) {
1542 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1543 return;
1544 }
1545 VkQueueFamilyProperties *properties[] = {
1546 pQueueFamilyProperties + 0,
1547 pQueueFamilyProperties + 1,
1548 pQueueFamilyProperties + 2,
1549 };
1550 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1551 assert(*pCount <= 3);
1552 }
1553
1554 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1555 VkPhysicalDevice physicalDevice,
1556 uint32_t* pCount,
1557 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1558 {
1559 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1560 if (!pQueueFamilyProperties) {
1561 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1562 return;
1563 }
1564 VkQueueFamilyProperties *properties[] = {
1565 &pQueueFamilyProperties[0].queueFamilyProperties,
1566 &pQueueFamilyProperties[1].queueFamilyProperties,
1567 &pQueueFamilyProperties[2].queueFamilyProperties,
1568 };
1569 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1570 assert(*pCount <= 3);
1571 }
1572
1573 void radv_GetPhysicalDeviceMemoryProperties(
1574 VkPhysicalDevice physicalDevice,
1575 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1576 {
1577 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1578
1579 *pMemoryProperties = physical_device->memory_properties;
1580 }
1581
1582 static void
1583 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
1584 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
1585 {
1586 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
1587 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
1588 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
1589 uint64_t vram_size = radv_get_vram_size(device);
1590 uint64_t gtt_size = device->rad_info.gart_size;
1591 uint64_t heap_budget, heap_usage;
1592
1593 /* For all memory heaps, the computation of budget is as follow:
1594 * heap_budget = heap_size - global_heap_usage + app_heap_usage
1595 *
1596 * The Vulkan spec 1.1.97 says that the budget should include any
1597 * currently allocated device memory.
1598 *
1599 * Note that the application heap usages are not really accurate (eg.
1600 * in presence of shared buffers).
1601 */
1602 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
1603 uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
1604
1605 switch (device->mem_type_indices[i]) {
1606 case RADV_MEM_TYPE_VRAM:
1607 heap_usage = device->ws->query_value(device->ws,
1608 RADEON_ALLOCATED_VRAM);
1609
1610 heap_budget = vram_size -
1611 device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
1612 heap_usage;
1613
1614 memoryBudget->heapBudget[heap_index] = heap_budget;
1615 memoryBudget->heapUsage[heap_index] = heap_usage;
1616 break;
1617 case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
1618 heap_usage = device->ws->query_value(device->ws,
1619 RADEON_ALLOCATED_VRAM_VIS);
1620
1621 heap_budget = visible_vram_size -
1622 device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
1623 heap_usage;
1624
1625 memoryBudget->heapBudget[heap_index] = heap_budget;
1626 memoryBudget->heapUsage[heap_index] = heap_usage;
1627 break;
1628 case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
1629 heap_usage = device->ws->query_value(device->ws,
1630 RADEON_ALLOCATED_GTT);
1631
1632 heap_budget = gtt_size -
1633 device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
1634 heap_usage;
1635
1636 memoryBudget->heapBudget[heap_index] = heap_budget;
1637 memoryBudget->heapUsage[heap_index] = heap_usage;
1638 break;
1639 default:
1640 break;
1641 }
1642 }
1643
1644 /* The heapBudget and heapUsage values must be zero for array elements
1645 * greater than or equal to
1646 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
1647 */
1648 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
1649 memoryBudget->heapBudget[i] = 0;
1650 memoryBudget->heapUsage[i] = 0;
1651 }
1652 }
1653
1654 void radv_GetPhysicalDeviceMemoryProperties2(
1655 VkPhysicalDevice physicalDevice,
1656 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1657 {
1658 radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1659 &pMemoryProperties->memoryProperties);
1660
1661 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
1662 vk_find_struct(pMemoryProperties->pNext,
1663 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
1664 if (memory_budget)
1665 radv_get_memory_budget_properties(physicalDevice, memory_budget);
1666 }
1667
1668 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1669 VkDevice _device,
1670 VkExternalMemoryHandleTypeFlagBits handleType,
1671 const void *pHostPointer,
1672 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1673 {
1674 RADV_FROM_HANDLE(radv_device, device, _device);
1675
1676 switch (handleType)
1677 {
1678 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1679 const struct radv_physical_device *physical_device = device->physical_device;
1680 uint32_t memoryTypeBits = 0;
1681 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1682 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1683 memoryTypeBits = (1 << i);
1684 break;
1685 }
1686 }
1687 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1688 return VK_SUCCESS;
1689 }
1690 default:
1691 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1692 }
1693 }
1694
1695 static enum radeon_ctx_priority
1696 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1697 {
1698 /* Default to MEDIUM when a specific global priority isn't requested */
1699 if (!pObj)
1700 return RADEON_CTX_PRIORITY_MEDIUM;
1701
1702 switch(pObj->globalPriority) {
1703 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1704 return RADEON_CTX_PRIORITY_REALTIME;
1705 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1706 return RADEON_CTX_PRIORITY_HIGH;
1707 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1708 return RADEON_CTX_PRIORITY_MEDIUM;
1709 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1710 return RADEON_CTX_PRIORITY_LOW;
1711 default:
1712 unreachable("Illegal global priority value");
1713 return RADEON_CTX_PRIORITY_INVALID;
1714 }
1715 }
1716
1717 static int
1718 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1719 uint32_t queue_family_index, int idx,
1720 VkDeviceQueueCreateFlags flags,
1721 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1722 {
1723 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1724 queue->device = device;
1725 queue->queue_family_index = queue_family_index;
1726 queue->queue_idx = idx;
1727 queue->priority = radv_get_queue_global_priority(global_priority);
1728 queue->flags = flags;
1729
1730 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1731 if (!queue->hw_ctx)
1732 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1733
1734 return VK_SUCCESS;
1735 }
1736
1737 static void
1738 radv_queue_finish(struct radv_queue *queue)
1739 {
1740 if (queue->hw_ctx)
1741 queue->device->ws->ctx_destroy(queue->hw_ctx);
1742
1743 if (queue->initial_full_flush_preamble_cs)
1744 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1745 if (queue->initial_preamble_cs)
1746 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1747 if (queue->continue_preamble_cs)
1748 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1749 if (queue->descriptor_bo)
1750 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1751 if (queue->scratch_bo)
1752 queue->device->ws->buffer_destroy(queue->scratch_bo);
1753 if (queue->esgs_ring_bo)
1754 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1755 if (queue->gsvs_ring_bo)
1756 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1757 if (queue->tess_rings_bo)
1758 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1759 if (queue->compute_scratch_bo)
1760 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1761 }
1762
1763 static void
1764 radv_bo_list_init(struct radv_bo_list *bo_list)
1765 {
1766 pthread_mutex_init(&bo_list->mutex, NULL);
1767 bo_list->list.count = bo_list->capacity = 0;
1768 bo_list->list.bos = NULL;
1769 }
1770
1771 static void
1772 radv_bo_list_finish(struct radv_bo_list *bo_list)
1773 {
1774 free(bo_list->list.bos);
1775 pthread_mutex_destroy(&bo_list->mutex);
1776 }
1777
1778 static VkResult radv_bo_list_add(struct radv_device *device,
1779 struct radeon_winsys_bo *bo)
1780 {
1781 struct radv_bo_list *bo_list = &device->bo_list;
1782
1783 if (bo->is_local)
1784 return VK_SUCCESS;
1785
1786 if (unlikely(!device->use_global_bo_list))
1787 return VK_SUCCESS;
1788
1789 pthread_mutex_lock(&bo_list->mutex);
1790 if (bo_list->list.count == bo_list->capacity) {
1791 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1792 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1793
1794 if (!data) {
1795 pthread_mutex_unlock(&bo_list->mutex);
1796 return VK_ERROR_OUT_OF_HOST_MEMORY;
1797 }
1798
1799 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1800 bo_list->capacity = capacity;
1801 }
1802
1803 bo_list->list.bos[bo_list->list.count++] = bo;
1804 pthread_mutex_unlock(&bo_list->mutex);
1805 return VK_SUCCESS;
1806 }
1807
1808 static void radv_bo_list_remove(struct radv_device *device,
1809 struct radeon_winsys_bo *bo)
1810 {
1811 struct radv_bo_list *bo_list = &device->bo_list;
1812
1813 if (bo->is_local)
1814 return;
1815
1816 if (unlikely(!device->use_global_bo_list))
1817 return;
1818
1819 pthread_mutex_lock(&bo_list->mutex);
1820 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1821 if (bo_list->list.bos[i] == bo) {
1822 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1823 --bo_list->list.count;
1824 break;
1825 }
1826 }
1827 pthread_mutex_unlock(&bo_list->mutex);
1828 }
1829
1830 static void
1831 radv_device_init_gs_info(struct radv_device *device)
1832 {
1833 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1834 device->physical_device->rad_info.family);
1835 }
1836
1837 static int radv_get_device_extension_index(const char *name)
1838 {
1839 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1840 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1841 return i;
1842 }
1843 return -1;
1844 }
1845
1846 static int
1847 radv_get_int_debug_option(const char *name, int default_value)
1848 {
1849 const char *str;
1850 int result;
1851
1852 str = getenv(name);
1853 if (!str) {
1854 result = default_value;
1855 } else {
1856 char *endptr;
1857
1858 result = strtol(str, &endptr, 0);
1859 if (str == endptr) {
1860 /* No digits founs. */
1861 result = default_value;
1862 }
1863 }
1864
1865 return result;
1866 }
1867
1868 VkResult radv_CreateDevice(
1869 VkPhysicalDevice physicalDevice,
1870 const VkDeviceCreateInfo* pCreateInfo,
1871 const VkAllocationCallbacks* pAllocator,
1872 VkDevice* pDevice)
1873 {
1874 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1875 VkResult result;
1876 struct radv_device *device;
1877
1878 bool keep_shader_info = false;
1879
1880 /* Check enabled features */
1881 if (pCreateInfo->pEnabledFeatures) {
1882 VkPhysicalDeviceFeatures supported_features;
1883 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1884 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1885 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1886 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1887 for (uint32_t i = 0; i < num_features; i++) {
1888 if (enabled_feature[i] && !supported_feature[i])
1889 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1890 }
1891 }
1892
1893 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1894 sizeof(*device), 8,
1895 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1896 if (!device)
1897 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1898
1899 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1900 device->instance = physical_device->instance;
1901 device->physical_device = physical_device;
1902
1903 device->ws = physical_device->ws;
1904 if (pAllocator)
1905 device->alloc = *pAllocator;
1906 else
1907 device->alloc = physical_device->instance->alloc;
1908
1909 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1910 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1911 int index = radv_get_device_extension_index(ext_name);
1912 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1913 vk_free(&device->alloc, device);
1914 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1915 }
1916
1917 device->enabled_extensions.extensions[index] = true;
1918 }
1919
1920 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1921
1922 /* With update after bind we can't attach bo's to the command buffer
1923 * from the descriptor set anymore, so we have to use a global BO list.
1924 */
1925 device->use_global_bo_list =
1926 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
1927 device->enabled_extensions.EXT_descriptor_indexing ||
1928 device->enabled_extensions.EXT_buffer_device_address;
1929
1930 device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
1931 pCreateInfo->pEnabledFeatures->robustBufferAccess;
1932
1933 mtx_init(&device->shader_slab_mutex, mtx_plain);
1934 list_inithead(&device->shader_slabs);
1935
1936 radv_bo_list_init(&device->bo_list);
1937
1938 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1939 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1940 uint32_t qfi = queue_create->queueFamilyIndex;
1941 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1942 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1943
1944 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1945
1946 device->queues[qfi] = vk_alloc(&device->alloc,
1947 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1948 if (!device->queues[qfi]) {
1949 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1950 goto fail;
1951 }
1952
1953 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1954
1955 device->queue_count[qfi] = queue_create->queueCount;
1956
1957 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1958 result = radv_queue_init(device, &device->queues[qfi][q],
1959 qfi, q, queue_create->flags,
1960 global_priority);
1961 if (result != VK_SUCCESS)
1962 goto fail;
1963 }
1964 }
1965
1966 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1967 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
1968
1969 device->dfsm_allowed = device->pbb_allowed &&
1970 (device->physical_device->rad_info.family == CHIP_RAVEN ||
1971 device->physical_device->rad_info.family == CHIP_RAVEN2 ||
1972 device->physical_device->rad_info.family == CHIP_RENOIR);
1973
1974 #ifdef ANDROID
1975 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1976 #endif
1977
1978 /* The maximum number of scratch waves. Scratch space isn't divided
1979 * evenly between CUs. The number is only a function of the number of CUs.
1980 * We can decrease the constant to decrease the scratch buffer size.
1981 *
1982 * sctx->scratch_waves must be >= the maximum possible size of
1983 * 1 threadgroup, so that the hw doesn't hang from being unable
1984 * to start any.
1985 *
1986 * The recommended value is 4 per CU at most. Higher numbers don't
1987 * bring much benefit, but they still occupy chip resources (think
1988 * async compute). I've seen ~2% performance difference between 4 and 32.
1989 */
1990 uint32_t max_threads_per_block = 2048;
1991 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1992 max_threads_per_block / 64);
1993
1994 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
1995 S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32);
1996
1997 if (device->physical_device->rad_info.chip_class >= GFX7) {
1998 /* If the KMD allows it (there is a KMD hw register for it),
1999 * allow launching waves out-of-order.
2000 */
2001 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
2002 }
2003
2004 radv_device_init_gs_info(device);
2005
2006 device->tess_offchip_block_dw_size =
2007 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
2008 device->has_distributed_tess =
2009 device->physical_device->rad_info.chip_class >= GFX8 &&
2010 device->physical_device->rad_info.max_se >= 2;
2011
2012 if (getenv("RADV_TRACE_FILE")) {
2013 const char *filename = getenv("RADV_TRACE_FILE");
2014
2015 keep_shader_info = true;
2016
2017 if (!radv_init_trace(device))
2018 goto fail;
2019
2020 fprintf(stderr, "*****************************************************************************\n");
2021 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
2022 fprintf(stderr, "*****************************************************************************\n");
2023
2024 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
2025 radv_dump_enabled_options(device, stderr);
2026 }
2027
2028 device->keep_shader_info = keep_shader_info;
2029
2030 result = radv_device_init_meta(device);
2031 if (result != VK_SUCCESS)
2032 goto fail;
2033
2034 radv_device_init_msaa(device);
2035
2036 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
2037 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
2038 switch (family) {
2039 case RADV_QUEUE_GENERAL:
2040 /* Since amdgpu version 3.6.0, CONTEXT_CONTROL is emitted by the kernel */
2041 if (device->physical_device->rad_info.drm_minor < 6) {
2042 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2043 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
2044 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
2045 }
2046 break;
2047 case RADV_QUEUE_COMPUTE:
2048 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
2049 radeon_emit(device->empty_cs[family], 0);
2050 break;
2051 }
2052 device->ws->cs_finalize(device->empty_cs[family]);
2053 }
2054
2055 if (device->physical_device->rad_info.chip_class >= GFX7)
2056 cik_create_gfx_config(device);
2057
2058 VkPipelineCacheCreateInfo ci;
2059 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
2060 ci.pNext = NULL;
2061 ci.flags = 0;
2062 ci.pInitialData = NULL;
2063 ci.initialDataSize = 0;
2064 VkPipelineCache pc;
2065 result = radv_CreatePipelineCache(radv_device_to_handle(device),
2066 &ci, NULL, &pc);
2067 if (result != VK_SUCCESS)
2068 goto fail_meta;
2069
2070 device->mem_cache = radv_pipeline_cache_from_handle(pc);
2071
2072 device->force_aniso =
2073 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2074 if (device->force_aniso >= 0) {
2075 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
2076 1 << util_logbase2(device->force_aniso));
2077 }
2078
2079 *pDevice = radv_device_to_handle(device);
2080 return VK_SUCCESS;
2081
2082 fail_meta:
2083 radv_device_finish_meta(device);
2084 fail:
2085 radv_bo_list_finish(&device->bo_list);
2086
2087 if (device->trace_bo)
2088 device->ws->buffer_destroy(device->trace_bo);
2089
2090 if (device->gfx_init)
2091 device->ws->buffer_destroy(device->gfx_init);
2092
2093 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2094 for (unsigned q = 0; q < device->queue_count[i]; q++)
2095 radv_queue_finish(&device->queues[i][q]);
2096 if (device->queue_count[i])
2097 vk_free(&device->alloc, device->queues[i]);
2098 }
2099
2100 vk_free(&device->alloc, device);
2101 return result;
2102 }
2103
2104 void radv_DestroyDevice(
2105 VkDevice _device,
2106 const VkAllocationCallbacks* pAllocator)
2107 {
2108 RADV_FROM_HANDLE(radv_device, device, _device);
2109
2110 if (!device)
2111 return;
2112
2113 if (device->trace_bo)
2114 device->ws->buffer_destroy(device->trace_bo);
2115
2116 if (device->gfx_init)
2117 device->ws->buffer_destroy(device->gfx_init);
2118
2119 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2120 for (unsigned q = 0; q < device->queue_count[i]; q++)
2121 radv_queue_finish(&device->queues[i][q]);
2122 if (device->queue_count[i])
2123 vk_free(&device->alloc, device->queues[i]);
2124 if (device->empty_cs[i])
2125 device->ws->cs_destroy(device->empty_cs[i]);
2126 }
2127 radv_device_finish_meta(device);
2128
2129 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
2130 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2131
2132 radv_destroy_shader_slabs(device);
2133
2134 radv_bo_list_finish(&device->bo_list);
2135 vk_free(&device->alloc, device);
2136 }
2137
2138 VkResult radv_EnumerateInstanceLayerProperties(
2139 uint32_t* pPropertyCount,
2140 VkLayerProperties* pProperties)
2141 {
2142 if (pProperties == NULL) {
2143 *pPropertyCount = 0;
2144 return VK_SUCCESS;
2145 }
2146
2147 /* None supported at this time */
2148 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2149 }
2150
2151 VkResult radv_EnumerateDeviceLayerProperties(
2152 VkPhysicalDevice physicalDevice,
2153 uint32_t* pPropertyCount,
2154 VkLayerProperties* pProperties)
2155 {
2156 if (pProperties == NULL) {
2157 *pPropertyCount = 0;
2158 return VK_SUCCESS;
2159 }
2160
2161 /* None supported at this time */
2162 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2163 }
2164
2165 void radv_GetDeviceQueue2(
2166 VkDevice _device,
2167 const VkDeviceQueueInfo2* pQueueInfo,
2168 VkQueue* pQueue)
2169 {
2170 RADV_FROM_HANDLE(radv_device, device, _device);
2171 struct radv_queue *queue;
2172
2173 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
2174 if (pQueueInfo->flags != queue->flags) {
2175 /* From the Vulkan 1.1.70 spec:
2176 *
2177 * "The queue returned by vkGetDeviceQueue2 must have the same
2178 * flags value from this structure as that used at device
2179 * creation time in a VkDeviceQueueCreateInfo instance. If no
2180 * matching flags were specified at device creation time then
2181 * pQueue will return VK_NULL_HANDLE."
2182 */
2183 *pQueue = VK_NULL_HANDLE;
2184 return;
2185 }
2186
2187 *pQueue = radv_queue_to_handle(queue);
2188 }
2189
2190 void radv_GetDeviceQueue(
2191 VkDevice _device,
2192 uint32_t queueFamilyIndex,
2193 uint32_t queueIndex,
2194 VkQueue* pQueue)
2195 {
2196 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
2197 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
2198 .queueFamilyIndex = queueFamilyIndex,
2199 .queueIndex = queueIndex
2200 };
2201
2202 radv_GetDeviceQueue2(_device, &info, pQueue);
2203 }
2204
2205 static void
2206 fill_geom_tess_rings(struct radv_queue *queue,
2207 uint32_t *map,
2208 bool add_sample_positions,
2209 uint32_t esgs_ring_size,
2210 struct radeon_winsys_bo *esgs_ring_bo,
2211 uint32_t gsvs_ring_size,
2212 struct radeon_winsys_bo *gsvs_ring_bo,
2213 uint32_t tess_factor_ring_size,
2214 uint32_t tess_offchip_ring_offset,
2215 uint32_t tess_offchip_ring_size,
2216 struct radeon_winsys_bo *tess_rings_bo)
2217 {
2218 uint32_t *desc = &map[4];
2219
2220 if (esgs_ring_bo) {
2221 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
2222
2223 /* stride 0, num records - size, add tid, swizzle, elsize4,
2224 index stride 64 */
2225 desc[0] = esgs_va;
2226 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
2227 S_008F04_SWIZZLE_ENABLE(true);
2228 desc[2] = esgs_ring_size;
2229 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2230 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2231 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2232 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2233 S_008F0C_INDEX_STRIDE(3) |
2234 S_008F0C_ADD_TID_ENABLE(1);
2235
2236 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2237 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2238 S_008F0C_OOB_SELECT(2) |
2239 S_008F0C_RESOURCE_LEVEL(1);
2240 } else {
2241 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2242 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2243 S_008F0C_ELEMENT_SIZE(1);
2244 }
2245
2246 /* GS entry for ES->GS ring */
2247 /* stride 0, num records - size, elsize0,
2248 index stride 0 */
2249 desc[4] = esgs_va;
2250 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
2251 desc[6] = esgs_ring_size;
2252 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2253 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2254 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2255 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2256
2257 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2258 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2259 S_008F0C_OOB_SELECT(2) |
2260 S_008F0C_RESOURCE_LEVEL(1);
2261 } else {
2262 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2263 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2264 }
2265 }
2266
2267 desc += 8;
2268
2269 if (gsvs_ring_bo) {
2270 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
2271
2272 /* VS entry for GS->VS ring */
2273 /* stride 0, num records - size, elsize0,
2274 index stride 0 */
2275 desc[0] = gsvs_va;
2276 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
2277 desc[2] = gsvs_ring_size;
2278 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2279 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2280 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2281 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2282
2283 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2284 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2285 S_008F0C_OOB_SELECT(2) |
2286 S_008F0C_RESOURCE_LEVEL(1);
2287 } else {
2288 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2289 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2290 }
2291
2292 /* stride gsvs_itemsize, num records 64
2293 elsize 4, index stride 16 */
2294 /* shader will patch stride and desc[2] */
2295 desc[4] = gsvs_va;
2296 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
2297 S_008F04_SWIZZLE_ENABLE(1);
2298 desc[6] = 0;
2299 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2300 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2301 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2302 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2303 S_008F0C_INDEX_STRIDE(1) |
2304 S_008F0C_ADD_TID_ENABLE(true);
2305
2306 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2307 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2308 S_008F0C_OOB_SELECT(2) |
2309 S_008F0C_RESOURCE_LEVEL(1);
2310 } else {
2311 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2312 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2313 S_008F0C_ELEMENT_SIZE(1);
2314 }
2315
2316 }
2317
2318 desc += 8;
2319
2320 if (tess_rings_bo) {
2321 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
2322 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
2323
2324 desc[0] = tess_va;
2325 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
2326 desc[2] = tess_factor_ring_size;
2327 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2328 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2329 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2330 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2331
2332 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2333 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2334 S_008F0C_OOB_SELECT(3) |
2335 S_008F0C_RESOURCE_LEVEL(1);
2336 } else {
2337 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2338 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2339 }
2340
2341 desc[4] = tess_offchip_va;
2342 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
2343 desc[6] = tess_offchip_ring_size;
2344 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2345 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2346 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2347 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2348
2349 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2350 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2351 S_008F0C_OOB_SELECT(3) |
2352 S_008F0C_RESOURCE_LEVEL(1);
2353 } else {
2354 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2355 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2356 }
2357 }
2358
2359 desc += 8;
2360
2361 if (add_sample_positions) {
2362 /* add sample positions after all rings */
2363 memcpy(desc, queue->device->sample_locations_1x, 8);
2364 desc += 2;
2365 memcpy(desc, queue->device->sample_locations_2x, 16);
2366 desc += 4;
2367 memcpy(desc, queue->device->sample_locations_4x, 32);
2368 desc += 8;
2369 memcpy(desc, queue->device->sample_locations_8x, 64);
2370 }
2371 }
2372
2373 static unsigned
2374 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
2375 {
2376 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
2377 device->physical_device->rad_info.family != CHIP_CARRIZO &&
2378 device->physical_device->rad_info.family != CHIP_STONEY;
2379 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
2380 unsigned max_offchip_buffers;
2381 unsigned offchip_granularity;
2382 unsigned hs_offchip_param;
2383
2384 /*
2385 * Per RadeonSI:
2386 * This must be one less than the maximum number due to a hw limitation.
2387 * Various hardware bugs need thGFX7
2388 *
2389 * Per AMDVLK:
2390 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
2391 * Gfx7 should limit max_offchip_buffers to 508
2392 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
2393 *
2394 * Follow AMDVLK here.
2395 */
2396 if (device->physical_device->rad_info.chip_class >= GFX10) {
2397 max_offchip_buffers_per_se = 256;
2398 } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2399 device->physical_device->rad_info.chip_class == GFX7 ||
2400 device->physical_device->rad_info.chip_class == GFX6)
2401 --max_offchip_buffers_per_se;
2402
2403 max_offchip_buffers = max_offchip_buffers_per_se *
2404 device->physical_device->rad_info.max_se;
2405
2406 /* Hawaii has a bug with offchip buffers > 256 that can be worked
2407 * around by setting 4K granularity.
2408 */
2409 if (device->tess_offchip_block_dw_size == 4096) {
2410 assert(device->physical_device->rad_info.family == CHIP_HAWAII);
2411 offchip_granularity = V_03093C_X_4K_DWORDS;
2412 } else {
2413 assert(device->tess_offchip_block_dw_size == 8192);
2414 offchip_granularity = V_03093C_X_8K_DWORDS;
2415 }
2416
2417 switch (device->physical_device->rad_info.chip_class) {
2418 case GFX6:
2419 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2420 break;
2421 case GFX7:
2422 case GFX8:
2423 case GFX9:
2424 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2425 break;
2426 case GFX10:
2427 break;
2428 default:
2429 break;
2430 }
2431
2432 *max_offchip_buffers_p = max_offchip_buffers;
2433 if (device->physical_device->rad_info.chip_class >= GFX7) {
2434 if (device->physical_device->rad_info.chip_class >= GFX8)
2435 --max_offchip_buffers;
2436 hs_offchip_param =
2437 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2438 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2439 } else {
2440 hs_offchip_param =
2441 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2442 }
2443 return hs_offchip_param;
2444 }
2445
2446 static void
2447 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2448 struct radeon_winsys_bo *esgs_ring_bo,
2449 uint32_t esgs_ring_size,
2450 struct radeon_winsys_bo *gsvs_ring_bo,
2451 uint32_t gsvs_ring_size)
2452 {
2453 if (!esgs_ring_bo && !gsvs_ring_bo)
2454 return;
2455
2456 if (esgs_ring_bo)
2457 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2458
2459 if (gsvs_ring_bo)
2460 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2461
2462 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
2463 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2464 radeon_emit(cs, esgs_ring_size >> 8);
2465 radeon_emit(cs, gsvs_ring_size >> 8);
2466 } else {
2467 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2468 radeon_emit(cs, esgs_ring_size >> 8);
2469 radeon_emit(cs, gsvs_ring_size >> 8);
2470 }
2471 }
2472
2473 static void
2474 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2475 unsigned hs_offchip_param, unsigned tf_ring_size,
2476 struct radeon_winsys_bo *tess_rings_bo)
2477 {
2478 uint64_t tf_va;
2479
2480 if (!tess_rings_bo)
2481 return;
2482
2483 tf_va = radv_buffer_get_va(tess_rings_bo);
2484
2485 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2486
2487 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
2488 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2489 S_030938_SIZE(tf_ring_size / 4));
2490 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2491 tf_va >> 8);
2492
2493 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2494 radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
2495 S_030984_BASE_HI(tf_va >> 40));
2496 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
2497 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2498 S_030944_BASE_HI(tf_va >> 40));
2499 }
2500 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2501 hs_offchip_param);
2502 } else {
2503 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2504 S_008988_SIZE(tf_ring_size / 4));
2505 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2506 tf_va >> 8);
2507 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2508 hs_offchip_param);
2509 }
2510 }
2511
2512 static void
2513 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2514 struct radeon_winsys_bo *compute_scratch_bo)
2515 {
2516 uint64_t scratch_va;
2517
2518 if (!compute_scratch_bo)
2519 return;
2520
2521 scratch_va = radv_buffer_get_va(compute_scratch_bo);
2522
2523 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2524
2525 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2526 radeon_emit(cs, scratch_va);
2527 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2528 S_008F04_SWIZZLE_ENABLE(1));
2529 }
2530
2531 static void
2532 radv_emit_global_shader_pointers(struct radv_queue *queue,
2533 struct radeon_cmdbuf *cs,
2534 struct radeon_winsys_bo *descriptor_bo)
2535 {
2536 uint64_t va;
2537
2538 if (!descriptor_bo)
2539 return;
2540
2541 va = radv_buffer_get_va(descriptor_bo);
2542
2543 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2544
2545 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2546 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2547 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2548 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2549 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2550
2551 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2552 radv_emit_shader_pointer(queue->device, cs, regs[i],
2553 va, true);
2554 }
2555 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
2556 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2557 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2558 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2559 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2560
2561 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2562 radv_emit_shader_pointer(queue->device, cs, regs[i],
2563 va, true);
2564 }
2565 } else {
2566 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2567 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2568 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2569 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2570 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2571 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2572
2573 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2574 radv_emit_shader_pointer(queue->device, cs, regs[i],
2575 va, true);
2576 }
2577 }
2578 }
2579
2580 static void
2581 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2582 {
2583 struct radv_device *device = queue->device;
2584
2585 if (device->gfx_init) {
2586 uint64_t va = radv_buffer_get_va(device->gfx_init);
2587
2588 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2589 radeon_emit(cs, va);
2590 radeon_emit(cs, va >> 32);
2591 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2592
2593 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2594 } else {
2595 struct radv_physical_device *physical_device = device->physical_device;
2596 si_emit_graphics(physical_device, cs);
2597 }
2598 }
2599
2600 static void
2601 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2602 {
2603 struct radv_physical_device *physical_device = queue->device->physical_device;
2604 si_emit_compute(physical_device, cs);
2605 }
2606
2607 static VkResult
2608 radv_get_preamble_cs(struct radv_queue *queue,
2609 uint32_t scratch_size,
2610 uint32_t compute_scratch_size,
2611 uint32_t esgs_ring_size,
2612 uint32_t gsvs_ring_size,
2613 bool needs_tess_rings,
2614 bool needs_sample_positions,
2615 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2616 struct radeon_cmdbuf **initial_preamble_cs,
2617 struct radeon_cmdbuf **continue_preamble_cs)
2618 {
2619 struct radeon_winsys_bo *scratch_bo = NULL;
2620 struct radeon_winsys_bo *descriptor_bo = NULL;
2621 struct radeon_winsys_bo *compute_scratch_bo = NULL;
2622 struct radeon_winsys_bo *esgs_ring_bo = NULL;
2623 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2624 struct radeon_winsys_bo *tess_rings_bo = NULL;
2625 struct radeon_cmdbuf *dest_cs[3] = {0};
2626 bool add_tess_rings = false, add_sample_positions = false;
2627 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2628 unsigned max_offchip_buffers;
2629 unsigned hs_offchip_param = 0;
2630 unsigned tess_offchip_ring_offset;
2631 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2632 if (!queue->has_tess_rings) {
2633 if (needs_tess_rings)
2634 add_tess_rings = true;
2635 }
2636 if (!queue->has_sample_positions) {
2637 if (needs_sample_positions)
2638 add_sample_positions = true;
2639 }
2640 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2641 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2642 &max_offchip_buffers);
2643 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2644 tess_offchip_ring_size = max_offchip_buffers *
2645 queue->device->tess_offchip_block_dw_size * 4;
2646
2647 if (scratch_size <= queue->scratch_size &&
2648 compute_scratch_size <= queue->compute_scratch_size &&
2649 esgs_ring_size <= queue->esgs_ring_size &&
2650 gsvs_ring_size <= queue->gsvs_ring_size &&
2651 !add_tess_rings && !add_sample_positions &&
2652 queue->initial_preamble_cs) {
2653 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2654 *initial_preamble_cs = queue->initial_preamble_cs;
2655 *continue_preamble_cs = queue->continue_preamble_cs;
2656 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2657 *continue_preamble_cs = NULL;
2658 return VK_SUCCESS;
2659 }
2660
2661 if (scratch_size > queue->scratch_size) {
2662 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2663 scratch_size,
2664 4096,
2665 RADEON_DOMAIN_VRAM,
2666 ring_bo_flags,
2667 RADV_BO_PRIORITY_SCRATCH);
2668 if (!scratch_bo)
2669 goto fail;
2670 } else
2671 scratch_bo = queue->scratch_bo;
2672
2673 if (compute_scratch_size > queue->compute_scratch_size) {
2674 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2675 compute_scratch_size,
2676 4096,
2677 RADEON_DOMAIN_VRAM,
2678 ring_bo_flags,
2679 RADV_BO_PRIORITY_SCRATCH);
2680 if (!compute_scratch_bo)
2681 goto fail;
2682
2683 } else
2684 compute_scratch_bo = queue->compute_scratch_bo;
2685
2686 if (esgs_ring_size > queue->esgs_ring_size) {
2687 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2688 esgs_ring_size,
2689 4096,
2690 RADEON_DOMAIN_VRAM,
2691 ring_bo_flags,
2692 RADV_BO_PRIORITY_SCRATCH);
2693 if (!esgs_ring_bo)
2694 goto fail;
2695 } else {
2696 esgs_ring_bo = queue->esgs_ring_bo;
2697 esgs_ring_size = queue->esgs_ring_size;
2698 }
2699
2700 if (gsvs_ring_size > queue->gsvs_ring_size) {
2701 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2702 gsvs_ring_size,
2703 4096,
2704 RADEON_DOMAIN_VRAM,
2705 ring_bo_flags,
2706 RADV_BO_PRIORITY_SCRATCH);
2707 if (!gsvs_ring_bo)
2708 goto fail;
2709 } else {
2710 gsvs_ring_bo = queue->gsvs_ring_bo;
2711 gsvs_ring_size = queue->gsvs_ring_size;
2712 }
2713
2714 if (add_tess_rings) {
2715 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2716 tess_offchip_ring_offset + tess_offchip_ring_size,
2717 256,
2718 RADEON_DOMAIN_VRAM,
2719 ring_bo_flags,
2720 RADV_BO_PRIORITY_SCRATCH);
2721 if (!tess_rings_bo)
2722 goto fail;
2723 } else {
2724 tess_rings_bo = queue->tess_rings_bo;
2725 }
2726
2727 if (scratch_bo != queue->scratch_bo ||
2728 esgs_ring_bo != queue->esgs_ring_bo ||
2729 gsvs_ring_bo != queue->gsvs_ring_bo ||
2730 tess_rings_bo != queue->tess_rings_bo ||
2731 add_sample_positions) {
2732 uint32_t size = 0;
2733 if (gsvs_ring_bo || esgs_ring_bo ||
2734 tess_rings_bo || add_sample_positions) {
2735 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2736 if (add_sample_positions)
2737 size += 128; /* 64+32+16+8 = 120 bytes */
2738 }
2739 else if (scratch_bo)
2740 size = 8; /* 2 dword */
2741
2742 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2743 size,
2744 4096,
2745 RADEON_DOMAIN_VRAM,
2746 RADEON_FLAG_CPU_ACCESS |
2747 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2748 RADEON_FLAG_READ_ONLY,
2749 RADV_BO_PRIORITY_DESCRIPTOR);
2750 if (!descriptor_bo)
2751 goto fail;
2752 } else
2753 descriptor_bo = queue->descriptor_bo;
2754
2755 if (descriptor_bo != queue->descriptor_bo) {
2756 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2757
2758 if (scratch_bo) {
2759 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2760 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2761 S_008F04_SWIZZLE_ENABLE(1);
2762 map[0] = scratch_va;
2763 map[1] = rsrc1;
2764 }
2765
2766 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
2767 fill_geom_tess_rings(queue, map, add_sample_positions,
2768 esgs_ring_size, esgs_ring_bo,
2769 gsvs_ring_size, gsvs_ring_bo,
2770 tess_factor_ring_size,
2771 tess_offchip_ring_offset,
2772 tess_offchip_ring_size,
2773 tess_rings_bo);
2774
2775 queue->device->ws->buffer_unmap(descriptor_bo);
2776 }
2777
2778 for(int i = 0; i < 3; ++i) {
2779 struct radeon_cmdbuf *cs = NULL;
2780 cs = queue->device->ws->cs_create(queue->device->ws,
2781 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2782 if (!cs)
2783 goto fail;
2784
2785 dest_cs[i] = cs;
2786
2787 if (scratch_bo)
2788 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2789
2790 /* Emit initial configuration. */
2791 switch (queue->queue_family_index) {
2792 case RADV_QUEUE_GENERAL:
2793 radv_init_graphics_state(cs, queue);
2794 break;
2795 case RADV_QUEUE_COMPUTE:
2796 radv_init_compute_state(cs, queue);
2797 break;
2798 case RADV_QUEUE_TRANSFER:
2799 break;
2800 }
2801
2802 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2803 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2804 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2805
2806 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2807 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2808 }
2809
2810 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2811 gsvs_ring_bo, gsvs_ring_size);
2812 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2813 tess_factor_ring_size, tess_rings_bo);
2814 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2815 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2816
2817 if (i == 0) {
2818 si_cs_emit_cache_flush(cs,
2819 queue->device->physical_device->rad_info.chip_class,
2820 NULL, 0,
2821 queue->queue_family_index == RING_COMPUTE &&
2822 queue->device->physical_device->rad_info.chip_class >= GFX7,
2823 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2824 RADV_CMD_FLAG_INV_ICACHE |
2825 RADV_CMD_FLAG_INV_SCACHE |
2826 RADV_CMD_FLAG_INV_VCACHE |
2827 RADV_CMD_FLAG_INV_L2 |
2828 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2829 } else if (i == 1) {
2830 si_cs_emit_cache_flush(cs,
2831 queue->device->physical_device->rad_info.chip_class,
2832 NULL, 0,
2833 queue->queue_family_index == RING_COMPUTE &&
2834 queue->device->physical_device->rad_info.chip_class >= GFX7,
2835 RADV_CMD_FLAG_INV_ICACHE |
2836 RADV_CMD_FLAG_INV_SCACHE |
2837 RADV_CMD_FLAG_INV_VCACHE |
2838 RADV_CMD_FLAG_INV_L2 |
2839 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2840 }
2841
2842 if (!queue->device->ws->cs_finalize(cs))
2843 goto fail;
2844 }
2845
2846 if (queue->initial_full_flush_preamble_cs)
2847 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2848
2849 if (queue->initial_preamble_cs)
2850 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2851
2852 if (queue->continue_preamble_cs)
2853 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2854
2855 queue->initial_full_flush_preamble_cs = dest_cs[0];
2856 queue->initial_preamble_cs = dest_cs[1];
2857 queue->continue_preamble_cs = dest_cs[2];
2858
2859 if (scratch_bo != queue->scratch_bo) {
2860 if (queue->scratch_bo)
2861 queue->device->ws->buffer_destroy(queue->scratch_bo);
2862 queue->scratch_bo = scratch_bo;
2863 queue->scratch_size = scratch_size;
2864 }
2865
2866 if (compute_scratch_bo != queue->compute_scratch_bo) {
2867 if (queue->compute_scratch_bo)
2868 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2869 queue->compute_scratch_bo = compute_scratch_bo;
2870 queue->compute_scratch_size = compute_scratch_size;
2871 }
2872
2873 if (esgs_ring_bo != queue->esgs_ring_bo) {
2874 if (queue->esgs_ring_bo)
2875 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2876 queue->esgs_ring_bo = esgs_ring_bo;
2877 queue->esgs_ring_size = esgs_ring_size;
2878 }
2879
2880 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2881 if (queue->gsvs_ring_bo)
2882 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2883 queue->gsvs_ring_bo = gsvs_ring_bo;
2884 queue->gsvs_ring_size = gsvs_ring_size;
2885 }
2886
2887 if (tess_rings_bo != queue->tess_rings_bo) {
2888 queue->tess_rings_bo = tess_rings_bo;
2889 queue->has_tess_rings = true;
2890 }
2891
2892 if (descriptor_bo != queue->descriptor_bo) {
2893 if (queue->descriptor_bo)
2894 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2895
2896 queue->descriptor_bo = descriptor_bo;
2897 }
2898
2899 if (add_sample_positions)
2900 queue->has_sample_positions = true;
2901
2902 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2903 *initial_preamble_cs = queue->initial_preamble_cs;
2904 *continue_preamble_cs = queue->continue_preamble_cs;
2905 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2906 *continue_preamble_cs = NULL;
2907 return VK_SUCCESS;
2908 fail:
2909 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2910 if (dest_cs[i])
2911 queue->device->ws->cs_destroy(dest_cs[i]);
2912 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2913 queue->device->ws->buffer_destroy(descriptor_bo);
2914 if (scratch_bo && scratch_bo != queue->scratch_bo)
2915 queue->device->ws->buffer_destroy(scratch_bo);
2916 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2917 queue->device->ws->buffer_destroy(compute_scratch_bo);
2918 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2919 queue->device->ws->buffer_destroy(esgs_ring_bo);
2920 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2921 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2922 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2923 queue->device->ws->buffer_destroy(tess_rings_bo);
2924 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2925 }
2926
2927 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2928 struct radv_winsys_sem_counts *counts,
2929 int num_sems,
2930 const VkSemaphore *sems,
2931 VkFence _fence,
2932 bool reset_temp)
2933 {
2934 int syncobj_idx = 0, sem_idx = 0;
2935
2936 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2937 return VK_SUCCESS;
2938
2939 for (uint32_t i = 0; i < num_sems; i++) {
2940 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2941
2942 if (sem->temp_syncobj || sem->syncobj)
2943 counts->syncobj_count++;
2944 else
2945 counts->sem_count++;
2946 }
2947
2948 if (_fence != VK_NULL_HANDLE) {
2949 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2950 if (fence->temp_syncobj || fence->syncobj)
2951 counts->syncobj_count++;
2952 }
2953
2954 if (counts->syncobj_count) {
2955 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2956 if (!counts->syncobj)
2957 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2958 }
2959
2960 if (counts->sem_count) {
2961 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2962 if (!counts->sem) {
2963 free(counts->syncobj);
2964 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2965 }
2966 }
2967
2968 for (uint32_t i = 0; i < num_sems; i++) {
2969 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2970
2971 if (sem->temp_syncobj) {
2972 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2973 }
2974 else if (sem->syncobj)
2975 counts->syncobj[syncobj_idx++] = sem->syncobj;
2976 else {
2977 assert(sem->sem);
2978 counts->sem[sem_idx++] = sem->sem;
2979 }
2980 }
2981
2982 if (_fence != VK_NULL_HANDLE) {
2983 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2984 if (fence->temp_syncobj)
2985 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2986 else if (fence->syncobj)
2987 counts->syncobj[syncobj_idx++] = fence->syncobj;
2988 }
2989
2990 return VK_SUCCESS;
2991 }
2992
2993 static void
2994 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2995 {
2996 free(sem_info->wait.syncobj);
2997 free(sem_info->wait.sem);
2998 free(sem_info->signal.syncobj);
2999 free(sem_info->signal.sem);
3000 }
3001
3002
3003 static void radv_free_temp_syncobjs(struct radv_device *device,
3004 int num_sems,
3005 const VkSemaphore *sems)
3006 {
3007 for (uint32_t i = 0; i < num_sems; i++) {
3008 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
3009
3010 if (sem->temp_syncobj) {
3011 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
3012 sem->temp_syncobj = 0;
3013 }
3014 }
3015 }
3016
3017 static VkResult
3018 radv_alloc_sem_info(struct radv_instance *instance,
3019 struct radv_winsys_sem_info *sem_info,
3020 int num_wait_sems,
3021 const VkSemaphore *wait_sems,
3022 int num_signal_sems,
3023 const VkSemaphore *signal_sems,
3024 VkFence fence)
3025 {
3026 VkResult ret;
3027 memset(sem_info, 0, sizeof(*sem_info));
3028
3029 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
3030 if (ret)
3031 return ret;
3032 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
3033 if (ret)
3034 radv_free_sem_info(sem_info);
3035
3036 /* caller can override these */
3037 sem_info->cs_emit_wait = true;
3038 sem_info->cs_emit_signal = true;
3039 return ret;
3040 }
3041
3042 /* Signals fence as soon as all the work currently put on queue is done. */
3043 static VkResult radv_signal_fence(struct radv_queue *queue,
3044 struct radv_fence *fence)
3045 {
3046 int ret;
3047 VkResult result;
3048 struct radv_winsys_sem_info sem_info;
3049
3050 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
3051 radv_fence_to_handle(fence));
3052 if (result != VK_SUCCESS)
3053 return result;
3054
3055 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3056 &queue->device->empty_cs[queue->queue_family_index],
3057 1, NULL, NULL, &sem_info, NULL,
3058 false, fence->fence);
3059 radv_free_sem_info(&sem_info);
3060
3061 if (ret)
3062 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
3063
3064 return VK_SUCCESS;
3065 }
3066
3067 VkResult radv_QueueSubmit(
3068 VkQueue _queue,
3069 uint32_t submitCount,
3070 const VkSubmitInfo* pSubmits,
3071 VkFence _fence)
3072 {
3073 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3074 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3075 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3076 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
3077 int ret;
3078 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
3079 uint32_t scratch_size = 0;
3080 uint32_t compute_scratch_size = 0;
3081 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
3082 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
3083 VkResult result;
3084 bool fence_emitted = false;
3085 bool tess_rings_needed = false;
3086 bool sample_positions_needed = false;
3087
3088 /* Do this first so failing to allocate scratch buffers can't result in
3089 * partially executed submissions. */
3090 for (uint32_t i = 0; i < submitCount; i++) {
3091 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
3092 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
3093 pSubmits[i].pCommandBuffers[j]);
3094
3095 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
3096 compute_scratch_size = MAX2(compute_scratch_size,
3097 cmd_buffer->compute_scratch_size_needed);
3098 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
3099 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
3100 tess_rings_needed |= cmd_buffer->tess_rings_needed;
3101 sample_positions_needed |= cmd_buffer->sample_positions_needed;
3102 }
3103 }
3104
3105 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
3106 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
3107 sample_positions_needed, &initial_flush_preamble_cs,
3108 &initial_preamble_cs, &continue_preamble_cs);
3109 if (result != VK_SUCCESS)
3110 return result;
3111
3112 for (uint32_t i = 0; i < submitCount; i++) {
3113 struct radeon_cmdbuf **cs_array;
3114 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
3115 bool can_patch = true;
3116 uint32_t advance;
3117 struct radv_winsys_sem_info sem_info;
3118
3119 result = radv_alloc_sem_info(queue->device->instance,
3120 &sem_info,
3121 pSubmits[i].waitSemaphoreCount,
3122 pSubmits[i].pWaitSemaphores,
3123 pSubmits[i].signalSemaphoreCount,
3124 pSubmits[i].pSignalSemaphores,
3125 _fence);
3126 if (result != VK_SUCCESS)
3127 return result;
3128
3129 if (!pSubmits[i].commandBufferCount) {
3130 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
3131 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
3132 &queue->device->empty_cs[queue->queue_family_index],
3133 1, NULL, NULL,
3134 &sem_info, NULL,
3135 false, base_fence);
3136 if (ret) {
3137 radv_loge("failed to submit CS %d\n", i);
3138 abort();
3139 }
3140 fence_emitted = true;
3141 }
3142 radv_free_sem_info(&sem_info);
3143 continue;
3144 }
3145
3146 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
3147 (pSubmits[i].commandBufferCount));
3148
3149 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
3150 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
3151 pSubmits[i].pCommandBuffers[j]);
3152 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3153
3154 cs_array[j] = cmd_buffer->cs;
3155 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
3156 can_patch = false;
3157
3158 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
3159 }
3160
3161 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
3162 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
3163 const struct radv_winsys_bo_list *bo_list = NULL;
3164
3165 advance = MIN2(max_cs_submission,
3166 pSubmits[i].commandBufferCount - j);
3167
3168 if (queue->device->trace_bo)
3169 *queue->device->trace_id_ptr = 0;
3170
3171 sem_info.cs_emit_wait = j == 0;
3172 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
3173
3174 if (unlikely(queue->device->use_global_bo_list)) {
3175 pthread_mutex_lock(&queue->device->bo_list.mutex);
3176 bo_list = &queue->device->bo_list.list;
3177 }
3178
3179 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
3180 advance, initial_preamble, continue_preamble_cs,
3181 &sem_info, bo_list,
3182 can_patch, base_fence);
3183
3184 if (unlikely(queue->device->use_global_bo_list))
3185 pthread_mutex_unlock(&queue->device->bo_list.mutex);
3186
3187 if (ret) {
3188 radv_loge("failed to submit CS %d\n", i);
3189 abort();
3190 }
3191 fence_emitted = true;
3192 if (queue->device->trace_bo) {
3193 radv_check_gpu_hangs(queue, cs_array[j]);
3194 }
3195 }
3196
3197 radv_free_temp_syncobjs(queue->device,
3198 pSubmits[i].waitSemaphoreCount,
3199 pSubmits[i].pWaitSemaphores);
3200 radv_free_sem_info(&sem_info);
3201 free(cs_array);
3202 }
3203
3204 if (fence) {
3205 if (!fence_emitted) {
3206 result = radv_signal_fence(queue, fence);
3207 if (result != VK_SUCCESS)
3208 return result;
3209 }
3210 }
3211
3212 return VK_SUCCESS;
3213 }
3214
3215 VkResult radv_QueueWaitIdle(
3216 VkQueue _queue)
3217 {
3218 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3219
3220 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
3221 radv_queue_family_to_ring(queue->queue_family_index),
3222 queue->queue_idx);
3223 return VK_SUCCESS;
3224 }
3225
3226 VkResult radv_DeviceWaitIdle(
3227 VkDevice _device)
3228 {
3229 RADV_FROM_HANDLE(radv_device, device, _device);
3230
3231 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3232 for (unsigned q = 0; q < device->queue_count[i]; q++) {
3233 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
3234 }
3235 }
3236 return VK_SUCCESS;
3237 }
3238
3239 VkResult radv_EnumerateInstanceExtensionProperties(
3240 const char* pLayerName,
3241 uint32_t* pPropertyCount,
3242 VkExtensionProperties* pProperties)
3243 {
3244 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3245
3246 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
3247 if (radv_supported_instance_extensions.extensions[i]) {
3248 vk_outarray_append(&out, prop) {
3249 *prop = radv_instance_extensions[i];
3250 }
3251 }
3252 }
3253
3254 return vk_outarray_status(&out);
3255 }
3256
3257 VkResult radv_EnumerateDeviceExtensionProperties(
3258 VkPhysicalDevice physicalDevice,
3259 const char* pLayerName,
3260 uint32_t* pPropertyCount,
3261 VkExtensionProperties* pProperties)
3262 {
3263 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
3264 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3265
3266 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
3267 if (device->supported_extensions.extensions[i]) {
3268 vk_outarray_append(&out, prop) {
3269 *prop = radv_device_extensions[i];
3270 }
3271 }
3272 }
3273
3274 return vk_outarray_status(&out);
3275 }
3276
3277 PFN_vkVoidFunction radv_GetInstanceProcAddr(
3278 VkInstance _instance,
3279 const char* pName)
3280 {
3281 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3282 bool unchecked = instance ? instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
3283
3284 if (unchecked) {
3285 return radv_lookup_entrypoint_unchecked(pName);
3286 } else {
3287 return radv_lookup_entrypoint_checked(pName,
3288 instance ? instance->apiVersion : 0,
3289 instance ? &instance->enabled_extensions : NULL,
3290 NULL);
3291 }
3292 }
3293
3294 /* The loader wants us to expose a second GetInstanceProcAddr function
3295 * to work around certain LD_PRELOAD issues seen in apps.
3296 */
3297 PUBLIC
3298 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3299 VkInstance instance,
3300 const char* pName);
3301
3302 PUBLIC
3303 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3304 VkInstance instance,
3305 const char* pName)
3306 {
3307 return radv_GetInstanceProcAddr(instance, pName);
3308 }
3309
3310 PUBLIC
3311 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
3312 VkInstance _instance,
3313 const char* pName);
3314
3315 PUBLIC
3316 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
3317 VkInstance _instance,
3318 const char* pName)
3319 {
3320 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3321
3322 return radv_lookup_physical_device_entrypoint_checked(pName,
3323 instance ? instance->apiVersion : 0,
3324 instance ? &instance->enabled_extensions : NULL);
3325 }
3326
3327 PFN_vkVoidFunction radv_GetDeviceProcAddr(
3328 VkDevice _device,
3329 const char* pName)
3330 {
3331 RADV_FROM_HANDLE(radv_device, device, _device);
3332 bool unchecked = device ? device->instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
3333
3334 if (unchecked) {
3335 return radv_lookup_entrypoint_unchecked(pName);
3336 } else {
3337 return radv_lookup_entrypoint_checked(pName,
3338 device->instance->apiVersion,
3339 &device->instance->enabled_extensions,
3340 &device->enabled_extensions);
3341 }
3342 }
3343
3344 bool radv_get_memory_fd(struct radv_device *device,
3345 struct radv_device_memory *memory,
3346 int *pFD)
3347 {
3348 struct radeon_bo_metadata metadata;
3349
3350 if (memory->image) {
3351 radv_init_metadata(device, memory->image, &metadata);
3352 device->ws->buffer_set_metadata(memory->bo, &metadata);
3353 }
3354
3355 return device->ws->buffer_get_fd(device->ws, memory->bo,
3356 pFD);
3357 }
3358
3359 static VkResult radv_alloc_memory(struct radv_device *device,
3360 const VkMemoryAllocateInfo* pAllocateInfo,
3361 const VkAllocationCallbacks* pAllocator,
3362 VkDeviceMemory* pMem)
3363 {
3364 struct radv_device_memory *mem;
3365 VkResult result;
3366 enum radeon_bo_domain domain;
3367 uint32_t flags = 0;
3368 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
3369
3370 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3371
3372 if (pAllocateInfo->allocationSize == 0) {
3373 /* Apparently, this is allowed */
3374 *pMem = VK_NULL_HANDLE;
3375 return VK_SUCCESS;
3376 }
3377
3378 const VkImportMemoryFdInfoKHR *import_info =
3379 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3380 const VkMemoryDedicatedAllocateInfo *dedicate_info =
3381 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3382 const VkExportMemoryAllocateInfo *export_info =
3383 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3384 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
3385 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
3386
3387 const struct wsi_memory_allocate_info *wsi_info =
3388 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3389
3390 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
3391 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3392 if (mem == NULL)
3393 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3394
3395 if (wsi_info && wsi_info->implicit_sync)
3396 flags |= RADEON_FLAG_IMPLICIT_SYNC;
3397
3398 if (dedicate_info) {
3399 mem->image = radv_image_from_handle(dedicate_info->image);
3400 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
3401 } else {
3402 mem->image = NULL;
3403 mem->buffer = NULL;
3404 }
3405
3406 float priority_float = 0.5;
3407 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
3408 vk_find_struct_const(pAllocateInfo->pNext,
3409 MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
3410 if (priority_ext)
3411 priority_float = priority_ext->priority;
3412
3413 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
3414 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
3415
3416 mem->user_ptr = NULL;
3417
3418 if (import_info) {
3419 assert(import_info->handleType ==
3420 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3421 import_info->handleType ==
3422 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3423 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
3424 priority, NULL, NULL);
3425 if (!mem->bo) {
3426 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3427 goto fail;
3428 } else {
3429 close(import_info->fd);
3430 }
3431 } else if (host_ptr_info) {
3432 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3433 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
3434 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
3435 pAllocateInfo->allocationSize,
3436 priority);
3437 if (!mem->bo) {
3438 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3439 goto fail;
3440 } else {
3441 mem->user_ptr = host_ptr_info->pHostPointer;
3442 }
3443 } else {
3444 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
3445 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
3446 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
3447 domain = RADEON_DOMAIN_GTT;
3448 else
3449 domain = RADEON_DOMAIN_VRAM;
3450
3451 if (mem_type_index == RADV_MEM_TYPE_VRAM)
3452 flags |= RADEON_FLAG_NO_CPU_ACCESS;
3453 else
3454 flags |= RADEON_FLAG_CPU_ACCESS;
3455
3456 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3457 flags |= RADEON_FLAG_GTT_WC;
3458
3459 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
3460 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3461 if (device->use_global_bo_list) {
3462 flags |= RADEON_FLAG_PREFER_LOCAL_BO;
3463 }
3464 }
3465
3466 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3467 domain, flags, priority);
3468
3469 if (!mem->bo) {
3470 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3471 goto fail;
3472 }
3473 mem->type_index = mem_type_index;
3474 }
3475
3476 result = radv_bo_list_add(device, mem->bo);
3477 if (result != VK_SUCCESS)
3478 goto fail_bo;
3479
3480 *pMem = radv_device_memory_to_handle(mem);
3481
3482 return VK_SUCCESS;
3483
3484 fail_bo:
3485 device->ws->buffer_destroy(mem->bo);
3486 fail:
3487 vk_free2(&device->alloc, pAllocator, mem);
3488
3489 return result;
3490 }
3491
3492 VkResult radv_AllocateMemory(
3493 VkDevice _device,
3494 const VkMemoryAllocateInfo* pAllocateInfo,
3495 const VkAllocationCallbacks* pAllocator,
3496 VkDeviceMemory* pMem)
3497 {
3498 RADV_FROM_HANDLE(radv_device, device, _device);
3499 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3500 }
3501
3502 void radv_FreeMemory(
3503 VkDevice _device,
3504 VkDeviceMemory _mem,
3505 const VkAllocationCallbacks* pAllocator)
3506 {
3507 RADV_FROM_HANDLE(radv_device, device, _device);
3508 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3509
3510 if (mem == NULL)
3511 return;
3512
3513 radv_bo_list_remove(device, mem->bo);
3514 device->ws->buffer_destroy(mem->bo);
3515 mem->bo = NULL;
3516
3517 vk_free2(&device->alloc, pAllocator, mem);
3518 }
3519
3520 VkResult radv_MapMemory(
3521 VkDevice _device,
3522 VkDeviceMemory _memory,
3523 VkDeviceSize offset,
3524 VkDeviceSize size,
3525 VkMemoryMapFlags flags,
3526 void** ppData)
3527 {
3528 RADV_FROM_HANDLE(radv_device, device, _device);
3529 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3530
3531 if (mem == NULL) {
3532 *ppData = NULL;
3533 return VK_SUCCESS;
3534 }
3535
3536 if (mem->user_ptr)
3537 *ppData = mem->user_ptr;
3538 else
3539 *ppData = device->ws->buffer_map(mem->bo);
3540
3541 if (*ppData) {
3542 *ppData += offset;
3543 return VK_SUCCESS;
3544 }
3545
3546 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3547 }
3548
3549 void radv_UnmapMemory(
3550 VkDevice _device,
3551 VkDeviceMemory _memory)
3552 {
3553 RADV_FROM_HANDLE(radv_device, device, _device);
3554 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3555
3556 if (mem == NULL)
3557 return;
3558
3559 if (mem->user_ptr == NULL)
3560 device->ws->buffer_unmap(mem->bo);
3561 }
3562
3563 VkResult radv_FlushMappedMemoryRanges(
3564 VkDevice _device,
3565 uint32_t memoryRangeCount,
3566 const VkMappedMemoryRange* pMemoryRanges)
3567 {
3568 return VK_SUCCESS;
3569 }
3570
3571 VkResult radv_InvalidateMappedMemoryRanges(
3572 VkDevice _device,
3573 uint32_t memoryRangeCount,
3574 const VkMappedMemoryRange* pMemoryRanges)
3575 {
3576 return VK_SUCCESS;
3577 }
3578
3579 void radv_GetBufferMemoryRequirements(
3580 VkDevice _device,
3581 VkBuffer _buffer,
3582 VkMemoryRequirements* pMemoryRequirements)
3583 {
3584 RADV_FROM_HANDLE(radv_device, device, _device);
3585 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3586
3587 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3588
3589 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3590 pMemoryRequirements->alignment = 4096;
3591 else
3592 pMemoryRequirements->alignment = 16;
3593
3594 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3595 }
3596
3597 void radv_GetBufferMemoryRequirements2(
3598 VkDevice device,
3599 const VkBufferMemoryRequirementsInfo2 *pInfo,
3600 VkMemoryRequirements2 *pMemoryRequirements)
3601 {
3602 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3603 &pMemoryRequirements->memoryRequirements);
3604 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3605 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3606 switch (ext->sType) {
3607 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3608 VkMemoryDedicatedRequirements *req =
3609 (VkMemoryDedicatedRequirements *) ext;
3610 req->requiresDedicatedAllocation = buffer->shareable;
3611 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3612 break;
3613 }
3614 default:
3615 break;
3616 }
3617 }
3618 }
3619
3620 void radv_GetImageMemoryRequirements(
3621 VkDevice _device,
3622 VkImage _image,
3623 VkMemoryRequirements* pMemoryRequirements)
3624 {
3625 RADV_FROM_HANDLE(radv_device, device, _device);
3626 RADV_FROM_HANDLE(radv_image, image, _image);
3627
3628 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3629
3630 pMemoryRequirements->size = image->size;
3631 pMemoryRequirements->alignment = image->alignment;
3632 }
3633
3634 void radv_GetImageMemoryRequirements2(
3635 VkDevice device,
3636 const VkImageMemoryRequirementsInfo2 *pInfo,
3637 VkMemoryRequirements2 *pMemoryRequirements)
3638 {
3639 radv_GetImageMemoryRequirements(device, pInfo->image,
3640 &pMemoryRequirements->memoryRequirements);
3641
3642 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3643
3644 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3645 switch (ext->sType) {
3646 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3647 VkMemoryDedicatedRequirements *req =
3648 (VkMemoryDedicatedRequirements *) ext;
3649 req->requiresDedicatedAllocation = image->shareable;
3650 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3651 break;
3652 }
3653 default:
3654 break;
3655 }
3656 }
3657 }
3658
3659 void radv_GetImageSparseMemoryRequirements(
3660 VkDevice device,
3661 VkImage image,
3662 uint32_t* pSparseMemoryRequirementCount,
3663 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
3664 {
3665 stub();
3666 }
3667
3668 void radv_GetImageSparseMemoryRequirements2(
3669 VkDevice device,
3670 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
3671 uint32_t* pSparseMemoryRequirementCount,
3672 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3673 {
3674 stub();
3675 }
3676
3677 void radv_GetDeviceMemoryCommitment(
3678 VkDevice device,
3679 VkDeviceMemory memory,
3680 VkDeviceSize* pCommittedMemoryInBytes)
3681 {
3682 *pCommittedMemoryInBytes = 0;
3683 }
3684
3685 VkResult radv_BindBufferMemory2(VkDevice device,
3686 uint32_t bindInfoCount,
3687 const VkBindBufferMemoryInfo *pBindInfos)
3688 {
3689 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3690 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3691 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3692
3693 if (mem) {
3694 buffer->bo = mem->bo;
3695 buffer->offset = pBindInfos[i].memoryOffset;
3696 } else {
3697 buffer->bo = NULL;
3698 }
3699 }
3700 return VK_SUCCESS;
3701 }
3702
3703 VkResult radv_BindBufferMemory(
3704 VkDevice device,
3705 VkBuffer buffer,
3706 VkDeviceMemory memory,
3707 VkDeviceSize memoryOffset)
3708 {
3709 const VkBindBufferMemoryInfo info = {
3710 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3711 .buffer = buffer,
3712 .memory = memory,
3713 .memoryOffset = memoryOffset
3714 };
3715
3716 return radv_BindBufferMemory2(device, 1, &info);
3717 }
3718
3719 VkResult radv_BindImageMemory2(VkDevice device,
3720 uint32_t bindInfoCount,
3721 const VkBindImageMemoryInfo *pBindInfos)
3722 {
3723 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3724 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3725 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3726
3727 if (mem) {
3728 image->bo = mem->bo;
3729 image->offset = pBindInfos[i].memoryOffset;
3730 } else {
3731 image->bo = NULL;
3732 image->offset = 0;
3733 }
3734 }
3735 return VK_SUCCESS;
3736 }
3737
3738
3739 VkResult radv_BindImageMemory(
3740 VkDevice device,
3741 VkImage image,
3742 VkDeviceMemory memory,
3743 VkDeviceSize memoryOffset)
3744 {
3745 const VkBindImageMemoryInfo info = {
3746 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3747 .image = image,
3748 .memory = memory,
3749 .memoryOffset = memoryOffset
3750 };
3751
3752 return radv_BindImageMemory2(device, 1, &info);
3753 }
3754
3755
3756 static void
3757 radv_sparse_buffer_bind_memory(struct radv_device *device,
3758 const VkSparseBufferMemoryBindInfo *bind)
3759 {
3760 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3761
3762 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3763 struct radv_device_memory *mem = NULL;
3764
3765 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3766 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3767
3768 device->ws->buffer_virtual_bind(buffer->bo,
3769 bind->pBinds[i].resourceOffset,
3770 bind->pBinds[i].size,
3771 mem ? mem->bo : NULL,
3772 bind->pBinds[i].memoryOffset);
3773 }
3774 }
3775
3776 static void
3777 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3778 const VkSparseImageOpaqueMemoryBindInfo *bind)
3779 {
3780 RADV_FROM_HANDLE(radv_image, image, bind->image);
3781
3782 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3783 struct radv_device_memory *mem = NULL;
3784
3785 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3786 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3787
3788 device->ws->buffer_virtual_bind(image->bo,
3789 bind->pBinds[i].resourceOffset,
3790 bind->pBinds[i].size,
3791 mem ? mem->bo : NULL,
3792 bind->pBinds[i].memoryOffset);
3793 }
3794 }
3795
3796 VkResult radv_QueueBindSparse(
3797 VkQueue _queue,
3798 uint32_t bindInfoCount,
3799 const VkBindSparseInfo* pBindInfo,
3800 VkFence _fence)
3801 {
3802 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3803 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3804 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3805 bool fence_emitted = false;
3806 VkResult result;
3807 int ret;
3808
3809 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3810 struct radv_winsys_sem_info sem_info;
3811 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3812 radv_sparse_buffer_bind_memory(queue->device,
3813 pBindInfo[i].pBufferBinds + j);
3814 }
3815
3816 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3817 radv_sparse_image_opaque_bind_memory(queue->device,
3818 pBindInfo[i].pImageOpaqueBinds + j);
3819 }
3820
3821 VkResult result;
3822 result = radv_alloc_sem_info(queue->device->instance,
3823 &sem_info,
3824 pBindInfo[i].waitSemaphoreCount,
3825 pBindInfo[i].pWaitSemaphores,
3826 pBindInfo[i].signalSemaphoreCount,
3827 pBindInfo[i].pSignalSemaphores,
3828 _fence);
3829 if (result != VK_SUCCESS)
3830 return result;
3831
3832 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3833 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3834 &queue->device->empty_cs[queue->queue_family_index],
3835 1, NULL, NULL,
3836 &sem_info, NULL,
3837 false, base_fence);
3838 if (ret) {
3839 radv_loge("failed to submit CS %d\n", i);
3840 abort();
3841 }
3842
3843 fence_emitted = true;
3844 }
3845
3846 radv_free_sem_info(&sem_info);
3847
3848 }
3849
3850 if (fence) {
3851 if (!fence_emitted) {
3852 result = radv_signal_fence(queue, fence);
3853 if (result != VK_SUCCESS)
3854 return result;
3855 }
3856 }
3857
3858 return VK_SUCCESS;
3859 }
3860
3861 VkResult radv_CreateFence(
3862 VkDevice _device,
3863 const VkFenceCreateInfo* pCreateInfo,
3864 const VkAllocationCallbacks* pAllocator,
3865 VkFence* pFence)
3866 {
3867 RADV_FROM_HANDLE(radv_device, device, _device);
3868 const VkExportFenceCreateInfo *export =
3869 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
3870 VkExternalFenceHandleTypeFlags handleTypes =
3871 export ? export->handleTypes : 0;
3872
3873 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3874 sizeof(*fence), 8,
3875 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3876
3877 if (!fence)
3878 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3879
3880 fence->fence_wsi = NULL;
3881 fence->temp_syncobj = 0;
3882 if (device->always_use_syncobj || handleTypes) {
3883 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3884 if (ret) {
3885 vk_free2(&device->alloc, pAllocator, fence);
3886 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3887 }
3888 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3889 device->ws->signal_syncobj(device->ws, fence->syncobj);
3890 }
3891 fence->fence = NULL;
3892 } else {
3893 fence->fence = device->ws->create_fence();
3894 if (!fence->fence) {
3895 vk_free2(&device->alloc, pAllocator, fence);
3896 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3897 }
3898 fence->syncobj = 0;
3899 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
3900 device->ws->signal_fence(fence->fence);
3901 }
3902
3903 *pFence = radv_fence_to_handle(fence);
3904
3905 return VK_SUCCESS;
3906 }
3907
3908 void radv_DestroyFence(
3909 VkDevice _device,
3910 VkFence _fence,
3911 const VkAllocationCallbacks* pAllocator)
3912 {
3913 RADV_FROM_HANDLE(radv_device, device, _device);
3914 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3915
3916 if (!fence)
3917 return;
3918
3919 if (fence->temp_syncobj)
3920 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3921 if (fence->syncobj)
3922 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3923 if (fence->fence)
3924 device->ws->destroy_fence(fence->fence);
3925 if (fence->fence_wsi)
3926 fence->fence_wsi->destroy(fence->fence_wsi);
3927 vk_free2(&device->alloc, pAllocator, fence);
3928 }
3929
3930
3931 uint64_t radv_get_current_time(void)
3932 {
3933 struct timespec tv;
3934 clock_gettime(CLOCK_MONOTONIC, &tv);
3935 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3936 }
3937
3938 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3939 {
3940 uint64_t current_time = radv_get_current_time();
3941
3942 timeout = MIN2(UINT64_MAX - current_time, timeout);
3943
3944 return current_time + timeout;
3945 }
3946
3947
3948 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
3949 uint32_t fenceCount, const VkFence *pFences)
3950 {
3951 for (uint32_t i = 0; i < fenceCount; ++i) {
3952 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3953 if (fence->fence == NULL || fence->syncobj ||
3954 fence->temp_syncobj || fence->fence_wsi ||
3955 (!device->ws->is_fence_waitable(fence->fence)))
3956 return false;
3957 }
3958 return true;
3959 }
3960
3961 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3962 {
3963 for (uint32_t i = 0; i < fenceCount; ++i) {
3964 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3965 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3966 return false;
3967 }
3968 return true;
3969 }
3970
3971 VkResult radv_WaitForFences(
3972 VkDevice _device,
3973 uint32_t fenceCount,
3974 const VkFence* pFences,
3975 VkBool32 waitAll,
3976 uint64_t timeout)
3977 {
3978 RADV_FROM_HANDLE(radv_device, device, _device);
3979 timeout = radv_get_absolute_timeout(timeout);
3980
3981 if (device->always_use_syncobj &&
3982 radv_all_fences_syncobj(fenceCount, pFences))
3983 {
3984 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3985 if (!handles)
3986 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3987
3988 for (uint32_t i = 0; i < fenceCount; ++i) {
3989 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3990 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3991 }
3992
3993 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3994
3995 free(handles);
3996 return success ? VK_SUCCESS : VK_TIMEOUT;
3997 }
3998
3999 if (!waitAll && fenceCount > 1) {
4000 /* Not doing this by default for waitAll, due to needing to allocate twice. */
4001 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
4002 uint32_t wait_count = 0;
4003 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
4004 if (!fences)
4005 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4006
4007 for (uint32_t i = 0; i < fenceCount; ++i) {
4008 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4009
4010 if (device->ws->fence_wait(device->ws, fence->fence, false, 0)) {
4011 free(fences);
4012 return VK_SUCCESS;
4013 }
4014
4015 fences[wait_count++] = fence->fence;
4016 }
4017
4018 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
4019 waitAll, timeout - radv_get_current_time());
4020
4021 free(fences);
4022 return success ? VK_SUCCESS : VK_TIMEOUT;
4023 }
4024
4025 while(radv_get_current_time() <= timeout) {
4026 for (uint32_t i = 0; i < fenceCount; ++i) {
4027 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
4028 return VK_SUCCESS;
4029 }
4030 }
4031 return VK_TIMEOUT;
4032 }
4033
4034 for (uint32_t i = 0; i < fenceCount; ++i) {
4035 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4036 bool expired = false;
4037
4038 if (fence->temp_syncobj) {
4039 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
4040 return VK_TIMEOUT;
4041 continue;
4042 }
4043
4044 if (fence->syncobj) {
4045 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
4046 return VK_TIMEOUT;
4047 continue;
4048 }
4049
4050 if (fence->fence) {
4051 if (!device->ws->is_fence_waitable(fence->fence)) {
4052 while(!device->ws->is_fence_waitable(fence->fence) &&
4053 radv_get_current_time() <= timeout)
4054 /* Do nothing */;
4055 }
4056
4057 expired = device->ws->fence_wait(device->ws,
4058 fence->fence,
4059 true, timeout);
4060 if (!expired)
4061 return VK_TIMEOUT;
4062 }
4063
4064 if (fence->fence_wsi) {
4065 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
4066 if (result != VK_SUCCESS)
4067 return result;
4068 }
4069 }
4070
4071 return VK_SUCCESS;
4072 }
4073
4074 VkResult radv_ResetFences(VkDevice _device,
4075 uint32_t fenceCount,
4076 const VkFence *pFences)
4077 {
4078 RADV_FROM_HANDLE(radv_device, device, _device);
4079
4080 for (unsigned i = 0; i < fenceCount; ++i) {
4081 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4082 if (fence->fence)
4083 device->ws->reset_fence(fence->fence);
4084
4085 /* Per spec, we first restore the permanent payload, and then reset, so
4086 * having a temp syncobj should not skip resetting the permanent syncobj. */
4087 if (fence->temp_syncobj) {
4088 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
4089 fence->temp_syncobj = 0;
4090 }
4091
4092 if (fence->syncobj) {
4093 device->ws->reset_syncobj(device->ws, fence->syncobj);
4094 }
4095 }
4096
4097 return VK_SUCCESS;
4098 }
4099
4100 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
4101 {
4102 RADV_FROM_HANDLE(radv_device, device, _device);
4103 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4104
4105 if (fence->temp_syncobj) {
4106 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
4107 return success ? VK_SUCCESS : VK_NOT_READY;
4108 }
4109
4110 if (fence->syncobj) {
4111 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
4112 return success ? VK_SUCCESS : VK_NOT_READY;
4113 }
4114
4115 if (fence->fence) {
4116 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
4117 return VK_NOT_READY;
4118 }
4119 if (fence->fence_wsi) {
4120 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
4121
4122 if (result != VK_SUCCESS) {
4123 if (result == VK_TIMEOUT)
4124 return VK_NOT_READY;
4125 return result;
4126 }
4127 }
4128 return VK_SUCCESS;
4129 }
4130
4131
4132 // Queue semaphore functions
4133
4134 VkResult radv_CreateSemaphore(
4135 VkDevice _device,
4136 const VkSemaphoreCreateInfo* pCreateInfo,
4137 const VkAllocationCallbacks* pAllocator,
4138 VkSemaphore* pSemaphore)
4139 {
4140 RADV_FROM_HANDLE(radv_device, device, _device);
4141 const VkExportSemaphoreCreateInfo *export =
4142 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
4143 VkExternalSemaphoreHandleTypeFlags handleTypes =
4144 export ? export->handleTypes : 0;
4145
4146 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
4147 sizeof(*sem), 8,
4148 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4149 if (!sem)
4150 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4151
4152 sem->temp_syncobj = 0;
4153 /* create a syncobject if we are going to export this semaphore */
4154 if (device->always_use_syncobj || handleTypes) {
4155 assert (device->physical_device->rad_info.has_syncobj);
4156 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
4157 if (ret) {
4158 vk_free2(&device->alloc, pAllocator, sem);
4159 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4160 }
4161 sem->sem = NULL;
4162 } else {
4163 sem->sem = device->ws->create_sem(device->ws);
4164 if (!sem->sem) {
4165 vk_free2(&device->alloc, pAllocator, sem);
4166 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4167 }
4168 sem->syncobj = 0;
4169 }
4170
4171 *pSemaphore = radv_semaphore_to_handle(sem);
4172 return VK_SUCCESS;
4173 }
4174
4175 void radv_DestroySemaphore(
4176 VkDevice _device,
4177 VkSemaphore _semaphore,
4178 const VkAllocationCallbacks* pAllocator)
4179 {
4180 RADV_FROM_HANDLE(radv_device, device, _device);
4181 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
4182 if (!_semaphore)
4183 return;
4184
4185 if (sem->syncobj)
4186 device->ws->destroy_syncobj(device->ws, sem->syncobj);
4187 else
4188 device->ws->destroy_sem(sem->sem);
4189 vk_free2(&device->alloc, pAllocator, sem);
4190 }
4191
4192 VkResult radv_CreateEvent(
4193 VkDevice _device,
4194 const VkEventCreateInfo* pCreateInfo,
4195 const VkAllocationCallbacks* pAllocator,
4196 VkEvent* pEvent)
4197 {
4198 RADV_FROM_HANDLE(radv_device, device, _device);
4199 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
4200 sizeof(*event), 8,
4201 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4202
4203 if (!event)
4204 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4205
4206 event->bo = device->ws->buffer_create(device->ws, 8, 8,
4207 RADEON_DOMAIN_GTT,
4208 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
4209 RADV_BO_PRIORITY_FENCE);
4210 if (!event->bo) {
4211 vk_free2(&device->alloc, pAllocator, event);
4212 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4213 }
4214
4215 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
4216
4217 *pEvent = radv_event_to_handle(event);
4218
4219 return VK_SUCCESS;
4220 }
4221
4222 void radv_DestroyEvent(
4223 VkDevice _device,
4224 VkEvent _event,
4225 const VkAllocationCallbacks* pAllocator)
4226 {
4227 RADV_FROM_HANDLE(radv_device, device, _device);
4228 RADV_FROM_HANDLE(radv_event, event, _event);
4229
4230 if (!event)
4231 return;
4232 device->ws->buffer_destroy(event->bo);
4233 vk_free2(&device->alloc, pAllocator, event);
4234 }
4235
4236 VkResult radv_GetEventStatus(
4237 VkDevice _device,
4238 VkEvent _event)
4239 {
4240 RADV_FROM_HANDLE(radv_event, event, _event);
4241
4242 if (*event->map == 1)
4243 return VK_EVENT_SET;
4244 return VK_EVENT_RESET;
4245 }
4246
4247 VkResult radv_SetEvent(
4248 VkDevice _device,
4249 VkEvent _event)
4250 {
4251 RADV_FROM_HANDLE(radv_event, event, _event);
4252 *event->map = 1;
4253
4254 return VK_SUCCESS;
4255 }
4256
4257 VkResult radv_ResetEvent(
4258 VkDevice _device,
4259 VkEvent _event)
4260 {
4261 RADV_FROM_HANDLE(radv_event, event, _event);
4262 *event->map = 0;
4263
4264 return VK_SUCCESS;
4265 }
4266
4267 VkResult radv_CreateBuffer(
4268 VkDevice _device,
4269 const VkBufferCreateInfo* pCreateInfo,
4270 const VkAllocationCallbacks* pAllocator,
4271 VkBuffer* pBuffer)
4272 {
4273 RADV_FROM_HANDLE(radv_device, device, _device);
4274 struct radv_buffer *buffer;
4275
4276 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4277
4278 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
4279 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4280 if (buffer == NULL)
4281 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4282
4283 buffer->size = pCreateInfo->size;
4284 buffer->usage = pCreateInfo->usage;
4285 buffer->bo = NULL;
4286 buffer->offset = 0;
4287 buffer->flags = pCreateInfo->flags;
4288
4289 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
4290 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
4291
4292 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
4293 buffer->bo = device->ws->buffer_create(device->ws,
4294 align64(buffer->size, 4096),
4295 4096, 0, RADEON_FLAG_VIRTUAL,
4296 RADV_BO_PRIORITY_VIRTUAL);
4297 if (!buffer->bo) {
4298 vk_free2(&device->alloc, pAllocator, buffer);
4299 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4300 }
4301 }
4302
4303 *pBuffer = radv_buffer_to_handle(buffer);
4304
4305 return VK_SUCCESS;
4306 }
4307
4308 void radv_DestroyBuffer(
4309 VkDevice _device,
4310 VkBuffer _buffer,
4311 const VkAllocationCallbacks* pAllocator)
4312 {
4313 RADV_FROM_HANDLE(radv_device, device, _device);
4314 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
4315
4316 if (!buffer)
4317 return;
4318
4319 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
4320 device->ws->buffer_destroy(buffer->bo);
4321
4322 vk_free2(&device->alloc, pAllocator, buffer);
4323 }
4324
4325 VkDeviceAddress radv_GetBufferDeviceAddressEXT(
4326 VkDevice device,
4327 const VkBufferDeviceAddressInfoEXT* pInfo)
4328 {
4329 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
4330 return radv_buffer_get_va(buffer->bo) + buffer->offset;
4331 }
4332
4333
4334 static inline unsigned
4335 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
4336 {
4337 if (stencil)
4338 return plane->surface.u.legacy.stencil_tiling_index[level];
4339 else
4340 return plane->surface.u.legacy.tiling_index[level];
4341 }
4342
4343 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
4344 {
4345 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
4346 }
4347
4348 static uint32_t
4349 radv_init_dcc_control_reg(struct radv_device *device,
4350 struct radv_image_view *iview)
4351 {
4352 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
4353 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
4354 unsigned max_compressed_block_size;
4355 unsigned independent_128b_blocks;
4356 unsigned independent_64b_blocks;
4357
4358 if (!radv_dcc_enabled(iview->image, iview->base_mip))
4359 return 0;
4360
4361 if (!device->physical_device->rad_info.has_dedicated_vram) {
4362 /* amdvlk: [min-compressed-block-size] should be set to 32 for
4363 * dGPU and 64 for APU because all of our APUs to date use
4364 * DIMMs which have a request granularity size of 64B while all
4365 * other chips have a 32B request size.
4366 */
4367 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
4368 }
4369
4370 if (device->physical_device->rad_info.chip_class >= GFX10) {
4371 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4372 independent_64b_blocks = 0;
4373 independent_128b_blocks = 1;
4374 } else {
4375 independent_128b_blocks = 0;
4376
4377 if (iview->image->info.samples > 1) {
4378 if (iview->image->planes[0].surface.bpe == 1)
4379 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4380 else if (iview->image->planes[0].surface.bpe == 2)
4381 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4382 }
4383
4384 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
4385 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
4386 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
4387 /* If this DCC image is potentially going to be used in texture
4388 * fetches, we need some special settings.
4389 */
4390 independent_64b_blocks = 1;
4391 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4392 } else {
4393 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
4394 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
4395 * big as possible for better compression state.
4396 */
4397 independent_64b_blocks = 0;
4398 max_compressed_block_size = max_uncompressed_block_size;
4399 }
4400 }
4401
4402 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
4403 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
4404 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
4405 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
4406 S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
4407 }
4408
4409 void
4410 radv_initialise_color_surface(struct radv_device *device,
4411 struct radv_color_buffer_info *cb,
4412 struct radv_image_view *iview)
4413 {
4414 const struct vk_format_description *desc;
4415 unsigned ntype, format, swap, endian;
4416 unsigned blend_clamp = 0, blend_bypass = 0;
4417 uint64_t va;
4418 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
4419 const struct radeon_surf *surf = &plane->surface;
4420
4421 desc = vk_format_description(iview->vk_format);
4422
4423 memset(cb, 0, sizeof(*cb));
4424
4425 /* Intensity is implemented as Red, so treat it that way. */
4426 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
4427
4428 va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
4429
4430 cb->cb_color_base = va >> 8;
4431
4432 if (device->physical_device->rad_info.chip_class >= GFX9) {
4433 struct gfx9_surf_meta_flags meta;
4434 if (iview->image->dcc_offset)
4435 meta = surf->u.gfx9.dcc;
4436 else
4437 meta = surf->u.gfx9.cmask;
4438
4439 if (device->physical_device->rad_info.chip_class >= GFX10) {
4440 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4441 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
4442 S_028EE0_CMASK_PIPE_ALIGNED(surf->u.gfx9.cmask.pipe_aligned) |
4443 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
4444 } else {
4445 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4446 S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
4447 S_028C74_RB_ALIGNED(meta.rb_aligned) |
4448 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
4449 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
4450 }
4451
4452 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
4453 cb->cb_color_base |= surf->tile_swizzle;
4454 } else {
4455 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
4456 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
4457
4458 cb->cb_color_base += level_info->offset >> 8;
4459 if (level_info->mode == RADEON_SURF_MODE_2D)
4460 cb->cb_color_base |= surf->tile_swizzle;
4461
4462 pitch_tile_max = level_info->nblk_x / 8 - 1;
4463 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
4464 tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
4465
4466 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
4467 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
4468 cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
4469
4470 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
4471
4472 if (radv_image_has_fmask(iview->image)) {
4473 if (device->physical_device->rad_info.chip_class >= GFX7)
4474 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
4475 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
4476 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
4477 } else {
4478 /* This must be set for fast clear to work without FMASK. */
4479 if (device->physical_device->rad_info.chip_class >= GFX7)
4480 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4481 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4482 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4483 }
4484 }
4485
4486 /* CMASK variables */
4487 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4488 va += iview->image->cmask_offset;
4489 cb->cb_color_cmask = va >> 8;
4490
4491 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4492 va += iview->image->dcc_offset;
4493
4494 if (radv_dcc_enabled(iview->image, iview->base_mip) &&
4495 device->physical_device->rad_info.chip_class <= GFX8)
4496 va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
4497
4498 unsigned dcc_tile_swizzle = surf->tile_swizzle;
4499 dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
4500
4501 cb->cb_dcc_base = va >> 8;
4502 cb->cb_dcc_base |= dcc_tile_swizzle;
4503
4504 /* GFX10 field has the same base shift as the GFX6 field. */
4505 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4506 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4507 S_028C6C_SLICE_MAX_GFX10(max_slice);
4508
4509 if (iview->image->info.samples > 1) {
4510 unsigned log_samples = util_logbase2(iview->image->info.samples);
4511
4512 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4513 S_028C74_NUM_FRAGMENTS(log_samples);
4514 }
4515
4516 if (radv_image_has_fmask(iview->image)) {
4517 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset;
4518 cb->cb_color_fmask = va >> 8;
4519 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
4520 } else {
4521 cb->cb_color_fmask = cb->cb_color_base;
4522 }
4523
4524 ntype = radv_translate_color_numformat(iview->vk_format,
4525 desc,
4526 vk_format_get_first_non_void_channel(iview->vk_format));
4527 format = radv_translate_colorformat(iview->vk_format);
4528 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4529 radv_finishme("Illegal color\n");
4530 swap = radv_translate_colorswap(iview->vk_format, false);
4531 endian = radv_colorformat_endian_swap(format);
4532
4533 /* blend clamp should be set for all NORM/SRGB types */
4534 if (ntype == V_028C70_NUMBER_UNORM ||
4535 ntype == V_028C70_NUMBER_SNORM ||
4536 ntype == V_028C70_NUMBER_SRGB)
4537 blend_clamp = 1;
4538
4539 /* set blend bypass according to docs if SINT/UINT or
4540 8/24 COLOR variants */
4541 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4542 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4543 format == V_028C70_COLOR_X24_8_32_FLOAT) {
4544 blend_clamp = 0;
4545 blend_bypass = 1;
4546 }
4547 #if 0
4548 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4549 (format == V_028C70_COLOR_8 ||
4550 format == V_028C70_COLOR_8_8 ||
4551 format == V_028C70_COLOR_8_8_8_8))
4552 ->color_is_int8 = true;
4553 #endif
4554 cb->cb_color_info = S_028C70_FORMAT(format) |
4555 S_028C70_COMP_SWAP(swap) |
4556 S_028C70_BLEND_CLAMP(blend_clamp) |
4557 S_028C70_BLEND_BYPASS(blend_bypass) |
4558 S_028C70_SIMPLE_FLOAT(1) |
4559 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4560 ntype != V_028C70_NUMBER_SNORM &&
4561 ntype != V_028C70_NUMBER_SRGB &&
4562 format != V_028C70_COLOR_8_24 &&
4563 format != V_028C70_COLOR_24_8) |
4564 S_028C70_NUMBER_TYPE(ntype) |
4565 S_028C70_ENDIAN(endian);
4566 if (radv_image_has_fmask(iview->image)) {
4567 cb->cb_color_info |= S_028C70_COMPRESSION(1);
4568 if (device->physical_device->rad_info.chip_class == GFX6) {
4569 unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
4570 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4571 }
4572
4573 if (radv_image_is_tc_compat_cmask(iview->image)) {
4574 /* Allow the texture block to read FMASK directly
4575 * without decompressing it. This bit must be cleared
4576 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
4577 * otherwise the operation doesn't happen.
4578 */
4579 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
4580
4581 /* Set CMASK into a tiling format that allows the
4582 * texture block to read it.
4583 */
4584 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
4585 }
4586 }
4587
4588 if (radv_image_has_cmask(iview->image) &&
4589 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4590 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4591
4592 if (radv_dcc_enabled(iview->image, iview->base_mip))
4593 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4594
4595 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4596
4597 /* This must be set for fast clear to work without FMASK. */
4598 if (!radv_image_has_fmask(iview->image) &&
4599 device->physical_device->rad_info.chip_class == GFX6) {
4600 unsigned bankh = util_logbase2(surf->u.legacy.bankh);
4601 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4602 }
4603
4604 if (device->physical_device->rad_info.chip_class >= GFX9) {
4605 const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
4606
4607 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4608 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4609 unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
4610 unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
4611
4612 if (device->physical_device->rad_info.chip_class >= GFX10) {
4613 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
4614
4615 cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
4616 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
4617 S_028EE0_RESOURCE_LEVEL(1);
4618 } else {
4619 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
4620 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4621 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
4622 }
4623
4624 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
4625 S_028C68_MIP0_HEIGHT(height - 1) |
4626 S_028C68_MAX_MIP(iview->image->info.levels - 1);
4627 }
4628 }
4629
4630 static unsigned
4631 radv_calc_decompress_on_z_planes(struct radv_device *device,
4632 struct radv_image_view *iview)
4633 {
4634 unsigned max_zplanes = 0;
4635
4636 assert(radv_image_is_tc_compat_htile(iview->image));
4637
4638 if (device->physical_device->rad_info.chip_class >= GFX9) {
4639 /* Default value for 32-bit depth surfaces. */
4640 max_zplanes = 4;
4641
4642 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4643 iview->image->info.samples > 1)
4644 max_zplanes = 2;
4645
4646 max_zplanes = max_zplanes + 1;
4647 } else {
4648 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4649 /* Do not enable Z plane compression for 16-bit depth
4650 * surfaces because isn't supported on GFX8. Only
4651 * 32-bit depth surfaces are supported by the hardware.
4652 * This allows to maintain shader compatibility and to
4653 * reduce the number of depth decompressions.
4654 */
4655 max_zplanes = 1;
4656 } else {
4657 if (iview->image->info.samples <= 1)
4658 max_zplanes = 5;
4659 else if (iview->image->info.samples <= 4)
4660 max_zplanes = 3;
4661 else
4662 max_zplanes = 2;
4663 }
4664 }
4665
4666 return max_zplanes;
4667 }
4668
4669 void
4670 radv_initialise_ds_surface(struct radv_device *device,
4671 struct radv_ds_buffer_info *ds,
4672 struct radv_image_view *iview)
4673 {
4674 unsigned level = iview->base_mip;
4675 unsigned format, stencil_format;
4676 uint64_t va, s_offs, z_offs;
4677 bool stencil_only = false;
4678 const struct radv_image_plane *plane = &iview->image->planes[0];
4679 const struct radeon_surf *surf = &plane->surface;
4680
4681 assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
4682
4683 memset(ds, 0, sizeof(*ds));
4684 switch (iview->image->vk_format) {
4685 case VK_FORMAT_D24_UNORM_S8_UINT:
4686 case VK_FORMAT_X8_D24_UNORM_PACK32:
4687 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4688 ds->offset_scale = 2.0f;
4689 break;
4690 case VK_FORMAT_D16_UNORM:
4691 case VK_FORMAT_D16_UNORM_S8_UINT:
4692 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4693 ds->offset_scale = 4.0f;
4694 break;
4695 case VK_FORMAT_D32_SFLOAT:
4696 case VK_FORMAT_D32_SFLOAT_S8_UINT:
4697 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4698 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4699 ds->offset_scale = 1.0f;
4700 break;
4701 case VK_FORMAT_S8_UINT:
4702 stencil_only = true;
4703 break;
4704 default:
4705 break;
4706 }
4707
4708 format = radv_translate_dbformat(iview->image->vk_format);
4709 stencil_format = surf->has_stencil ?
4710 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4711
4712 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4713 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4714 S_028008_SLICE_MAX(max_slice);
4715 if (device->physical_device->rad_info.chip_class >= GFX10) {
4716 ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
4717 S_028008_SLICE_MAX_HI(max_slice >> 11);
4718 }
4719
4720 ds->db_htile_data_base = 0;
4721 ds->db_htile_surface = 0;
4722
4723 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4724 s_offs = z_offs = va;
4725
4726 if (device->physical_device->rad_info.chip_class >= GFX9) {
4727 assert(surf->u.gfx9.surf_offset == 0);
4728 s_offs += surf->u.gfx9.stencil_offset;
4729
4730 ds->db_z_info = S_028038_FORMAT(format) |
4731 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4732 S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4733 S_028038_MAXMIP(iview->image->info.levels - 1) |
4734 S_028038_ZRANGE_PRECISION(1);
4735 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4736 S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
4737
4738 if (device->physical_device->rad_info.chip_class == GFX9) {
4739 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
4740 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
4741 }
4742
4743 ds->db_depth_view |= S_028008_MIPID(level);
4744 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4745 S_02801C_Y_MAX(iview->image->info.height - 1);
4746
4747 if (radv_htile_enabled(iview->image, level)) {
4748 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4749
4750 if (radv_image_is_tc_compat_htile(iview->image)) {
4751 unsigned max_zplanes =
4752 radv_calc_decompress_on_z_planes(device, iview);
4753
4754 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4755
4756 if (device->physical_device->rad_info.chip_class >= GFX10) {
4757 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
4758 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
4759 } else {
4760 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
4761 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4762 }
4763 }
4764
4765 if (!surf->has_stencil)
4766 /* Use all of the htile_buffer for depth if there's no stencil. */
4767 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4768 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4769 iview->image->htile_offset;
4770 ds->db_htile_data_base = va >> 8;
4771 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4772 S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned);
4773
4774 if (device->physical_device->rad_info.chip_class == GFX9) {
4775 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned);
4776 }
4777 }
4778 } else {
4779 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
4780
4781 if (stencil_only)
4782 level_info = &surf->u.legacy.stencil_level[level];
4783
4784 z_offs += surf->u.legacy.level[level].offset;
4785 s_offs += surf->u.legacy.stencil_level[level].offset;
4786
4787 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4788 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4789 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4790
4791 if (iview->image->info.samples > 1)
4792 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4793
4794 if (device->physical_device->rad_info.chip_class >= GFX7) {
4795 struct radeon_info *info = &device->physical_device->rad_info;
4796 unsigned tiling_index = surf->u.legacy.tiling_index[level];
4797 unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
4798 unsigned macro_index = surf->u.legacy.macro_tile_index;
4799 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4800 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4801 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4802
4803 if (stencil_only)
4804 tile_mode = stencil_tile_mode;
4805
4806 ds->db_depth_info |=
4807 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4808 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4809 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4810 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4811 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4812 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4813 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4814 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4815 } else {
4816 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
4817 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4818 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
4819 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4820 if (stencil_only)
4821 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4822 }
4823
4824 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4825 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4826 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4827
4828 if (radv_htile_enabled(iview->image, level)) {
4829 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4830
4831 if (!surf->has_stencil &&
4832 !radv_image_is_tc_compat_htile(iview->image))
4833 /* Use all of the htile_buffer for depth if there's no stencil. */
4834 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4835
4836 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4837 iview->image->htile_offset;
4838 ds->db_htile_data_base = va >> 8;
4839 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4840
4841 if (radv_image_is_tc_compat_htile(iview->image)) {
4842 unsigned max_zplanes =
4843 radv_calc_decompress_on_z_planes(device, iview);
4844
4845 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4846 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4847 }
4848 }
4849 }
4850
4851 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4852 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4853 }
4854
4855 VkResult radv_CreateFramebuffer(
4856 VkDevice _device,
4857 const VkFramebufferCreateInfo* pCreateInfo,
4858 const VkAllocationCallbacks* pAllocator,
4859 VkFramebuffer* pFramebuffer)
4860 {
4861 RADV_FROM_HANDLE(radv_device, device, _device);
4862 struct radv_framebuffer *framebuffer;
4863 const VkFramebufferAttachmentsCreateInfoKHR *imageless_create_info =
4864 vk_find_struct_const(pCreateInfo->pNext,
4865 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO_KHR);
4866
4867 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4868
4869 size_t size = sizeof(*framebuffer);
4870 if (!imageless_create_info)
4871 size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
4872 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4873 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4874 if (framebuffer == NULL)
4875 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4876
4877 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4878 framebuffer->width = pCreateInfo->width;
4879 framebuffer->height = pCreateInfo->height;
4880 framebuffer->layers = pCreateInfo->layers;
4881 if (imageless_create_info) {
4882 for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
4883 const VkFramebufferAttachmentImageInfoKHR *attachment =
4884 imageless_create_info->pAttachmentImageInfos + i;
4885 framebuffer->width = MIN2(framebuffer->width, attachment->width);
4886 framebuffer->height = MIN2(framebuffer->height, attachment->height);
4887 framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
4888 }
4889 } else {
4890 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4891 VkImageView _iview = pCreateInfo->pAttachments[i];
4892 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4893 framebuffer->attachments[i] = iview;
4894 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4895 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4896 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4897 }
4898 }
4899
4900 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4901 return VK_SUCCESS;
4902 }
4903
4904 void radv_DestroyFramebuffer(
4905 VkDevice _device,
4906 VkFramebuffer _fb,
4907 const VkAllocationCallbacks* pAllocator)
4908 {
4909 RADV_FROM_HANDLE(radv_device, device, _device);
4910 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4911
4912 if (!fb)
4913 return;
4914 vk_free2(&device->alloc, pAllocator, fb);
4915 }
4916
4917 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4918 {
4919 switch (address_mode) {
4920 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4921 return V_008F30_SQ_TEX_WRAP;
4922 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4923 return V_008F30_SQ_TEX_MIRROR;
4924 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4925 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4926 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4927 return V_008F30_SQ_TEX_CLAMP_BORDER;
4928 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4929 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4930 default:
4931 unreachable("illegal tex wrap mode");
4932 break;
4933 }
4934 }
4935
4936 static unsigned
4937 radv_tex_compare(VkCompareOp op)
4938 {
4939 switch (op) {
4940 case VK_COMPARE_OP_NEVER:
4941 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4942 case VK_COMPARE_OP_LESS:
4943 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4944 case VK_COMPARE_OP_EQUAL:
4945 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4946 case VK_COMPARE_OP_LESS_OR_EQUAL:
4947 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4948 case VK_COMPARE_OP_GREATER:
4949 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4950 case VK_COMPARE_OP_NOT_EQUAL:
4951 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4952 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4953 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4954 case VK_COMPARE_OP_ALWAYS:
4955 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4956 default:
4957 unreachable("illegal compare mode");
4958 break;
4959 }
4960 }
4961
4962 static unsigned
4963 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4964 {
4965 switch (filter) {
4966 case VK_FILTER_NEAREST:
4967 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4968 V_008F38_SQ_TEX_XY_FILTER_POINT);
4969 case VK_FILTER_LINEAR:
4970 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4971 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4972 case VK_FILTER_CUBIC_IMG:
4973 default:
4974 fprintf(stderr, "illegal texture filter");
4975 return 0;
4976 }
4977 }
4978
4979 static unsigned
4980 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4981 {
4982 switch (mode) {
4983 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4984 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4985 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4986 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4987 default:
4988 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4989 }
4990 }
4991
4992 static unsigned
4993 radv_tex_bordercolor(VkBorderColor bcolor)
4994 {
4995 switch (bcolor) {
4996 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4997 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4998 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4999 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
5000 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
5001 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
5002 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
5003 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
5004 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
5005 default:
5006 break;
5007 }
5008 return 0;
5009 }
5010
5011 static unsigned
5012 radv_tex_aniso_filter(unsigned filter)
5013 {
5014 if (filter < 2)
5015 return 0;
5016 if (filter < 4)
5017 return 1;
5018 if (filter < 8)
5019 return 2;
5020 if (filter < 16)
5021 return 3;
5022 return 4;
5023 }
5024
5025 static unsigned
5026 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
5027 {
5028 switch (mode) {
5029 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
5030 return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
5031 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
5032 return V_008F30_SQ_IMG_FILTER_MODE_MIN;
5033 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
5034 return V_008F30_SQ_IMG_FILTER_MODE_MAX;
5035 default:
5036 break;
5037 }
5038 return 0;
5039 }
5040
5041 static uint32_t
5042 radv_get_max_anisotropy(struct radv_device *device,
5043 const VkSamplerCreateInfo *pCreateInfo)
5044 {
5045 if (device->force_aniso >= 0)
5046 return device->force_aniso;
5047
5048 if (pCreateInfo->anisotropyEnable &&
5049 pCreateInfo->maxAnisotropy > 1.0f)
5050 return (uint32_t)pCreateInfo->maxAnisotropy;
5051
5052 return 0;
5053 }
5054
5055 static void
5056 radv_init_sampler(struct radv_device *device,
5057 struct radv_sampler *sampler,
5058 const VkSamplerCreateInfo *pCreateInfo)
5059 {
5060 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
5061 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
5062 bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
5063 device->physical_device->rad_info.chip_class == GFX9;
5064 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
5065
5066 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
5067 vk_find_struct_const(pCreateInfo->pNext,
5068 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
5069 if (sampler_reduction)
5070 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
5071
5072 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
5073 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
5074 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
5075 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
5076 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
5077 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
5078 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
5079 S_008F30_ANISO_BIAS(max_aniso_ratio) |
5080 S_008F30_DISABLE_CUBE_WRAP(0) |
5081 S_008F30_COMPAT_MODE(compat_mode) |
5082 S_008F30_FILTER_MODE(filter_mode));
5083 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
5084 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
5085 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
5086 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
5087 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
5088 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
5089 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
5090 S_008F38_MIP_POINT_PRECLAMP(0));
5091 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
5092 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
5093
5094 if (device->physical_device->rad_info.chip_class >= GFX10) {
5095 sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
5096 } else {
5097 sampler->state[2] |=
5098 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
5099 S_008F38_FILTER_PREC_FIX(1) |
5100 S_008F38_ANISO_OVERRIDE_GFX6(device->physical_device->rad_info.chip_class >= GFX8);
5101 }
5102 }
5103
5104 VkResult radv_CreateSampler(
5105 VkDevice _device,
5106 const VkSamplerCreateInfo* pCreateInfo,
5107 const VkAllocationCallbacks* pAllocator,
5108 VkSampler* pSampler)
5109 {
5110 RADV_FROM_HANDLE(radv_device, device, _device);
5111 struct radv_sampler *sampler;
5112
5113 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
5114 vk_find_struct_const(pCreateInfo->pNext,
5115 SAMPLER_YCBCR_CONVERSION_INFO);
5116
5117 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
5118
5119 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
5120 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5121 if (!sampler)
5122 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5123
5124 radv_init_sampler(device, sampler, pCreateInfo);
5125
5126 sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
5127 *pSampler = radv_sampler_to_handle(sampler);
5128
5129 return VK_SUCCESS;
5130 }
5131
5132 void radv_DestroySampler(
5133 VkDevice _device,
5134 VkSampler _sampler,
5135 const VkAllocationCallbacks* pAllocator)
5136 {
5137 RADV_FROM_HANDLE(radv_device, device, _device);
5138 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
5139
5140 if (!sampler)
5141 return;
5142 vk_free2(&device->alloc, pAllocator, sampler);
5143 }
5144
5145 /* vk_icd.h does not declare this function, so we declare it here to
5146 * suppress Wmissing-prototypes.
5147 */
5148 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
5149 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
5150
5151 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
5152 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
5153 {
5154 /* For the full details on loader interface versioning, see
5155 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
5156 * What follows is a condensed summary, to help you navigate the large and
5157 * confusing official doc.
5158 *
5159 * - Loader interface v0 is incompatible with later versions. We don't
5160 * support it.
5161 *
5162 * - In loader interface v1:
5163 * - The first ICD entrypoint called by the loader is
5164 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
5165 * entrypoint.
5166 * - The ICD must statically expose no other Vulkan symbol unless it is
5167 * linked with -Bsymbolic.
5168 * - Each dispatchable Vulkan handle created by the ICD must be
5169 * a pointer to a struct whose first member is VK_LOADER_DATA. The
5170 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
5171 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
5172 * vkDestroySurfaceKHR(). The ICD must be capable of working with
5173 * such loader-managed surfaces.
5174 *
5175 * - Loader interface v2 differs from v1 in:
5176 * - The first ICD entrypoint called by the loader is
5177 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
5178 * statically expose this entrypoint.
5179 *
5180 * - Loader interface v3 differs from v2 in:
5181 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
5182 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
5183 * because the loader no longer does so.
5184 */
5185 *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
5186 return VK_SUCCESS;
5187 }
5188
5189 VkResult radv_GetMemoryFdKHR(VkDevice _device,
5190 const VkMemoryGetFdInfoKHR *pGetFdInfo,
5191 int *pFD)
5192 {
5193 RADV_FROM_HANDLE(radv_device, device, _device);
5194 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
5195
5196 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
5197
5198 /* At the moment, we support only the below handle types. */
5199 assert(pGetFdInfo->handleType ==
5200 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5201 pGetFdInfo->handleType ==
5202 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5203
5204 bool ret = radv_get_memory_fd(device, memory, pFD);
5205 if (ret == false)
5206 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
5207 return VK_SUCCESS;
5208 }
5209
5210 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
5211 VkExternalMemoryHandleTypeFlagBits handleType,
5212 int fd,
5213 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
5214 {
5215 RADV_FROM_HANDLE(radv_device, device, _device);
5216
5217 switch (handleType) {
5218 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
5219 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
5220 return VK_SUCCESS;
5221
5222 default:
5223 /* The valid usage section for this function says:
5224 *
5225 * "handleType must not be one of the handle types defined as
5226 * opaque."
5227 *
5228 * So opaque handle types fall into the default "unsupported" case.
5229 */
5230 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5231 }
5232 }
5233
5234 static VkResult radv_import_opaque_fd(struct radv_device *device,
5235 int fd,
5236 uint32_t *syncobj)
5237 {
5238 uint32_t syncobj_handle = 0;
5239 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
5240 if (ret != 0)
5241 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5242
5243 if (*syncobj)
5244 device->ws->destroy_syncobj(device->ws, *syncobj);
5245
5246 *syncobj = syncobj_handle;
5247 close(fd);
5248
5249 return VK_SUCCESS;
5250 }
5251
5252 static VkResult radv_import_sync_fd(struct radv_device *device,
5253 int fd,
5254 uint32_t *syncobj)
5255 {
5256 /* If we create a syncobj we do it locally so that if we have an error, we don't
5257 * leave a syncobj in an undetermined state in the fence. */
5258 uint32_t syncobj_handle = *syncobj;
5259 if (!syncobj_handle) {
5260 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
5261 if (ret) {
5262 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5263 }
5264 }
5265
5266 if (fd == -1) {
5267 device->ws->signal_syncobj(device->ws, syncobj_handle);
5268 } else {
5269 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
5270 if (ret != 0)
5271 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5272 }
5273
5274 *syncobj = syncobj_handle;
5275 if (fd != -1)
5276 close(fd);
5277
5278 return VK_SUCCESS;
5279 }
5280
5281 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
5282 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
5283 {
5284 RADV_FROM_HANDLE(radv_device, device, _device);
5285 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
5286 uint32_t *syncobj_dst = NULL;
5287
5288 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
5289 syncobj_dst = &sem->temp_syncobj;
5290 } else {
5291 syncobj_dst = &sem->syncobj;
5292 }
5293
5294 switch(pImportSemaphoreFdInfo->handleType) {
5295 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5296 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5297 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5298 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5299 default:
5300 unreachable("Unhandled semaphore handle type");
5301 }
5302 }
5303
5304 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
5305 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
5306 int *pFd)
5307 {
5308 RADV_FROM_HANDLE(radv_device, device, _device);
5309 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
5310 int ret;
5311 uint32_t syncobj_handle;
5312
5313 if (sem->temp_syncobj)
5314 syncobj_handle = sem->temp_syncobj;
5315 else
5316 syncobj_handle = sem->syncobj;
5317
5318 switch(pGetFdInfo->handleType) {
5319 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5320 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5321 break;
5322 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5323 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5324 if (!ret) {
5325 if (sem->temp_syncobj) {
5326 close (sem->temp_syncobj);
5327 sem->temp_syncobj = 0;
5328 } else {
5329 device->ws->reset_syncobj(device->ws, syncobj_handle);
5330 }
5331 }
5332 break;
5333 default:
5334 unreachable("Unhandled semaphore handle type");
5335 }
5336
5337 if (ret)
5338 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5339 return VK_SUCCESS;
5340 }
5341
5342 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
5343 VkPhysicalDevice physicalDevice,
5344 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
5345 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
5346 {
5347 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5348
5349 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
5350 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5351 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5352 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
5353 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5354 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5355 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5356 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5357 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
5358 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5359 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5360 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5361 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5362 } else {
5363 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
5364 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
5365 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
5366 }
5367 }
5368
5369 VkResult radv_ImportFenceFdKHR(VkDevice _device,
5370 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
5371 {
5372 RADV_FROM_HANDLE(radv_device, device, _device);
5373 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
5374 uint32_t *syncobj_dst = NULL;
5375
5376
5377 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
5378 syncobj_dst = &fence->temp_syncobj;
5379 } else {
5380 syncobj_dst = &fence->syncobj;
5381 }
5382
5383 switch(pImportFenceFdInfo->handleType) {
5384 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5385 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5386 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5387 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5388 default:
5389 unreachable("Unhandled fence handle type");
5390 }
5391 }
5392
5393 VkResult radv_GetFenceFdKHR(VkDevice _device,
5394 const VkFenceGetFdInfoKHR *pGetFdInfo,
5395 int *pFd)
5396 {
5397 RADV_FROM_HANDLE(radv_device, device, _device);
5398 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
5399 int ret;
5400 uint32_t syncobj_handle;
5401
5402 if (fence->temp_syncobj)
5403 syncobj_handle = fence->temp_syncobj;
5404 else
5405 syncobj_handle = fence->syncobj;
5406
5407 switch(pGetFdInfo->handleType) {
5408 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5409 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5410 break;
5411 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5412 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5413 if (!ret) {
5414 if (fence->temp_syncobj) {
5415 close (fence->temp_syncobj);
5416 fence->temp_syncobj = 0;
5417 } else {
5418 device->ws->reset_syncobj(device->ws, syncobj_handle);
5419 }
5420 }
5421 break;
5422 default:
5423 unreachable("Unhandled fence handle type");
5424 }
5425
5426 if (ret)
5427 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5428 return VK_SUCCESS;
5429 }
5430
5431 void radv_GetPhysicalDeviceExternalFenceProperties(
5432 VkPhysicalDevice physicalDevice,
5433 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
5434 VkExternalFenceProperties *pExternalFenceProperties)
5435 {
5436 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5437
5438 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5439 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5440 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
5441 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5442 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5443 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
5444 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5445 } else {
5446 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
5447 pExternalFenceProperties->compatibleHandleTypes = 0;
5448 pExternalFenceProperties->externalFenceFeatures = 0;
5449 }
5450 }
5451
5452 VkResult
5453 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
5454 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
5455 const VkAllocationCallbacks* pAllocator,
5456 VkDebugReportCallbackEXT* pCallback)
5457 {
5458 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5459 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
5460 pCreateInfo, pAllocator, &instance->alloc,
5461 pCallback);
5462 }
5463
5464 void
5465 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
5466 VkDebugReportCallbackEXT _callback,
5467 const VkAllocationCallbacks* pAllocator)
5468 {
5469 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5470 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
5471 _callback, pAllocator, &instance->alloc);
5472 }
5473
5474 void
5475 radv_DebugReportMessageEXT(VkInstance _instance,
5476 VkDebugReportFlagsEXT flags,
5477 VkDebugReportObjectTypeEXT objectType,
5478 uint64_t object,
5479 size_t location,
5480 int32_t messageCode,
5481 const char* pLayerPrefix,
5482 const char* pMessage)
5483 {
5484 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5485 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
5486 object, location, messageCode, pLayerPrefix, pMessage);
5487 }
5488
5489 void
5490 radv_GetDeviceGroupPeerMemoryFeatures(
5491 VkDevice device,
5492 uint32_t heapIndex,
5493 uint32_t localDeviceIndex,
5494 uint32_t remoteDeviceIndex,
5495 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
5496 {
5497 assert(localDeviceIndex == remoteDeviceIndex);
5498
5499 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
5500 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
5501 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
5502 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
5503 }
5504
5505 static const VkTimeDomainEXT radv_time_domains[] = {
5506 VK_TIME_DOMAIN_DEVICE_EXT,
5507 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
5508 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
5509 };
5510
5511 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
5512 VkPhysicalDevice physicalDevice,
5513 uint32_t *pTimeDomainCount,
5514 VkTimeDomainEXT *pTimeDomains)
5515 {
5516 int d;
5517 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
5518
5519 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
5520 vk_outarray_append(&out, i) {
5521 *i = radv_time_domains[d];
5522 }
5523 }
5524
5525 return vk_outarray_status(&out);
5526 }
5527
5528 static uint64_t
5529 radv_clock_gettime(clockid_t clock_id)
5530 {
5531 struct timespec current;
5532 int ret;
5533
5534 ret = clock_gettime(clock_id, &current);
5535 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
5536 ret = clock_gettime(CLOCK_MONOTONIC, &current);
5537 if (ret < 0)
5538 return 0;
5539
5540 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
5541 }
5542
5543 VkResult radv_GetCalibratedTimestampsEXT(
5544 VkDevice _device,
5545 uint32_t timestampCount,
5546 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
5547 uint64_t *pTimestamps,
5548 uint64_t *pMaxDeviation)
5549 {
5550 RADV_FROM_HANDLE(radv_device, device, _device);
5551 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
5552 int d;
5553 uint64_t begin, end;
5554 uint64_t max_clock_period = 0;
5555
5556 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5557
5558 for (d = 0; d < timestampCount; d++) {
5559 switch (pTimestampInfos[d].timeDomain) {
5560 case VK_TIME_DOMAIN_DEVICE_EXT:
5561 pTimestamps[d] = device->ws->query_value(device->ws,
5562 RADEON_TIMESTAMP);
5563 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
5564 max_clock_period = MAX2(max_clock_period, device_period);
5565 break;
5566 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
5567 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
5568 max_clock_period = MAX2(max_clock_period, 1);
5569 break;
5570
5571 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
5572 pTimestamps[d] = begin;
5573 break;
5574 default:
5575 pTimestamps[d] = 0;
5576 break;
5577 }
5578 }
5579
5580 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5581
5582 /*
5583 * The maximum deviation is the sum of the interval over which we
5584 * perform the sampling and the maximum period of any sampled
5585 * clock. That's because the maximum skew between any two sampled
5586 * clock edges is when the sampled clock with the largest period is
5587 * sampled at the end of that period but right at the beginning of the
5588 * sampling interval and some other clock is sampled right at the
5589 * begining of its sampling period and right at the end of the
5590 * sampling interval. Let's assume the GPU has the longest clock
5591 * period and that the application is sampling GPU and monotonic:
5592 *
5593 * s e
5594 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
5595 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5596 *
5597 * g
5598 * 0 1 2 3
5599 * GPU -----_____-----_____-----_____-----_____
5600 *
5601 * m
5602 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
5603 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5604 *
5605 * Interval <----------------->
5606 * Deviation <-------------------------->
5607 *
5608 * s = read(raw) 2
5609 * g = read(GPU) 1
5610 * m = read(monotonic) 2
5611 * e = read(raw) b
5612 *
5613 * We round the sample interval up by one tick to cover sampling error
5614 * in the interval clock
5615 */
5616
5617 uint64_t sample_interval = end - begin + 1;
5618
5619 *pMaxDeviation = sample_interval + max_clock_period;
5620
5621 return VK_SUCCESS;
5622 }
5623
5624 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
5625 VkPhysicalDevice physicalDevice,
5626 VkSampleCountFlagBits samples,
5627 VkMultisamplePropertiesEXT* pMultisampleProperties)
5628 {
5629 if (samples & (VK_SAMPLE_COUNT_2_BIT |
5630 VK_SAMPLE_COUNT_4_BIT |
5631 VK_SAMPLE_COUNT_8_BIT)) {
5632 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
5633 } else {
5634 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
5635 }
5636 }