radv: add has_clear_state and enable it on CIK+ only
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "amdgpu_id.h"
43 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
44 #include "ac_llvm_util.h"
45 #include "vk_format.h"
46 #include "sid.h"
47 #include "gfx9d.h"
48 #include "util/debug.h"
49
50 static int
51 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
52 {
53 uint32_t mesa_timestamp, llvm_timestamp;
54 uint16_t f = family;
55 memset(uuid, 0, VK_UUID_SIZE);
56 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
57 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
58 return -1;
59
60 memcpy(uuid, &mesa_timestamp, 4);
61 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
62 memcpy((char*)uuid + 8, &f, 2);
63 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
64 return 0;
65 }
66
67 static void
68 radv_get_driver_uuid(void *uuid)
69 {
70 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
71 }
72
73 static void
74 radv_get_device_uuid(struct radeon_info *info, void *uuid)
75 {
76 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
77 }
78
79 static const VkExtensionProperties instance_extensions[] = {
80 {
81 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
82 .specVersion = 25,
83 },
84 #ifdef VK_USE_PLATFORM_XCB_KHR
85 {
86 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
87 .specVersion = 6,
88 },
89 #endif
90 #ifdef VK_USE_PLATFORM_XLIB_KHR
91 {
92 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
93 .specVersion = 6,
94 },
95 #endif
96 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
97 {
98 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
99 .specVersion = 6,
100 },
101 #endif
102 {
103 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 };
115
116 static const VkExtensionProperties common_device_extensions[] = {
117 {
118 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
119 .specVersion = 1,
120 },
121 {
122 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
123 .specVersion = 1,
124 },
125 {
126 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
127 .specVersion = 1,
128 },
129 {
130 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
131 .specVersion = 1,
132 },
133 {
134 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
135 .specVersion = 1,
136 },
137 {
138 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
139 .specVersion = 68,
140 },
141 {
142 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
143 .specVersion = 1,
144 },
145 {
146 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
147 .specVersion = 1,
148 },
149 {
150 .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
151 .specVersion = 1,
152 },
153 {
154 .extensionName = VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
155 .specVersion = 1,
156 },
157 {
158 .extensionName = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
159 .specVersion = 1,
160 },
161 {
162 .extensionName = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
163 .specVersion = 1,
164 },
165 {
166 .extensionName = VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
167 .specVersion = 1,
168 },
169 {
170 .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
171 .specVersion = 1,
172 },
173 {
174 .extensionName = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME,
175 .specVersion = 1,
176 },
177 {
178 .extensionName = VK_KHR_BIND_MEMORY_2_EXTENSION_NAME,
179 .specVersion = 1,
180 },
181 {
182 .extensionName = VK_KHR_MAINTENANCE2_EXTENSION_NAME,
183 .specVersion = 1,
184 },
185 {
186 .extensionName = VK_KHR_RELAXED_BLOCK_LAYOUT_EXTENSION_NAME,
187 .specVersion = 1,
188 },
189 };
190
191 static const VkExtensionProperties rasterization_order_extension[] ={
192 {
193 .extensionName = VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME,
194 .specVersion = 1,
195 },
196 };
197
198 static const VkExtensionProperties ext_sema_device_extensions[] = {
199 {
200 .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
201 .specVersion = 1,
202 },
203 {
204 .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
205 .specVersion = 1,
206 },
207 {
208 .extensionName = VK_KHX_MULTIVIEW_EXTENSION_NAME,
209 .specVersion = 1,
210 },
211 };
212
213 static VkResult
214 radv_extensions_register(struct radv_instance *instance,
215 struct radv_extensions *extensions,
216 const VkExtensionProperties *new_ext,
217 uint32_t num_ext)
218 {
219 size_t new_size;
220 VkExtensionProperties *new_ptr;
221
222 assert(new_ext && num_ext > 0);
223
224 if (!new_ext)
225 return VK_ERROR_INITIALIZATION_FAILED;
226
227 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
228 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
229 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
230
231 /* Old array continues to be valid, update nothing */
232 if (!new_ptr)
233 return VK_ERROR_OUT_OF_HOST_MEMORY;
234
235 memcpy(&new_ptr[extensions->num_ext], new_ext,
236 num_ext * sizeof(VkExtensionProperties));
237 extensions->ext_array = new_ptr;
238 extensions->num_ext += num_ext;
239
240 return VK_SUCCESS;
241 }
242
243 static void
244 radv_extensions_finish(struct radv_instance *instance,
245 struct radv_extensions *extensions)
246 {
247 assert(extensions);
248
249 if (!extensions)
250 radv_loge("Attemted to free invalid extension struct\n");
251
252 if (extensions->ext_array)
253 vk_free(&instance->alloc, extensions->ext_array);
254 }
255
256 static bool
257 is_extension_enabled(const VkExtensionProperties *extensions,
258 size_t num_ext,
259 const char *name)
260 {
261 assert(extensions && name);
262
263 for (uint32_t i = 0; i < num_ext; i++) {
264 if (strcmp(name, extensions[i].extensionName) == 0)
265 return true;
266 }
267
268 return false;
269 }
270
271 static const char *
272 get_chip_name(enum radeon_family family)
273 {
274 switch (family) {
275 case CHIP_TAHITI: return "AMD RADV TAHITI";
276 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
277 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
278 case CHIP_OLAND: return "AMD RADV OLAND";
279 case CHIP_HAINAN: return "AMD RADV HAINAN";
280 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
281 case CHIP_KAVERI: return "AMD RADV KAVERI";
282 case CHIP_KABINI: return "AMD RADV KABINI";
283 case CHIP_HAWAII: return "AMD RADV HAWAII";
284 case CHIP_MULLINS: return "AMD RADV MULLINS";
285 case CHIP_TONGA: return "AMD RADV TONGA";
286 case CHIP_ICELAND: return "AMD RADV ICELAND";
287 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
288 case CHIP_FIJI: return "AMD RADV FIJI";
289 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
290 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
291 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
292 case CHIP_STONEY: return "AMD RADV STONEY";
293 case CHIP_VEGA10: return "AMD RADV VEGA";
294 case CHIP_RAVEN: return "AMD RADV RAVEN";
295 default: return "AMD RADV unknown";
296 }
297 }
298
299 static VkResult
300 radv_physical_device_init(struct radv_physical_device *device,
301 struct radv_instance *instance,
302 drmDevicePtr drm_device)
303 {
304 const char *path = drm_device->nodes[DRM_NODE_RENDER];
305 VkResult result;
306 drmVersionPtr version;
307 int fd;
308
309 fd = open(path, O_RDWR | O_CLOEXEC);
310 if (fd < 0)
311 return VK_ERROR_INCOMPATIBLE_DRIVER;
312
313 version = drmGetVersion(fd);
314 if (!version) {
315 close(fd);
316 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
317 "failed to get version %s: %m", path);
318 }
319
320 if (strcmp(version->name, "amdgpu")) {
321 drmFreeVersion(version);
322 close(fd);
323 return VK_ERROR_INCOMPATIBLE_DRIVER;
324 }
325 drmFreeVersion(version);
326
327 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
328 device->instance = instance;
329 assert(strlen(path) < ARRAY_SIZE(device->path));
330 strncpy(device->path, path, ARRAY_SIZE(device->path));
331
332 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
333 instance->perftest_flags);
334 if (!device->ws) {
335 result = VK_ERROR_INCOMPATIBLE_DRIVER;
336 goto fail;
337 }
338
339 device->local_fd = fd;
340 device->ws->query_info(device->ws, &device->rad_info);
341 result = radv_init_wsi(device);
342 if (result != VK_SUCCESS) {
343 device->ws->destroy(device->ws);
344 goto fail;
345 }
346
347 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
348 radv_finish_wsi(device);
349 device->ws->destroy(device->ws);
350 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
351 "cannot generate UUID");
352 goto fail;
353 }
354
355 /* These flags affect shader compilation. */
356 uint64_t shader_env_flags =
357 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
358 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
359
360 /* The gpu id is already embeded in the uuid so we just pass "radv"
361 * when creating the cache.
362 */
363 char buf[VK_UUID_SIZE + 1];
364 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE);
365 device->disk_cache = disk_cache_create("radv", buf, shader_env_flags);
366
367 result = radv_extensions_register(instance,
368 &device->extensions,
369 common_device_extensions,
370 ARRAY_SIZE(common_device_extensions));
371 if (result != VK_SUCCESS)
372 goto fail;
373
374 if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) {
375 result = radv_extensions_register(instance,
376 &device->extensions,
377 rasterization_order_extension,
378 ARRAY_SIZE(rasterization_order_extension));
379 if (result != VK_SUCCESS)
380 goto fail;
381 }
382
383 if (device->rad_info.has_syncobj) {
384 result = radv_extensions_register(instance,
385 &device->extensions,
386 ext_sema_device_extensions,
387 ARRAY_SIZE(ext_sema_device_extensions));
388 if (result != VK_SUCCESS)
389 goto fail;
390 }
391
392 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
393 device->name = get_chip_name(device->rad_info.family);
394
395 radv_get_driver_uuid(&device->device_uuid);
396 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
397
398 if (device->rad_info.family == CHIP_STONEY ||
399 device->rad_info.chip_class >= GFX9) {
400 device->has_rbplus = true;
401 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
402 }
403
404 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
405 * on SI.
406 */
407 device->has_clear_state = device->rad_info.chip_class >= CIK;
408
409 return VK_SUCCESS;
410
411 fail:
412 close(fd);
413 return result;
414 }
415
416 static void
417 radv_physical_device_finish(struct radv_physical_device *device)
418 {
419 radv_extensions_finish(device->instance, &device->extensions);
420 radv_finish_wsi(device);
421 device->ws->destroy(device->ws);
422 disk_cache_destroy(device->disk_cache);
423 close(device->local_fd);
424 }
425
426 static void *
427 default_alloc_func(void *pUserData, size_t size, size_t align,
428 VkSystemAllocationScope allocationScope)
429 {
430 return malloc(size);
431 }
432
433 static void *
434 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
435 size_t align, VkSystemAllocationScope allocationScope)
436 {
437 return realloc(pOriginal, size);
438 }
439
440 static void
441 default_free_func(void *pUserData, void *pMemory)
442 {
443 free(pMemory);
444 }
445
446 static const VkAllocationCallbacks default_alloc = {
447 .pUserData = NULL,
448 .pfnAllocation = default_alloc_func,
449 .pfnReallocation = default_realloc_func,
450 .pfnFree = default_free_func,
451 };
452
453 static const struct debug_control radv_debug_options[] = {
454 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
455 {"nodcc", RADV_DEBUG_NO_DCC},
456 {"shaders", RADV_DEBUG_DUMP_SHADERS},
457 {"nocache", RADV_DEBUG_NO_CACHE},
458 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
459 {"nohiz", RADV_DEBUG_NO_HIZ},
460 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
461 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
462 {"allbos", RADV_DEBUG_ALL_BOS},
463 {"noibs", RADV_DEBUG_NO_IBS},
464 {"spirv", RADV_DEBUG_DUMP_SPIRV},
465 {"vmfaults", RADV_DEBUG_VM_FAULTS},
466 {"zerovram", RADV_DEBUG_ZERO_VRAM},
467 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
468 {NULL, 0}
469 };
470
471 const char *
472 radv_get_debug_option_name(int id)
473 {
474 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
475 return radv_debug_options[id].string;
476 }
477
478 static const struct debug_control radv_perftest_options[] = {
479 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
480 {"sisched", RADV_PERFTEST_SISCHED},
481 {NULL, 0}
482 };
483
484 const char *
485 radv_get_perftest_option_name(int id)
486 {
487 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
488 return radv_perftest_options[id].string;
489 }
490
491 VkResult radv_CreateInstance(
492 const VkInstanceCreateInfo* pCreateInfo,
493 const VkAllocationCallbacks* pAllocator,
494 VkInstance* pInstance)
495 {
496 struct radv_instance *instance;
497
498 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
499
500 uint32_t client_version;
501 if (pCreateInfo->pApplicationInfo &&
502 pCreateInfo->pApplicationInfo->apiVersion != 0) {
503 client_version = pCreateInfo->pApplicationInfo->apiVersion;
504 } else {
505 client_version = VK_MAKE_VERSION(1, 0, 0);
506 }
507
508 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
509 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
510 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
511 "Client requested version %d.%d.%d",
512 VK_VERSION_MAJOR(client_version),
513 VK_VERSION_MINOR(client_version),
514 VK_VERSION_PATCH(client_version));
515 }
516
517 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
518 if (!is_extension_enabled(instance_extensions,
519 ARRAY_SIZE(instance_extensions),
520 pCreateInfo->ppEnabledExtensionNames[i]))
521 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
522 }
523
524 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
525 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
526 if (!instance)
527 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
528
529 memset(instance, 0, sizeof(*instance));
530
531 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
532
533 if (pAllocator)
534 instance->alloc = *pAllocator;
535 else
536 instance->alloc = default_alloc;
537
538 instance->apiVersion = client_version;
539 instance->physicalDeviceCount = -1;
540
541 _mesa_locale_init();
542
543 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
544
545 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
546 radv_debug_options);
547
548 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
549 radv_perftest_options);
550
551 *pInstance = radv_instance_to_handle(instance);
552
553 return VK_SUCCESS;
554 }
555
556 void radv_DestroyInstance(
557 VkInstance _instance,
558 const VkAllocationCallbacks* pAllocator)
559 {
560 RADV_FROM_HANDLE(radv_instance, instance, _instance);
561
562 if (!instance)
563 return;
564
565 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
566 radv_physical_device_finish(instance->physicalDevices + i);
567 }
568
569 VG(VALGRIND_DESTROY_MEMPOOL(instance));
570
571 _mesa_locale_fini();
572
573 vk_free(&instance->alloc, instance);
574 }
575
576 static VkResult
577 radv_enumerate_devices(struct radv_instance *instance)
578 {
579 /* TODO: Check for more devices ? */
580 drmDevicePtr devices[8];
581 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
582 int max_devices;
583
584 instance->physicalDeviceCount = 0;
585
586 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
587 if (max_devices < 1)
588 return VK_ERROR_INCOMPATIBLE_DRIVER;
589
590 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
591 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
592 devices[i]->bustype == DRM_BUS_PCI &&
593 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
594
595 result = radv_physical_device_init(instance->physicalDevices +
596 instance->physicalDeviceCount,
597 instance,
598 devices[i]);
599 if (result == VK_SUCCESS)
600 ++instance->physicalDeviceCount;
601 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
602 break;
603 }
604 }
605 drmFreeDevices(devices, max_devices);
606
607 return result;
608 }
609
610 VkResult radv_EnumeratePhysicalDevices(
611 VkInstance _instance,
612 uint32_t* pPhysicalDeviceCount,
613 VkPhysicalDevice* pPhysicalDevices)
614 {
615 RADV_FROM_HANDLE(radv_instance, instance, _instance);
616 VkResult result;
617
618 if (instance->physicalDeviceCount < 0) {
619 result = radv_enumerate_devices(instance);
620 if (result != VK_SUCCESS &&
621 result != VK_ERROR_INCOMPATIBLE_DRIVER)
622 return result;
623 }
624
625 if (!pPhysicalDevices) {
626 *pPhysicalDeviceCount = instance->physicalDeviceCount;
627 } else {
628 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
629 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
630 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
631 }
632
633 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
634 : VK_SUCCESS;
635 }
636
637 void radv_GetPhysicalDeviceFeatures(
638 VkPhysicalDevice physicalDevice,
639 VkPhysicalDeviceFeatures* pFeatures)
640 {
641 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
642 bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
643 memset(pFeatures, 0, sizeof(*pFeatures));
644
645 *pFeatures = (VkPhysicalDeviceFeatures) {
646 .robustBufferAccess = true,
647 .fullDrawIndexUint32 = true,
648 .imageCubeArray = true,
649 .independentBlend = true,
650 .geometryShader = !is_gfx9,
651 .tessellationShader = !is_gfx9,
652 .sampleRateShading = true,
653 .dualSrcBlend = true,
654 .logicOp = true,
655 .multiDrawIndirect = true,
656 .drawIndirectFirstInstance = true,
657 .depthClamp = true,
658 .depthBiasClamp = true,
659 .fillModeNonSolid = true,
660 .depthBounds = true,
661 .wideLines = true,
662 .largePoints = true,
663 .alphaToOne = true,
664 .multiViewport = true,
665 .samplerAnisotropy = true,
666 .textureCompressionETC2 = false,
667 .textureCompressionASTC_LDR = false,
668 .textureCompressionBC = true,
669 .occlusionQueryPrecise = true,
670 .pipelineStatisticsQuery = true,
671 .vertexPipelineStoresAndAtomics = true,
672 .fragmentStoresAndAtomics = true,
673 .shaderTessellationAndGeometryPointSize = true,
674 .shaderImageGatherExtended = true,
675 .shaderStorageImageExtendedFormats = true,
676 .shaderStorageImageMultisample = false,
677 .shaderUniformBufferArrayDynamicIndexing = true,
678 .shaderSampledImageArrayDynamicIndexing = true,
679 .shaderStorageBufferArrayDynamicIndexing = true,
680 .shaderStorageImageArrayDynamicIndexing = true,
681 .shaderStorageImageReadWithoutFormat = true,
682 .shaderStorageImageWriteWithoutFormat = true,
683 .shaderClipDistance = true,
684 .shaderCullDistance = true,
685 .shaderFloat64 = true,
686 .shaderInt64 = true,
687 .shaderInt16 = false,
688 .sparseBinding = true,
689 .variableMultisampleRate = true,
690 .inheritedQueries = true,
691 };
692 }
693
694 void radv_GetPhysicalDeviceFeatures2KHR(
695 VkPhysicalDevice physicalDevice,
696 VkPhysicalDeviceFeatures2KHR *pFeatures)
697 {
698 vk_foreach_struct(ext, pFeatures->pNext) {
699 switch (ext->sType) {
700 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
701 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
702 features->variablePointersStorageBuffer = true;
703 features->variablePointers = false;
704 break;
705 }
706 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
707 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
708 features->multiview = true;
709 features->multiviewGeometryShader = true;
710 features->multiviewTessellationShader = true;
711 break;
712 }
713 default:
714 break;
715 }
716 }
717 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
718 }
719
720 void radv_GetPhysicalDeviceProperties(
721 VkPhysicalDevice physicalDevice,
722 VkPhysicalDeviceProperties* pProperties)
723 {
724 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
725 VkSampleCountFlags sample_counts = 0xf;
726
727 /* make sure that the entire descriptor set is addressable with a signed
728 * 32-bit int. So the sum of all limits scaled by descriptor size has to
729 * be at most 2 GiB. the combined image & samples object count as one of
730 * both. This limit is for the pipeline layout, not for the set layout, but
731 * there is no set limit, so we just set a pipeline limit. I don't think
732 * any app is going to hit this soon. */
733 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
734 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
735 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
736 32 /* sampler, largest when combined with image */ +
737 64 /* sampled image */ +
738 64 /* storage image */);
739
740 VkPhysicalDeviceLimits limits = {
741 .maxImageDimension1D = (1 << 14),
742 .maxImageDimension2D = (1 << 14),
743 .maxImageDimension3D = (1 << 11),
744 .maxImageDimensionCube = (1 << 14),
745 .maxImageArrayLayers = (1 << 11),
746 .maxTexelBufferElements = 128 * 1024 * 1024,
747 .maxUniformBufferRange = UINT32_MAX,
748 .maxStorageBufferRange = UINT32_MAX,
749 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
750 .maxMemoryAllocationCount = UINT32_MAX,
751 .maxSamplerAllocationCount = 64 * 1024,
752 .bufferImageGranularity = 64, /* A cache line */
753 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
754 .maxBoundDescriptorSets = MAX_SETS,
755 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
756 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
757 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
758 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
759 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
760 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
761 .maxPerStageResources = max_descriptor_set_size,
762 .maxDescriptorSetSamplers = max_descriptor_set_size,
763 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
764 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
765 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
766 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
767 .maxDescriptorSetSampledImages = max_descriptor_set_size,
768 .maxDescriptorSetStorageImages = max_descriptor_set_size,
769 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
770 .maxVertexInputAttributes = 32,
771 .maxVertexInputBindings = 32,
772 .maxVertexInputAttributeOffset = 2047,
773 .maxVertexInputBindingStride = 2048,
774 .maxVertexOutputComponents = 128,
775 .maxTessellationGenerationLevel = 64,
776 .maxTessellationPatchSize = 32,
777 .maxTessellationControlPerVertexInputComponents = 128,
778 .maxTessellationControlPerVertexOutputComponents = 128,
779 .maxTessellationControlPerPatchOutputComponents = 120,
780 .maxTessellationControlTotalOutputComponents = 4096,
781 .maxTessellationEvaluationInputComponents = 128,
782 .maxTessellationEvaluationOutputComponents = 128,
783 .maxGeometryShaderInvocations = 127,
784 .maxGeometryInputComponents = 64,
785 .maxGeometryOutputComponents = 128,
786 .maxGeometryOutputVertices = 256,
787 .maxGeometryTotalOutputComponents = 1024,
788 .maxFragmentInputComponents = 128,
789 .maxFragmentOutputAttachments = 8,
790 .maxFragmentDualSrcAttachments = 1,
791 .maxFragmentCombinedOutputResources = 8,
792 .maxComputeSharedMemorySize = 32768,
793 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
794 .maxComputeWorkGroupInvocations = 2048,
795 .maxComputeWorkGroupSize = {
796 2048,
797 2048,
798 2048
799 },
800 .subPixelPrecisionBits = 4 /* FIXME */,
801 .subTexelPrecisionBits = 4 /* FIXME */,
802 .mipmapPrecisionBits = 4 /* FIXME */,
803 .maxDrawIndexedIndexValue = UINT32_MAX,
804 .maxDrawIndirectCount = UINT32_MAX,
805 .maxSamplerLodBias = 16,
806 .maxSamplerAnisotropy = 16,
807 .maxViewports = MAX_VIEWPORTS,
808 .maxViewportDimensions = { (1 << 14), (1 << 14) },
809 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
810 .viewportSubPixelBits = 13, /* We take a float? */
811 .minMemoryMapAlignment = 4096, /* A page */
812 .minTexelBufferOffsetAlignment = 1,
813 .minUniformBufferOffsetAlignment = 4,
814 .minStorageBufferOffsetAlignment = 4,
815 .minTexelOffset = -32,
816 .maxTexelOffset = 31,
817 .minTexelGatherOffset = -32,
818 .maxTexelGatherOffset = 31,
819 .minInterpolationOffset = -2,
820 .maxInterpolationOffset = 2,
821 .subPixelInterpolationOffsetBits = 8,
822 .maxFramebufferWidth = (1 << 14),
823 .maxFramebufferHeight = (1 << 14),
824 .maxFramebufferLayers = (1 << 10),
825 .framebufferColorSampleCounts = sample_counts,
826 .framebufferDepthSampleCounts = sample_counts,
827 .framebufferStencilSampleCounts = sample_counts,
828 .framebufferNoAttachmentsSampleCounts = sample_counts,
829 .maxColorAttachments = MAX_RTS,
830 .sampledImageColorSampleCounts = sample_counts,
831 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
832 .sampledImageDepthSampleCounts = sample_counts,
833 .sampledImageStencilSampleCounts = sample_counts,
834 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
835 .maxSampleMaskWords = 1,
836 .timestampComputeAndGraphics = true,
837 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
838 .maxClipDistances = 8,
839 .maxCullDistances = 8,
840 .maxCombinedClipAndCullDistances = 8,
841 .discreteQueuePriorities = 1,
842 .pointSizeRange = { 0.125, 255.875 },
843 .lineWidthRange = { 0.0, 7.9921875 },
844 .pointSizeGranularity = (1.0 / 8.0),
845 .lineWidthGranularity = (1.0 / 128.0),
846 .strictLines = false, /* FINISHME */
847 .standardSampleLocations = true,
848 .optimalBufferCopyOffsetAlignment = 128,
849 .optimalBufferCopyRowPitchAlignment = 128,
850 .nonCoherentAtomSize = 64,
851 };
852
853 *pProperties = (VkPhysicalDeviceProperties) {
854 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
855 .driverVersion = vk_get_driver_version(),
856 .vendorID = ATI_VENDOR_ID,
857 .deviceID = pdevice->rad_info.pci_id,
858 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
859 .limits = limits,
860 .sparseProperties = {0},
861 };
862
863 strcpy(pProperties->deviceName, pdevice->name);
864 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
865 }
866
867 void radv_GetPhysicalDeviceProperties2KHR(
868 VkPhysicalDevice physicalDevice,
869 VkPhysicalDeviceProperties2KHR *pProperties)
870 {
871 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
872 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
873
874 vk_foreach_struct(ext, pProperties->pNext) {
875 switch (ext->sType) {
876 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
877 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
878 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
879 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
880 break;
881 }
882 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
883 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
884 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
885 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
886 properties->deviceLUIDValid = false;
887 break;
888 }
889 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
890 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
891 properties->maxMultiviewViewCount = MAX_VIEWS;
892 properties->maxMultiviewInstanceIndex = INT_MAX;
893 break;
894 }
895 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
896 VkPhysicalDevicePointClippingPropertiesKHR *properties =
897 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
898 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
899 break;
900 }
901 default:
902 break;
903 }
904 }
905 }
906
907 static void radv_get_physical_device_queue_family_properties(
908 struct radv_physical_device* pdevice,
909 uint32_t* pCount,
910 VkQueueFamilyProperties** pQueueFamilyProperties)
911 {
912 int num_queue_families = 1;
913 int idx;
914 if (pdevice->rad_info.num_compute_rings > 0 &&
915 pdevice->rad_info.chip_class >= CIK &&
916 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
917 num_queue_families++;
918
919 if (pQueueFamilyProperties == NULL) {
920 *pCount = num_queue_families;
921 return;
922 }
923
924 if (!*pCount)
925 return;
926
927 idx = 0;
928 if (*pCount >= 1) {
929 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
930 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
931 VK_QUEUE_COMPUTE_BIT |
932 VK_QUEUE_TRANSFER_BIT |
933 VK_QUEUE_SPARSE_BINDING_BIT,
934 .queueCount = 1,
935 .timestampValidBits = 64,
936 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
937 };
938 idx++;
939 }
940
941 if (pdevice->rad_info.num_compute_rings > 0 &&
942 pdevice->rad_info.chip_class >= CIK &&
943 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
944 if (*pCount > idx) {
945 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
946 .queueFlags = VK_QUEUE_COMPUTE_BIT |
947 VK_QUEUE_TRANSFER_BIT |
948 VK_QUEUE_SPARSE_BINDING_BIT,
949 .queueCount = pdevice->rad_info.num_compute_rings,
950 .timestampValidBits = 64,
951 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
952 };
953 idx++;
954 }
955 }
956 *pCount = idx;
957 }
958
959 void radv_GetPhysicalDeviceQueueFamilyProperties(
960 VkPhysicalDevice physicalDevice,
961 uint32_t* pCount,
962 VkQueueFamilyProperties* pQueueFamilyProperties)
963 {
964 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
965 if (!pQueueFamilyProperties) {
966 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
967 return;
968 }
969 VkQueueFamilyProperties *properties[] = {
970 pQueueFamilyProperties + 0,
971 pQueueFamilyProperties + 1,
972 pQueueFamilyProperties + 2,
973 };
974 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
975 assert(*pCount <= 3);
976 }
977
978 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
979 VkPhysicalDevice physicalDevice,
980 uint32_t* pCount,
981 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
982 {
983 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
984 if (!pQueueFamilyProperties) {
985 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
986 return;
987 }
988 VkQueueFamilyProperties *properties[] = {
989 &pQueueFamilyProperties[0].queueFamilyProperties,
990 &pQueueFamilyProperties[1].queueFamilyProperties,
991 &pQueueFamilyProperties[2].queueFamilyProperties,
992 };
993 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
994 assert(*pCount <= 3);
995 }
996
997 void radv_GetPhysicalDeviceMemoryProperties(
998 VkPhysicalDevice physicalDevice,
999 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1000 {
1001 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1002
1003 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
1004
1005 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
1006 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
1007 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1008 .heapIndex = RADV_MEM_HEAP_VRAM,
1009 };
1010 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
1011 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1012 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1013 .heapIndex = RADV_MEM_HEAP_GTT,
1014 };
1015 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
1016 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1017 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1018 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1019 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
1020 };
1021 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
1022 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1023 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1024 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1025 .heapIndex = RADV_MEM_HEAP_GTT,
1026 };
1027
1028 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
1029 uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
1030 physical_device->rad_info.vram_vis_size);
1031
1032 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
1033 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
1034 .size = physical_device->rad_info.vram_size -
1035 visible_vram_size,
1036 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1037 };
1038 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
1039 .size = visible_vram_size,
1040 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1041 };
1042 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
1043 .size = physical_device->rad_info.gart_size,
1044 .flags = 0,
1045 };
1046 }
1047
1048 void radv_GetPhysicalDeviceMemoryProperties2KHR(
1049 VkPhysicalDevice physicalDevice,
1050 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1051 {
1052 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1053 &pMemoryProperties->memoryProperties);
1054 }
1055
1056 static int
1057 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1058 int queue_family_index, int idx)
1059 {
1060 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1061 queue->device = device;
1062 queue->queue_family_index = queue_family_index;
1063 queue->queue_idx = idx;
1064
1065 queue->hw_ctx = device->ws->ctx_create(device->ws);
1066 if (!queue->hw_ctx)
1067 return VK_ERROR_OUT_OF_HOST_MEMORY;
1068
1069 return VK_SUCCESS;
1070 }
1071
1072 static void
1073 radv_queue_finish(struct radv_queue *queue)
1074 {
1075 if (queue->hw_ctx)
1076 queue->device->ws->ctx_destroy(queue->hw_ctx);
1077
1078 if (queue->initial_full_flush_preamble_cs)
1079 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1080 if (queue->initial_preamble_cs)
1081 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1082 if (queue->continue_preamble_cs)
1083 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1084 if (queue->descriptor_bo)
1085 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1086 if (queue->scratch_bo)
1087 queue->device->ws->buffer_destroy(queue->scratch_bo);
1088 if (queue->esgs_ring_bo)
1089 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1090 if (queue->gsvs_ring_bo)
1091 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1092 if (queue->tess_factor_ring_bo)
1093 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
1094 if (queue->tess_offchip_ring_bo)
1095 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
1096 if (queue->compute_scratch_bo)
1097 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1098 }
1099
1100 static void
1101 radv_device_init_gs_info(struct radv_device *device)
1102 {
1103 switch (device->physical_device->rad_info.family) {
1104 case CHIP_OLAND:
1105 case CHIP_HAINAN:
1106 case CHIP_KAVERI:
1107 case CHIP_KABINI:
1108 case CHIP_MULLINS:
1109 case CHIP_ICELAND:
1110 case CHIP_CARRIZO:
1111 case CHIP_STONEY:
1112 device->gs_table_depth = 16;
1113 return;
1114 case CHIP_TAHITI:
1115 case CHIP_PITCAIRN:
1116 case CHIP_VERDE:
1117 case CHIP_BONAIRE:
1118 case CHIP_HAWAII:
1119 case CHIP_TONGA:
1120 case CHIP_FIJI:
1121 case CHIP_POLARIS10:
1122 case CHIP_POLARIS11:
1123 case CHIP_POLARIS12:
1124 case CHIP_VEGA10:
1125 case CHIP_RAVEN:
1126 device->gs_table_depth = 32;
1127 return;
1128 default:
1129 unreachable("unknown GPU");
1130 }
1131 }
1132
1133 VkResult radv_CreateDevice(
1134 VkPhysicalDevice physicalDevice,
1135 const VkDeviceCreateInfo* pCreateInfo,
1136 const VkAllocationCallbacks* pAllocator,
1137 VkDevice* pDevice)
1138 {
1139 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1140 VkResult result;
1141 struct radv_device *device;
1142
1143 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1144 if (!is_extension_enabled(physical_device->extensions.ext_array,
1145 physical_device->extensions.num_ext,
1146 pCreateInfo->ppEnabledExtensionNames[i]))
1147 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1148 }
1149
1150 /* Check enabled features */
1151 if (pCreateInfo->pEnabledFeatures) {
1152 VkPhysicalDeviceFeatures supported_features;
1153 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1154 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1155 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1156 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1157 for (uint32_t i = 0; i < num_features; i++) {
1158 if (enabled_feature[i] && !supported_feature[i])
1159 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1160 }
1161 }
1162
1163 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1164 sizeof(*device), 8,
1165 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1166 if (!device)
1167 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1168
1169 memset(device, 0, sizeof(*device));
1170
1171 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1172 device->instance = physical_device->instance;
1173 device->physical_device = physical_device;
1174
1175 device->ws = physical_device->ws;
1176 if (pAllocator)
1177 device->alloc = *pAllocator;
1178 else
1179 device->alloc = physical_device->instance->alloc;
1180
1181 mtx_init(&device->shader_slab_mutex, mtx_plain);
1182 list_inithead(&device->shader_slabs);
1183
1184 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1185 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1186 uint32_t qfi = queue_create->queueFamilyIndex;
1187
1188 device->queues[qfi] = vk_alloc(&device->alloc,
1189 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1190 if (!device->queues[qfi]) {
1191 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1192 goto fail;
1193 }
1194
1195 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1196
1197 device->queue_count[qfi] = queue_create->queueCount;
1198
1199 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1200 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1201 if (result != VK_SUCCESS)
1202 goto fail;
1203 }
1204 }
1205
1206 #if HAVE_LLVM < 0x0400
1207 device->llvm_supports_spill = false;
1208 #else
1209 device->llvm_supports_spill = true;
1210 #endif
1211
1212 /* The maximum number of scratch waves. Scratch space isn't divided
1213 * evenly between CUs. The number is only a function of the number of CUs.
1214 * We can decrease the constant to decrease the scratch buffer size.
1215 *
1216 * sctx->scratch_waves must be >= the maximum posible size of
1217 * 1 threadgroup, so that the hw doesn't hang from being unable
1218 * to start any.
1219 *
1220 * The recommended value is 4 per CU at most. Higher numbers don't
1221 * bring much benefit, but they still occupy chip resources (think
1222 * async compute). I've seen ~2% performance difference between 4 and 32.
1223 */
1224 uint32_t max_threads_per_block = 2048;
1225 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1226 max_threads_per_block / 64);
1227
1228 radv_device_init_gs_info(device);
1229
1230 device->tess_offchip_block_dw_size =
1231 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1232 device->has_distributed_tess =
1233 device->physical_device->rad_info.chip_class >= VI &&
1234 device->physical_device->rad_info.max_se >= 2;
1235
1236 if (getenv("RADV_TRACE_FILE")) {
1237 if (!radv_init_trace(device))
1238 goto fail;
1239 }
1240
1241 result = radv_device_init_meta(device);
1242 if (result != VK_SUCCESS)
1243 goto fail;
1244
1245 radv_device_init_msaa(device);
1246
1247 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1248 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1249 switch (family) {
1250 case RADV_QUEUE_GENERAL:
1251 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1252 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1253 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1254 break;
1255 case RADV_QUEUE_COMPUTE:
1256 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1257 radeon_emit(device->empty_cs[family], 0);
1258 break;
1259 }
1260 device->ws->cs_finalize(device->empty_cs[family]);
1261 }
1262
1263 if (device->physical_device->rad_info.chip_class >= CIK)
1264 cik_create_gfx_config(device);
1265
1266 VkPipelineCacheCreateInfo ci;
1267 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1268 ci.pNext = NULL;
1269 ci.flags = 0;
1270 ci.pInitialData = NULL;
1271 ci.initialDataSize = 0;
1272 VkPipelineCache pc;
1273 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1274 &ci, NULL, &pc);
1275 if (result != VK_SUCCESS)
1276 goto fail;
1277
1278 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1279
1280 *pDevice = radv_device_to_handle(device);
1281 return VK_SUCCESS;
1282
1283 fail:
1284 if (device->trace_bo)
1285 device->ws->buffer_destroy(device->trace_bo);
1286
1287 if (device->gfx_init)
1288 device->ws->buffer_destroy(device->gfx_init);
1289
1290 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1291 for (unsigned q = 0; q < device->queue_count[i]; q++)
1292 radv_queue_finish(&device->queues[i][q]);
1293 if (device->queue_count[i])
1294 vk_free(&device->alloc, device->queues[i]);
1295 }
1296
1297 vk_free(&device->alloc, device);
1298 return result;
1299 }
1300
1301 void radv_DestroyDevice(
1302 VkDevice _device,
1303 const VkAllocationCallbacks* pAllocator)
1304 {
1305 RADV_FROM_HANDLE(radv_device, device, _device);
1306
1307 if (!device)
1308 return;
1309
1310 if (device->trace_bo)
1311 device->ws->buffer_destroy(device->trace_bo);
1312
1313 if (device->gfx_init)
1314 device->ws->buffer_destroy(device->gfx_init);
1315
1316 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1317 for (unsigned q = 0; q < device->queue_count[i]; q++)
1318 radv_queue_finish(&device->queues[i][q]);
1319 if (device->queue_count[i])
1320 vk_free(&device->alloc, device->queues[i]);
1321 if (device->empty_cs[i])
1322 device->ws->cs_destroy(device->empty_cs[i]);
1323 }
1324 radv_device_finish_meta(device);
1325
1326 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1327 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1328
1329 radv_destroy_shader_slabs(device);
1330
1331 vk_free(&device->alloc, device);
1332 }
1333
1334 VkResult radv_EnumerateInstanceExtensionProperties(
1335 const char* pLayerName,
1336 uint32_t* pPropertyCount,
1337 VkExtensionProperties* pProperties)
1338 {
1339 if (pProperties == NULL) {
1340 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1341 return VK_SUCCESS;
1342 }
1343
1344 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1345 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1346
1347 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1348 return VK_INCOMPLETE;
1349
1350 return VK_SUCCESS;
1351 }
1352
1353 VkResult radv_EnumerateDeviceExtensionProperties(
1354 VkPhysicalDevice physicalDevice,
1355 const char* pLayerName,
1356 uint32_t* pPropertyCount,
1357 VkExtensionProperties* pProperties)
1358 {
1359 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1360
1361 if (pProperties == NULL) {
1362 *pPropertyCount = pdevice->extensions.num_ext;
1363 return VK_SUCCESS;
1364 }
1365
1366 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1367 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1368
1369 if (*pPropertyCount < pdevice->extensions.num_ext)
1370 return VK_INCOMPLETE;
1371
1372 return VK_SUCCESS;
1373 }
1374
1375 VkResult radv_EnumerateInstanceLayerProperties(
1376 uint32_t* pPropertyCount,
1377 VkLayerProperties* pProperties)
1378 {
1379 if (pProperties == NULL) {
1380 *pPropertyCount = 0;
1381 return VK_SUCCESS;
1382 }
1383
1384 /* None supported at this time */
1385 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1386 }
1387
1388 VkResult radv_EnumerateDeviceLayerProperties(
1389 VkPhysicalDevice physicalDevice,
1390 uint32_t* pPropertyCount,
1391 VkLayerProperties* pProperties)
1392 {
1393 if (pProperties == NULL) {
1394 *pPropertyCount = 0;
1395 return VK_SUCCESS;
1396 }
1397
1398 /* None supported at this time */
1399 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1400 }
1401
1402 void radv_GetDeviceQueue(
1403 VkDevice _device,
1404 uint32_t queueFamilyIndex,
1405 uint32_t queueIndex,
1406 VkQueue* pQueue)
1407 {
1408 RADV_FROM_HANDLE(radv_device, device, _device);
1409
1410 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1411 }
1412
1413 static void
1414 fill_geom_tess_rings(struct radv_queue *queue,
1415 uint32_t *map,
1416 bool add_sample_positions,
1417 uint32_t esgs_ring_size,
1418 struct radeon_winsys_bo *esgs_ring_bo,
1419 uint32_t gsvs_ring_size,
1420 struct radeon_winsys_bo *gsvs_ring_bo,
1421 uint32_t tess_factor_ring_size,
1422 struct radeon_winsys_bo *tess_factor_ring_bo,
1423 uint32_t tess_offchip_ring_size,
1424 struct radeon_winsys_bo *tess_offchip_ring_bo)
1425 {
1426 uint64_t esgs_va = 0, gsvs_va = 0;
1427 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1428 uint32_t *desc = &map[4];
1429
1430 if (esgs_ring_bo)
1431 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1432 if (gsvs_ring_bo)
1433 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1434 if (tess_factor_ring_bo)
1435 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1436 if (tess_offchip_ring_bo)
1437 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1438
1439 /* stride 0, num records - size, add tid, swizzle, elsize4,
1440 index stride 64 */
1441 desc[0] = esgs_va;
1442 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1443 S_008F04_STRIDE(0) |
1444 S_008F04_SWIZZLE_ENABLE(true);
1445 desc[2] = esgs_ring_size;
1446 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1447 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1448 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1449 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1450 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1451 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1452 S_008F0C_ELEMENT_SIZE(1) |
1453 S_008F0C_INDEX_STRIDE(3) |
1454 S_008F0C_ADD_TID_ENABLE(true);
1455
1456 desc += 4;
1457 /* GS entry for ES->GS ring */
1458 /* stride 0, num records - size, elsize0,
1459 index stride 0 */
1460 desc[0] = esgs_va;
1461 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1462 S_008F04_STRIDE(0) |
1463 S_008F04_SWIZZLE_ENABLE(false);
1464 desc[2] = esgs_ring_size;
1465 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1466 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1467 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1468 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1469 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1470 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1471 S_008F0C_ELEMENT_SIZE(0) |
1472 S_008F0C_INDEX_STRIDE(0) |
1473 S_008F0C_ADD_TID_ENABLE(false);
1474
1475 desc += 4;
1476 /* VS entry for GS->VS ring */
1477 /* stride 0, num records - size, elsize0,
1478 index stride 0 */
1479 desc[0] = gsvs_va;
1480 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1481 S_008F04_STRIDE(0) |
1482 S_008F04_SWIZZLE_ENABLE(false);
1483 desc[2] = gsvs_ring_size;
1484 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1485 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1486 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1487 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1488 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1489 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1490 S_008F0C_ELEMENT_SIZE(0) |
1491 S_008F0C_INDEX_STRIDE(0) |
1492 S_008F0C_ADD_TID_ENABLE(false);
1493 desc += 4;
1494
1495 /* stride gsvs_itemsize, num records 64
1496 elsize 4, index stride 16 */
1497 /* shader will patch stride and desc[2] */
1498 desc[0] = gsvs_va;
1499 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1500 S_008F04_STRIDE(0) |
1501 S_008F04_SWIZZLE_ENABLE(true);
1502 desc[2] = 0;
1503 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1504 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1505 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1506 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1507 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1508 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1509 S_008F0C_ELEMENT_SIZE(1) |
1510 S_008F0C_INDEX_STRIDE(1) |
1511 S_008F0C_ADD_TID_ENABLE(true);
1512 desc += 4;
1513
1514 desc[0] = tess_factor_va;
1515 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1516 S_008F04_STRIDE(0) |
1517 S_008F04_SWIZZLE_ENABLE(false);
1518 desc[2] = tess_factor_ring_size;
1519 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1520 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1521 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1522 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1523 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1524 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1525 S_008F0C_ELEMENT_SIZE(0) |
1526 S_008F0C_INDEX_STRIDE(0) |
1527 S_008F0C_ADD_TID_ENABLE(false);
1528 desc += 4;
1529
1530 desc[0] = tess_offchip_va;
1531 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1532 S_008F04_STRIDE(0) |
1533 S_008F04_SWIZZLE_ENABLE(false);
1534 desc[2] = tess_offchip_ring_size;
1535 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1536 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1537 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1538 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1539 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1540 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1541 S_008F0C_ELEMENT_SIZE(0) |
1542 S_008F0C_INDEX_STRIDE(0) |
1543 S_008F0C_ADD_TID_ENABLE(false);
1544 desc += 4;
1545
1546 /* add sample positions after all rings */
1547 memcpy(desc, queue->device->sample_locations_1x, 8);
1548 desc += 2;
1549 memcpy(desc, queue->device->sample_locations_2x, 16);
1550 desc += 4;
1551 memcpy(desc, queue->device->sample_locations_4x, 32);
1552 desc += 8;
1553 memcpy(desc, queue->device->sample_locations_8x, 64);
1554 desc += 16;
1555 memcpy(desc, queue->device->sample_locations_16x, 128);
1556 }
1557
1558 static unsigned
1559 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1560 {
1561 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1562 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1563 device->physical_device->rad_info.family != CHIP_STONEY;
1564 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1565 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1566 device->physical_device->rad_info.max_se;
1567 unsigned offchip_granularity;
1568 unsigned hs_offchip_param;
1569 switch (device->tess_offchip_block_dw_size) {
1570 default:
1571 assert(0);
1572 /* fall through */
1573 case 8192:
1574 offchip_granularity = V_03093C_X_8K_DWORDS;
1575 break;
1576 case 4096:
1577 offchip_granularity = V_03093C_X_4K_DWORDS;
1578 break;
1579 }
1580
1581 switch (device->physical_device->rad_info.chip_class) {
1582 case SI:
1583 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1584 break;
1585 case CIK:
1586 case VI:
1587 case GFX9:
1588 default:
1589 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1590 break;
1591 }
1592
1593 *max_offchip_buffers_p = max_offchip_buffers;
1594 if (device->physical_device->rad_info.chip_class >= CIK) {
1595 if (device->physical_device->rad_info.chip_class >= VI)
1596 --max_offchip_buffers;
1597 hs_offchip_param =
1598 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1599 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1600 } else {
1601 hs_offchip_param =
1602 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1603 }
1604 return hs_offchip_param;
1605 }
1606
1607 static VkResult
1608 radv_get_preamble_cs(struct radv_queue *queue,
1609 uint32_t scratch_size,
1610 uint32_t compute_scratch_size,
1611 uint32_t esgs_ring_size,
1612 uint32_t gsvs_ring_size,
1613 bool needs_tess_rings,
1614 bool needs_sample_positions,
1615 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1616 struct radeon_winsys_cs **initial_preamble_cs,
1617 struct radeon_winsys_cs **continue_preamble_cs)
1618 {
1619 struct radeon_winsys_bo *scratch_bo = NULL;
1620 struct radeon_winsys_bo *descriptor_bo = NULL;
1621 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1622 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1623 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1624 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1625 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1626 struct radeon_winsys_cs *dest_cs[3] = {0};
1627 bool add_tess_rings = false, add_sample_positions = false;
1628 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1629 unsigned max_offchip_buffers;
1630 unsigned hs_offchip_param = 0;
1631 if (!queue->has_tess_rings) {
1632 if (needs_tess_rings)
1633 add_tess_rings = true;
1634 }
1635 if (!queue->has_sample_positions) {
1636 if (needs_sample_positions)
1637 add_sample_positions = true;
1638 }
1639 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1640 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1641 &max_offchip_buffers);
1642 tess_offchip_ring_size = max_offchip_buffers *
1643 queue->device->tess_offchip_block_dw_size * 4;
1644
1645 if (scratch_size <= queue->scratch_size &&
1646 compute_scratch_size <= queue->compute_scratch_size &&
1647 esgs_ring_size <= queue->esgs_ring_size &&
1648 gsvs_ring_size <= queue->gsvs_ring_size &&
1649 !add_tess_rings && !add_sample_positions &&
1650 queue->initial_preamble_cs) {
1651 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1652 *initial_preamble_cs = queue->initial_preamble_cs;
1653 *continue_preamble_cs = queue->continue_preamble_cs;
1654 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1655 *continue_preamble_cs = NULL;
1656 return VK_SUCCESS;
1657 }
1658
1659 if (scratch_size > queue->scratch_size) {
1660 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1661 scratch_size,
1662 4096,
1663 RADEON_DOMAIN_VRAM,
1664 RADEON_FLAG_NO_CPU_ACCESS);
1665 if (!scratch_bo)
1666 goto fail;
1667 } else
1668 scratch_bo = queue->scratch_bo;
1669
1670 if (compute_scratch_size > queue->compute_scratch_size) {
1671 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1672 compute_scratch_size,
1673 4096,
1674 RADEON_DOMAIN_VRAM,
1675 RADEON_FLAG_NO_CPU_ACCESS);
1676 if (!compute_scratch_bo)
1677 goto fail;
1678
1679 } else
1680 compute_scratch_bo = queue->compute_scratch_bo;
1681
1682 if (esgs_ring_size > queue->esgs_ring_size) {
1683 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1684 esgs_ring_size,
1685 4096,
1686 RADEON_DOMAIN_VRAM,
1687 RADEON_FLAG_NO_CPU_ACCESS);
1688 if (!esgs_ring_bo)
1689 goto fail;
1690 } else {
1691 esgs_ring_bo = queue->esgs_ring_bo;
1692 esgs_ring_size = queue->esgs_ring_size;
1693 }
1694
1695 if (gsvs_ring_size > queue->gsvs_ring_size) {
1696 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1697 gsvs_ring_size,
1698 4096,
1699 RADEON_DOMAIN_VRAM,
1700 RADEON_FLAG_NO_CPU_ACCESS);
1701 if (!gsvs_ring_bo)
1702 goto fail;
1703 } else {
1704 gsvs_ring_bo = queue->gsvs_ring_bo;
1705 gsvs_ring_size = queue->gsvs_ring_size;
1706 }
1707
1708 if (add_tess_rings) {
1709 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1710 tess_factor_ring_size,
1711 256,
1712 RADEON_DOMAIN_VRAM,
1713 RADEON_FLAG_NO_CPU_ACCESS);
1714 if (!tess_factor_ring_bo)
1715 goto fail;
1716 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1717 tess_offchip_ring_size,
1718 256,
1719 RADEON_DOMAIN_VRAM,
1720 RADEON_FLAG_NO_CPU_ACCESS);
1721 if (!tess_offchip_ring_bo)
1722 goto fail;
1723 } else {
1724 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1725 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1726 }
1727
1728 if (scratch_bo != queue->scratch_bo ||
1729 esgs_ring_bo != queue->esgs_ring_bo ||
1730 gsvs_ring_bo != queue->gsvs_ring_bo ||
1731 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1732 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1733 uint32_t size = 0;
1734 if (gsvs_ring_bo || esgs_ring_bo ||
1735 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1736 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1737 if (add_sample_positions)
1738 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1739 }
1740 else if (scratch_bo)
1741 size = 8; /* 2 dword */
1742
1743 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1744 size,
1745 4096,
1746 RADEON_DOMAIN_VRAM,
1747 RADEON_FLAG_CPU_ACCESS);
1748 if (!descriptor_bo)
1749 goto fail;
1750 } else
1751 descriptor_bo = queue->descriptor_bo;
1752
1753 for(int i = 0; i < 3; ++i) {
1754 struct radeon_winsys_cs *cs = NULL;
1755 cs = queue->device->ws->cs_create(queue->device->ws,
1756 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1757 if (!cs)
1758 goto fail;
1759
1760 dest_cs[i] = cs;
1761
1762 if (scratch_bo)
1763 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1764
1765 if (esgs_ring_bo)
1766 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1767
1768 if (gsvs_ring_bo)
1769 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1770
1771 if (tess_factor_ring_bo)
1772 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1773
1774 if (tess_offchip_ring_bo)
1775 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1776
1777 if (descriptor_bo)
1778 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1779
1780 if (descriptor_bo != queue->descriptor_bo) {
1781 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1782
1783 if (scratch_bo) {
1784 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1785 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1786 S_008F04_SWIZZLE_ENABLE(1);
1787 map[0] = scratch_va;
1788 map[1] = rsrc1;
1789 }
1790
1791 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1792 add_sample_positions)
1793 fill_geom_tess_rings(queue, map, add_sample_positions,
1794 esgs_ring_size, esgs_ring_bo,
1795 gsvs_ring_size, gsvs_ring_bo,
1796 tess_factor_ring_size, tess_factor_ring_bo,
1797 tess_offchip_ring_size, tess_offchip_ring_bo);
1798
1799 queue->device->ws->buffer_unmap(descriptor_bo);
1800 }
1801
1802 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1803 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1804 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1805 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1806 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1807 }
1808
1809 if (esgs_ring_bo || gsvs_ring_bo) {
1810 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1811 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1812 radeon_emit(cs, esgs_ring_size >> 8);
1813 radeon_emit(cs, gsvs_ring_size >> 8);
1814 } else {
1815 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1816 radeon_emit(cs, esgs_ring_size >> 8);
1817 radeon_emit(cs, gsvs_ring_size >> 8);
1818 }
1819 }
1820
1821 if (tess_factor_ring_bo) {
1822 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1823 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1824 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1825 S_030938_SIZE(tess_factor_ring_size / 4));
1826 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1827 tf_va >> 8);
1828 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1829 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1830 tf_va >> 40);
1831 }
1832 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1833 } else {
1834 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1835 S_008988_SIZE(tess_factor_ring_size / 4));
1836 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1837 tf_va >> 8);
1838 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1839 hs_offchip_param);
1840 }
1841 }
1842
1843 if (descriptor_bo) {
1844 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1845 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1846 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1847 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1848 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1849 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1850
1851 uint64_t va = radv_buffer_get_va(descriptor_bo);
1852
1853 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1854 radeon_set_sh_reg_seq(cs, regs[i], 2);
1855 radeon_emit(cs, va);
1856 radeon_emit(cs, va >> 32);
1857 }
1858 }
1859
1860 if (compute_scratch_bo) {
1861 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1862 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1863 S_008F04_SWIZZLE_ENABLE(1);
1864
1865 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1866
1867 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1868 radeon_emit(cs, scratch_va);
1869 radeon_emit(cs, rsrc1);
1870 }
1871
1872 if (i == 0) {
1873 si_cs_emit_cache_flush(cs,
1874 false,
1875 queue->device->physical_device->rad_info.chip_class,
1876 NULL, 0,
1877 queue->queue_family_index == RING_COMPUTE &&
1878 queue->device->physical_device->rad_info.chip_class >= CIK,
1879 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1880 RADV_CMD_FLAG_INV_ICACHE |
1881 RADV_CMD_FLAG_INV_SMEM_L1 |
1882 RADV_CMD_FLAG_INV_VMEM_L1 |
1883 RADV_CMD_FLAG_INV_GLOBAL_L2);
1884 } else if (i == 1) {
1885 si_cs_emit_cache_flush(cs,
1886 false,
1887 queue->device->physical_device->rad_info.chip_class,
1888 NULL, 0,
1889 queue->queue_family_index == RING_COMPUTE &&
1890 queue->device->physical_device->rad_info.chip_class >= CIK,
1891 RADV_CMD_FLAG_INV_ICACHE |
1892 RADV_CMD_FLAG_INV_SMEM_L1 |
1893 RADV_CMD_FLAG_INV_VMEM_L1 |
1894 RADV_CMD_FLAG_INV_GLOBAL_L2);
1895 }
1896
1897 if (!queue->device->ws->cs_finalize(cs))
1898 goto fail;
1899 }
1900
1901 if (queue->initial_full_flush_preamble_cs)
1902 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1903
1904 if (queue->initial_preamble_cs)
1905 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1906
1907 if (queue->continue_preamble_cs)
1908 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1909
1910 queue->initial_full_flush_preamble_cs = dest_cs[0];
1911 queue->initial_preamble_cs = dest_cs[1];
1912 queue->continue_preamble_cs = dest_cs[2];
1913
1914 if (scratch_bo != queue->scratch_bo) {
1915 if (queue->scratch_bo)
1916 queue->device->ws->buffer_destroy(queue->scratch_bo);
1917 queue->scratch_bo = scratch_bo;
1918 queue->scratch_size = scratch_size;
1919 }
1920
1921 if (compute_scratch_bo != queue->compute_scratch_bo) {
1922 if (queue->compute_scratch_bo)
1923 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1924 queue->compute_scratch_bo = compute_scratch_bo;
1925 queue->compute_scratch_size = compute_scratch_size;
1926 }
1927
1928 if (esgs_ring_bo != queue->esgs_ring_bo) {
1929 if (queue->esgs_ring_bo)
1930 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1931 queue->esgs_ring_bo = esgs_ring_bo;
1932 queue->esgs_ring_size = esgs_ring_size;
1933 }
1934
1935 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1936 if (queue->gsvs_ring_bo)
1937 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1938 queue->gsvs_ring_bo = gsvs_ring_bo;
1939 queue->gsvs_ring_size = gsvs_ring_size;
1940 }
1941
1942 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1943 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1944 }
1945
1946 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1947 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1948 queue->has_tess_rings = true;
1949 }
1950
1951 if (descriptor_bo != queue->descriptor_bo) {
1952 if (queue->descriptor_bo)
1953 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1954
1955 queue->descriptor_bo = descriptor_bo;
1956 }
1957
1958 if (add_sample_positions)
1959 queue->has_sample_positions = true;
1960
1961 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1962 *initial_preamble_cs = queue->initial_preamble_cs;
1963 *continue_preamble_cs = queue->continue_preamble_cs;
1964 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1965 *continue_preamble_cs = NULL;
1966 return VK_SUCCESS;
1967 fail:
1968 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1969 if (dest_cs[i])
1970 queue->device->ws->cs_destroy(dest_cs[i]);
1971 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1972 queue->device->ws->buffer_destroy(descriptor_bo);
1973 if (scratch_bo && scratch_bo != queue->scratch_bo)
1974 queue->device->ws->buffer_destroy(scratch_bo);
1975 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1976 queue->device->ws->buffer_destroy(compute_scratch_bo);
1977 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1978 queue->device->ws->buffer_destroy(esgs_ring_bo);
1979 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1980 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1981 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1982 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1983 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1984 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1985 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1986 }
1987
1988 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1989 int num_sems,
1990 const VkSemaphore *sems,
1991 bool reset_temp)
1992 {
1993 int syncobj_idx = 0, sem_idx = 0;
1994
1995 if (num_sems == 0)
1996 return VK_SUCCESS;
1997 for (uint32_t i = 0; i < num_sems; i++) {
1998 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1999
2000 if (sem->temp_syncobj || sem->syncobj)
2001 counts->syncobj_count++;
2002 else
2003 counts->sem_count++;
2004 }
2005
2006 if (counts->syncobj_count) {
2007 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2008 if (!counts->syncobj)
2009 return VK_ERROR_OUT_OF_HOST_MEMORY;
2010 }
2011
2012 if (counts->sem_count) {
2013 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2014 if (!counts->sem) {
2015 free(counts->syncobj);
2016 return VK_ERROR_OUT_OF_HOST_MEMORY;
2017 }
2018 }
2019
2020 for (uint32_t i = 0; i < num_sems; i++) {
2021 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2022
2023 if (sem->temp_syncobj) {
2024 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2025 if (reset_temp) {
2026 /* after we wait on a temp import - drop it */
2027 sem->temp_syncobj = 0;
2028 }
2029 }
2030 else if (sem->syncobj)
2031 counts->syncobj[syncobj_idx++] = sem->syncobj;
2032 else {
2033 assert(sem->sem);
2034 counts->sem[sem_idx++] = sem->sem;
2035 }
2036 }
2037
2038 return VK_SUCCESS;
2039 }
2040
2041 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2042 {
2043 free(sem_info->wait.syncobj);
2044 free(sem_info->wait.sem);
2045 free(sem_info->signal.syncobj);
2046 free(sem_info->signal.sem);
2047 }
2048
2049 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2050 int num_wait_sems,
2051 const VkSemaphore *wait_sems,
2052 int num_signal_sems,
2053 const VkSemaphore *signal_sems)
2054 {
2055 VkResult ret;
2056 memset(sem_info, 0, sizeof(*sem_info));
2057
2058 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
2059 if (ret)
2060 return ret;
2061 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
2062 if (ret)
2063 radv_free_sem_info(sem_info);
2064
2065 /* caller can override these */
2066 sem_info->cs_emit_wait = true;
2067 sem_info->cs_emit_signal = true;
2068 return ret;
2069 }
2070
2071 VkResult radv_QueueSubmit(
2072 VkQueue _queue,
2073 uint32_t submitCount,
2074 const VkSubmitInfo* pSubmits,
2075 VkFence _fence)
2076 {
2077 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2078 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2079 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2080 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2081 int ret;
2082 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2083 uint32_t scratch_size = 0;
2084 uint32_t compute_scratch_size = 0;
2085 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2086 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2087 VkResult result;
2088 bool fence_emitted = false;
2089 bool tess_rings_needed = false;
2090 bool sample_positions_needed = false;
2091
2092 /* Do this first so failing to allocate scratch buffers can't result in
2093 * partially executed submissions. */
2094 for (uint32_t i = 0; i < submitCount; i++) {
2095 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2096 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2097 pSubmits[i].pCommandBuffers[j]);
2098
2099 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2100 compute_scratch_size = MAX2(compute_scratch_size,
2101 cmd_buffer->compute_scratch_size_needed);
2102 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2103 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2104 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2105 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2106 }
2107 }
2108
2109 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2110 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2111 sample_positions_needed, &initial_flush_preamble_cs,
2112 &initial_preamble_cs, &continue_preamble_cs);
2113 if (result != VK_SUCCESS)
2114 return result;
2115
2116 for (uint32_t i = 0; i < submitCount; i++) {
2117 struct radeon_winsys_cs **cs_array;
2118 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2119 bool can_patch = true;
2120 uint32_t advance;
2121 struct radv_winsys_sem_info sem_info;
2122
2123 result = radv_alloc_sem_info(&sem_info,
2124 pSubmits[i].waitSemaphoreCount,
2125 pSubmits[i].pWaitSemaphores,
2126 pSubmits[i].signalSemaphoreCount,
2127 pSubmits[i].pSignalSemaphores);
2128 if (result != VK_SUCCESS)
2129 return result;
2130
2131 if (!pSubmits[i].commandBufferCount) {
2132 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2133 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2134 &queue->device->empty_cs[queue->queue_family_index],
2135 1, NULL, NULL,
2136 &sem_info,
2137 false, base_fence);
2138 if (ret) {
2139 radv_loge("failed to submit CS %d\n", i);
2140 abort();
2141 }
2142 fence_emitted = true;
2143 }
2144 radv_free_sem_info(&sem_info);
2145 continue;
2146 }
2147
2148 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2149 (pSubmits[i].commandBufferCount));
2150
2151 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2152 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2153 pSubmits[i].pCommandBuffers[j]);
2154 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2155
2156 cs_array[j] = cmd_buffer->cs;
2157 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2158 can_patch = false;
2159 }
2160
2161 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2162 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2163 advance = MIN2(max_cs_submission,
2164 pSubmits[i].commandBufferCount - j);
2165
2166 if (queue->device->trace_bo)
2167 *queue->device->trace_id_ptr = 0;
2168
2169 sem_info.cs_emit_wait = j == 0;
2170 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2171
2172 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2173 advance, initial_preamble, continue_preamble_cs,
2174 &sem_info,
2175 can_patch, base_fence);
2176
2177 if (ret) {
2178 radv_loge("failed to submit CS %d\n", i);
2179 abort();
2180 }
2181 fence_emitted = true;
2182 if (queue->device->trace_bo) {
2183 radv_check_gpu_hangs(queue, cs_array[j]);
2184 }
2185 }
2186
2187 radv_free_sem_info(&sem_info);
2188 free(cs_array);
2189 }
2190
2191 if (fence) {
2192 if (!fence_emitted) {
2193 struct radv_winsys_sem_info sem_info = {0};
2194 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2195 &queue->device->empty_cs[queue->queue_family_index],
2196 1, NULL, NULL, &sem_info,
2197 false, base_fence);
2198 }
2199 fence->submitted = true;
2200 }
2201
2202 return VK_SUCCESS;
2203 }
2204
2205 VkResult radv_QueueWaitIdle(
2206 VkQueue _queue)
2207 {
2208 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2209
2210 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2211 radv_queue_family_to_ring(queue->queue_family_index),
2212 queue->queue_idx);
2213 return VK_SUCCESS;
2214 }
2215
2216 VkResult radv_DeviceWaitIdle(
2217 VkDevice _device)
2218 {
2219 RADV_FROM_HANDLE(radv_device, device, _device);
2220
2221 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2222 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2223 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2224 }
2225 }
2226 return VK_SUCCESS;
2227 }
2228
2229 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2230 VkInstance instance,
2231 const char* pName)
2232 {
2233 return radv_lookup_entrypoint(pName);
2234 }
2235
2236 /* The loader wants us to expose a second GetInstanceProcAddr function
2237 * to work around certain LD_PRELOAD issues seen in apps.
2238 */
2239 PUBLIC
2240 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2241 VkInstance instance,
2242 const char* pName);
2243
2244 PUBLIC
2245 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2246 VkInstance instance,
2247 const char* pName)
2248 {
2249 return radv_GetInstanceProcAddr(instance, pName);
2250 }
2251
2252 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2253 VkDevice device,
2254 const char* pName)
2255 {
2256 return radv_lookup_entrypoint(pName);
2257 }
2258
2259 bool radv_get_memory_fd(struct radv_device *device,
2260 struct radv_device_memory *memory,
2261 int *pFD)
2262 {
2263 struct radeon_bo_metadata metadata;
2264
2265 if (memory->image) {
2266 radv_init_metadata(device, memory->image, &metadata);
2267 device->ws->buffer_set_metadata(memory->bo, &metadata);
2268 }
2269
2270 return device->ws->buffer_get_fd(device->ws, memory->bo,
2271 pFD);
2272 }
2273
2274 VkResult radv_AllocateMemory(
2275 VkDevice _device,
2276 const VkMemoryAllocateInfo* pAllocateInfo,
2277 const VkAllocationCallbacks* pAllocator,
2278 VkDeviceMemory* pMem)
2279 {
2280 RADV_FROM_HANDLE(radv_device, device, _device);
2281 struct radv_device_memory *mem;
2282 VkResult result;
2283 enum radeon_bo_domain domain;
2284 uint32_t flags = 0;
2285
2286 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2287
2288 if (pAllocateInfo->allocationSize == 0) {
2289 /* Apparently, this is allowed */
2290 *pMem = VK_NULL_HANDLE;
2291 return VK_SUCCESS;
2292 }
2293
2294 const VkImportMemoryFdInfoKHR *import_info =
2295 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2296 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2297 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2298
2299 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2300 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2301 if (mem == NULL)
2302 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2303
2304 if (dedicate_info) {
2305 mem->image = radv_image_from_handle(dedicate_info->image);
2306 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2307 } else {
2308 mem->image = NULL;
2309 mem->buffer = NULL;
2310 }
2311
2312 if (import_info) {
2313 assert(import_info->handleType ==
2314 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2315 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2316 NULL, NULL);
2317 if (!mem->bo) {
2318 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2319 goto fail;
2320 } else {
2321 close(import_info->fd);
2322 goto out_success;
2323 }
2324 }
2325
2326 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2327 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2328 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2329 domain = RADEON_DOMAIN_GTT;
2330 else
2331 domain = RADEON_DOMAIN_VRAM;
2332
2333 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2334 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2335 else
2336 flags |= RADEON_FLAG_CPU_ACCESS;
2337
2338 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2339 flags |= RADEON_FLAG_GTT_WC;
2340
2341 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2342 domain, flags);
2343
2344 if (!mem->bo) {
2345 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2346 goto fail;
2347 }
2348 mem->type_index = pAllocateInfo->memoryTypeIndex;
2349 out_success:
2350 *pMem = radv_device_memory_to_handle(mem);
2351
2352 return VK_SUCCESS;
2353
2354 fail:
2355 vk_free2(&device->alloc, pAllocator, mem);
2356
2357 return result;
2358 }
2359
2360 void radv_FreeMemory(
2361 VkDevice _device,
2362 VkDeviceMemory _mem,
2363 const VkAllocationCallbacks* pAllocator)
2364 {
2365 RADV_FROM_HANDLE(radv_device, device, _device);
2366 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2367
2368 if (mem == NULL)
2369 return;
2370
2371 device->ws->buffer_destroy(mem->bo);
2372 mem->bo = NULL;
2373
2374 vk_free2(&device->alloc, pAllocator, mem);
2375 }
2376
2377 VkResult radv_MapMemory(
2378 VkDevice _device,
2379 VkDeviceMemory _memory,
2380 VkDeviceSize offset,
2381 VkDeviceSize size,
2382 VkMemoryMapFlags flags,
2383 void** ppData)
2384 {
2385 RADV_FROM_HANDLE(radv_device, device, _device);
2386 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2387
2388 if (mem == NULL) {
2389 *ppData = NULL;
2390 return VK_SUCCESS;
2391 }
2392
2393 *ppData = device->ws->buffer_map(mem->bo);
2394 if (*ppData) {
2395 *ppData += offset;
2396 return VK_SUCCESS;
2397 }
2398
2399 return VK_ERROR_MEMORY_MAP_FAILED;
2400 }
2401
2402 void radv_UnmapMemory(
2403 VkDevice _device,
2404 VkDeviceMemory _memory)
2405 {
2406 RADV_FROM_HANDLE(radv_device, device, _device);
2407 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2408
2409 if (mem == NULL)
2410 return;
2411
2412 device->ws->buffer_unmap(mem->bo);
2413 }
2414
2415 VkResult radv_FlushMappedMemoryRanges(
2416 VkDevice _device,
2417 uint32_t memoryRangeCount,
2418 const VkMappedMemoryRange* pMemoryRanges)
2419 {
2420 return VK_SUCCESS;
2421 }
2422
2423 VkResult radv_InvalidateMappedMemoryRanges(
2424 VkDevice _device,
2425 uint32_t memoryRangeCount,
2426 const VkMappedMemoryRange* pMemoryRanges)
2427 {
2428 return VK_SUCCESS;
2429 }
2430
2431 void radv_GetBufferMemoryRequirements(
2432 VkDevice device,
2433 VkBuffer _buffer,
2434 VkMemoryRequirements* pMemoryRequirements)
2435 {
2436 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2437
2438 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2439
2440 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2441 pMemoryRequirements->alignment = 4096;
2442 else
2443 pMemoryRequirements->alignment = 16;
2444
2445 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2446 }
2447
2448 void radv_GetBufferMemoryRequirements2KHR(
2449 VkDevice device,
2450 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2451 VkMemoryRequirements2KHR* pMemoryRequirements)
2452 {
2453 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2454 &pMemoryRequirements->memoryRequirements);
2455
2456 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2457 switch (ext->sType) {
2458 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2459 VkMemoryDedicatedRequirementsKHR *req =
2460 (VkMemoryDedicatedRequirementsKHR *) ext;
2461 req->requiresDedicatedAllocation = false;
2462 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2463 break;
2464 }
2465 default:
2466 break;
2467 }
2468 }
2469 }
2470
2471 void radv_GetImageMemoryRequirements(
2472 VkDevice device,
2473 VkImage _image,
2474 VkMemoryRequirements* pMemoryRequirements)
2475 {
2476 RADV_FROM_HANDLE(radv_image, image, _image);
2477
2478 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2479
2480 pMemoryRequirements->size = image->size;
2481 pMemoryRequirements->alignment = image->alignment;
2482 }
2483
2484 void radv_GetImageMemoryRequirements2KHR(
2485 VkDevice device,
2486 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2487 VkMemoryRequirements2KHR* pMemoryRequirements)
2488 {
2489 radv_GetImageMemoryRequirements(device, pInfo->image,
2490 &pMemoryRequirements->memoryRequirements);
2491
2492 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2493
2494 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2495 switch (ext->sType) {
2496 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2497 VkMemoryDedicatedRequirementsKHR *req =
2498 (VkMemoryDedicatedRequirementsKHR *) ext;
2499 req->requiresDedicatedAllocation = image->shareable;
2500 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2501 break;
2502 }
2503 default:
2504 break;
2505 }
2506 }
2507 }
2508
2509 void radv_GetImageSparseMemoryRequirements(
2510 VkDevice device,
2511 VkImage image,
2512 uint32_t* pSparseMemoryRequirementCount,
2513 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2514 {
2515 stub();
2516 }
2517
2518 void radv_GetImageSparseMemoryRequirements2KHR(
2519 VkDevice device,
2520 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2521 uint32_t* pSparseMemoryRequirementCount,
2522 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2523 {
2524 stub();
2525 }
2526
2527 void radv_GetDeviceMemoryCommitment(
2528 VkDevice device,
2529 VkDeviceMemory memory,
2530 VkDeviceSize* pCommittedMemoryInBytes)
2531 {
2532 *pCommittedMemoryInBytes = 0;
2533 }
2534
2535 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2536 uint32_t bindInfoCount,
2537 const VkBindBufferMemoryInfoKHR *pBindInfos)
2538 {
2539 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2540 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2541 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2542
2543 if (mem) {
2544 buffer->bo = mem->bo;
2545 buffer->offset = pBindInfos[i].memoryOffset;
2546 } else {
2547 buffer->bo = NULL;
2548 }
2549 }
2550 return VK_SUCCESS;
2551 }
2552
2553 VkResult radv_BindBufferMemory(
2554 VkDevice device,
2555 VkBuffer buffer,
2556 VkDeviceMemory memory,
2557 VkDeviceSize memoryOffset)
2558 {
2559 const VkBindBufferMemoryInfoKHR info = {
2560 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2561 .buffer = buffer,
2562 .memory = memory,
2563 .memoryOffset = memoryOffset
2564 };
2565
2566 return radv_BindBufferMemory2KHR(device, 1, &info);
2567 }
2568
2569 VkResult radv_BindImageMemory2KHR(VkDevice device,
2570 uint32_t bindInfoCount,
2571 const VkBindImageMemoryInfoKHR *pBindInfos)
2572 {
2573 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2574 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2575 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2576
2577 if (mem) {
2578 image->bo = mem->bo;
2579 image->offset = pBindInfos[i].memoryOffset;
2580 } else {
2581 image->bo = NULL;
2582 image->offset = 0;
2583 }
2584 }
2585 return VK_SUCCESS;
2586 }
2587
2588
2589 VkResult radv_BindImageMemory(
2590 VkDevice device,
2591 VkImage image,
2592 VkDeviceMemory memory,
2593 VkDeviceSize memoryOffset)
2594 {
2595 const VkBindImageMemoryInfoKHR info = {
2596 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2597 .image = image,
2598 .memory = memory,
2599 .memoryOffset = memoryOffset
2600 };
2601
2602 return radv_BindImageMemory2KHR(device, 1, &info);
2603 }
2604
2605
2606 static void
2607 radv_sparse_buffer_bind_memory(struct radv_device *device,
2608 const VkSparseBufferMemoryBindInfo *bind)
2609 {
2610 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2611
2612 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2613 struct radv_device_memory *mem = NULL;
2614
2615 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2616 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2617
2618 device->ws->buffer_virtual_bind(buffer->bo,
2619 bind->pBinds[i].resourceOffset,
2620 bind->pBinds[i].size,
2621 mem ? mem->bo : NULL,
2622 bind->pBinds[i].memoryOffset);
2623 }
2624 }
2625
2626 static void
2627 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2628 const VkSparseImageOpaqueMemoryBindInfo *bind)
2629 {
2630 RADV_FROM_HANDLE(radv_image, image, bind->image);
2631
2632 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2633 struct radv_device_memory *mem = NULL;
2634
2635 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2636 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2637
2638 device->ws->buffer_virtual_bind(image->bo,
2639 bind->pBinds[i].resourceOffset,
2640 bind->pBinds[i].size,
2641 mem ? mem->bo : NULL,
2642 bind->pBinds[i].memoryOffset);
2643 }
2644 }
2645
2646 VkResult radv_QueueBindSparse(
2647 VkQueue _queue,
2648 uint32_t bindInfoCount,
2649 const VkBindSparseInfo* pBindInfo,
2650 VkFence _fence)
2651 {
2652 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2653 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2654 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2655 bool fence_emitted = false;
2656
2657 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2658 struct radv_winsys_sem_info sem_info;
2659 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2660 radv_sparse_buffer_bind_memory(queue->device,
2661 pBindInfo[i].pBufferBinds + j);
2662 }
2663
2664 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2665 radv_sparse_image_opaque_bind_memory(queue->device,
2666 pBindInfo[i].pImageOpaqueBinds + j);
2667 }
2668
2669 VkResult result;
2670 result = radv_alloc_sem_info(&sem_info,
2671 pBindInfo[i].waitSemaphoreCount,
2672 pBindInfo[i].pWaitSemaphores,
2673 pBindInfo[i].signalSemaphoreCount,
2674 pBindInfo[i].pSignalSemaphores);
2675 if (result != VK_SUCCESS)
2676 return result;
2677
2678 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2679 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2680 &queue->device->empty_cs[queue->queue_family_index],
2681 1, NULL, NULL,
2682 &sem_info,
2683 false, base_fence);
2684 fence_emitted = true;
2685 if (fence)
2686 fence->submitted = true;
2687 }
2688
2689 radv_free_sem_info(&sem_info);
2690
2691 }
2692
2693 if (fence && !fence_emitted) {
2694 fence->signalled = true;
2695 }
2696
2697 return VK_SUCCESS;
2698 }
2699
2700 VkResult radv_CreateFence(
2701 VkDevice _device,
2702 const VkFenceCreateInfo* pCreateInfo,
2703 const VkAllocationCallbacks* pAllocator,
2704 VkFence* pFence)
2705 {
2706 RADV_FROM_HANDLE(radv_device, device, _device);
2707 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2708 sizeof(*fence), 8,
2709 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2710
2711 if (!fence)
2712 return VK_ERROR_OUT_OF_HOST_MEMORY;
2713
2714 memset(fence, 0, sizeof(*fence));
2715 fence->submitted = false;
2716 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2717 fence->fence = device->ws->create_fence();
2718 if (!fence->fence) {
2719 vk_free2(&device->alloc, pAllocator, fence);
2720 return VK_ERROR_OUT_OF_HOST_MEMORY;
2721 }
2722
2723 *pFence = radv_fence_to_handle(fence);
2724
2725 return VK_SUCCESS;
2726 }
2727
2728 void radv_DestroyFence(
2729 VkDevice _device,
2730 VkFence _fence,
2731 const VkAllocationCallbacks* pAllocator)
2732 {
2733 RADV_FROM_HANDLE(radv_device, device, _device);
2734 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2735
2736 if (!fence)
2737 return;
2738 device->ws->destroy_fence(fence->fence);
2739 vk_free2(&device->alloc, pAllocator, fence);
2740 }
2741
2742 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2743 {
2744 uint64_t current_time;
2745 struct timespec tv;
2746
2747 clock_gettime(CLOCK_MONOTONIC, &tv);
2748 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2749
2750 timeout = MIN2(UINT64_MAX - current_time, timeout);
2751
2752 return current_time + timeout;
2753 }
2754
2755 VkResult radv_WaitForFences(
2756 VkDevice _device,
2757 uint32_t fenceCount,
2758 const VkFence* pFences,
2759 VkBool32 waitAll,
2760 uint64_t timeout)
2761 {
2762 RADV_FROM_HANDLE(radv_device, device, _device);
2763 timeout = radv_get_absolute_timeout(timeout);
2764
2765 if (!waitAll && fenceCount > 1) {
2766 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2767 }
2768
2769 for (uint32_t i = 0; i < fenceCount; ++i) {
2770 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2771 bool expired = false;
2772
2773 if (fence->signalled)
2774 continue;
2775
2776 if (!fence->submitted)
2777 return VK_TIMEOUT;
2778
2779 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2780 if (!expired)
2781 return VK_TIMEOUT;
2782
2783 fence->signalled = true;
2784 }
2785
2786 return VK_SUCCESS;
2787 }
2788
2789 VkResult radv_ResetFences(VkDevice device,
2790 uint32_t fenceCount,
2791 const VkFence *pFences)
2792 {
2793 for (unsigned i = 0; i < fenceCount; ++i) {
2794 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2795 fence->submitted = fence->signalled = false;
2796 }
2797
2798 return VK_SUCCESS;
2799 }
2800
2801 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2802 {
2803 RADV_FROM_HANDLE(radv_device, device, _device);
2804 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2805
2806 if (fence->signalled)
2807 return VK_SUCCESS;
2808 if (!fence->submitted)
2809 return VK_NOT_READY;
2810
2811 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2812 return VK_NOT_READY;
2813
2814 return VK_SUCCESS;
2815 }
2816
2817
2818 // Queue semaphore functions
2819
2820 VkResult radv_CreateSemaphore(
2821 VkDevice _device,
2822 const VkSemaphoreCreateInfo* pCreateInfo,
2823 const VkAllocationCallbacks* pAllocator,
2824 VkSemaphore* pSemaphore)
2825 {
2826 RADV_FROM_HANDLE(radv_device, device, _device);
2827 const VkExportSemaphoreCreateInfoKHR *export =
2828 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2829 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2830 export ? export->handleTypes : 0;
2831
2832 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2833 sizeof(*sem), 8,
2834 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2835 if (!sem)
2836 return VK_ERROR_OUT_OF_HOST_MEMORY;
2837
2838 sem->temp_syncobj = 0;
2839 /* create a syncobject if we are going to export this semaphore */
2840 if (handleTypes) {
2841 assert (device->physical_device->rad_info.has_syncobj);
2842 assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2843 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2844 if (ret) {
2845 vk_free2(&device->alloc, pAllocator, sem);
2846 return VK_ERROR_OUT_OF_HOST_MEMORY;
2847 }
2848 sem->sem = NULL;
2849 } else {
2850 sem->sem = device->ws->create_sem(device->ws);
2851 if (!sem->sem) {
2852 vk_free2(&device->alloc, pAllocator, sem);
2853 return VK_ERROR_OUT_OF_HOST_MEMORY;
2854 }
2855 sem->syncobj = 0;
2856 }
2857
2858 *pSemaphore = radv_semaphore_to_handle(sem);
2859 return VK_SUCCESS;
2860 }
2861
2862 void radv_DestroySemaphore(
2863 VkDevice _device,
2864 VkSemaphore _semaphore,
2865 const VkAllocationCallbacks* pAllocator)
2866 {
2867 RADV_FROM_HANDLE(radv_device, device, _device);
2868 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2869 if (!_semaphore)
2870 return;
2871
2872 if (sem->syncobj)
2873 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2874 else
2875 device->ws->destroy_sem(sem->sem);
2876 vk_free2(&device->alloc, pAllocator, sem);
2877 }
2878
2879 VkResult radv_CreateEvent(
2880 VkDevice _device,
2881 const VkEventCreateInfo* pCreateInfo,
2882 const VkAllocationCallbacks* pAllocator,
2883 VkEvent* pEvent)
2884 {
2885 RADV_FROM_HANDLE(radv_device, device, _device);
2886 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2887 sizeof(*event), 8,
2888 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2889
2890 if (!event)
2891 return VK_ERROR_OUT_OF_HOST_MEMORY;
2892
2893 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2894 RADEON_DOMAIN_GTT,
2895 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS);
2896 if (!event->bo) {
2897 vk_free2(&device->alloc, pAllocator, event);
2898 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2899 }
2900
2901 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2902
2903 *pEvent = radv_event_to_handle(event);
2904
2905 return VK_SUCCESS;
2906 }
2907
2908 void radv_DestroyEvent(
2909 VkDevice _device,
2910 VkEvent _event,
2911 const VkAllocationCallbacks* pAllocator)
2912 {
2913 RADV_FROM_HANDLE(radv_device, device, _device);
2914 RADV_FROM_HANDLE(radv_event, event, _event);
2915
2916 if (!event)
2917 return;
2918 device->ws->buffer_destroy(event->bo);
2919 vk_free2(&device->alloc, pAllocator, event);
2920 }
2921
2922 VkResult radv_GetEventStatus(
2923 VkDevice _device,
2924 VkEvent _event)
2925 {
2926 RADV_FROM_HANDLE(radv_event, event, _event);
2927
2928 if (*event->map == 1)
2929 return VK_EVENT_SET;
2930 return VK_EVENT_RESET;
2931 }
2932
2933 VkResult radv_SetEvent(
2934 VkDevice _device,
2935 VkEvent _event)
2936 {
2937 RADV_FROM_HANDLE(radv_event, event, _event);
2938 *event->map = 1;
2939
2940 return VK_SUCCESS;
2941 }
2942
2943 VkResult radv_ResetEvent(
2944 VkDevice _device,
2945 VkEvent _event)
2946 {
2947 RADV_FROM_HANDLE(radv_event, event, _event);
2948 *event->map = 0;
2949
2950 return VK_SUCCESS;
2951 }
2952
2953 VkResult radv_CreateBuffer(
2954 VkDevice _device,
2955 const VkBufferCreateInfo* pCreateInfo,
2956 const VkAllocationCallbacks* pAllocator,
2957 VkBuffer* pBuffer)
2958 {
2959 RADV_FROM_HANDLE(radv_device, device, _device);
2960 struct radv_buffer *buffer;
2961
2962 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2963
2964 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2965 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2966 if (buffer == NULL)
2967 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2968
2969 buffer->size = pCreateInfo->size;
2970 buffer->usage = pCreateInfo->usage;
2971 buffer->bo = NULL;
2972 buffer->offset = 0;
2973 buffer->flags = pCreateInfo->flags;
2974
2975 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2976 buffer->bo = device->ws->buffer_create(device->ws,
2977 align64(buffer->size, 4096),
2978 4096, 0, RADEON_FLAG_VIRTUAL);
2979 if (!buffer->bo) {
2980 vk_free2(&device->alloc, pAllocator, buffer);
2981 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2982 }
2983 }
2984
2985 *pBuffer = radv_buffer_to_handle(buffer);
2986
2987 return VK_SUCCESS;
2988 }
2989
2990 void radv_DestroyBuffer(
2991 VkDevice _device,
2992 VkBuffer _buffer,
2993 const VkAllocationCallbacks* pAllocator)
2994 {
2995 RADV_FROM_HANDLE(radv_device, device, _device);
2996 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2997
2998 if (!buffer)
2999 return;
3000
3001 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3002 device->ws->buffer_destroy(buffer->bo);
3003
3004 vk_free2(&device->alloc, pAllocator, buffer);
3005 }
3006
3007 static inline unsigned
3008 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3009 {
3010 if (stencil)
3011 return image->surface.u.legacy.stencil_tiling_index[level];
3012 else
3013 return image->surface.u.legacy.tiling_index[level];
3014 }
3015
3016 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
3017 {
3018 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
3019 }
3020
3021 static void
3022 radv_initialise_color_surface(struct radv_device *device,
3023 struct radv_color_buffer_info *cb,
3024 struct radv_image_view *iview)
3025 {
3026 const struct vk_format_description *desc;
3027 unsigned ntype, format, swap, endian;
3028 unsigned blend_clamp = 0, blend_bypass = 0;
3029 uint64_t va;
3030 const struct radeon_surf *surf = &iview->image->surface;
3031
3032 desc = vk_format_description(iview->vk_format);
3033
3034 memset(cb, 0, sizeof(*cb));
3035
3036 /* Intensity is implemented as Red, so treat it that way. */
3037 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3038
3039 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3040
3041 cb->cb_color_base = va >> 8;
3042
3043 if (device->physical_device->rad_info.chip_class >= GFX9) {
3044 struct gfx9_surf_meta_flags meta;
3045 if (iview->image->dcc_offset)
3046 meta = iview->image->surface.u.gfx9.dcc;
3047 else
3048 meta = iview->image->surface.u.gfx9.cmask;
3049
3050 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3051 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3052 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3053 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3054
3055 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3056 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3057 } else {
3058 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3059 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3060
3061 cb->cb_color_base += level_info->offset >> 8;
3062 if (level_info->mode == RADEON_SURF_MODE_2D)
3063 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3064
3065 pitch_tile_max = level_info->nblk_x / 8 - 1;
3066 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3067 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3068
3069 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3070 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3071 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3072
3073 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3074 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
3075
3076 if (iview->image->fmask.size) {
3077 if (device->physical_device->rad_info.chip_class >= CIK)
3078 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3079 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3080 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3081 } else {
3082 /* This must be set for fast clear to work without FMASK. */
3083 if (device->physical_device->rad_info.chip_class >= CIK)
3084 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3085 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3086 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3087 }
3088 }
3089
3090 /* CMASK variables */
3091 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3092 va += iview->image->cmask.offset;
3093 cb->cb_color_cmask = va >> 8;
3094
3095 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3096 va += iview->image->dcc_offset;
3097 cb->cb_dcc_base = va >> 8;
3098 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3099
3100 uint32_t max_slice = radv_surface_layer_count(iview);
3101 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3102 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
3103
3104 if (iview->image->info.samples > 1) {
3105 unsigned log_samples = util_logbase2(iview->image->info.samples);
3106
3107 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3108 S_028C74_NUM_FRAGMENTS(log_samples);
3109 }
3110
3111 if (iview->image->fmask.size) {
3112 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3113 cb->cb_color_fmask = va >> 8;
3114 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3115 } else {
3116 cb->cb_color_fmask = cb->cb_color_base;
3117 }
3118
3119 ntype = radv_translate_color_numformat(iview->vk_format,
3120 desc,
3121 vk_format_get_first_non_void_channel(iview->vk_format));
3122 format = radv_translate_colorformat(iview->vk_format);
3123 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3124 radv_finishme("Illegal color\n");
3125 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3126 endian = radv_colorformat_endian_swap(format);
3127
3128 /* blend clamp should be set for all NORM/SRGB types */
3129 if (ntype == V_028C70_NUMBER_UNORM ||
3130 ntype == V_028C70_NUMBER_SNORM ||
3131 ntype == V_028C70_NUMBER_SRGB)
3132 blend_clamp = 1;
3133
3134 /* set blend bypass according to docs if SINT/UINT or
3135 8/24 COLOR variants */
3136 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3137 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3138 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3139 blend_clamp = 0;
3140 blend_bypass = 1;
3141 }
3142 #if 0
3143 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3144 (format == V_028C70_COLOR_8 ||
3145 format == V_028C70_COLOR_8_8 ||
3146 format == V_028C70_COLOR_8_8_8_8))
3147 ->color_is_int8 = true;
3148 #endif
3149 cb->cb_color_info = S_028C70_FORMAT(format) |
3150 S_028C70_COMP_SWAP(swap) |
3151 S_028C70_BLEND_CLAMP(blend_clamp) |
3152 S_028C70_BLEND_BYPASS(blend_bypass) |
3153 S_028C70_SIMPLE_FLOAT(1) |
3154 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3155 ntype != V_028C70_NUMBER_SNORM &&
3156 ntype != V_028C70_NUMBER_SRGB &&
3157 format != V_028C70_COLOR_8_24 &&
3158 format != V_028C70_COLOR_24_8) |
3159 S_028C70_NUMBER_TYPE(ntype) |
3160 S_028C70_ENDIAN(endian);
3161 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3162 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3163 if (device->physical_device->rad_info.chip_class == SI) {
3164 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3165 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3166 }
3167 }
3168
3169 if (iview->image->cmask.size &&
3170 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3171 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3172
3173 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3174 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3175
3176 if (device->physical_device->rad_info.chip_class >= VI) {
3177 unsigned max_uncompressed_block_size = 2;
3178 if (iview->image->info.samples > 1) {
3179 if (iview->image->surface.bpe == 1)
3180 max_uncompressed_block_size = 0;
3181 else if (iview->image->surface.bpe == 2)
3182 max_uncompressed_block_size = 1;
3183 }
3184
3185 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3186 S_028C78_INDEPENDENT_64B_BLOCKS(1);
3187 }
3188
3189 /* This must be set for fast clear to work without FMASK. */
3190 if (!iview->image->fmask.size &&
3191 device->physical_device->rad_info.chip_class == SI) {
3192 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3193 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3194 }
3195
3196 if (device->physical_device->rad_info.chip_class >= GFX9) {
3197 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3198 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3199
3200 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3201 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3202 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3203 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3204 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3205 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3206
3207 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3208
3209 }
3210 }
3211
3212 static void
3213 radv_initialise_ds_surface(struct radv_device *device,
3214 struct radv_ds_buffer_info *ds,
3215 struct radv_image_view *iview)
3216 {
3217 unsigned level = iview->base_mip;
3218 unsigned format, stencil_format;
3219 uint64_t va, s_offs, z_offs;
3220 bool stencil_only = false;
3221 memset(ds, 0, sizeof(*ds));
3222 switch (iview->image->vk_format) {
3223 case VK_FORMAT_D24_UNORM_S8_UINT:
3224 case VK_FORMAT_X8_D24_UNORM_PACK32:
3225 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3226 ds->offset_scale = 2.0f;
3227 break;
3228 case VK_FORMAT_D16_UNORM:
3229 case VK_FORMAT_D16_UNORM_S8_UINT:
3230 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3231 ds->offset_scale = 4.0f;
3232 break;
3233 case VK_FORMAT_D32_SFLOAT:
3234 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3235 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3236 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3237 ds->offset_scale = 1.0f;
3238 break;
3239 case VK_FORMAT_S8_UINT:
3240 stencil_only = true;
3241 break;
3242 default:
3243 break;
3244 }
3245
3246 format = radv_translate_dbformat(iview->image->vk_format);
3247 stencil_format = iview->image->surface.has_stencil ?
3248 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3249
3250 uint32_t max_slice = radv_surface_layer_count(iview);
3251 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3252 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3253
3254 ds->db_htile_data_base = 0;
3255 ds->db_htile_surface = 0;
3256
3257 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3258 s_offs = z_offs = va;
3259
3260 if (device->physical_device->rad_info.chip_class >= GFX9) {
3261 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3262 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3263
3264 ds->db_z_info = S_028038_FORMAT(format) |
3265 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3266 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3267 S_028038_MAXMIP(iview->image->info.levels - 1);
3268 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3269 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3270
3271 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3272 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3273 ds->db_depth_view |= S_028008_MIPID(level);
3274
3275 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3276 S_02801C_Y_MAX(iview->image->info.height - 1);
3277
3278 if (radv_htile_enabled(iview->image, level)) {
3279 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3280
3281 if (iview->image->tc_compatible_htile) {
3282 unsigned max_zplanes = 4;
3283
3284 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3285 iview->image->info.samples > 1)
3286 max_zplanes = 2;
3287
3288 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3289 S_028038_ITERATE_FLUSH(1);
3290 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3291 }
3292
3293 if (!iview->image->surface.has_stencil)
3294 /* Use all of the htile_buffer for depth if there's no stencil. */
3295 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3296 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3297 iview->image->htile_offset;
3298 ds->db_htile_data_base = va >> 8;
3299 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3300 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3301 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3302 }
3303 } else {
3304 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3305
3306 if (stencil_only)
3307 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3308
3309 z_offs += iview->image->surface.u.legacy.level[level].offset;
3310 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3311
3312 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3313 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3314 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3315
3316 if (iview->image->info.samples > 1)
3317 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3318
3319 if (device->physical_device->rad_info.chip_class >= CIK) {
3320 struct radeon_info *info = &device->physical_device->rad_info;
3321 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3322 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3323 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3324 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3325 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3326 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3327
3328 if (stencil_only)
3329 tile_mode = stencil_tile_mode;
3330
3331 ds->db_depth_info |=
3332 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3333 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3334 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3335 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3336 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3337 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3338 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3339 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3340 } else {
3341 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3342 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3343 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3344 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3345 if (stencil_only)
3346 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3347 }
3348
3349 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3350 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3351 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3352
3353 if (radv_htile_enabled(iview->image, level)) {
3354 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3355
3356 if (!iview->image->surface.has_stencil &&
3357 !iview->image->tc_compatible_htile)
3358 /* Use all of the htile_buffer for depth if there's no stencil. */
3359 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3360
3361 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3362 iview->image->htile_offset;
3363 ds->db_htile_data_base = va >> 8;
3364 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3365
3366 if (iview->image->tc_compatible_htile) {
3367 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3368
3369 if (iview->image->info.samples <= 1)
3370 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3371 else if (iview->image->info.samples <= 4)
3372 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3373 else
3374 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3375 }
3376 }
3377 }
3378
3379 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3380 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3381 }
3382
3383 VkResult radv_CreateFramebuffer(
3384 VkDevice _device,
3385 const VkFramebufferCreateInfo* pCreateInfo,
3386 const VkAllocationCallbacks* pAllocator,
3387 VkFramebuffer* pFramebuffer)
3388 {
3389 RADV_FROM_HANDLE(radv_device, device, _device);
3390 struct radv_framebuffer *framebuffer;
3391
3392 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3393
3394 size_t size = sizeof(*framebuffer) +
3395 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3396 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3397 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3398 if (framebuffer == NULL)
3399 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3400
3401 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3402 framebuffer->width = pCreateInfo->width;
3403 framebuffer->height = pCreateInfo->height;
3404 framebuffer->layers = pCreateInfo->layers;
3405 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3406 VkImageView _iview = pCreateInfo->pAttachments[i];
3407 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3408 framebuffer->attachments[i].attachment = iview;
3409 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3410 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3411 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3412 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3413 }
3414 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3415 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3416 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3417 }
3418
3419 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3420 return VK_SUCCESS;
3421 }
3422
3423 void radv_DestroyFramebuffer(
3424 VkDevice _device,
3425 VkFramebuffer _fb,
3426 const VkAllocationCallbacks* pAllocator)
3427 {
3428 RADV_FROM_HANDLE(radv_device, device, _device);
3429 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3430
3431 if (!fb)
3432 return;
3433 vk_free2(&device->alloc, pAllocator, fb);
3434 }
3435
3436 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3437 {
3438 switch (address_mode) {
3439 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3440 return V_008F30_SQ_TEX_WRAP;
3441 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3442 return V_008F30_SQ_TEX_MIRROR;
3443 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3444 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3445 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3446 return V_008F30_SQ_TEX_CLAMP_BORDER;
3447 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3448 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3449 default:
3450 unreachable("illegal tex wrap mode");
3451 break;
3452 }
3453 }
3454
3455 static unsigned
3456 radv_tex_compare(VkCompareOp op)
3457 {
3458 switch (op) {
3459 case VK_COMPARE_OP_NEVER:
3460 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3461 case VK_COMPARE_OP_LESS:
3462 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3463 case VK_COMPARE_OP_EQUAL:
3464 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3465 case VK_COMPARE_OP_LESS_OR_EQUAL:
3466 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3467 case VK_COMPARE_OP_GREATER:
3468 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3469 case VK_COMPARE_OP_NOT_EQUAL:
3470 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3471 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3472 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3473 case VK_COMPARE_OP_ALWAYS:
3474 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3475 default:
3476 unreachable("illegal compare mode");
3477 break;
3478 }
3479 }
3480
3481 static unsigned
3482 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3483 {
3484 switch (filter) {
3485 case VK_FILTER_NEAREST:
3486 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3487 V_008F38_SQ_TEX_XY_FILTER_POINT);
3488 case VK_FILTER_LINEAR:
3489 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3490 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3491 case VK_FILTER_CUBIC_IMG:
3492 default:
3493 fprintf(stderr, "illegal texture filter");
3494 return 0;
3495 }
3496 }
3497
3498 static unsigned
3499 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3500 {
3501 switch (mode) {
3502 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3503 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3504 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3505 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3506 default:
3507 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3508 }
3509 }
3510
3511 static unsigned
3512 radv_tex_bordercolor(VkBorderColor bcolor)
3513 {
3514 switch (bcolor) {
3515 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3516 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3517 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3518 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3519 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3520 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3521 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3522 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3523 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3524 default:
3525 break;
3526 }
3527 return 0;
3528 }
3529
3530 static unsigned
3531 radv_tex_aniso_filter(unsigned filter)
3532 {
3533 if (filter < 2)
3534 return 0;
3535 if (filter < 4)
3536 return 1;
3537 if (filter < 8)
3538 return 2;
3539 if (filter < 16)
3540 return 3;
3541 return 4;
3542 }
3543
3544 static void
3545 radv_init_sampler(struct radv_device *device,
3546 struct radv_sampler *sampler,
3547 const VkSamplerCreateInfo *pCreateInfo)
3548 {
3549 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3550 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3551 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3552 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3553
3554 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3555 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3556 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3557 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3558 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3559 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3560 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3561 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3562 S_008F30_DISABLE_CUBE_WRAP(0) |
3563 S_008F30_COMPAT_MODE(is_vi));
3564 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3565 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3566 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3567 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3568 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3569 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3570 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3571 S_008F38_MIP_POINT_PRECLAMP(0) |
3572 S_008F38_DISABLE_LSB_CEIL(1) |
3573 S_008F38_FILTER_PREC_FIX(1) |
3574 S_008F38_ANISO_OVERRIDE(is_vi));
3575 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3576 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3577 }
3578
3579 VkResult radv_CreateSampler(
3580 VkDevice _device,
3581 const VkSamplerCreateInfo* pCreateInfo,
3582 const VkAllocationCallbacks* pAllocator,
3583 VkSampler* pSampler)
3584 {
3585 RADV_FROM_HANDLE(radv_device, device, _device);
3586 struct radv_sampler *sampler;
3587
3588 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3589
3590 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3591 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3592 if (!sampler)
3593 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3594
3595 radv_init_sampler(device, sampler, pCreateInfo);
3596 *pSampler = radv_sampler_to_handle(sampler);
3597
3598 return VK_SUCCESS;
3599 }
3600
3601 void radv_DestroySampler(
3602 VkDevice _device,
3603 VkSampler _sampler,
3604 const VkAllocationCallbacks* pAllocator)
3605 {
3606 RADV_FROM_HANDLE(radv_device, device, _device);
3607 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3608
3609 if (!sampler)
3610 return;
3611 vk_free2(&device->alloc, pAllocator, sampler);
3612 }
3613
3614 /* vk_icd.h does not declare this function, so we declare it here to
3615 * suppress Wmissing-prototypes.
3616 */
3617 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3618 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3619
3620 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3621 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3622 {
3623 /* For the full details on loader interface versioning, see
3624 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3625 * What follows is a condensed summary, to help you navigate the large and
3626 * confusing official doc.
3627 *
3628 * - Loader interface v0 is incompatible with later versions. We don't
3629 * support it.
3630 *
3631 * - In loader interface v1:
3632 * - The first ICD entrypoint called by the loader is
3633 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3634 * entrypoint.
3635 * - The ICD must statically expose no other Vulkan symbol unless it is
3636 * linked with -Bsymbolic.
3637 * - Each dispatchable Vulkan handle created by the ICD must be
3638 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3639 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3640 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3641 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3642 * such loader-managed surfaces.
3643 *
3644 * - Loader interface v2 differs from v1 in:
3645 * - The first ICD entrypoint called by the loader is
3646 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3647 * statically expose this entrypoint.
3648 *
3649 * - Loader interface v3 differs from v2 in:
3650 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3651 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3652 * because the loader no longer does so.
3653 */
3654 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3655 return VK_SUCCESS;
3656 }
3657
3658 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3659 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3660 int *pFD)
3661 {
3662 RADV_FROM_HANDLE(radv_device, device, _device);
3663 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3664
3665 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3666
3667 /* We support only one handle type. */
3668 assert(pGetFdInfo->handleType ==
3669 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3670
3671 bool ret = radv_get_memory_fd(device, memory, pFD);
3672 if (ret == false)
3673 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3674 return VK_SUCCESS;
3675 }
3676
3677 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3678 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3679 int fd,
3680 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3681 {
3682 /* The valid usage section for this function says:
3683 *
3684 * "handleType must not be one of the handle types defined as opaque."
3685 *
3686 * Since we only handle opaque handles for now, there are no FD properties.
3687 */
3688 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3689 }
3690
3691 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3692 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3693 {
3694 RADV_FROM_HANDLE(radv_device, device, _device);
3695 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3696 uint32_t syncobj_handle = 0;
3697 assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3698
3699 int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
3700 if (ret != 0)
3701 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3702
3703 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3704 sem->temp_syncobj = syncobj_handle;
3705 } else {
3706 sem->syncobj = syncobj_handle;
3707 }
3708 close(pImportSemaphoreFdInfo->fd);
3709 return VK_SUCCESS;
3710 }
3711
3712 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3713 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3714 int *pFd)
3715 {
3716 RADV_FROM_HANDLE(radv_device, device, _device);
3717 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3718 int ret;
3719 uint32_t syncobj_handle;
3720
3721 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3722 if (sem->temp_syncobj)
3723 syncobj_handle = sem->temp_syncobj;
3724 else
3725 syncobj_handle = sem->syncobj;
3726 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3727 if (ret)
3728 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3729 return VK_SUCCESS;
3730 }
3731
3732 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3733 VkPhysicalDevice physicalDevice,
3734 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3735 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3736 {
3737 if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3738 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3739 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3740 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3741 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3742 } else {
3743 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3744 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3745 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3746 }
3747 }