radv: Add trivial device group implementation.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static void
79 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80 {
81 const char *chip_string;
82 char llvm_string[32] = {};
83
84 switch (family) {
85 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
86 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
87 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
88 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
89 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
90 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
91 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
92 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
93 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
94 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
95 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
96 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
97 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
98 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
99 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
100 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
101 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
102 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
103 case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
104 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
105 default: chip_string = "AMD RADV unknown"; break;
106 }
107
108 if (HAVE_LLVM > 0) {
109 snprintf(llvm_string, sizeof(llvm_string),
110 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
111 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
112 }
113
114 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115 }
116
117 static void
118 radv_physical_device_init_mem_types(struct radv_physical_device *device)
119 {
120 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
121 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
122 device->rad_info.vram_vis_size);
123
124 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
125 device->memory_properties.memoryHeapCount = 0;
126 if (device->rad_info.vram_size - visible_vram_size > 0) {
127 vram_index = device->memory_properties.memoryHeapCount++;
128 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
129 .size = device->rad_info.vram_size - visible_vram_size,
130 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
131 };
132 }
133 if (visible_vram_size) {
134 visible_vram_index = device->memory_properties.memoryHeapCount++;
135 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
136 .size = visible_vram_size,
137 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
138 };
139 }
140 if (device->rad_info.gart_size > 0) {
141 gart_index = device->memory_properties.memoryHeapCount++;
142 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
143 .size = device->rad_info.gart_size,
144 .flags = 0,
145 };
146 }
147
148 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
149 unsigned type_count = 0;
150 if (vram_index >= 0) {
151 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
152 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
153 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
154 .heapIndex = vram_index,
155 };
156 }
157 if (gart_index >= 0) {
158 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
159 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
160 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
161 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
162 .heapIndex = gart_index,
163 };
164 }
165 if (visible_vram_index >= 0) {
166 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
167 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
168 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
169 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
170 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
171 .heapIndex = visible_vram_index,
172 };
173 }
174 if (gart_index >= 0) {
175 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
176 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
177 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
178 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
179 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
180 .heapIndex = gart_index,
181 };
182 }
183 device->memory_properties.memoryTypeCount = type_count;
184 }
185
186 static void
187 radv_handle_env_var_force_family(struct radv_physical_device *device)
188 {
189 const char *family = getenv("RADV_FORCE_FAMILY");
190 unsigned i;
191
192 if (!family)
193 return;
194
195 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
196 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
197 /* Override family and chip_class. */
198 device->rad_info.family = i;
199
200 if (i >= CHIP_VEGA10)
201 device->rad_info.chip_class = GFX9;
202 else if (i >= CHIP_TONGA)
203 device->rad_info.chip_class = VI;
204 else if (i >= CHIP_BONAIRE)
205 device->rad_info.chip_class = CIK;
206 else
207 device->rad_info.chip_class = SI;
208
209 return;
210 }
211 }
212
213 fprintf(stderr, "radv: Unknown family: %s\n", family);
214 exit(1);
215 }
216
217 static VkResult
218 radv_physical_device_init(struct radv_physical_device *device,
219 struct radv_instance *instance,
220 drmDevicePtr drm_device)
221 {
222 const char *path = drm_device->nodes[DRM_NODE_RENDER];
223 VkResult result;
224 drmVersionPtr version;
225 int fd;
226
227 fd = open(path, O_RDWR | O_CLOEXEC);
228 if (fd < 0)
229 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
230
231 version = drmGetVersion(fd);
232 if (!version) {
233 close(fd);
234 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
235 "failed to get version %s: %m", path);
236 }
237
238 if (strcmp(version->name, "amdgpu")) {
239 drmFreeVersion(version);
240 close(fd);
241 return VK_ERROR_INCOMPATIBLE_DRIVER;
242 }
243 drmFreeVersion(version);
244
245 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
246 device->instance = instance;
247 assert(strlen(path) < ARRAY_SIZE(device->path));
248 strncpy(device->path, path, ARRAY_SIZE(device->path));
249
250 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
251 instance->perftest_flags);
252 if (!device->ws) {
253 result = VK_ERROR_INCOMPATIBLE_DRIVER;
254 goto fail;
255 }
256
257 device->local_fd = fd;
258 device->ws->query_info(device->ws, &device->rad_info);
259
260 radv_handle_env_var_force_family(device);
261
262 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
263
264 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
265 device->ws->destroy(device->ws);
266 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
267 "cannot generate UUID");
268 goto fail;
269 }
270
271 /* These flags affect shader compilation. */
272 uint64_t shader_env_flags =
273 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
274 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
275
276 /* The gpu id is already embeded in the uuid so we just pass "radv"
277 * when creating the cache.
278 */
279 char buf[VK_UUID_SIZE * 2 + 1];
280 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
281 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
282
283 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
284
285 radv_get_driver_uuid(&device->device_uuid);
286 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
287
288 if (device->rad_info.family == CHIP_STONEY ||
289 device->rad_info.chip_class >= GFX9) {
290 device->has_rbplus = true;
291 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
292 }
293
294 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
295 * on SI.
296 */
297 device->has_clear_state = device->rad_info.chip_class >= CIK;
298
299 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
300
301 /* Vega10/Raven need a special workaround for a hardware bug. */
302 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
303 device->rad_info.family == CHIP_RAVEN;
304
305 radv_physical_device_init_mem_types(device);
306 radv_fill_device_extension_table(device, &device->supported_extensions);
307
308 result = radv_init_wsi(device);
309 if (result != VK_SUCCESS) {
310 device->ws->destroy(device->ws);
311 goto fail;
312 }
313
314 return VK_SUCCESS;
315
316 fail:
317 close(fd);
318 return result;
319 }
320
321 static void
322 radv_physical_device_finish(struct radv_physical_device *device)
323 {
324 radv_finish_wsi(device);
325 device->ws->destroy(device->ws);
326 disk_cache_destroy(device->disk_cache);
327 close(device->local_fd);
328 }
329
330 static void *
331 default_alloc_func(void *pUserData, size_t size, size_t align,
332 VkSystemAllocationScope allocationScope)
333 {
334 return malloc(size);
335 }
336
337 static void *
338 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
339 size_t align, VkSystemAllocationScope allocationScope)
340 {
341 return realloc(pOriginal, size);
342 }
343
344 static void
345 default_free_func(void *pUserData, void *pMemory)
346 {
347 free(pMemory);
348 }
349
350 static const VkAllocationCallbacks default_alloc = {
351 .pUserData = NULL,
352 .pfnAllocation = default_alloc_func,
353 .pfnReallocation = default_realloc_func,
354 .pfnFree = default_free_func,
355 };
356
357 static const struct debug_control radv_debug_options[] = {
358 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
359 {"nodcc", RADV_DEBUG_NO_DCC},
360 {"shaders", RADV_DEBUG_DUMP_SHADERS},
361 {"nocache", RADV_DEBUG_NO_CACHE},
362 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
363 {"nohiz", RADV_DEBUG_NO_HIZ},
364 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
365 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
366 {"allbos", RADV_DEBUG_ALL_BOS},
367 {"noibs", RADV_DEBUG_NO_IBS},
368 {"spirv", RADV_DEBUG_DUMP_SPIRV},
369 {"vmfaults", RADV_DEBUG_VM_FAULTS},
370 {"zerovram", RADV_DEBUG_ZERO_VRAM},
371 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
372 {"nosisched", RADV_DEBUG_NO_SISCHED},
373 {"preoptir", RADV_DEBUG_PREOPTIR},
374 {NULL, 0}
375 };
376
377 const char *
378 radv_get_debug_option_name(int id)
379 {
380 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
381 return radv_debug_options[id].string;
382 }
383
384 static const struct debug_control radv_perftest_options[] = {
385 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
386 {"sisched", RADV_PERFTEST_SISCHED},
387 {"localbos", RADV_PERFTEST_LOCAL_BOS},
388 {"binning", RADV_PERFTEST_BINNING},
389 {NULL, 0}
390 };
391
392 const char *
393 radv_get_perftest_option_name(int id)
394 {
395 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
396 return radv_perftest_options[id].string;
397 }
398
399 static void
400 radv_handle_per_app_options(struct radv_instance *instance,
401 const VkApplicationInfo *info)
402 {
403 const char *name = info ? info->pApplicationName : NULL;
404
405 if (!name)
406 return;
407
408 if (!strcmp(name, "Talos - Linux - 32bit") ||
409 !strcmp(name, "Talos - Linux - 64bit")) {
410 /* Force enable LLVM sisched for Talos because it looks safe
411 * and it gives few more FPS.
412 */
413 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
414 }
415 }
416
417 static int radv_get_instance_extension_index(const char *name)
418 {
419 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
420 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
421 return i;
422 }
423 return -1;
424 }
425
426
427 VkResult radv_CreateInstance(
428 const VkInstanceCreateInfo* pCreateInfo,
429 const VkAllocationCallbacks* pAllocator,
430 VkInstance* pInstance)
431 {
432 struct radv_instance *instance;
433 VkResult result;
434
435 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
436
437 uint32_t client_version;
438 if (pCreateInfo->pApplicationInfo &&
439 pCreateInfo->pApplicationInfo->apiVersion != 0) {
440 client_version = pCreateInfo->pApplicationInfo->apiVersion;
441 } else {
442 client_version = VK_MAKE_VERSION(1, 0, 0);
443 }
444
445 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
446 client_version > VK_MAKE_VERSION(1, 1, 0xfff)) {
447 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
448 "Client requested version %d.%d.%d",
449 VK_VERSION_MAJOR(client_version),
450 VK_VERSION_MINOR(client_version),
451 VK_VERSION_PATCH(client_version));
452 }
453
454 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
455 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
456 if (!instance)
457 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
458
459 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
460
461 if (pAllocator)
462 instance->alloc = *pAllocator;
463 else
464 instance->alloc = default_alloc;
465
466 instance->apiVersion = client_version;
467 instance->physicalDeviceCount = -1;
468
469 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
470 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
471 int index = radv_get_instance_extension_index(ext_name);
472
473 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
474 vk_free2(&default_alloc, pAllocator, instance);
475 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
476 }
477
478 instance->enabled_extensions.extensions[index] = true;
479 }
480
481 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
482 if (result != VK_SUCCESS) {
483 vk_free2(&default_alloc, pAllocator, instance);
484 return vk_error(result);
485 }
486
487 _mesa_locale_init();
488
489 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
490
491 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
492 radv_debug_options);
493
494 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
495 radv_perftest_options);
496
497 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
498
499 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
500 /* Disable sisched when the user requests it, this is mostly
501 * useful when the driver force-enable sisched for the given
502 * application.
503 */
504 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
505 }
506
507 *pInstance = radv_instance_to_handle(instance);
508
509 return VK_SUCCESS;
510 }
511
512 void radv_DestroyInstance(
513 VkInstance _instance,
514 const VkAllocationCallbacks* pAllocator)
515 {
516 RADV_FROM_HANDLE(radv_instance, instance, _instance);
517
518 if (!instance)
519 return;
520
521 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
522 radv_physical_device_finish(instance->physicalDevices + i);
523 }
524
525 VG(VALGRIND_DESTROY_MEMPOOL(instance));
526
527 _mesa_locale_fini();
528
529 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
530
531 vk_free(&instance->alloc, instance);
532 }
533
534 static VkResult
535 radv_enumerate_devices(struct radv_instance *instance)
536 {
537 /* TODO: Check for more devices ? */
538 drmDevicePtr devices[8];
539 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
540 int max_devices;
541
542 instance->physicalDeviceCount = 0;
543
544 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
545 if (max_devices < 1)
546 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
547
548 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
549 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
550 devices[i]->bustype == DRM_BUS_PCI &&
551 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
552
553 result = radv_physical_device_init(instance->physicalDevices +
554 instance->physicalDeviceCount,
555 instance,
556 devices[i]);
557 if (result == VK_SUCCESS)
558 ++instance->physicalDeviceCount;
559 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
560 break;
561 }
562 }
563 drmFreeDevices(devices, max_devices);
564
565 return result;
566 }
567
568 VkResult radv_EnumeratePhysicalDevices(
569 VkInstance _instance,
570 uint32_t* pPhysicalDeviceCount,
571 VkPhysicalDevice* pPhysicalDevices)
572 {
573 RADV_FROM_HANDLE(radv_instance, instance, _instance);
574 VkResult result;
575
576 if (instance->physicalDeviceCount < 0) {
577 result = radv_enumerate_devices(instance);
578 if (result != VK_SUCCESS &&
579 result != VK_ERROR_INCOMPATIBLE_DRIVER)
580 return result;
581 }
582
583 if (!pPhysicalDevices) {
584 *pPhysicalDeviceCount = instance->physicalDeviceCount;
585 } else {
586 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
587 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
588 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
589 }
590
591 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
592 : VK_SUCCESS;
593 }
594
595 VkResult radv_EnumeratePhysicalDeviceGroups(
596 VkInstance _instance,
597 uint32_t* pPhysicalDeviceGroupCount,
598 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
599 {
600 RADV_FROM_HANDLE(radv_instance, instance, _instance);
601 VkResult result;
602
603 if (instance->physicalDeviceCount < 0) {
604 result = radv_enumerate_devices(instance);
605 if (result != VK_SUCCESS &&
606 result != VK_ERROR_INCOMPATIBLE_DRIVER)
607 return result;
608 }
609
610 if (!pPhysicalDeviceGroupProperties) {
611 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
612 } else {
613 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
614 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
615 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
616 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
617 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
618 }
619 }
620 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
621 : VK_SUCCESS;
622 }
623
624 void radv_GetPhysicalDeviceFeatures(
625 VkPhysicalDevice physicalDevice,
626 VkPhysicalDeviceFeatures* pFeatures)
627 {
628 memset(pFeatures, 0, sizeof(*pFeatures));
629
630 *pFeatures = (VkPhysicalDeviceFeatures) {
631 .robustBufferAccess = true,
632 .fullDrawIndexUint32 = true,
633 .imageCubeArray = true,
634 .independentBlend = true,
635 .geometryShader = true,
636 .tessellationShader = true,
637 .sampleRateShading = true,
638 .dualSrcBlend = true,
639 .logicOp = true,
640 .multiDrawIndirect = true,
641 .drawIndirectFirstInstance = true,
642 .depthClamp = true,
643 .depthBiasClamp = true,
644 .fillModeNonSolid = true,
645 .depthBounds = true,
646 .wideLines = true,
647 .largePoints = true,
648 .alphaToOne = true,
649 .multiViewport = true,
650 .samplerAnisotropy = true,
651 .textureCompressionETC2 = false,
652 .textureCompressionASTC_LDR = false,
653 .textureCompressionBC = true,
654 .occlusionQueryPrecise = true,
655 .pipelineStatisticsQuery = true,
656 .vertexPipelineStoresAndAtomics = true,
657 .fragmentStoresAndAtomics = true,
658 .shaderTessellationAndGeometryPointSize = true,
659 .shaderImageGatherExtended = true,
660 .shaderStorageImageExtendedFormats = true,
661 .shaderStorageImageMultisample = false,
662 .shaderUniformBufferArrayDynamicIndexing = true,
663 .shaderSampledImageArrayDynamicIndexing = true,
664 .shaderStorageBufferArrayDynamicIndexing = true,
665 .shaderStorageImageArrayDynamicIndexing = true,
666 .shaderStorageImageReadWithoutFormat = true,
667 .shaderStorageImageWriteWithoutFormat = true,
668 .shaderClipDistance = true,
669 .shaderCullDistance = true,
670 .shaderFloat64 = true,
671 .shaderInt64 = true,
672 .shaderInt16 = false,
673 .sparseBinding = true,
674 .variableMultisampleRate = true,
675 .inheritedQueries = true,
676 };
677 }
678
679 void radv_GetPhysicalDeviceFeatures2(
680 VkPhysicalDevice physicalDevice,
681 VkPhysicalDeviceFeatures2KHR *pFeatures)
682 {
683 vk_foreach_struct(ext, pFeatures->pNext) {
684 switch (ext->sType) {
685 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
686 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
687 features->variablePointersStorageBuffer = true;
688 features->variablePointers = false;
689 break;
690 }
691 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
692 VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
693 features->multiview = true;
694 features->multiviewGeometryShader = true;
695 features->multiviewTessellationShader = true;
696 break;
697 }
698 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
699 VkPhysicalDeviceShaderDrawParameterFeatures *features =
700 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
701 features->shaderDrawParameters = true;
702 break;
703 }
704 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
705 VkPhysicalDeviceProtectedMemoryFeatures *features =
706 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
707 features->protectedMemory = false;
708 break;
709 }
710 default:
711 break;
712 }
713 }
714 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
715 }
716
717 void radv_GetPhysicalDeviceProperties(
718 VkPhysicalDevice physicalDevice,
719 VkPhysicalDeviceProperties* pProperties)
720 {
721 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
722 VkSampleCountFlags sample_counts = 0xf;
723
724 /* make sure that the entire descriptor set is addressable with a signed
725 * 32-bit int. So the sum of all limits scaled by descriptor size has to
726 * be at most 2 GiB. the combined image & samples object count as one of
727 * both. This limit is for the pipeline layout, not for the set layout, but
728 * there is no set limit, so we just set a pipeline limit. I don't think
729 * any app is going to hit this soon. */
730 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
731 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
732 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
733 32 /* sampler, largest when combined with image */ +
734 64 /* sampled image */ +
735 64 /* storage image */);
736
737 VkPhysicalDeviceLimits limits = {
738 .maxImageDimension1D = (1 << 14),
739 .maxImageDimension2D = (1 << 14),
740 .maxImageDimension3D = (1 << 11),
741 .maxImageDimensionCube = (1 << 14),
742 .maxImageArrayLayers = (1 << 11),
743 .maxTexelBufferElements = 128 * 1024 * 1024,
744 .maxUniformBufferRange = UINT32_MAX,
745 .maxStorageBufferRange = UINT32_MAX,
746 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
747 .maxMemoryAllocationCount = UINT32_MAX,
748 .maxSamplerAllocationCount = 64 * 1024,
749 .bufferImageGranularity = 64, /* A cache line */
750 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
751 .maxBoundDescriptorSets = MAX_SETS,
752 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
753 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
754 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
755 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
756 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
757 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
758 .maxPerStageResources = max_descriptor_set_size,
759 .maxDescriptorSetSamplers = max_descriptor_set_size,
760 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
761 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
762 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
763 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
764 .maxDescriptorSetSampledImages = max_descriptor_set_size,
765 .maxDescriptorSetStorageImages = max_descriptor_set_size,
766 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
767 .maxVertexInputAttributes = 32,
768 .maxVertexInputBindings = 32,
769 .maxVertexInputAttributeOffset = 2047,
770 .maxVertexInputBindingStride = 2048,
771 .maxVertexOutputComponents = 128,
772 .maxTessellationGenerationLevel = 64,
773 .maxTessellationPatchSize = 32,
774 .maxTessellationControlPerVertexInputComponents = 128,
775 .maxTessellationControlPerVertexOutputComponents = 128,
776 .maxTessellationControlPerPatchOutputComponents = 120,
777 .maxTessellationControlTotalOutputComponents = 4096,
778 .maxTessellationEvaluationInputComponents = 128,
779 .maxTessellationEvaluationOutputComponents = 128,
780 .maxGeometryShaderInvocations = 127,
781 .maxGeometryInputComponents = 64,
782 .maxGeometryOutputComponents = 128,
783 .maxGeometryOutputVertices = 256,
784 .maxGeometryTotalOutputComponents = 1024,
785 .maxFragmentInputComponents = 128,
786 .maxFragmentOutputAttachments = 8,
787 .maxFragmentDualSrcAttachments = 1,
788 .maxFragmentCombinedOutputResources = 8,
789 .maxComputeSharedMemorySize = 32768,
790 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
791 .maxComputeWorkGroupInvocations = 2048,
792 .maxComputeWorkGroupSize = {
793 2048,
794 2048,
795 2048
796 },
797 .subPixelPrecisionBits = 4 /* FIXME */,
798 .subTexelPrecisionBits = 4 /* FIXME */,
799 .mipmapPrecisionBits = 4 /* FIXME */,
800 .maxDrawIndexedIndexValue = UINT32_MAX,
801 .maxDrawIndirectCount = UINT32_MAX,
802 .maxSamplerLodBias = 16,
803 .maxSamplerAnisotropy = 16,
804 .maxViewports = MAX_VIEWPORTS,
805 .maxViewportDimensions = { (1 << 14), (1 << 14) },
806 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
807 .viewportSubPixelBits = 13, /* We take a float? */
808 .minMemoryMapAlignment = 4096, /* A page */
809 .minTexelBufferOffsetAlignment = 1,
810 .minUniformBufferOffsetAlignment = 4,
811 .minStorageBufferOffsetAlignment = 4,
812 .minTexelOffset = -32,
813 .maxTexelOffset = 31,
814 .minTexelGatherOffset = -32,
815 .maxTexelGatherOffset = 31,
816 .minInterpolationOffset = -2,
817 .maxInterpolationOffset = 2,
818 .subPixelInterpolationOffsetBits = 8,
819 .maxFramebufferWidth = (1 << 14),
820 .maxFramebufferHeight = (1 << 14),
821 .maxFramebufferLayers = (1 << 10),
822 .framebufferColorSampleCounts = sample_counts,
823 .framebufferDepthSampleCounts = sample_counts,
824 .framebufferStencilSampleCounts = sample_counts,
825 .framebufferNoAttachmentsSampleCounts = sample_counts,
826 .maxColorAttachments = MAX_RTS,
827 .sampledImageColorSampleCounts = sample_counts,
828 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
829 .sampledImageDepthSampleCounts = sample_counts,
830 .sampledImageStencilSampleCounts = sample_counts,
831 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
832 .maxSampleMaskWords = 1,
833 .timestampComputeAndGraphics = true,
834 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
835 .maxClipDistances = 8,
836 .maxCullDistances = 8,
837 .maxCombinedClipAndCullDistances = 8,
838 .discreteQueuePriorities = 1,
839 .pointSizeRange = { 0.125, 255.875 },
840 .lineWidthRange = { 0.0, 7.9921875 },
841 .pointSizeGranularity = (1.0 / 8.0),
842 .lineWidthGranularity = (1.0 / 128.0),
843 .strictLines = false, /* FINISHME */
844 .standardSampleLocations = true,
845 .optimalBufferCopyOffsetAlignment = 128,
846 .optimalBufferCopyRowPitchAlignment = 128,
847 .nonCoherentAtomSize = 64,
848 };
849
850 *pProperties = (VkPhysicalDeviceProperties) {
851 .apiVersion = radv_physical_device_api_version(pdevice),
852 .driverVersion = vk_get_driver_version(),
853 .vendorID = ATI_VENDOR_ID,
854 .deviceID = pdevice->rad_info.pci_id,
855 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
856 .limits = limits,
857 .sparseProperties = {0},
858 };
859
860 strcpy(pProperties->deviceName, pdevice->name);
861 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
862 }
863
864 void radv_GetPhysicalDeviceProperties2(
865 VkPhysicalDevice physicalDevice,
866 VkPhysicalDeviceProperties2KHR *pProperties)
867 {
868 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
869 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
870
871 vk_foreach_struct(ext, pProperties->pNext) {
872 switch (ext->sType) {
873 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
874 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
875 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
876 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
877 break;
878 }
879 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
880 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
881 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
882 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
883 properties->deviceLUIDValid = false;
884 break;
885 }
886 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
887 VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
888 properties->maxMultiviewViewCount = MAX_VIEWS;
889 properties->maxMultiviewInstanceIndex = INT_MAX;
890 break;
891 }
892 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
893 VkPhysicalDevicePointClippingPropertiesKHR *properties =
894 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
895 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
896 break;
897 }
898 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
899 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
900 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
901 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
902 break;
903 }
904 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
905 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
906 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
907 properties->minImportedHostPointerAlignment = 4096;
908 break;
909 }
910 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
911 VkPhysicalDeviceSubgroupProperties *properties =
912 (VkPhysicalDeviceSubgroupProperties*)ext;
913 properties->subgroupSize = 64;
914 properties->supportedStages = VK_SHADER_STAGE_ALL;
915 properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
916 properties->quadOperationsInAllStages = false;
917 break;
918 }
919 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
920 VkPhysicalDeviceMaintenance3Properties *properties =
921 (VkPhysicalDeviceMaintenance3Properties*)ext;
922 /* Make sure everything is addressable by a signed 32-bit int, and
923 * our largest descriptors are 96 bytes. */
924 properties->maxPerSetDescriptors = (1ull << 31) / 96;
925 /* Our buffer size fields allow only this much */
926 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
927 break;
928 }
929 default:
930 break;
931 }
932 }
933 }
934
935 static void radv_get_physical_device_queue_family_properties(
936 struct radv_physical_device* pdevice,
937 uint32_t* pCount,
938 VkQueueFamilyProperties** pQueueFamilyProperties)
939 {
940 int num_queue_families = 1;
941 int idx;
942 if (pdevice->rad_info.num_compute_rings > 0 &&
943 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
944 num_queue_families++;
945
946 if (pQueueFamilyProperties == NULL) {
947 *pCount = num_queue_families;
948 return;
949 }
950
951 if (!*pCount)
952 return;
953
954 idx = 0;
955 if (*pCount >= 1) {
956 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
957 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
958 VK_QUEUE_COMPUTE_BIT |
959 VK_QUEUE_TRANSFER_BIT |
960 VK_QUEUE_SPARSE_BINDING_BIT,
961 .queueCount = 1,
962 .timestampValidBits = 64,
963 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
964 };
965 idx++;
966 }
967
968 if (pdevice->rad_info.num_compute_rings > 0 &&
969 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
970 if (*pCount > idx) {
971 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
972 .queueFlags = VK_QUEUE_COMPUTE_BIT |
973 VK_QUEUE_TRANSFER_BIT |
974 VK_QUEUE_SPARSE_BINDING_BIT,
975 .queueCount = pdevice->rad_info.num_compute_rings,
976 .timestampValidBits = 64,
977 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
978 };
979 idx++;
980 }
981 }
982 *pCount = idx;
983 }
984
985 void radv_GetPhysicalDeviceQueueFamilyProperties(
986 VkPhysicalDevice physicalDevice,
987 uint32_t* pCount,
988 VkQueueFamilyProperties* pQueueFamilyProperties)
989 {
990 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
991 if (!pQueueFamilyProperties) {
992 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
993 return;
994 }
995 VkQueueFamilyProperties *properties[] = {
996 pQueueFamilyProperties + 0,
997 pQueueFamilyProperties + 1,
998 pQueueFamilyProperties + 2,
999 };
1000 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1001 assert(*pCount <= 3);
1002 }
1003
1004 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1005 VkPhysicalDevice physicalDevice,
1006 uint32_t* pCount,
1007 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
1008 {
1009 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1010 if (!pQueueFamilyProperties) {
1011 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1012 return;
1013 }
1014 VkQueueFamilyProperties *properties[] = {
1015 &pQueueFamilyProperties[0].queueFamilyProperties,
1016 &pQueueFamilyProperties[1].queueFamilyProperties,
1017 &pQueueFamilyProperties[2].queueFamilyProperties,
1018 };
1019 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1020 assert(*pCount <= 3);
1021 }
1022
1023 void radv_GetPhysicalDeviceMemoryProperties(
1024 VkPhysicalDevice physicalDevice,
1025 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1026 {
1027 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1028
1029 *pMemoryProperties = physical_device->memory_properties;
1030 }
1031
1032 void radv_GetPhysicalDeviceMemoryProperties2(
1033 VkPhysicalDevice physicalDevice,
1034 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1035 {
1036 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1037 &pMemoryProperties->memoryProperties);
1038 }
1039
1040 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1041 VkDevice _device,
1042 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1043 const void *pHostPointer,
1044 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1045 {
1046 RADV_FROM_HANDLE(radv_device, device, _device);
1047
1048 switch (handleType)
1049 {
1050 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1051 const struct radv_physical_device *physical_device = device->physical_device;
1052 uint32_t memoryTypeBits = 0;
1053 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1054 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1055 memoryTypeBits = (1 << i);
1056 break;
1057 }
1058 }
1059 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1060 return VK_SUCCESS;
1061 }
1062 default:
1063 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1064 }
1065 }
1066
1067 static enum radeon_ctx_priority
1068 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1069 {
1070 /* Default to MEDIUM when a specific global priority isn't requested */
1071 if (!pObj)
1072 return RADEON_CTX_PRIORITY_MEDIUM;
1073
1074 switch(pObj->globalPriority) {
1075 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1076 return RADEON_CTX_PRIORITY_REALTIME;
1077 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1078 return RADEON_CTX_PRIORITY_HIGH;
1079 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1080 return RADEON_CTX_PRIORITY_MEDIUM;
1081 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1082 return RADEON_CTX_PRIORITY_LOW;
1083 default:
1084 unreachable("Illegal global priority value");
1085 return RADEON_CTX_PRIORITY_INVALID;
1086 }
1087 }
1088
1089 static int
1090 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1091 uint32_t queue_family_index, int idx,
1092 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1093 {
1094 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1095 queue->device = device;
1096 queue->queue_family_index = queue_family_index;
1097 queue->queue_idx = idx;
1098 queue->priority = radv_get_queue_global_priority(global_priority);
1099
1100 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1101 if (!queue->hw_ctx)
1102 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1103
1104 return VK_SUCCESS;
1105 }
1106
1107 static void
1108 radv_queue_finish(struct radv_queue *queue)
1109 {
1110 if (queue->hw_ctx)
1111 queue->device->ws->ctx_destroy(queue->hw_ctx);
1112
1113 if (queue->initial_full_flush_preamble_cs)
1114 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1115 if (queue->initial_preamble_cs)
1116 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1117 if (queue->continue_preamble_cs)
1118 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1119 if (queue->descriptor_bo)
1120 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1121 if (queue->scratch_bo)
1122 queue->device->ws->buffer_destroy(queue->scratch_bo);
1123 if (queue->esgs_ring_bo)
1124 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1125 if (queue->gsvs_ring_bo)
1126 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1127 if (queue->tess_rings_bo)
1128 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1129 if (queue->compute_scratch_bo)
1130 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1131 }
1132
1133 static void
1134 radv_device_init_gs_info(struct radv_device *device)
1135 {
1136 switch (device->physical_device->rad_info.family) {
1137 case CHIP_OLAND:
1138 case CHIP_HAINAN:
1139 case CHIP_KAVERI:
1140 case CHIP_KABINI:
1141 case CHIP_MULLINS:
1142 case CHIP_ICELAND:
1143 case CHIP_CARRIZO:
1144 case CHIP_STONEY:
1145 device->gs_table_depth = 16;
1146 return;
1147 case CHIP_TAHITI:
1148 case CHIP_PITCAIRN:
1149 case CHIP_VERDE:
1150 case CHIP_BONAIRE:
1151 case CHIP_HAWAII:
1152 case CHIP_TONGA:
1153 case CHIP_FIJI:
1154 case CHIP_POLARIS10:
1155 case CHIP_POLARIS11:
1156 case CHIP_POLARIS12:
1157 case CHIP_VEGA10:
1158 case CHIP_RAVEN:
1159 device->gs_table_depth = 32;
1160 return;
1161 default:
1162 unreachable("unknown GPU");
1163 }
1164 }
1165
1166 static int radv_get_device_extension_index(const char *name)
1167 {
1168 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1169 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1170 return i;
1171 }
1172 return -1;
1173 }
1174
1175 VkResult radv_CreateDevice(
1176 VkPhysicalDevice physicalDevice,
1177 const VkDeviceCreateInfo* pCreateInfo,
1178 const VkAllocationCallbacks* pAllocator,
1179 VkDevice* pDevice)
1180 {
1181 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1182 VkResult result;
1183 struct radv_device *device;
1184
1185 bool keep_shader_info = false;
1186
1187 /* Check enabled features */
1188 if (pCreateInfo->pEnabledFeatures) {
1189 VkPhysicalDeviceFeatures supported_features;
1190 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1191 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1192 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1193 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1194 for (uint32_t i = 0; i < num_features; i++) {
1195 if (enabled_feature[i] && !supported_feature[i])
1196 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1197 }
1198 }
1199
1200 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1201 sizeof(*device), 8,
1202 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1203 if (!device)
1204 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1205
1206 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1207 device->instance = physical_device->instance;
1208 device->physical_device = physical_device;
1209
1210 device->ws = physical_device->ws;
1211 if (pAllocator)
1212 device->alloc = *pAllocator;
1213 else
1214 device->alloc = physical_device->instance->alloc;
1215
1216 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1217 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1218 int index = radv_get_device_extension_index(ext_name);
1219 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1220 vk_free(&device->alloc, device);
1221 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1222 }
1223
1224 device->enabled_extensions.extensions[index] = true;
1225 }
1226
1227 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1228
1229 mtx_init(&device->shader_slab_mutex, mtx_plain);
1230 list_inithead(&device->shader_slabs);
1231
1232 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1233 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1234 uint32_t qfi = queue_create->queueFamilyIndex;
1235 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1236 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1237
1238 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1239
1240 device->queues[qfi] = vk_alloc(&device->alloc,
1241 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1242 if (!device->queues[qfi]) {
1243 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1244 goto fail;
1245 }
1246
1247 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1248
1249 device->queue_count[qfi] = queue_create->queueCount;
1250
1251 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1252 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1253 if (result != VK_SUCCESS)
1254 goto fail;
1255 }
1256 }
1257
1258 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1259 (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1260
1261 /* Disabled and not implemented for now. */
1262 device->dfsm_allowed = device->pbb_allowed && false;
1263
1264 #ifdef ANDROID
1265 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1266 #endif
1267
1268 device->llvm_supports_spill = true;
1269
1270 /* The maximum number of scratch waves. Scratch space isn't divided
1271 * evenly between CUs. The number is only a function of the number of CUs.
1272 * We can decrease the constant to decrease the scratch buffer size.
1273 *
1274 * sctx->scratch_waves must be >= the maximum posible size of
1275 * 1 threadgroup, so that the hw doesn't hang from being unable
1276 * to start any.
1277 *
1278 * The recommended value is 4 per CU at most. Higher numbers don't
1279 * bring much benefit, but they still occupy chip resources (think
1280 * async compute). I've seen ~2% performance difference between 4 and 32.
1281 */
1282 uint32_t max_threads_per_block = 2048;
1283 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1284 max_threads_per_block / 64);
1285
1286 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1287
1288 if (device->physical_device->rad_info.chip_class >= CIK) {
1289 /* If the KMD allows it (there is a KMD hw register for it),
1290 * allow launching waves out-of-order.
1291 */
1292 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1293 }
1294
1295 radv_device_init_gs_info(device);
1296
1297 device->tess_offchip_block_dw_size =
1298 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1299 device->has_distributed_tess =
1300 device->physical_device->rad_info.chip_class >= VI &&
1301 device->physical_device->rad_info.max_se >= 2;
1302
1303 if (getenv("RADV_TRACE_FILE")) {
1304 keep_shader_info = true;
1305
1306 if (!radv_init_trace(device))
1307 goto fail;
1308 }
1309
1310 device->keep_shader_info = keep_shader_info;
1311
1312 result = radv_device_init_meta(device);
1313 if (result != VK_SUCCESS)
1314 goto fail;
1315
1316 radv_device_init_msaa(device);
1317
1318 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1319 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1320 switch (family) {
1321 case RADV_QUEUE_GENERAL:
1322 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1323 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1324 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1325 break;
1326 case RADV_QUEUE_COMPUTE:
1327 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1328 radeon_emit(device->empty_cs[family], 0);
1329 break;
1330 }
1331 device->ws->cs_finalize(device->empty_cs[family]);
1332 }
1333
1334 if (device->physical_device->rad_info.chip_class >= CIK)
1335 cik_create_gfx_config(device);
1336
1337 VkPipelineCacheCreateInfo ci;
1338 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1339 ci.pNext = NULL;
1340 ci.flags = 0;
1341 ci.pInitialData = NULL;
1342 ci.initialDataSize = 0;
1343 VkPipelineCache pc;
1344 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1345 &ci, NULL, &pc);
1346 if (result != VK_SUCCESS)
1347 goto fail_meta;
1348
1349 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1350
1351 *pDevice = radv_device_to_handle(device);
1352 return VK_SUCCESS;
1353
1354 fail_meta:
1355 radv_device_finish_meta(device);
1356 fail:
1357 if (device->trace_bo)
1358 device->ws->buffer_destroy(device->trace_bo);
1359
1360 if (device->gfx_init)
1361 device->ws->buffer_destroy(device->gfx_init);
1362
1363 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1364 for (unsigned q = 0; q < device->queue_count[i]; q++)
1365 radv_queue_finish(&device->queues[i][q]);
1366 if (device->queue_count[i])
1367 vk_free(&device->alloc, device->queues[i]);
1368 }
1369
1370 vk_free(&device->alloc, device);
1371 return result;
1372 }
1373
1374 void radv_DestroyDevice(
1375 VkDevice _device,
1376 const VkAllocationCallbacks* pAllocator)
1377 {
1378 RADV_FROM_HANDLE(radv_device, device, _device);
1379
1380 if (!device)
1381 return;
1382
1383 if (device->trace_bo)
1384 device->ws->buffer_destroy(device->trace_bo);
1385
1386 if (device->gfx_init)
1387 device->ws->buffer_destroy(device->gfx_init);
1388
1389 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1390 for (unsigned q = 0; q < device->queue_count[i]; q++)
1391 radv_queue_finish(&device->queues[i][q]);
1392 if (device->queue_count[i])
1393 vk_free(&device->alloc, device->queues[i]);
1394 if (device->empty_cs[i])
1395 device->ws->cs_destroy(device->empty_cs[i]);
1396 }
1397 radv_device_finish_meta(device);
1398
1399 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1400 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1401
1402 radv_destroy_shader_slabs(device);
1403
1404 vk_free(&device->alloc, device);
1405 }
1406
1407 VkResult radv_EnumerateInstanceLayerProperties(
1408 uint32_t* pPropertyCount,
1409 VkLayerProperties* pProperties)
1410 {
1411 if (pProperties == NULL) {
1412 *pPropertyCount = 0;
1413 return VK_SUCCESS;
1414 }
1415
1416 /* None supported at this time */
1417 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1418 }
1419
1420 VkResult radv_EnumerateDeviceLayerProperties(
1421 VkPhysicalDevice physicalDevice,
1422 uint32_t* pPropertyCount,
1423 VkLayerProperties* pProperties)
1424 {
1425 if (pProperties == NULL) {
1426 *pPropertyCount = 0;
1427 return VK_SUCCESS;
1428 }
1429
1430 /* None supported at this time */
1431 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1432 }
1433
1434 void radv_GetDeviceQueue2(
1435 VkDevice _device,
1436 const VkDeviceQueueInfo2* pQueueInfo,
1437 VkQueue* pQueue)
1438 {
1439 RADV_FROM_HANDLE(radv_device, device, _device);
1440
1441 *pQueue = radv_queue_to_handle(&device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]);
1442 }
1443
1444 void radv_GetDeviceQueue(
1445 VkDevice _device,
1446 uint32_t queueFamilyIndex,
1447 uint32_t queueIndex,
1448 VkQueue* pQueue)
1449 {
1450 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1451 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1452 .queueFamilyIndex = queueFamilyIndex,
1453 .queueIndex = queueIndex
1454 };
1455
1456 radv_GetDeviceQueue2(_device, &info, pQueue);
1457 }
1458
1459 static void
1460 fill_geom_tess_rings(struct radv_queue *queue,
1461 uint32_t *map,
1462 bool add_sample_positions,
1463 uint32_t esgs_ring_size,
1464 struct radeon_winsys_bo *esgs_ring_bo,
1465 uint32_t gsvs_ring_size,
1466 struct radeon_winsys_bo *gsvs_ring_bo,
1467 uint32_t tess_factor_ring_size,
1468 uint32_t tess_offchip_ring_offset,
1469 uint32_t tess_offchip_ring_size,
1470 struct radeon_winsys_bo *tess_rings_bo)
1471 {
1472 uint64_t esgs_va = 0, gsvs_va = 0;
1473 uint64_t tess_va = 0, tess_offchip_va = 0;
1474 uint32_t *desc = &map[4];
1475
1476 if (esgs_ring_bo)
1477 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1478 if (gsvs_ring_bo)
1479 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1480 if (tess_rings_bo) {
1481 tess_va = radv_buffer_get_va(tess_rings_bo);
1482 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1483 }
1484
1485 /* stride 0, num records - size, add tid, swizzle, elsize4,
1486 index stride 64 */
1487 desc[0] = esgs_va;
1488 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1489 S_008F04_STRIDE(0) |
1490 S_008F04_SWIZZLE_ENABLE(true);
1491 desc[2] = esgs_ring_size;
1492 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1493 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1494 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1495 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1496 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1497 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1498 S_008F0C_ELEMENT_SIZE(1) |
1499 S_008F0C_INDEX_STRIDE(3) |
1500 S_008F0C_ADD_TID_ENABLE(true);
1501
1502 desc += 4;
1503 /* GS entry for ES->GS ring */
1504 /* stride 0, num records - size, elsize0,
1505 index stride 0 */
1506 desc[0] = esgs_va;
1507 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1508 S_008F04_STRIDE(0) |
1509 S_008F04_SWIZZLE_ENABLE(false);
1510 desc[2] = esgs_ring_size;
1511 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1512 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1513 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1514 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1515 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1516 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1517 S_008F0C_ELEMENT_SIZE(0) |
1518 S_008F0C_INDEX_STRIDE(0) |
1519 S_008F0C_ADD_TID_ENABLE(false);
1520
1521 desc += 4;
1522 /* VS entry for GS->VS ring */
1523 /* stride 0, num records - size, elsize0,
1524 index stride 0 */
1525 desc[0] = gsvs_va;
1526 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1527 S_008F04_STRIDE(0) |
1528 S_008F04_SWIZZLE_ENABLE(false);
1529 desc[2] = gsvs_ring_size;
1530 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1531 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1532 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1533 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1534 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1535 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1536 S_008F0C_ELEMENT_SIZE(0) |
1537 S_008F0C_INDEX_STRIDE(0) |
1538 S_008F0C_ADD_TID_ENABLE(false);
1539 desc += 4;
1540
1541 /* stride gsvs_itemsize, num records 64
1542 elsize 4, index stride 16 */
1543 /* shader will patch stride and desc[2] */
1544 desc[0] = gsvs_va;
1545 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1546 S_008F04_STRIDE(0) |
1547 S_008F04_SWIZZLE_ENABLE(true);
1548 desc[2] = 0;
1549 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1550 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1551 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1552 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1553 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1554 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1555 S_008F0C_ELEMENT_SIZE(1) |
1556 S_008F0C_INDEX_STRIDE(1) |
1557 S_008F0C_ADD_TID_ENABLE(true);
1558 desc += 4;
1559
1560 desc[0] = tess_va;
1561 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1562 S_008F04_STRIDE(0) |
1563 S_008F04_SWIZZLE_ENABLE(false);
1564 desc[2] = tess_factor_ring_size;
1565 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1566 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1567 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1568 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1569 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1570 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1571 S_008F0C_ELEMENT_SIZE(0) |
1572 S_008F0C_INDEX_STRIDE(0) |
1573 S_008F0C_ADD_TID_ENABLE(false);
1574 desc += 4;
1575
1576 desc[0] = tess_offchip_va;
1577 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1578 S_008F04_STRIDE(0) |
1579 S_008F04_SWIZZLE_ENABLE(false);
1580 desc[2] = tess_offchip_ring_size;
1581 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1582 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1583 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1584 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1585 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1586 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1587 S_008F0C_ELEMENT_SIZE(0) |
1588 S_008F0C_INDEX_STRIDE(0) |
1589 S_008F0C_ADD_TID_ENABLE(false);
1590 desc += 4;
1591
1592 /* add sample positions after all rings */
1593 memcpy(desc, queue->device->sample_locations_1x, 8);
1594 desc += 2;
1595 memcpy(desc, queue->device->sample_locations_2x, 16);
1596 desc += 4;
1597 memcpy(desc, queue->device->sample_locations_4x, 32);
1598 desc += 8;
1599 memcpy(desc, queue->device->sample_locations_8x, 64);
1600 desc += 16;
1601 memcpy(desc, queue->device->sample_locations_16x, 128);
1602 }
1603
1604 static unsigned
1605 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1606 {
1607 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1608 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1609 device->physical_device->rad_info.family != CHIP_STONEY;
1610 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1611 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1612 device->physical_device->rad_info.max_se;
1613 unsigned offchip_granularity;
1614 unsigned hs_offchip_param;
1615 switch (device->tess_offchip_block_dw_size) {
1616 default:
1617 assert(0);
1618 /* fall through */
1619 case 8192:
1620 offchip_granularity = V_03093C_X_8K_DWORDS;
1621 break;
1622 case 4096:
1623 offchip_granularity = V_03093C_X_4K_DWORDS;
1624 break;
1625 }
1626
1627 switch (device->physical_device->rad_info.chip_class) {
1628 case SI:
1629 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1630 break;
1631 case CIK:
1632 case VI:
1633 case GFX9:
1634 default:
1635 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1636 break;
1637 }
1638
1639 *max_offchip_buffers_p = max_offchip_buffers;
1640 if (device->physical_device->rad_info.chip_class >= CIK) {
1641 if (device->physical_device->rad_info.chip_class >= VI)
1642 --max_offchip_buffers;
1643 hs_offchip_param =
1644 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1645 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1646 } else {
1647 hs_offchip_param =
1648 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1649 }
1650 return hs_offchip_param;
1651 }
1652
1653 static VkResult
1654 radv_get_preamble_cs(struct radv_queue *queue,
1655 uint32_t scratch_size,
1656 uint32_t compute_scratch_size,
1657 uint32_t esgs_ring_size,
1658 uint32_t gsvs_ring_size,
1659 bool needs_tess_rings,
1660 bool needs_sample_positions,
1661 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1662 struct radeon_winsys_cs **initial_preamble_cs,
1663 struct radeon_winsys_cs **continue_preamble_cs)
1664 {
1665 struct radeon_winsys_bo *scratch_bo = NULL;
1666 struct radeon_winsys_bo *descriptor_bo = NULL;
1667 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1668 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1669 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1670 struct radeon_winsys_bo *tess_rings_bo = NULL;
1671 struct radeon_winsys_cs *dest_cs[3] = {0};
1672 bool add_tess_rings = false, add_sample_positions = false;
1673 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1674 unsigned max_offchip_buffers;
1675 unsigned hs_offchip_param = 0;
1676 unsigned tess_offchip_ring_offset;
1677 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1678 if (!queue->has_tess_rings) {
1679 if (needs_tess_rings)
1680 add_tess_rings = true;
1681 }
1682 if (!queue->has_sample_positions) {
1683 if (needs_sample_positions)
1684 add_sample_positions = true;
1685 }
1686 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1687 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1688 &max_offchip_buffers);
1689 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
1690 tess_offchip_ring_size = max_offchip_buffers *
1691 queue->device->tess_offchip_block_dw_size * 4;
1692
1693 if (scratch_size <= queue->scratch_size &&
1694 compute_scratch_size <= queue->compute_scratch_size &&
1695 esgs_ring_size <= queue->esgs_ring_size &&
1696 gsvs_ring_size <= queue->gsvs_ring_size &&
1697 !add_tess_rings && !add_sample_positions &&
1698 queue->initial_preamble_cs) {
1699 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1700 *initial_preamble_cs = queue->initial_preamble_cs;
1701 *continue_preamble_cs = queue->continue_preamble_cs;
1702 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1703 *continue_preamble_cs = NULL;
1704 return VK_SUCCESS;
1705 }
1706
1707 if (scratch_size > queue->scratch_size) {
1708 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1709 scratch_size,
1710 4096,
1711 RADEON_DOMAIN_VRAM,
1712 ring_bo_flags);
1713 if (!scratch_bo)
1714 goto fail;
1715 } else
1716 scratch_bo = queue->scratch_bo;
1717
1718 if (compute_scratch_size > queue->compute_scratch_size) {
1719 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1720 compute_scratch_size,
1721 4096,
1722 RADEON_DOMAIN_VRAM,
1723 ring_bo_flags);
1724 if (!compute_scratch_bo)
1725 goto fail;
1726
1727 } else
1728 compute_scratch_bo = queue->compute_scratch_bo;
1729
1730 if (esgs_ring_size > queue->esgs_ring_size) {
1731 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1732 esgs_ring_size,
1733 4096,
1734 RADEON_DOMAIN_VRAM,
1735 ring_bo_flags);
1736 if (!esgs_ring_bo)
1737 goto fail;
1738 } else {
1739 esgs_ring_bo = queue->esgs_ring_bo;
1740 esgs_ring_size = queue->esgs_ring_size;
1741 }
1742
1743 if (gsvs_ring_size > queue->gsvs_ring_size) {
1744 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1745 gsvs_ring_size,
1746 4096,
1747 RADEON_DOMAIN_VRAM,
1748 ring_bo_flags);
1749 if (!gsvs_ring_bo)
1750 goto fail;
1751 } else {
1752 gsvs_ring_bo = queue->gsvs_ring_bo;
1753 gsvs_ring_size = queue->gsvs_ring_size;
1754 }
1755
1756 if (add_tess_rings) {
1757 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
1758 tess_offchip_ring_offset + tess_offchip_ring_size,
1759 256,
1760 RADEON_DOMAIN_VRAM,
1761 ring_bo_flags);
1762 if (!tess_rings_bo)
1763 goto fail;
1764 } else {
1765 tess_rings_bo = queue->tess_rings_bo;
1766 }
1767
1768 if (scratch_bo != queue->scratch_bo ||
1769 esgs_ring_bo != queue->esgs_ring_bo ||
1770 gsvs_ring_bo != queue->gsvs_ring_bo ||
1771 tess_rings_bo != queue->tess_rings_bo ||
1772 add_sample_positions) {
1773 uint32_t size = 0;
1774 if (gsvs_ring_bo || esgs_ring_bo ||
1775 tess_rings_bo || add_sample_positions) {
1776 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1777 if (add_sample_positions)
1778 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1779 }
1780 else if (scratch_bo)
1781 size = 8; /* 2 dword */
1782
1783 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1784 size,
1785 4096,
1786 RADEON_DOMAIN_VRAM,
1787 RADEON_FLAG_CPU_ACCESS |
1788 RADEON_FLAG_NO_INTERPROCESS_SHARING |
1789 RADEON_FLAG_READ_ONLY);
1790 if (!descriptor_bo)
1791 goto fail;
1792 } else
1793 descriptor_bo = queue->descriptor_bo;
1794
1795 for(int i = 0; i < 3; ++i) {
1796 struct radeon_winsys_cs *cs = NULL;
1797 cs = queue->device->ws->cs_create(queue->device->ws,
1798 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1799 if (!cs)
1800 goto fail;
1801
1802 dest_cs[i] = cs;
1803
1804 if (scratch_bo)
1805 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1806
1807 if (esgs_ring_bo)
1808 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1809
1810 if (gsvs_ring_bo)
1811 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1812
1813 if (tess_rings_bo)
1814 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
1815
1816 if (descriptor_bo)
1817 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1818
1819 if (descriptor_bo != queue->descriptor_bo) {
1820 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1821
1822 if (scratch_bo) {
1823 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1824 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1825 S_008F04_SWIZZLE_ENABLE(1);
1826 map[0] = scratch_va;
1827 map[1] = rsrc1;
1828 }
1829
1830 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
1831 add_sample_positions)
1832 fill_geom_tess_rings(queue, map, add_sample_positions,
1833 esgs_ring_size, esgs_ring_bo,
1834 gsvs_ring_size, gsvs_ring_bo,
1835 tess_factor_ring_size,
1836 tess_offchip_ring_offset,
1837 tess_offchip_ring_size,
1838 tess_rings_bo);
1839
1840 queue->device->ws->buffer_unmap(descriptor_bo);
1841 }
1842
1843 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
1844 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1845 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1846 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1847 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1848 }
1849
1850 if (esgs_ring_bo || gsvs_ring_bo) {
1851 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1852 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1853 radeon_emit(cs, esgs_ring_size >> 8);
1854 radeon_emit(cs, gsvs_ring_size >> 8);
1855 } else {
1856 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1857 radeon_emit(cs, esgs_ring_size >> 8);
1858 radeon_emit(cs, gsvs_ring_size >> 8);
1859 }
1860 }
1861
1862 if (tess_rings_bo) {
1863 uint64_t tf_va = radv_buffer_get_va(tess_rings_bo);
1864 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1865 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1866 S_030938_SIZE(tess_factor_ring_size / 4));
1867 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1868 tf_va >> 8);
1869 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1870 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1871 tf_va >> 40);
1872 }
1873 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1874 } else {
1875 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1876 S_008988_SIZE(tess_factor_ring_size / 4));
1877 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1878 tf_va >> 8);
1879 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1880 hs_offchip_param);
1881 }
1882 }
1883
1884 if (descriptor_bo) {
1885 uint64_t va = radv_buffer_get_va(descriptor_bo);
1886 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1887 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1888 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1889 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1890 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1891
1892 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1893 radeon_set_sh_reg_seq(cs, regs[i], 2);
1894 radeon_emit(cs, va);
1895 radeon_emit(cs, va >> 32);
1896 }
1897 } else {
1898 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1899 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1900 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1901 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1902 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1903 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1904
1905 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1906 radeon_set_sh_reg_seq(cs, regs[i], 2);
1907 radeon_emit(cs, va);
1908 radeon_emit(cs, va >> 32);
1909 }
1910 }
1911 }
1912
1913 if (compute_scratch_bo) {
1914 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1915 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1916 S_008F04_SWIZZLE_ENABLE(1);
1917
1918 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1919
1920 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1921 radeon_emit(cs, scratch_va);
1922 radeon_emit(cs, rsrc1);
1923 }
1924
1925 if (i == 0) {
1926 si_cs_emit_cache_flush(cs,
1927 queue->device->physical_device->rad_info.chip_class,
1928 NULL, 0,
1929 queue->queue_family_index == RING_COMPUTE &&
1930 queue->device->physical_device->rad_info.chip_class >= CIK,
1931 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1932 RADV_CMD_FLAG_INV_ICACHE |
1933 RADV_CMD_FLAG_INV_SMEM_L1 |
1934 RADV_CMD_FLAG_INV_VMEM_L1 |
1935 RADV_CMD_FLAG_INV_GLOBAL_L2);
1936 } else if (i == 1) {
1937 si_cs_emit_cache_flush(cs,
1938 queue->device->physical_device->rad_info.chip_class,
1939 NULL, 0,
1940 queue->queue_family_index == RING_COMPUTE &&
1941 queue->device->physical_device->rad_info.chip_class >= CIK,
1942 RADV_CMD_FLAG_INV_ICACHE |
1943 RADV_CMD_FLAG_INV_SMEM_L1 |
1944 RADV_CMD_FLAG_INV_VMEM_L1 |
1945 RADV_CMD_FLAG_INV_GLOBAL_L2);
1946 }
1947
1948 if (!queue->device->ws->cs_finalize(cs))
1949 goto fail;
1950 }
1951
1952 if (queue->initial_full_flush_preamble_cs)
1953 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1954
1955 if (queue->initial_preamble_cs)
1956 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1957
1958 if (queue->continue_preamble_cs)
1959 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1960
1961 queue->initial_full_flush_preamble_cs = dest_cs[0];
1962 queue->initial_preamble_cs = dest_cs[1];
1963 queue->continue_preamble_cs = dest_cs[2];
1964
1965 if (scratch_bo != queue->scratch_bo) {
1966 if (queue->scratch_bo)
1967 queue->device->ws->buffer_destroy(queue->scratch_bo);
1968 queue->scratch_bo = scratch_bo;
1969 queue->scratch_size = scratch_size;
1970 }
1971
1972 if (compute_scratch_bo != queue->compute_scratch_bo) {
1973 if (queue->compute_scratch_bo)
1974 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1975 queue->compute_scratch_bo = compute_scratch_bo;
1976 queue->compute_scratch_size = compute_scratch_size;
1977 }
1978
1979 if (esgs_ring_bo != queue->esgs_ring_bo) {
1980 if (queue->esgs_ring_bo)
1981 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1982 queue->esgs_ring_bo = esgs_ring_bo;
1983 queue->esgs_ring_size = esgs_ring_size;
1984 }
1985
1986 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1987 if (queue->gsvs_ring_bo)
1988 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1989 queue->gsvs_ring_bo = gsvs_ring_bo;
1990 queue->gsvs_ring_size = gsvs_ring_size;
1991 }
1992
1993 if (tess_rings_bo != queue->tess_rings_bo) {
1994 queue->tess_rings_bo = tess_rings_bo;
1995 queue->has_tess_rings = true;
1996 }
1997
1998 if (descriptor_bo != queue->descriptor_bo) {
1999 if (queue->descriptor_bo)
2000 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2001
2002 queue->descriptor_bo = descriptor_bo;
2003 }
2004
2005 if (add_sample_positions)
2006 queue->has_sample_positions = true;
2007
2008 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2009 *initial_preamble_cs = queue->initial_preamble_cs;
2010 *continue_preamble_cs = queue->continue_preamble_cs;
2011 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2012 *continue_preamble_cs = NULL;
2013 return VK_SUCCESS;
2014 fail:
2015 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2016 if (dest_cs[i])
2017 queue->device->ws->cs_destroy(dest_cs[i]);
2018 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2019 queue->device->ws->buffer_destroy(descriptor_bo);
2020 if (scratch_bo && scratch_bo != queue->scratch_bo)
2021 queue->device->ws->buffer_destroy(scratch_bo);
2022 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2023 queue->device->ws->buffer_destroy(compute_scratch_bo);
2024 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2025 queue->device->ws->buffer_destroy(esgs_ring_bo);
2026 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2027 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2028 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2029 queue->device->ws->buffer_destroy(tess_rings_bo);
2030 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2031 }
2032
2033 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
2034 int num_sems,
2035 const VkSemaphore *sems,
2036 VkFence _fence,
2037 bool reset_temp)
2038 {
2039 int syncobj_idx = 0, sem_idx = 0;
2040
2041 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2042 return VK_SUCCESS;
2043
2044 for (uint32_t i = 0; i < num_sems; i++) {
2045 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2046
2047 if (sem->temp_syncobj || sem->syncobj)
2048 counts->syncobj_count++;
2049 else
2050 counts->sem_count++;
2051 }
2052
2053 if (_fence != VK_NULL_HANDLE) {
2054 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2055 if (fence->temp_syncobj || fence->syncobj)
2056 counts->syncobj_count++;
2057 }
2058
2059 if (counts->syncobj_count) {
2060 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2061 if (!counts->syncobj)
2062 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2063 }
2064
2065 if (counts->sem_count) {
2066 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2067 if (!counts->sem) {
2068 free(counts->syncobj);
2069 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2070 }
2071 }
2072
2073 for (uint32_t i = 0; i < num_sems; i++) {
2074 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2075
2076 if (sem->temp_syncobj) {
2077 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2078 }
2079 else if (sem->syncobj)
2080 counts->syncobj[syncobj_idx++] = sem->syncobj;
2081 else {
2082 assert(sem->sem);
2083 counts->sem[sem_idx++] = sem->sem;
2084 }
2085 }
2086
2087 if (_fence != VK_NULL_HANDLE) {
2088 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2089 if (fence->temp_syncobj)
2090 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2091 else if (fence->syncobj)
2092 counts->syncobj[syncobj_idx++] = fence->syncobj;
2093 }
2094
2095 return VK_SUCCESS;
2096 }
2097
2098 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2099 {
2100 free(sem_info->wait.syncobj);
2101 free(sem_info->wait.sem);
2102 free(sem_info->signal.syncobj);
2103 free(sem_info->signal.sem);
2104 }
2105
2106
2107 static void radv_free_temp_syncobjs(struct radv_device *device,
2108 int num_sems,
2109 const VkSemaphore *sems)
2110 {
2111 for (uint32_t i = 0; i < num_sems; i++) {
2112 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2113
2114 if (sem->temp_syncobj) {
2115 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2116 sem->temp_syncobj = 0;
2117 }
2118 }
2119 }
2120
2121 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2122 int num_wait_sems,
2123 const VkSemaphore *wait_sems,
2124 int num_signal_sems,
2125 const VkSemaphore *signal_sems,
2126 VkFence fence)
2127 {
2128 VkResult ret;
2129 memset(sem_info, 0, sizeof(*sem_info));
2130
2131 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2132 if (ret)
2133 return ret;
2134 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2135 if (ret)
2136 radv_free_sem_info(sem_info);
2137
2138 /* caller can override these */
2139 sem_info->cs_emit_wait = true;
2140 sem_info->cs_emit_signal = true;
2141 return ret;
2142 }
2143
2144 /* Signals fence as soon as all the work currently put on queue is done. */
2145 static VkResult radv_signal_fence(struct radv_queue *queue,
2146 struct radv_fence *fence)
2147 {
2148 int ret;
2149 VkResult result;
2150 struct radv_winsys_sem_info sem_info;
2151
2152 result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2153 radv_fence_to_handle(fence));
2154 if (result != VK_SUCCESS)
2155 return result;
2156
2157 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2158 &queue->device->empty_cs[queue->queue_family_index],
2159 1, NULL, NULL, &sem_info,
2160 false, fence->fence);
2161 radv_free_sem_info(&sem_info);
2162
2163 /* TODO: find a better error */
2164 if (ret)
2165 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2166
2167 return VK_SUCCESS;
2168 }
2169
2170 VkResult radv_QueueSubmit(
2171 VkQueue _queue,
2172 uint32_t submitCount,
2173 const VkSubmitInfo* pSubmits,
2174 VkFence _fence)
2175 {
2176 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2177 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2178 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2179 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2180 int ret;
2181 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2182 uint32_t scratch_size = 0;
2183 uint32_t compute_scratch_size = 0;
2184 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2185 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2186 VkResult result;
2187 bool fence_emitted = false;
2188 bool tess_rings_needed = false;
2189 bool sample_positions_needed = false;
2190
2191 /* Do this first so failing to allocate scratch buffers can't result in
2192 * partially executed submissions. */
2193 for (uint32_t i = 0; i < submitCount; i++) {
2194 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2195 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2196 pSubmits[i].pCommandBuffers[j]);
2197
2198 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2199 compute_scratch_size = MAX2(compute_scratch_size,
2200 cmd_buffer->compute_scratch_size_needed);
2201 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2202 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2203 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2204 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2205 }
2206 }
2207
2208 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2209 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2210 sample_positions_needed, &initial_flush_preamble_cs,
2211 &initial_preamble_cs, &continue_preamble_cs);
2212 if (result != VK_SUCCESS)
2213 return result;
2214
2215 for (uint32_t i = 0; i < submitCount; i++) {
2216 struct radeon_winsys_cs **cs_array;
2217 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2218 bool can_patch = true;
2219 uint32_t advance;
2220 struct radv_winsys_sem_info sem_info;
2221
2222 result = radv_alloc_sem_info(&sem_info,
2223 pSubmits[i].waitSemaphoreCount,
2224 pSubmits[i].pWaitSemaphores,
2225 pSubmits[i].signalSemaphoreCount,
2226 pSubmits[i].pSignalSemaphores,
2227 _fence);
2228 if (result != VK_SUCCESS)
2229 return result;
2230
2231 if (!pSubmits[i].commandBufferCount) {
2232 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2233 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2234 &queue->device->empty_cs[queue->queue_family_index],
2235 1, NULL, NULL,
2236 &sem_info,
2237 false, base_fence);
2238 if (ret) {
2239 radv_loge("failed to submit CS %d\n", i);
2240 abort();
2241 }
2242 fence_emitted = true;
2243 }
2244 radv_free_sem_info(&sem_info);
2245 continue;
2246 }
2247
2248 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2249 (pSubmits[i].commandBufferCount));
2250
2251 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2252 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2253 pSubmits[i].pCommandBuffers[j]);
2254 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2255
2256 cs_array[j] = cmd_buffer->cs;
2257 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2258 can_patch = false;
2259
2260 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2261 }
2262
2263 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2264 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2265 advance = MIN2(max_cs_submission,
2266 pSubmits[i].commandBufferCount - j);
2267
2268 if (queue->device->trace_bo)
2269 *queue->device->trace_id_ptr = 0;
2270
2271 sem_info.cs_emit_wait = j == 0;
2272 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2273
2274 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2275 advance, initial_preamble, continue_preamble_cs,
2276 &sem_info,
2277 can_patch, base_fence);
2278
2279 if (ret) {
2280 radv_loge("failed to submit CS %d\n", i);
2281 abort();
2282 }
2283 fence_emitted = true;
2284 if (queue->device->trace_bo) {
2285 radv_check_gpu_hangs(queue, cs_array[j]);
2286 }
2287 }
2288
2289 radv_free_temp_syncobjs(queue->device,
2290 pSubmits[i].waitSemaphoreCount,
2291 pSubmits[i].pWaitSemaphores);
2292 radv_free_sem_info(&sem_info);
2293 free(cs_array);
2294 }
2295
2296 if (fence) {
2297 if (!fence_emitted) {
2298 radv_signal_fence(queue, fence);
2299 }
2300 fence->submitted = true;
2301 }
2302
2303 return VK_SUCCESS;
2304 }
2305
2306 VkResult radv_QueueWaitIdle(
2307 VkQueue _queue)
2308 {
2309 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2310
2311 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2312 radv_queue_family_to_ring(queue->queue_family_index),
2313 queue->queue_idx);
2314 return VK_SUCCESS;
2315 }
2316
2317 VkResult radv_DeviceWaitIdle(
2318 VkDevice _device)
2319 {
2320 RADV_FROM_HANDLE(radv_device, device, _device);
2321
2322 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2323 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2324 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2325 }
2326 }
2327 return VK_SUCCESS;
2328 }
2329
2330 VkResult radv_EnumerateInstanceExtensionProperties(
2331 const char* pLayerName,
2332 uint32_t* pPropertyCount,
2333 VkExtensionProperties* pProperties)
2334 {
2335 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2336
2337 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2338 if (radv_supported_instance_extensions.extensions[i]) {
2339 vk_outarray_append(&out, prop) {
2340 *prop = radv_instance_extensions[i];
2341 }
2342 }
2343 }
2344
2345 return vk_outarray_status(&out);
2346 }
2347
2348 VkResult radv_EnumerateDeviceExtensionProperties(
2349 VkPhysicalDevice physicalDevice,
2350 const char* pLayerName,
2351 uint32_t* pPropertyCount,
2352 VkExtensionProperties* pProperties)
2353 {
2354 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2355 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2356
2357 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2358 if (device->supported_extensions.extensions[i]) {
2359 vk_outarray_append(&out, prop) {
2360 *prop = radv_device_extensions[i];
2361 }
2362 }
2363 }
2364
2365 return vk_outarray_status(&out);
2366 }
2367
2368 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2369 VkInstance _instance,
2370 const char* pName)
2371 {
2372 RADV_FROM_HANDLE(radv_instance, instance, _instance);
2373
2374 return radv_lookup_entrypoint_checked(pName,
2375 instance ? instance->apiVersion : 0,
2376 instance ? &instance->enabled_extensions : NULL,
2377 NULL);
2378 }
2379
2380 /* The loader wants us to expose a second GetInstanceProcAddr function
2381 * to work around certain LD_PRELOAD issues seen in apps.
2382 */
2383 PUBLIC
2384 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2385 VkInstance instance,
2386 const char* pName);
2387
2388 PUBLIC
2389 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2390 VkInstance instance,
2391 const char* pName)
2392 {
2393 return radv_GetInstanceProcAddr(instance, pName);
2394 }
2395
2396 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2397 VkDevice _device,
2398 const char* pName)
2399 {
2400 RADV_FROM_HANDLE(radv_device, device, _device);
2401
2402 return radv_lookup_entrypoint_checked(pName,
2403 device->instance->apiVersion,
2404 &device->instance->enabled_extensions,
2405 &device->enabled_extensions);
2406 }
2407
2408 bool radv_get_memory_fd(struct radv_device *device,
2409 struct radv_device_memory *memory,
2410 int *pFD)
2411 {
2412 struct radeon_bo_metadata metadata;
2413
2414 if (memory->image) {
2415 radv_init_metadata(device, memory->image, &metadata);
2416 device->ws->buffer_set_metadata(memory->bo, &metadata);
2417 }
2418
2419 return device->ws->buffer_get_fd(device->ws, memory->bo,
2420 pFD);
2421 }
2422
2423 static VkResult radv_alloc_memory(struct radv_device *device,
2424 const VkMemoryAllocateInfo* pAllocateInfo,
2425 const VkAllocationCallbacks* pAllocator,
2426 VkDeviceMemory* pMem)
2427 {
2428 struct radv_device_memory *mem;
2429 VkResult result;
2430 enum radeon_bo_domain domain;
2431 uint32_t flags = 0;
2432 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2433
2434 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2435
2436 if (pAllocateInfo->allocationSize == 0) {
2437 /* Apparently, this is allowed */
2438 *pMem = VK_NULL_HANDLE;
2439 return VK_SUCCESS;
2440 }
2441
2442 const VkImportMemoryFdInfoKHR *import_info =
2443 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2444 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2445 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2446 const VkExportMemoryAllocateInfoKHR *export_info =
2447 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2448 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2449 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2450
2451 const struct wsi_memory_allocate_info *wsi_info =
2452 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2453
2454 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2456 if (mem == NULL)
2457 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2458
2459 if (wsi_info && wsi_info->implicit_sync)
2460 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2461
2462 if (dedicate_info) {
2463 mem->image = radv_image_from_handle(dedicate_info->image);
2464 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2465 } else {
2466 mem->image = NULL;
2467 mem->buffer = NULL;
2468 }
2469
2470 mem->user_ptr = NULL;
2471
2472 if (import_info) {
2473 assert(import_info->handleType ==
2474 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2475 import_info->handleType ==
2476 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2477 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2478 NULL, NULL);
2479 if (!mem->bo) {
2480 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2481 goto fail;
2482 } else {
2483 close(import_info->fd);
2484 goto out_success;
2485 }
2486 }
2487
2488 if (host_ptr_info) {
2489 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2490 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2491 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2492 pAllocateInfo->allocationSize);
2493 if (!mem->bo) {
2494 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2495 goto fail;
2496 } else {
2497 mem->user_ptr = host_ptr_info->pHostPointer;
2498 goto out_success;
2499 }
2500 }
2501
2502 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2503 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2504 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2505 domain = RADEON_DOMAIN_GTT;
2506 else
2507 domain = RADEON_DOMAIN_VRAM;
2508
2509 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2510 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2511 else
2512 flags |= RADEON_FLAG_CPU_ACCESS;
2513
2514 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2515 flags |= RADEON_FLAG_GTT_WC;
2516
2517 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2518 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2519
2520 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2521 domain, flags);
2522
2523 if (!mem->bo) {
2524 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2525 goto fail;
2526 }
2527 mem->type_index = mem_type_index;
2528 out_success:
2529 *pMem = radv_device_memory_to_handle(mem);
2530
2531 return VK_SUCCESS;
2532
2533 fail:
2534 vk_free2(&device->alloc, pAllocator, mem);
2535
2536 return result;
2537 }
2538
2539 VkResult radv_AllocateMemory(
2540 VkDevice _device,
2541 const VkMemoryAllocateInfo* pAllocateInfo,
2542 const VkAllocationCallbacks* pAllocator,
2543 VkDeviceMemory* pMem)
2544 {
2545 RADV_FROM_HANDLE(radv_device, device, _device);
2546 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2547 }
2548
2549 void radv_FreeMemory(
2550 VkDevice _device,
2551 VkDeviceMemory _mem,
2552 const VkAllocationCallbacks* pAllocator)
2553 {
2554 RADV_FROM_HANDLE(radv_device, device, _device);
2555 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2556
2557 if (mem == NULL)
2558 return;
2559
2560 device->ws->buffer_destroy(mem->bo);
2561 mem->bo = NULL;
2562
2563 vk_free2(&device->alloc, pAllocator, mem);
2564 }
2565
2566 VkResult radv_MapMemory(
2567 VkDevice _device,
2568 VkDeviceMemory _memory,
2569 VkDeviceSize offset,
2570 VkDeviceSize size,
2571 VkMemoryMapFlags flags,
2572 void** ppData)
2573 {
2574 RADV_FROM_HANDLE(radv_device, device, _device);
2575 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2576
2577 if (mem == NULL) {
2578 *ppData = NULL;
2579 return VK_SUCCESS;
2580 }
2581
2582 if (mem->user_ptr)
2583 *ppData = mem->user_ptr;
2584 else
2585 *ppData = device->ws->buffer_map(mem->bo);
2586
2587 if (*ppData) {
2588 *ppData += offset;
2589 return VK_SUCCESS;
2590 }
2591
2592 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2593 }
2594
2595 void radv_UnmapMemory(
2596 VkDevice _device,
2597 VkDeviceMemory _memory)
2598 {
2599 RADV_FROM_HANDLE(radv_device, device, _device);
2600 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2601
2602 if (mem == NULL)
2603 return;
2604
2605 if (mem->user_ptr == NULL)
2606 device->ws->buffer_unmap(mem->bo);
2607 }
2608
2609 VkResult radv_FlushMappedMemoryRanges(
2610 VkDevice _device,
2611 uint32_t memoryRangeCount,
2612 const VkMappedMemoryRange* pMemoryRanges)
2613 {
2614 return VK_SUCCESS;
2615 }
2616
2617 VkResult radv_InvalidateMappedMemoryRanges(
2618 VkDevice _device,
2619 uint32_t memoryRangeCount,
2620 const VkMappedMemoryRange* pMemoryRanges)
2621 {
2622 return VK_SUCCESS;
2623 }
2624
2625 void radv_GetBufferMemoryRequirements(
2626 VkDevice _device,
2627 VkBuffer _buffer,
2628 VkMemoryRequirements* pMemoryRequirements)
2629 {
2630 RADV_FROM_HANDLE(radv_device, device, _device);
2631 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2632
2633 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2634
2635 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2636 pMemoryRequirements->alignment = 4096;
2637 else
2638 pMemoryRequirements->alignment = 16;
2639
2640 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2641 }
2642
2643 void radv_GetBufferMemoryRequirements2(
2644 VkDevice device,
2645 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2646 VkMemoryRequirements2KHR* pMemoryRequirements)
2647 {
2648 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2649 &pMemoryRequirements->memoryRequirements);
2650 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2651 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2652 switch (ext->sType) {
2653 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2654 VkMemoryDedicatedRequirementsKHR *req =
2655 (VkMemoryDedicatedRequirementsKHR *) ext;
2656 req->requiresDedicatedAllocation = buffer->shareable;
2657 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2658 break;
2659 }
2660 default:
2661 break;
2662 }
2663 }
2664 }
2665
2666 void radv_GetImageMemoryRequirements(
2667 VkDevice _device,
2668 VkImage _image,
2669 VkMemoryRequirements* pMemoryRequirements)
2670 {
2671 RADV_FROM_HANDLE(radv_device, device, _device);
2672 RADV_FROM_HANDLE(radv_image, image, _image);
2673
2674 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2675
2676 pMemoryRequirements->size = image->size;
2677 pMemoryRequirements->alignment = image->alignment;
2678 }
2679
2680 void radv_GetImageMemoryRequirements2(
2681 VkDevice device,
2682 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2683 VkMemoryRequirements2KHR* pMemoryRequirements)
2684 {
2685 radv_GetImageMemoryRequirements(device, pInfo->image,
2686 &pMemoryRequirements->memoryRequirements);
2687
2688 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2689
2690 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2691 switch (ext->sType) {
2692 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2693 VkMemoryDedicatedRequirementsKHR *req =
2694 (VkMemoryDedicatedRequirementsKHR *) ext;
2695 req->requiresDedicatedAllocation = image->shareable;
2696 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2697 break;
2698 }
2699 default:
2700 break;
2701 }
2702 }
2703 }
2704
2705 void radv_GetImageSparseMemoryRequirements(
2706 VkDevice device,
2707 VkImage image,
2708 uint32_t* pSparseMemoryRequirementCount,
2709 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2710 {
2711 stub();
2712 }
2713
2714 void radv_GetImageSparseMemoryRequirements2(
2715 VkDevice device,
2716 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2717 uint32_t* pSparseMemoryRequirementCount,
2718 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2719 {
2720 stub();
2721 }
2722
2723 void radv_GetDeviceMemoryCommitment(
2724 VkDevice device,
2725 VkDeviceMemory memory,
2726 VkDeviceSize* pCommittedMemoryInBytes)
2727 {
2728 *pCommittedMemoryInBytes = 0;
2729 }
2730
2731 VkResult radv_BindBufferMemory2(VkDevice device,
2732 uint32_t bindInfoCount,
2733 const VkBindBufferMemoryInfoKHR *pBindInfos)
2734 {
2735 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2736 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2737 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2738
2739 if (mem) {
2740 buffer->bo = mem->bo;
2741 buffer->offset = pBindInfos[i].memoryOffset;
2742 } else {
2743 buffer->bo = NULL;
2744 }
2745 }
2746 return VK_SUCCESS;
2747 }
2748
2749 VkResult radv_BindBufferMemory(
2750 VkDevice device,
2751 VkBuffer buffer,
2752 VkDeviceMemory memory,
2753 VkDeviceSize memoryOffset)
2754 {
2755 const VkBindBufferMemoryInfoKHR info = {
2756 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2757 .buffer = buffer,
2758 .memory = memory,
2759 .memoryOffset = memoryOffset
2760 };
2761
2762 return radv_BindBufferMemory2(device, 1, &info);
2763 }
2764
2765 VkResult radv_BindImageMemory2(VkDevice device,
2766 uint32_t bindInfoCount,
2767 const VkBindImageMemoryInfoKHR *pBindInfos)
2768 {
2769 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2770 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2771 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2772
2773 if (mem) {
2774 image->bo = mem->bo;
2775 image->offset = pBindInfos[i].memoryOffset;
2776 } else {
2777 image->bo = NULL;
2778 image->offset = 0;
2779 }
2780 }
2781 return VK_SUCCESS;
2782 }
2783
2784
2785 VkResult radv_BindImageMemory(
2786 VkDevice device,
2787 VkImage image,
2788 VkDeviceMemory memory,
2789 VkDeviceSize memoryOffset)
2790 {
2791 const VkBindImageMemoryInfoKHR info = {
2792 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2793 .image = image,
2794 .memory = memory,
2795 .memoryOffset = memoryOffset
2796 };
2797
2798 return radv_BindImageMemory2(device, 1, &info);
2799 }
2800
2801
2802 static void
2803 radv_sparse_buffer_bind_memory(struct radv_device *device,
2804 const VkSparseBufferMemoryBindInfo *bind)
2805 {
2806 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2807
2808 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2809 struct radv_device_memory *mem = NULL;
2810
2811 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2812 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2813
2814 device->ws->buffer_virtual_bind(buffer->bo,
2815 bind->pBinds[i].resourceOffset,
2816 bind->pBinds[i].size,
2817 mem ? mem->bo : NULL,
2818 bind->pBinds[i].memoryOffset);
2819 }
2820 }
2821
2822 static void
2823 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2824 const VkSparseImageOpaqueMemoryBindInfo *bind)
2825 {
2826 RADV_FROM_HANDLE(radv_image, image, bind->image);
2827
2828 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2829 struct radv_device_memory *mem = NULL;
2830
2831 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2832 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2833
2834 device->ws->buffer_virtual_bind(image->bo,
2835 bind->pBinds[i].resourceOffset,
2836 bind->pBinds[i].size,
2837 mem ? mem->bo : NULL,
2838 bind->pBinds[i].memoryOffset);
2839 }
2840 }
2841
2842 VkResult radv_QueueBindSparse(
2843 VkQueue _queue,
2844 uint32_t bindInfoCount,
2845 const VkBindSparseInfo* pBindInfo,
2846 VkFence _fence)
2847 {
2848 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2849 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2850 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2851 bool fence_emitted = false;
2852
2853 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2854 struct radv_winsys_sem_info sem_info;
2855 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2856 radv_sparse_buffer_bind_memory(queue->device,
2857 pBindInfo[i].pBufferBinds + j);
2858 }
2859
2860 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2861 radv_sparse_image_opaque_bind_memory(queue->device,
2862 pBindInfo[i].pImageOpaqueBinds + j);
2863 }
2864
2865 VkResult result;
2866 result = radv_alloc_sem_info(&sem_info,
2867 pBindInfo[i].waitSemaphoreCount,
2868 pBindInfo[i].pWaitSemaphores,
2869 pBindInfo[i].signalSemaphoreCount,
2870 pBindInfo[i].pSignalSemaphores,
2871 _fence);
2872 if (result != VK_SUCCESS)
2873 return result;
2874
2875 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2876 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2877 &queue->device->empty_cs[queue->queue_family_index],
2878 1, NULL, NULL,
2879 &sem_info,
2880 false, base_fence);
2881 fence_emitted = true;
2882 if (fence)
2883 fence->submitted = true;
2884 }
2885
2886 radv_free_sem_info(&sem_info);
2887
2888 }
2889
2890 if (fence) {
2891 if (!fence_emitted) {
2892 radv_signal_fence(queue, fence);
2893 }
2894 fence->submitted = true;
2895 }
2896
2897 return VK_SUCCESS;
2898 }
2899
2900 VkResult radv_CreateFence(
2901 VkDevice _device,
2902 const VkFenceCreateInfo* pCreateInfo,
2903 const VkAllocationCallbacks* pAllocator,
2904 VkFence* pFence)
2905 {
2906 RADV_FROM_HANDLE(radv_device, device, _device);
2907 const VkExportFenceCreateInfoKHR *export =
2908 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2909 VkExternalFenceHandleTypeFlagsKHR handleTypes =
2910 export ? export->handleTypes : 0;
2911
2912 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2913 sizeof(*fence), 8,
2914 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2915
2916 if (!fence)
2917 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2918
2919 fence->submitted = false;
2920 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2921 fence->temp_syncobj = 0;
2922 if (device->always_use_syncobj || handleTypes) {
2923 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2924 if (ret) {
2925 vk_free2(&device->alloc, pAllocator, fence);
2926 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2927 }
2928 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2929 device->ws->signal_syncobj(device->ws, fence->syncobj);
2930 }
2931 fence->fence = NULL;
2932 } else {
2933 fence->fence = device->ws->create_fence();
2934 if (!fence->fence) {
2935 vk_free2(&device->alloc, pAllocator, fence);
2936 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2937 }
2938 fence->syncobj = 0;
2939 }
2940
2941 *pFence = radv_fence_to_handle(fence);
2942
2943 return VK_SUCCESS;
2944 }
2945
2946 void radv_DestroyFence(
2947 VkDevice _device,
2948 VkFence _fence,
2949 const VkAllocationCallbacks* pAllocator)
2950 {
2951 RADV_FROM_HANDLE(radv_device, device, _device);
2952 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2953
2954 if (!fence)
2955 return;
2956
2957 if (fence->temp_syncobj)
2958 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2959 if (fence->syncobj)
2960 device->ws->destroy_syncobj(device->ws, fence->syncobj);
2961 if (fence->fence)
2962 device->ws->destroy_fence(fence->fence);
2963 vk_free2(&device->alloc, pAllocator, fence);
2964 }
2965
2966
2967 static uint64_t radv_get_current_time()
2968 {
2969 struct timespec tv;
2970 clock_gettime(CLOCK_MONOTONIC, &tv);
2971 return tv.tv_nsec + tv.tv_sec*1000000000ull;
2972 }
2973
2974 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2975 {
2976 uint64_t current_time = radv_get_current_time();
2977
2978 timeout = MIN2(UINT64_MAX - current_time, timeout);
2979
2980 return current_time + timeout;
2981 }
2982
2983
2984 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
2985 {
2986 for (uint32_t i = 0; i < fenceCount; ++i) {
2987 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2988 if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
2989 return false;
2990 }
2991 return true;
2992 }
2993
2994 VkResult radv_WaitForFences(
2995 VkDevice _device,
2996 uint32_t fenceCount,
2997 const VkFence* pFences,
2998 VkBool32 waitAll,
2999 uint64_t timeout)
3000 {
3001 RADV_FROM_HANDLE(radv_device, device, _device);
3002 timeout = radv_get_absolute_timeout(timeout);
3003
3004 if (device->always_use_syncobj) {
3005 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3006 if (!handles)
3007 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3008
3009 for (uint32_t i = 0; i < fenceCount; ++i) {
3010 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3011 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3012 }
3013
3014 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3015
3016 free(handles);
3017 return success ? VK_SUCCESS : VK_TIMEOUT;
3018 }
3019
3020 if (!waitAll && fenceCount > 1) {
3021 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3022 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3023 uint32_t wait_count = 0;
3024 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3025 if (!fences)
3026 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3027
3028 for (uint32_t i = 0; i < fenceCount; ++i) {
3029 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3030
3031 if (fence->signalled) {
3032 free(fences);
3033 return VK_SUCCESS;
3034 }
3035
3036 fences[wait_count++] = fence->fence;
3037 }
3038
3039 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3040 waitAll, timeout - radv_get_current_time());
3041
3042 free(fences);
3043 return success ? VK_SUCCESS : VK_TIMEOUT;
3044 }
3045
3046 while(radv_get_current_time() <= timeout) {
3047 for (uint32_t i = 0; i < fenceCount; ++i) {
3048 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3049 return VK_SUCCESS;
3050 }
3051 }
3052 return VK_TIMEOUT;
3053 }
3054
3055 for (uint32_t i = 0; i < fenceCount; ++i) {
3056 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3057 bool expired = false;
3058
3059 if (fence->temp_syncobj) {
3060 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3061 return VK_TIMEOUT;
3062 continue;
3063 }
3064
3065 if (fence->syncobj) {
3066 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3067 return VK_TIMEOUT;
3068 continue;
3069 }
3070
3071 if (fence->signalled)
3072 continue;
3073
3074 if (!fence->submitted) {
3075 while(radv_get_current_time() <= timeout && !fence->submitted)
3076 /* Do nothing */;
3077
3078 if (!fence->submitted)
3079 return VK_TIMEOUT;
3080
3081 /* Recheck as it may have been set by submitting operations. */
3082 if (fence->signalled)
3083 continue;
3084 }
3085
3086 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
3087 if (!expired)
3088 return VK_TIMEOUT;
3089
3090 fence->signalled = true;
3091 }
3092
3093 return VK_SUCCESS;
3094 }
3095
3096 VkResult radv_ResetFences(VkDevice _device,
3097 uint32_t fenceCount,
3098 const VkFence *pFences)
3099 {
3100 RADV_FROM_HANDLE(radv_device, device, _device);
3101
3102 for (unsigned i = 0; i < fenceCount; ++i) {
3103 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3104 fence->submitted = fence->signalled = false;
3105
3106 /* Per spec, we first restore the permanent payload, and then reset, so
3107 * having a temp syncobj should not skip resetting the permanent syncobj. */
3108 if (fence->temp_syncobj) {
3109 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3110 fence->temp_syncobj = 0;
3111 }
3112
3113 if (fence->syncobj) {
3114 device->ws->reset_syncobj(device->ws, fence->syncobj);
3115 }
3116 }
3117
3118 return VK_SUCCESS;
3119 }
3120
3121 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3122 {
3123 RADV_FROM_HANDLE(radv_device, device, _device);
3124 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3125
3126 if (fence->temp_syncobj) {
3127 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3128 return success ? VK_SUCCESS : VK_NOT_READY;
3129 }
3130
3131 if (fence->syncobj) {
3132 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3133 return success ? VK_SUCCESS : VK_NOT_READY;
3134 }
3135
3136 if (fence->signalled)
3137 return VK_SUCCESS;
3138 if (!fence->submitted)
3139 return VK_NOT_READY;
3140 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3141 return VK_NOT_READY;
3142
3143 return VK_SUCCESS;
3144 }
3145
3146
3147 // Queue semaphore functions
3148
3149 VkResult radv_CreateSemaphore(
3150 VkDevice _device,
3151 const VkSemaphoreCreateInfo* pCreateInfo,
3152 const VkAllocationCallbacks* pAllocator,
3153 VkSemaphore* pSemaphore)
3154 {
3155 RADV_FROM_HANDLE(radv_device, device, _device);
3156 const VkExportSemaphoreCreateInfoKHR *export =
3157 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3158 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3159 export ? export->handleTypes : 0;
3160
3161 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3162 sizeof(*sem), 8,
3163 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3164 if (!sem)
3165 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3166
3167 sem->temp_syncobj = 0;
3168 /* create a syncobject if we are going to export this semaphore */
3169 if (device->always_use_syncobj || handleTypes) {
3170 assert (device->physical_device->rad_info.has_syncobj);
3171 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3172 if (ret) {
3173 vk_free2(&device->alloc, pAllocator, sem);
3174 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3175 }
3176 sem->sem = NULL;
3177 } else {
3178 sem->sem = device->ws->create_sem(device->ws);
3179 if (!sem->sem) {
3180 vk_free2(&device->alloc, pAllocator, sem);
3181 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3182 }
3183 sem->syncobj = 0;
3184 }
3185
3186 *pSemaphore = radv_semaphore_to_handle(sem);
3187 return VK_SUCCESS;
3188 }
3189
3190 void radv_DestroySemaphore(
3191 VkDevice _device,
3192 VkSemaphore _semaphore,
3193 const VkAllocationCallbacks* pAllocator)
3194 {
3195 RADV_FROM_HANDLE(radv_device, device, _device);
3196 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3197 if (!_semaphore)
3198 return;
3199
3200 if (sem->syncobj)
3201 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3202 else
3203 device->ws->destroy_sem(sem->sem);
3204 vk_free2(&device->alloc, pAllocator, sem);
3205 }
3206
3207 VkResult radv_CreateEvent(
3208 VkDevice _device,
3209 const VkEventCreateInfo* pCreateInfo,
3210 const VkAllocationCallbacks* pAllocator,
3211 VkEvent* pEvent)
3212 {
3213 RADV_FROM_HANDLE(radv_device, device, _device);
3214 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3215 sizeof(*event), 8,
3216 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3217
3218 if (!event)
3219 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3220
3221 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3222 RADEON_DOMAIN_GTT,
3223 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3224 if (!event->bo) {
3225 vk_free2(&device->alloc, pAllocator, event);
3226 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3227 }
3228
3229 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3230
3231 *pEvent = radv_event_to_handle(event);
3232
3233 return VK_SUCCESS;
3234 }
3235
3236 void radv_DestroyEvent(
3237 VkDevice _device,
3238 VkEvent _event,
3239 const VkAllocationCallbacks* pAllocator)
3240 {
3241 RADV_FROM_HANDLE(radv_device, device, _device);
3242 RADV_FROM_HANDLE(radv_event, event, _event);
3243
3244 if (!event)
3245 return;
3246 device->ws->buffer_destroy(event->bo);
3247 vk_free2(&device->alloc, pAllocator, event);
3248 }
3249
3250 VkResult radv_GetEventStatus(
3251 VkDevice _device,
3252 VkEvent _event)
3253 {
3254 RADV_FROM_HANDLE(radv_event, event, _event);
3255
3256 if (*event->map == 1)
3257 return VK_EVENT_SET;
3258 return VK_EVENT_RESET;
3259 }
3260
3261 VkResult radv_SetEvent(
3262 VkDevice _device,
3263 VkEvent _event)
3264 {
3265 RADV_FROM_HANDLE(radv_event, event, _event);
3266 *event->map = 1;
3267
3268 return VK_SUCCESS;
3269 }
3270
3271 VkResult radv_ResetEvent(
3272 VkDevice _device,
3273 VkEvent _event)
3274 {
3275 RADV_FROM_HANDLE(radv_event, event, _event);
3276 *event->map = 0;
3277
3278 return VK_SUCCESS;
3279 }
3280
3281 VkResult radv_CreateBuffer(
3282 VkDevice _device,
3283 const VkBufferCreateInfo* pCreateInfo,
3284 const VkAllocationCallbacks* pAllocator,
3285 VkBuffer* pBuffer)
3286 {
3287 RADV_FROM_HANDLE(radv_device, device, _device);
3288 struct radv_buffer *buffer;
3289
3290 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3291
3292 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3293 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3294 if (buffer == NULL)
3295 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3296
3297 buffer->size = pCreateInfo->size;
3298 buffer->usage = pCreateInfo->usage;
3299 buffer->bo = NULL;
3300 buffer->offset = 0;
3301 buffer->flags = pCreateInfo->flags;
3302
3303 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3304 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3305
3306 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3307 buffer->bo = device->ws->buffer_create(device->ws,
3308 align64(buffer->size, 4096),
3309 4096, 0, RADEON_FLAG_VIRTUAL);
3310 if (!buffer->bo) {
3311 vk_free2(&device->alloc, pAllocator, buffer);
3312 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3313 }
3314 }
3315
3316 *pBuffer = radv_buffer_to_handle(buffer);
3317
3318 return VK_SUCCESS;
3319 }
3320
3321 void radv_DestroyBuffer(
3322 VkDevice _device,
3323 VkBuffer _buffer,
3324 const VkAllocationCallbacks* pAllocator)
3325 {
3326 RADV_FROM_HANDLE(radv_device, device, _device);
3327 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3328
3329 if (!buffer)
3330 return;
3331
3332 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3333 device->ws->buffer_destroy(buffer->bo);
3334
3335 vk_free2(&device->alloc, pAllocator, buffer);
3336 }
3337
3338 static inline unsigned
3339 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3340 {
3341 if (stencil)
3342 return image->surface.u.legacy.stencil_tiling_index[level];
3343 else
3344 return image->surface.u.legacy.tiling_index[level];
3345 }
3346
3347 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3348 {
3349 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3350 }
3351
3352 static void
3353 radv_initialise_color_surface(struct radv_device *device,
3354 struct radv_color_buffer_info *cb,
3355 struct radv_image_view *iview)
3356 {
3357 const struct vk_format_description *desc;
3358 unsigned ntype, format, swap, endian;
3359 unsigned blend_clamp = 0, blend_bypass = 0;
3360 uint64_t va;
3361 const struct radeon_surf *surf = &iview->image->surface;
3362
3363 desc = vk_format_description(iview->vk_format);
3364
3365 memset(cb, 0, sizeof(*cb));
3366
3367 /* Intensity is implemented as Red, so treat it that way. */
3368 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3369
3370 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3371
3372 cb->cb_color_base = va >> 8;
3373
3374 if (device->physical_device->rad_info.chip_class >= GFX9) {
3375 struct gfx9_surf_meta_flags meta;
3376 if (iview->image->dcc_offset)
3377 meta = iview->image->surface.u.gfx9.dcc;
3378 else
3379 meta = iview->image->surface.u.gfx9.cmask;
3380
3381 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3382 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3383 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3384 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3385
3386 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3387 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3388 } else {
3389 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3390 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3391
3392 cb->cb_color_base += level_info->offset >> 8;
3393 if (level_info->mode == RADEON_SURF_MODE_2D)
3394 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3395
3396 pitch_tile_max = level_info->nblk_x / 8 - 1;
3397 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3398 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3399
3400 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3401 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3402 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3403
3404 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3405
3406 if (iview->image->fmask.size) {
3407 if (device->physical_device->rad_info.chip_class >= CIK)
3408 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3409 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3410 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3411 } else {
3412 /* This must be set for fast clear to work without FMASK. */
3413 if (device->physical_device->rad_info.chip_class >= CIK)
3414 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3415 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3416 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3417 }
3418 }
3419
3420 /* CMASK variables */
3421 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3422 va += iview->image->cmask.offset;
3423 cb->cb_color_cmask = va >> 8;
3424
3425 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3426 va += iview->image->dcc_offset;
3427 cb->cb_dcc_base = va >> 8;
3428 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3429
3430 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3431 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3432 S_028C6C_SLICE_MAX(max_slice);
3433
3434 if (iview->image->info.samples > 1) {
3435 unsigned log_samples = util_logbase2(iview->image->info.samples);
3436
3437 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3438 S_028C74_NUM_FRAGMENTS(log_samples);
3439 }
3440
3441 if (iview->image->fmask.size) {
3442 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3443 cb->cb_color_fmask = va >> 8;
3444 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3445 } else {
3446 cb->cb_color_fmask = cb->cb_color_base;
3447 }
3448
3449 ntype = radv_translate_color_numformat(iview->vk_format,
3450 desc,
3451 vk_format_get_first_non_void_channel(iview->vk_format));
3452 format = radv_translate_colorformat(iview->vk_format);
3453 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3454 radv_finishme("Illegal color\n");
3455 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3456 endian = radv_colorformat_endian_swap(format);
3457
3458 /* blend clamp should be set for all NORM/SRGB types */
3459 if (ntype == V_028C70_NUMBER_UNORM ||
3460 ntype == V_028C70_NUMBER_SNORM ||
3461 ntype == V_028C70_NUMBER_SRGB)
3462 blend_clamp = 1;
3463
3464 /* set blend bypass according to docs if SINT/UINT or
3465 8/24 COLOR variants */
3466 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3467 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3468 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3469 blend_clamp = 0;
3470 blend_bypass = 1;
3471 }
3472 #if 0
3473 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3474 (format == V_028C70_COLOR_8 ||
3475 format == V_028C70_COLOR_8_8 ||
3476 format == V_028C70_COLOR_8_8_8_8))
3477 ->color_is_int8 = true;
3478 #endif
3479 cb->cb_color_info = S_028C70_FORMAT(format) |
3480 S_028C70_COMP_SWAP(swap) |
3481 S_028C70_BLEND_CLAMP(blend_clamp) |
3482 S_028C70_BLEND_BYPASS(blend_bypass) |
3483 S_028C70_SIMPLE_FLOAT(1) |
3484 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3485 ntype != V_028C70_NUMBER_SNORM &&
3486 ntype != V_028C70_NUMBER_SRGB &&
3487 format != V_028C70_COLOR_8_24 &&
3488 format != V_028C70_COLOR_24_8) |
3489 S_028C70_NUMBER_TYPE(ntype) |
3490 S_028C70_ENDIAN(endian);
3491 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3492 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3493 if (device->physical_device->rad_info.chip_class == SI) {
3494 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3495 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3496 }
3497 }
3498
3499 if (iview->image->cmask.size &&
3500 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3501 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3502
3503 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3504 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3505
3506 if (device->physical_device->rad_info.chip_class >= VI) {
3507 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3508 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3509 unsigned independent_64b_blocks = 0;
3510 unsigned max_compressed_block_size;
3511
3512 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3513 64 for APU because all of our APUs to date use DIMMs which have
3514 a request granularity size of 64B while all other chips have a
3515 32B request size */
3516 if (!device->physical_device->rad_info.has_dedicated_vram)
3517 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3518
3519 if (iview->image->info.samples > 1) {
3520 if (iview->image->surface.bpe == 1)
3521 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3522 else if (iview->image->surface.bpe == 2)
3523 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3524 }
3525
3526 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3527 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3528 independent_64b_blocks = 1;
3529 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3530 } else
3531 max_compressed_block_size = max_uncompressed_block_size;
3532
3533 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3534 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3535 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3536 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3537 }
3538
3539 /* This must be set for fast clear to work without FMASK. */
3540 if (!iview->image->fmask.size &&
3541 device->physical_device->rad_info.chip_class == SI) {
3542 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3543 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3544 }
3545
3546 if (device->physical_device->rad_info.chip_class >= GFX9) {
3547 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3548 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3549
3550 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3551 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3552 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3553 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3554 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3555 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3556 }
3557 }
3558
3559 static void
3560 radv_initialise_ds_surface(struct radv_device *device,
3561 struct radv_ds_buffer_info *ds,
3562 struct radv_image_view *iview)
3563 {
3564 unsigned level = iview->base_mip;
3565 unsigned format, stencil_format;
3566 uint64_t va, s_offs, z_offs;
3567 bool stencil_only = false;
3568 memset(ds, 0, sizeof(*ds));
3569 switch (iview->image->vk_format) {
3570 case VK_FORMAT_D24_UNORM_S8_UINT:
3571 case VK_FORMAT_X8_D24_UNORM_PACK32:
3572 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3573 ds->offset_scale = 2.0f;
3574 break;
3575 case VK_FORMAT_D16_UNORM:
3576 case VK_FORMAT_D16_UNORM_S8_UINT:
3577 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3578 ds->offset_scale = 4.0f;
3579 break;
3580 case VK_FORMAT_D32_SFLOAT:
3581 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3582 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3583 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3584 ds->offset_scale = 1.0f;
3585 break;
3586 case VK_FORMAT_S8_UINT:
3587 stencil_only = true;
3588 break;
3589 default:
3590 break;
3591 }
3592
3593 format = radv_translate_dbformat(iview->image->vk_format);
3594 stencil_format = iview->image->surface.has_stencil ?
3595 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3596
3597 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3598 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3599 S_028008_SLICE_MAX(max_slice);
3600
3601 ds->db_htile_data_base = 0;
3602 ds->db_htile_surface = 0;
3603
3604 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3605 s_offs = z_offs = va;
3606
3607 if (device->physical_device->rad_info.chip_class >= GFX9) {
3608 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3609 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3610
3611 ds->db_z_info = S_028038_FORMAT(format) |
3612 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3613 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3614 S_028038_MAXMIP(iview->image->info.levels - 1);
3615 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3616 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3617
3618 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3619 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3620 ds->db_depth_view |= S_028008_MIPID(level);
3621
3622 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3623 S_02801C_Y_MAX(iview->image->info.height - 1);
3624
3625 if (radv_htile_enabled(iview->image, level)) {
3626 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3627
3628 if (iview->image->tc_compatible_htile) {
3629 unsigned max_zplanes = 4;
3630
3631 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3632 iview->image->info.samples > 1)
3633 max_zplanes = 2;
3634
3635 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3636 S_028038_ITERATE_FLUSH(1);
3637 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3638 }
3639
3640 if (!iview->image->surface.has_stencil)
3641 /* Use all of the htile_buffer for depth if there's no stencil. */
3642 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3643 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3644 iview->image->htile_offset;
3645 ds->db_htile_data_base = va >> 8;
3646 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3647 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3648 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3649 }
3650 } else {
3651 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3652
3653 if (stencil_only)
3654 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3655
3656 z_offs += iview->image->surface.u.legacy.level[level].offset;
3657 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3658
3659 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3660 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3661 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3662
3663 if (iview->image->info.samples > 1)
3664 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3665
3666 if (device->physical_device->rad_info.chip_class >= CIK) {
3667 struct radeon_info *info = &device->physical_device->rad_info;
3668 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3669 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3670 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3671 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3672 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3673 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3674
3675 if (stencil_only)
3676 tile_mode = stencil_tile_mode;
3677
3678 ds->db_depth_info |=
3679 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3680 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3681 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3682 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3683 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3684 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3685 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3686 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3687 } else {
3688 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3689 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3690 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3691 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3692 if (stencil_only)
3693 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3694 }
3695
3696 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3697 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3698 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3699
3700 if (radv_htile_enabled(iview->image, level)) {
3701 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3702
3703 if (!iview->image->surface.has_stencil &&
3704 !iview->image->tc_compatible_htile)
3705 /* Use all of the htile_buffer for depth if there's no stencil. */
3706 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3707
3708 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3709 iview->image->htile_offset;
3710 ds->db_htile_data_base = va >> 8;
3711 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3712
3713 if (iview->image->tc_compatible_htile) {
3714 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3715
3716 if (iview->image->info.samples <= 1)
3717 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3718 else if (iview->image->info.samples <= 4)
3719 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3720 else
3721 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3722 }
3723 }
3724 }
3725
3726 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3727 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3728 }
3729
3730 VkResult radv_CreateFramebuffer(
3731 VkDevice _device,
3732 const VkFramebufferCreateInfo* pCreateInfo,
3733 const VkAllocationCallbacks* pAllocator,
3734 VkFramebuffer* pFramebuffer)
3735 {
3736 RADV_FROM_HANDLE(radv_device, device, _device);
3737 struct radv_framebuffer *framebuffer;
3738
3739 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3740
3741 size_t size = sizeof(*framebuffer) +
3742 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3743 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3744 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3745 if (framebuffer == NULL)
3746 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3747
3748 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3749 framebuffer->width = pCreateInfo->width;
3750 framebuffer->height = pCreateInfo->height;
3751 framebuffer->layers = pCreateInfo->layers;
3752 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3753 VkImageView _iview = pCreateInfo->pAttachments[i];
3754 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3755 framebuffer->attachments[i].attachment = iview;
3756 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3757 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3758 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3759 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3760 }
3761 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3762 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3763 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3764 }
3765
3766 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3767 return VK_SUCCESS;
3768 }
3769
3770 void radv_DestroyFramebuffer(
3771 VkDevice _device,
3772 VkFramebuffer _fb,
3773 const VkAllocationCallbacks* pAllocator)
3774 {
3775 RADV_FROM_HANDLE(radv_device, device, _device);
3776 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3777
3778 if (!fb)
3779 return;
3780 vk_free2(&device->alloc, pAllocator, fb);
3781 }
3782
3783 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3784 {
3785 switch (address_mode) {
3786 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3787 return V_008F30_SQ_TEX_WRAP;
3788 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3789 return V_008F30_SQ_TEX_MIRROR;
3790 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3791 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3792 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3793 return V_008F30_SQ_TEX_CLAMP_BORDER;
3794 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3795 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3796 default:
3797 unreachable("illegal tex wrap mode");
3798 break;
3799 }
3800 }
3801
3802 static unsigned
3803 radv_tex_compare(VkCompareOp op)
3804 {
3805 switch (op) {
3806 case VK_COMPARE_OP_NEVER:
3807 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3808 case VK_COMPARE_OP_LESS:
3809 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3810 case VK_COMPARE_OP_EQUAL:
3811 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3812 case VK_COMPARE_OP_LESS_OR_EQUAL:
3813 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3814 case VK_COMPARE_OP_GREATER:
3815 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3816 case VK_COMPARE_OP_NOT_EQUAL:
3817 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3818 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3819 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3820 case VK_COMPARE_OP_ALWAYS:
3821 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3822 default:
3823 unreachable("illegal compare mode");
3824 break;
3825 }
3826 }
3827
3828 static unsigned
3829 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3830 {
3831 switch (filter) {
3832 case VK_FILTER_NEAREST:
3833 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3834 V_008F38_SQ_TEX_XY_FILTER_POINT);
3835 case VK_FILTER_LINEAR:
3836 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3837 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3838 case VK_FILTER_CUBIC_IMG:
3839 default:
3840 fprintf(stderr, "illegal texture filter");
3841 return 0;
3842 }
3843 }
3844
3845 static unsigned
3846 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3847 {
3848 switch (mode) {
3849 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3850 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3851 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3852 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3853 default:
3854 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3855 }
3856 }
3857
3858 static unsigned
3859 radv_tex_bordercolor(VkBorderColor bcolor)
3860 {
3861 switch (bcolor) {
3862 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3863 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3864 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3865 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3866 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3867 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3868 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3869 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3870 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3871 default:
3872 break;
3873 }
3874 return 0;
3875 }
3876
3877 static unsigned
3878 radv_tex_aniso_filter(unsigned filter)
3879 {
3880 if (filter < 2)
3881 return 0;
3882 if (filter < 4)
3883 return 1;
3884 if (filter < 8)
3885 return 2;
3886 if (filter < 16)
3887 return 3;
3888 return 4;
3889 }
3890
3891 static void
3892 radv_init_sampler(struct radv_device *device,
3893 struct radv_sampler *sampler,
3894 const VkSamplerCreateInfo *pCreateInfo)
3895 {
3896 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3897 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3898 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3899 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3900
3901 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3902 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3903 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3904 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3905 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3906 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3907 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3908 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3909 S_008F30_DISABLE_CUBE_WRAP(0) |
3910 S_008F30_COMPAT_MODE(is_vi));
3911 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3912 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3913 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3914 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3915 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3916 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3917 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3918 S_008F38_MIP_POINT_PRECLAMP(0) |
3919 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3920 S_008F38_FILTER_PREC_FIX(1) |
3921 S_008F38_ANISO_OVERRIDE(is_vi));
3922 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3923 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3924 }
3925
3926 VkResult radv_CreateSampler(
3927 VkDevice _device,
3928 const VkSamplerCreateInfo* pCreateInfo,
3929 const VkAllocationCallbacks* pAllocator,
3930 VkSampler* pSampler)
3931 {
3932 RADV_FROM_HANDLE(radv_device, device, _device);
3933 struct radv_sampler *sampler;
3934
3935 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3936
3937 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3938 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3939 if (!sampler)
3940 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3941
3942 radv_init_sampler(device, sampler, pCreateInfo);
3943 *pSampler = radv_sampler_to_handle(sampler);
3944
3945 return VK_SUCCESS;
3946 }
3947
3948 void radv_DestroySampler(
3949 VkDevice _device,
3950 VkSampler _sampler,
3951 const VkAllocationCallbacks* pAllocator)
3952 {
3953 RADV_FROM_HANDLE(radv_device, device, _device);
3954 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3955
3956 if (!sampler)
3957 return;
3958 vk_free2(&device->alloc, pAllocator, sampler);
3959 }
3960
3961 /* vk_icd.h does not declare this function, so we declare it here to
3962 * suppress Wmissing-prototypes.
3963 */
3964 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3965 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3966
3967 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3968 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3969 {
3970 /* For the full details on loader interface versioning, see
3971 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3972 * What follows is a condensed summary, to help you navigate the large and
3973 * confusing official doc.
3974 *
3975 * - Loader interface v0 is incompatible with later versions. We don't
3976 * support it.
3977 *
3978 * - In loader interface v1:
3979 * - The first ICD entrypoint called by the loader is
3980 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3981 * entrypoint.
3982 * - The ICD must statically expose no other Vulkan symbol unless it is
3983 * linked with -Bsymbolic.
3984 * - Each dispatchable Vulkan handle created by the ICD must be
3985 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3986 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3987 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3988 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3989 * such loader-managed surfaces.
3990 *
3991 * - Loader interface v2 differs from v1 in:
3992 * - The first ICD entrypoint called by the loader is
3993 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3994 * statically expose this entrypoint.
3995 *
3996 * - Loader interface v3 differs from v2 in:
3997 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3998 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3999 * because the loader no longer does so.
4000 */
4001 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4002 return VK_SUCCESS;
4003 }
4004
4005 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4006 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4007 int *pFD)
4008 {
4009 RADV_FROM_HANDLE(radv_device, device, _device);
4010 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4011
4012 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4013
4014 /* At the moment, we support only the below handle types. */
4015 assert(pGetFdInfo->handleType ==
4016 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4017 pGetFdInfo->handleType ==
4018 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4019
4020 bool ret = radv_get_memory_fd(device, memory, pFD);
4021 if (ret == false)
4022 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4023 return VK_SUCCESS;
4024 }
4025
4026 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4027 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4028 int fd,
4029 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4030 {
4031 switch (handleType) {
4032 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4033 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4034 return VK_SUCCESS;
4035
4036 default:
4037 /* The valid usage section for this function says:
4038 *
4039 * "handleType must not be one of the handle types defined as
4040 * opaque."
4041 *
4042 * So opaque handle types fall into the default "unsupported" case.
4043 */
4044 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4045 }
4046 }
4047
4048 static VkResult radv_import_opaque_fd(struct radv_device *device,
4049 int fd,
4050 uint32_t *syncobj)
4051 {
4052 uint32_t syncobj_handle = 0;
4053 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4054 if (ret != 0)
4055 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4056
4057 if (*syncobj)
4058 device->ws->destroy_syncobj(device->ws, *syncobj);
4059
4060 *syncobj = syncobj_handle;
4061 close(fd);
4062
4063 return VK_SUCCESS;
4064 }
4065
4066 static VkResult radv_import_sync_fd(struct radv_device *device,
4067 int fd,
4068 uint32_t *syncobj)
4069 {
4070 /* If we create a syncobj we do it locally so that if we have an error, we don't
4071 * leave a syncobj in an undetermined state in the fence. */
4072 uint32_t syncobj_handle = *syncobj;
4073 if (!syncobj_handle) {
4074 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4075 if (ret) {
4076 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4077 }
4078 }
4079
4080 if (fd == -1) {
4081 device->ws->signal_syncobj(device->ws, syncobj_handle);
4082 } else {
4083 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4084 if (ret != 0)
4085 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4086 }
4087
4088 *syncobj = syncobj_handle;
4089 if (fd != -1)
4090 close(fd);
4091
4092 return VK_SUCCESS;
4093 }
4094
4095 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4096 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4097 {
4098 RADV_FROM_HANDLE(radv_device, device, _device);
4099 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4100 uint32_t *syncobj_dst = NULL;
4101
4102 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4103 syncobj_dst = &sem->temp_syncobj;
4104 } else {
4105 syncobj_dst = &sem->syncobj;
4106 }
4107
4108 switch(pImportSemaphoreFdInfo->handleType) {
4109 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4110 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4111 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4112 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4113 default:
4114 unreachable("Unhandled semaphore handle type");
4115 }
4116 }
4117
4118 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4119 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4120 int *pFd)
4121 {
4122 RADV_FROM_HANDLE(radv_device, device, _device);
4123 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4124 int ret;
4125 uint32_t syncobj_handle;
4126
4127 if (sem->temp_syncobj)
4128 syncobj_handle = sem->temp_syncobj;
4129 else
4130 syncobj_handle = sem->syncobj;
4131
4132 switch(pGetFdInfo->handleType) {
4133 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4134 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4135 break;
4136 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4137 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4138 if (!ret) {
4139 if (sem->temp_syncobj) {
4140 close (sem->temp_syncobj);
4141 sem->temp_syncobj = 0;
4142 } else {
4143 device->ws->reset_syncobj(device->ws, syncobj_handle);
4144 }
4145 }
4146 break;
4147 default:
4148 unreachable("Unhandled semaphore handle type");
4149 }
4150
4151 if (ret)
4152 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4153 return VK_SUCCESS;
4154 }
4155
4156 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4157 VkPhysicalDevice physicalDevice,
4158 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4159 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
4160 {
4161 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4162
4163 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4164 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4165 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4166 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4167 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4168 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4169 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4170 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4171 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4172 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4173 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4174 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4175 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4176 } else {
4177 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4178 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4179 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4180 }
4181 }
4182
4183 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4184 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4185 {
4186 RADV_FROM_HANDLE(radv_device, device, _device);
4187 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4188 uint32_t *syncobj_dst = NULL;
4189
4190
4191 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4192 syncobj_dst = &fence->temp_syncobj;
4193 } else {
4194 syncobj_dst = &fence->syncobj;
4195 }
4196
4197 switch(pImportFenceFdInfo->handleType) {
4198 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4199 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4200 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4201 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4202 default:
4203 unreachable("Unhandled fence handle type");
4204 }
4205 }
4206
4207 VkResult radv_GetFenceFdKHR(VkDevice _device,
4208 const VkFenceGetFdInfoKHR *pGetFdInfo,
4209 int *pFd)
4210 {
4211 RADV_FROM_HANDLE(radv_device, device, _device);
4212 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4213 int ret;
4214 uint32_t syncobj_handle;
4215
4216 if (fence->temp_syncobj)
4217 syncobj_handle = fence->temp_syncobj;
4218 else
4219 syncobj_handle = fence->syncobj;
4220
4221 switch(pGetFdInfo->handleType) {
4222 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4223 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4224 break;
4225 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4226 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4227 if (!ret) {
4228 if (fence->temp_syncobj) {
4229 close (fence->temp_syncobj);
4230 fence->temp_syncobj = 0;
4231 } else {
4232 device->ws->reset_syncobj(device->ws, syncobj_handle);
4233 }
4234 }
4235 break;
4236 default:
4237 unreachable("Unhandled fence handle type");
4238 }
4239
4240 if (ret)
4241 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4242 return VK_SUCCESS;
4243 }
4244
4245 void radv_GetPhysicalDeviceExternalFenceProperties(
4246 VkPhysicalDevice physicalDevice,
4247 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4248 VkExternalFencePropertiesKHR* pExternalFenceProperties)
4249 {
4250 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4251
4252 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4253 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4254 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4255 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4256 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4257 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4258 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4259 } else {
4260 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4261 pExternalFenceProperties->compatibleHandleTypes = 0;
4262 pExternalFenceProperties->externalFenceFeatures = 0;
4263 }
4264 }
4265
4266 VkResult
4267 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4268 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4269 const VkAllocationCallbacks* pAllocator,
4270 VkDebugReportCallbackEXT* pCallback)
4271 {
4272 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4273 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4274 pCreateInfo, pAllocator, &instance->alloc,
4275 pCallback);
4276 }
4277
4278 void
4279 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4280 VkDebugReportCallbackEXT _callback,
4281 const VkAllocationCallbacks* pAllocator)
4282 {
4283 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4284 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4285 _callback, pAllocator, &instance->alloc);
4286 }
4287
4288 void
4289 radv_DebugReportMessageEXT(VkInstance _instance,
4290 VkDebugReportFlagsEXT flags,
4291 VkDebugReportObjectTypeEXT objectType,
4292 uint64_t object,
4293 size_t location,
4294 int32_t messageCode,
4295 const char* pLayerPrefix,
4296 const char* pMessage)
4297 {
4298 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4299 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4300 object, location, messageCode, pLayerPrefix, pMessage);
4301 }
4302
4303 void
4304 radv_GetDeviceGroupPeerMemoryFeatures(
4305 VkDevice device,
4306 uint32_t heapIndex,
4307 uint32_t localDeviceIndex,
4308 uint32_t remoteDeviceIndex,
4309 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
4310 {
4311 assert(localDeviceIndex == remoteDeviceIndex);
4312
4313 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4314 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4315 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4316 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4317 }