radv: add support for Vega12
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static void
79 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80 {
81 const char *chip_string;
82 char llvm_string[32] = {};
83
84 switch (family) {
85 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
86 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
87 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
88 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
89 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
90 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
91 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
92 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
93 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
94 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
95 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
96 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
97 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
98 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
99 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
100 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
101 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
102 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
103 case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
104 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
105 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
106 default: chip_string = "AMD RADV unknown"; break;
107 }
108
109 if (HAVE_LLVM > 0) {
110 snprintf(llvm_string, sizeof(llvm_string),
111 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
112 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
113 }
114
115 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
116 }
117
118 static void
119 radv_physical_device_init_mem_types(struct radv_physical_device *device)
120 {
121 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
122 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
123 device->rad_info.vram_vis_size);
124
125 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
126 device->memory_properties.memoryHeapCount = 0;
127 if (device->rad_info.vram_size - visible_vram_size > 0) {
128 vram_index = device->memory_properties.memoryHeapCount++;
129 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
130 .size = device->rad_info.vram_size - visible_vram_size,
131 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
132 };
133 }
134 if (visible_vram_size) {
135 visible_vram_index = device->memory_properties.memoryHeapCount++;
136 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
137 .size = visible_vram_size,
138 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
139 };
140 }
141 if (device->rad_info.gart_size > 0) {
142 gart_index = device->memory_properties.memoryHeapCount++;
143 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
144 .size = device->rad_info.gart_size,
145 .flags = 0,
146 };
147 }
148
149 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
150 unsigned type_count = 0;
151 if (vram_index >= 0) {
152 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
153 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
154 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
155 .heapIndex = vram_index,
156 };
157 }
158 if (gart_index >= 0) {
159 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
160 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
161 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
162 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
163 .heapIndex = gart_index,
164 };
165 }
166 if (visible_vram_index >= 0) {
167 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
168 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
169 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
170 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
171 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
172 .heapIndex = visible_vram_index,
173 };
174 }
175 if (gart_index >= 0) {
176 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
177 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
178 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
179 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
180 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
181 .heapIndex = gart_index,
182 };
183 }
184 device->memory_properties.memoryTypeCount = type_count;
185 }
186
187 static void
188 radv_handle_env_var_force_family(struct radv_physical_device *device)
189 {
190 const char *family = getenv("RADV_FORCE_FAMILY");
191 unsigned i;
192
193 if (!family)
194 return;
195
196 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
197 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
198 /* Override family and chip_class. */
199 device->rad_info.family = i;
200
201 if (i >= CHIP_VEGA10)
202 device->rad_info.chip_class = GFX9;
203 else if (i >= CHIP_TONGA)
204 device->rad_info.chip_class = VI;
205 else if (i >= CHIP_BONAIRE)
206 device->rad_info.chip_class = CIK;
207 else
208 device->rad_info.chip_class = SI;
209
210 return;
211 }
212 }
213
214 fprintf(stderr, "radv: Unknown family: %s\n", family);
215 exit(1);
216 }
217
218 static VkResult
219 radv_physical_device_init(struct radv_physical_device *device,
220 struct radv_instance *instance,
221 drmDevicePtr drm_device)
222 {
223 const char *path = drm_device->nodes[DRM_NODE_RENDER];
224 VkResult result;
225 drmVersionPtr version;
226 int fd;
227
228 fd = open(path, O_RDWR | O_CLOEXEC);
229 if (fd < 0)
230 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
231
232 version = drmGetVersion(fd);
233 if (!version) {
234 close(fd);
235 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
236 "failed to get version %s: %m", path);
237 }
238
239 if (strcmp(version->name, "amdgpu")) {
240 drmFreeVersion(version);
241 close(fd);
242 return VK_ERROR_INCOMPATIBLE_DRIVER;
243 }
244 drmFreeVersion(version);
245
246 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
247 device->instance = instance;
248 assert(strlen(path) < ARRAY_SIZE(device->path));
249 strncpy(device->path, path, ARRAY_SIZE(device->path));
250
251 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
252 instance->perftest_flags);
253 if (!device->ws) {
254 result = VK_ERROR_INCOMPATIBLE_DRIVER;
255 goto fail;
256 }
257
258 device->local_fd = fd;
259 device->ws->query_info(device->ws, &device->rad_info);
260
261 radv_handle_env_var_force_family(device);
262
263 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
264
265 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
266 device->ws->destroy(device->ws);
267 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
268 "cannot generate UUID");
269 goto fail;
270 }
271
272 /* These flags affect shader compilation. */
273 uint64_t shader_env_flags =
274 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
275 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
276
277 /* The gpu id is already embeded in the uuid so we just pass "radv"
278 * when creating the cache.
279 */
280 char buf[VK_UUID_SIZE * 2 + 1];
281 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
282 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
283
284 if (device->rad_info.chip_class < VI ||
285 device->rad_info.chip_class > GFX9)
286 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
287
288 radv_get_driver_uuid(&device->device_uuid);
289 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
290
291 if (device->rad_info.family == CHIP_STONEY ||
292 device->rad_info.chip_class >= GFX9) {
293 device->has_rbplus = true;
294 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
295 device->rad_info.family == CHIP_VEGA12;
296 }
297
298 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
299 * on SI.
300 */
301 device->has_clear_state = device->rad_info.chip_class >= CIK;
302
303 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
304
305 /* Vega10/Raven need a special workaround for a hardware bug. */
306 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
307 device->rad_info.family == CHIP_RAVEN;
308
309 radv_physical_device_init_mem_types(device);
310 radv_fill_device_extension_table(device, &device->supported_extensions);
311
312 result = radv_init_wsi(device);
313 if (result != VK_SUCCESS) {
314 device->ws->destroy(device->ws);
315 goto fail;
316 }
317
318 return VK_SUCCESS;
319
320 fail:
321 close(fd);
322 return result;
323 }
324
325 static void
326 radv_physical_device_finish(struct radv_physical_device *device)
327 {
328 radv_finish_wsi(device);
329 device->ws->destroy(device->ws);
330 disk_cache_destroy(device->disk_cache);
331 close(device->local_fd);
332 }
333
334 static void *
335 default_alloc_func(void *pUserData, size_t size, size_t align,
336 VkSystemAllocationScope allocationScope)
337 {
338 return malloc(size);
339 }
340
341 static void *
342 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
343 size_t align, VkSystemAllocationScope allocationScope)
344 {
345 return realloc(pOriginal, size);
346 }
347
348 static void
349 default_free_func(void *pUserData, void *pMemory)
350 {
351 free(pMemory);
352 }
353
354 static const VkAllocationCallbacks default_alloc = {
355 .pUserData = NULL,
356 .pfnAllocation = default_alloc_func,
357 .pfnReallocation = default_realloc_func,
358 .pfnFree = default_free_func,
359 };
360
361 static const struct debug_control radv_debug_options[] = {
362 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
363 {"nodcc", RADV_DEBUG_NO_DCC},
364 {"shaders", RADV_DEBUG_DUMP_SHADERS},
365 {"nocache", RADV_DEBUG_NO_CACHE},
366 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
367 {"nohiz", RADV_DEBUG_NO_HIZ},
368 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
369 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
370 {"allbos", RADV_DEBUG_ALL_BOS},
371 {"noibs", RADV_DEBUG_NO_IBS},
372 {"spirv", RADV_DEBUG_DUMP_SPIRV},
373 {"vmfaults", RADV_DEBUG_VM_FAULTS},
374 {"zerovram", RADV_DEBUG_ZERO_VRAM},
375 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
376 {"nosisched", RADV_DEBUG_NO_SISCHED},
377 {"preoptir", RADV_DEBUG_PREOPTIR},
378 {NULL, 0}
379 };
380
381 const char *
382 radv_get_debug_option_name(int id)
383 {
384 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
385 return radv_debug_options[id].string;
386 }
387
388 static const struct debug_control radv_perftest_options[] = {
389 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
390 {"sisched", RADV_PERFTEST_SISCHED},
391 {"localbos", RADV_PERFTEST_LOCAL_BOS},
392 {"binning", RADV_PERFTEST_BINNING},
393 {NULL, 0}
394 };
395
396 const char *
397 radv_get_perftest_option_name(int id)
398 {
399 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
400 return radv_perftest_options[id].string;
401 }
402
403 static void
404 radv_handle_per_app_options(struct radv_instance *instance,
405 const VkApplicationInfo *info)
406 {
407 const char *name = info ? info->pApplicationName : NULL;
408
409 if (!name)
410 return;
411
412 if (!strcmp(name, "Talos - Linux - 32bit") ||
413 !strcmp(name, "Talos - Linux - 64bit")) {
414 /* Force enable LLVM sisched for Talos because it looks safe
415 * and it gives few more FPS.
416 */
417 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
418 }
419 }
420
421 static int radv_get_instance_extension_index(const char *name)
422 {
423 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
424 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
425 return i;
426 }
427 return -1;
428 }
429
430
431 VkResult radv_CreateInstance(
432 const VkInstanceCreateInfo* pCreateInfo,
433 const VkAllocationCallbacks* pAllocator,
434 VkInstance* pInstance)
435 {
436 struct radv_instance *instance;
437 VkResult result;
438
439 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
440
441 uint32_t client_version;
442 if (pCreateInfo->pApplicationInfo &&
443 pCreateInfo->pApplicationInfo->apiVersion != 0) {
444 client_version = pCreateInfo->pApplicationInfo->apiVersion;
445 } else {
446 client_version = VK_MAKE_VERSION(1, 0, 0);
447 }
448
449 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
450 client_version > VK_MAKE_VERSION(1, 1, 0xfff)) {
451 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
452 "Client requested version %d.%d.%d",
453 VK_VERSION_MAJOR(client_version),
454 VK_VERSION_MINOR(client_version),
455 VK_VERSION_PATCH(client_version));
456 }
457
458 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
459 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
460 if (!instance)
461 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
462
463 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
464
465 if (pAllocator)
466 instance->alloc = *pAllocator;
467 else
468 instance->alloc = default_alloc;
469
470 instance->apiVersion = client_version;
471 instance->physicalDeviceCount = -1;
472
473 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
474 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
475 int index = radv_get_instance_extension_index(ext_name);
476
477 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
478 vk_free2(&default_alloc, pAllocator, instance);
479 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
480 }
481
482 instance->enabled_extensions.extensions[index] = true;
483 }
484
485 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
486 if (result != VK_SUCCESS) {
487 vk_free2(&default_alloc, pAllocator, instance);
488 return vk_error(result);
489 }
490
491 _mesa_locale_init();
492
493 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
494
495 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
496 radv_debug_options);
497
498 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
499 radv_perftest_options);
500
501 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
502
503 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
504 /* Disable sisched when the user requests it, this is mostly
505 * useful when the driver force-enable sisched for the given
506 * application.
507 */
508 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
509 }
510
511 *pInstance = radv_instance_to_handle(instance);
512
513 return VK_SUCCESS;
514 }
515
516 void radv_DestroyInstance(
517 VkInstance _instance,
518 const VkAllocationCallbacks* pAllocator)
519 {
520 RADV_FROM_HANDLE(radv_instance, instance, _instance);
521
522 if (!instance)
523 return;
524
525 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
526 radv_physical_device_finish(instance->physicalDevices + i);
527 }
528
529 VG(VALGRIND_DESTROY_MEMPOOL(instance));
530
531 _mesa_locale_fini();
532
533 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
534
535 vk_free(&instance->alloc, instance);
536 }
537
538 static VkResult
539 radv_enumerate_devices(struct radv_instance *instance)
540 {
541 /* TODO: Check for more devices ? */
542 drmDevicePtr devices[8];
543 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
544 int max_devices;
545
546 instance->physicalDeviceCount = 0;
547
548 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
549 if (max_devices < 1)
550 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
551
552 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
553 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
554 devices[i]->bustype == DRM_BUS_PCI &&
555 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
556
557 result = radv_physical_device_init(instance->physicalDevices +
558 instance->physicalDeviceCount,
559 instance,
560 devices[i]);
561 if (result == VK_SUCCESS)
562 ++instance->physicalDeviceCount;
563 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
564 break;
565 }
566 }
567 drmFreeDevices(devices, max_devices);
568
569 return result;
570 }
571
572 VkResult radv_EnumeratePhysicalDevices(
573 VkInstance _instance,
574 uint32_t* pPhysicalDeviceCount,
575 VkPhysicalDevice* pPhysicalDevices)
576 {
577 RADV_FROM_HANDLE(radv_instance, instance, _instance);
578 VkResult result;
579
580 if (instance->physicalDeviceCount < 0) {
581 result = radv_enumerate_devices(instance);
582 if (result != VK_SUCCESS &&
583 result != VK_ERROR_INCOMPATIBLE_DRIVER)
584 return result;
585 }
586
587 if (!pPhysicalDevices) {
588 *pPhysicalDeviceCount = instance->physicalDeviceCount;
589 } else {
590 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
591 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
592 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
593 }
594
595 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
596 : VK_SUCCESS;
597 }
598
599 VkResult radv_EnumeratePhysicalDeviceGroups(
600 VkInstance _instance,
601 uint32_t* pPhysicalDeviceGroupCount,
602 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
603 {
604 RADV_FROM_HANDLE(radv_instance, instance, _instance);
605 VkResult result;
606
607 if (instance->physicalDeviceCount < 0) {
608 result = radv_enumerate_devices(instance);
609 if (result != VK_SUCCESS &&
610 result != VK_ERROR_INCOMPATIBLE_DRIVER)
611 return result;
612 }
613
614 if (!pPhysicalDeviceGroupProperties) {
615 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
616 } else {
617 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
618 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
619 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
620 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
621 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
622 }
623 }
624 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
625 : VK_SUCCESS;
626 }
627
628 void radv_GetPhysicalDeviceFeatures(
629 VkPhysicalDevice physicalDevice,
630 VkPhysicalDeviceFeatures* pFeatures)
631 {
632 memset(pFeatures, 0, sizeof(*pFeatures));
633
634 *pFeatures = (VkPhysicalDeviceFeatures) {
635 .robustBufferAccess = true,
636 .fullDrawIndexUint32 = true,
637 .imageCubeArray = true,
638 .independentBlend = true,
639 .geometryShader = true,
640 .tessellationShader = true,
641 .sampleRateShading = true,
642 .dualSrcBlend = true,
643 .logicOp = true,
644 .multiDrawIndirect = true,
645 .drawIndirectFirstInstance = true,
646 .depthClamp = true,
647 .depthBiasClamp = true,
648 .fillModeNonSolid = true,
649 .depthBounds = true,
650 .wideLines = true,
651 .largePoints = true,
652 .alphaToOne = true,
653 .multiViewport = true,
654 .samplerAnisotropy = true,
655 .textureCompressionETC2 = false,
656 .textureCompressionASTC_LDR = false,
657 .textureCompressionBC = true,
658 .occlusionQueryPrecise = true,
659 .pipelineStatisticsQuery = true,
660 .vertexPipelineStoresAndAtomics = true,
661 .fragmentStoresAndAtomics = true,
662 .shaderTessellationAndGeometryPointSize = true,
663 .shaderImageGatherExtended = true,
664 .shaderStorageImageExtendedFormats = true,
665 .shaderStorageImageMultisample = false,
666 .shaderUniformBufferArrayDynamicIndexing = true,
667 .shaderSampledImageArrayDynamicIndexing = true,
668 .shaderStorageBufferArrayDynamicIndexing = true,
669 .shaderStorageImageArrayDynamicIndexing = true,
670 .shaderStorageImageReadWithoutFormat = true,
671 .shaderStorageImageWriteWithoutFormat = true,
672 .shaderClipDistance = true,
673 .shaderCullDistance = true,
674 .shaderFloat64 = true,
675 .shaderInt64 = true,
676 .shaderInt16 = false,
677 .sparseBinding = true,
678 .variableMultisampleRate = true,
679 .inheritedQueries = true,
680 };
681 }
682
683 void radv_GetPhysicalDeviceFeatures2(
684 VkPhysicalDevice physicalDevice,
685 VkPhysicalDeviceFeatures2KHR *pFeatures)
686 {
687 vk_foreach_struct(ext, pFeatures->pNext) {
688 switch (ext->sType) {
689 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
690 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
691 features->variablePointersStorageBuffer = true;
692 features->variablePointers = false;
693 break;
694 }
695 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
696 VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
697 features->multiview = true;
698 features->multiviewGeometryShader = true;
699 features->multiviewTessellationShader = true;
700 break;
701 }
702 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
703 VkPhysicalDeviceShaderDrawParameterFeatures *features =
704 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
705 features->shaderDrawParameters = true;
706 break;
707 }
708 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
709 VkPhysicalDeviceProtectedMemoryFeatures *features =
710 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
711 features->protectedMemory = false;
712 break;
713 }
714 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
715 VkPhysicalDevice16BitStorageFeatures *features =
716 (VkPhysicalDevice16BitStorageFeatures*)ext;
717 features->storageBuffer16BitAccess = false;
718 features->uniformAndStorageBuffer16BitAccess = false;
719 features->storagePushConstant16 = false;
720 features->storageInputOutput16 = false;
721 break;
722 }
723 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
724 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
725 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
726 features->samplerYcbcrConversion = false;
727 break;
728 }
729 default:
730 break;
731 }
732 }
733 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
734 }
735
736 void radv_GetPhysicalDeviceProperties(
737 VkPhysicalDevice physicalDevice,
738 VkPhysicalDeviceProperties* pProperties)
739 {
740 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
741 VkSampleCountFlags sample_counts = 0xf;
742
743 /* make sure that the entire descriptor set is addressable with a signed
744 * 32-bit int. So the sum of all limits scaled by descriptor size has to
745 * be at most 2 GiB. the combined image & samples object count as one of
746 * both. This limit is for the pipeline layout, not for the set layout, but
747 * there is no set limit, so we just set a pipeline limit. I don't think
748 * any app is going to hit this soon. */
749 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
750 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
751 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
752 32 /* sampler, largest when combined with image */ +
753 64 /* sampled image */ +
754 64 /* storage image */);
755
756 VkPhysicalDeviceLimits limits = {
757 .maxImageDimension1D = (1 << 14),
758 .maxImageDimension2D = (1 << 14),
759 .maxImageDimension3D = (1 << 11),
760 .maxImageDimensionCube = (1 << 14),
761 .maxImageArrayLayers = (1 << 11),
762 .maxTexelBufferElements = 128 * 1024 * 1024,
763 .maxUniformBufferRange = UINT32_MAX,
764 .maxStorageBufferRange = UINT32_MAX,
765 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
766 .maxMemoryAllocationCount = UINT32_MAX,
767 .maxSamplerAllocationCount = 64 * 1024,
768 .bufferImageGranularity = 64, /* A cache line */
769 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
770 .maxBoundDescriptorSets = MAX_SETS,
771 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
772 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
773 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
774 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
775 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
776 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
777 .maxPerStageResources = max_descriptor_set_size,
778 .maxDescriptorSetSamplers = max_descriptor_set_size,
779 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
780 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
781 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
782 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
783 .maxDescriptorSetSampledImages = max_descriptor_set_size,
784 .maxDescriptorSetStorageImages = max_descriptor_set_size,
785 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
786 .maxVertexInputAttributes = 32,
787 .maxVertexInputBindings = 32,
788 .maxVertexInputAttributeOffset = 2047,
789 .maxVertexInputBindingStride = 2048,
790 .maxVertexOutputComponents = 128,
791 .maxTessellationGenerationLevel = 64,
792 .maxTessellationPatchSize = 32,
793 .maxTessellationControlPerVertexInputComponents = 128,
794 .maxTessellationControlPerVertexOutputComponents = 128,
795 .maxTessellationControlPerPatchOutputComponents = 120,
796 .maxTessellationControlTotalOutputComponents = 4096,
797 .maxTessellationEvaluationInputComponents = 128,
798 .maxTessellationEvaluationOutputComponents = 128,
799 .maxGeometryShaderInvocations = 127,
800 .maxGeometryInputComponents = 64,
801 .maxGeometryOutputComponents = 128,
802 .maxGeometryOutputVertices = 256,
803 .maxGeometryTotalOutputComponents = 1024,
804 .maxFragmentInputComponents = 128,
805 .maxFragmentOutputAttachments = 8,
806 .maxFragmentDualSrcAttachments = 1,
807 .maxFragmentCombinedOutputResources = 8,
808 .maxComputeSharedMemorySize = 32768,
809 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
810 .maxComputeWorkGroupInvocations = 2048,
811 .maxComputeWorkGroupSize = {
812 2048,
813 2048,
814 2048
815 },
816 .subPixelPrecisionBits = 4 /* FIXME */,
817 .subTexelPrecisionBits = 4 /* FIXME */,
818 .mipmapPrecisionBits = 4 /* FIXME */,
819 .maxDrawIndexedIndexValue = UINT32_MAX,
820 .maxDrawIndirectCount = UINT32_MAX,
821 .maxSamplerLodBias = 16,
822 .maxSamplerAnisotropy = 16,
823 .maxViewports = MAX_VIEWPORTS,
824 .maxViewportDimensions = { (1 << 14), (1 << 14) },
825 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
826 .viewportSubPixelBits = 13, /* We take a float? */
827 .minMemoryMapAlignment = 4096, /* A page */
828 .minTexelBufferOffsetAlignment = 1,
829 .minUniformBufferOffsetAlignment = 4,
830 .minStorageBufferOffsetAlignment = 4,
831 .minTexelOffset = -32,
832 .maxTexelOffset = 31,
833 .minTexelGatherOffset = -32,
834 .maxTexelGatherOffset = 31,
835 .minInterpolationOffset = -2,
836 .maxInterpolationOffset = 2,
837 .subPixelInterpolationOffsetBits = 8,
838 .maxFramebufferWidth = (1 << 14),
839 .maxFramebufferHeight = (1 << 14),
840 .maxFramebufferLayers = (1 << 10),
841 .framebufferColorSampleCounts = sample_counts,
842 .framebufferDepthSampleCounts = sample_counts,
843 .framebufferStencilSampleCounts = sample_counts,
844 .framebufferNoAttachmentsSampleCounts = sample_counts,
845 .maxColorAttachments = MAX_RTS,
846 .sampledImageColorSampleCounts = sample_counts,
847 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
848 .sampledImageDepthSampleCounts = sample_counts,
849 .sampledImageStencilSampleCounts = sample_counts,
850 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
851 .maxSampleMaskWords = 1,
852 .timestampComputeAndGraphics = true,
853 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
854 .maxClipDistances = 8,
855 .maxCullDistances = 8,
856 .maxCombinedClipAndCullDistances = 8,
857 .discreteQueuePriorities = 1,
858 .pointSizeRange = { 0.125, 255.875 },
859 .lineWidthRange = { 0.0, 7.9921875 },
860 .pointSizeGranularity = (1.0 / 8.0),
861 .lineWidthGranularity = (1.0 / 128.0),
862 .strictLines = false, /* FINISHME */
863 .standardSampleLocations = true,
864 .optimalBufferCopyOffsetAlignment = 128,
865 .optimalBufferCopyRowPitchAlignment = 128,
866 .nonCoherentAtomSize = 64,
867 };
868
869 *pProperties = (VkPhysicalDeviceProperties) {
870 .apiVersion = radv_physical_device_api_version(pdevice),
871 .driverVersion = vk_get_driver_version(),
872 .vendorID = ATI_VENDOR_ID,
873 .deviceID = pdevice->rad_info.pci_id,
874 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
875 .limits = limits,
876 .sparseProperties = {0},
877 };
878
879 strcpy(pProperties->deviceName, pdevice->name);
880 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
881 }
882
883 void radv_GetPhysicalDeviceProperties2(
884 VkPhysicalDevice physicalDevice,
885 VkPhysicalDeviceProperties2KHR *pProperties)
886 {
887 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
888 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
889
890 vk_foreach_struct(ext, pProperties->pNext) {
891 switch (ext->sType) {
892 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
893 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
894 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
895 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
896 break;
897 }
898 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
899 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
900 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
901 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
902 properties->deviceLUIDValid = false;
903 break;
904 }
905 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
906 VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
907 properties->maxMultiviewViewCount = MAX_VIEWS;
908 properties->maxMultiviewInstanceIndex = INT_MAX;
909 break;
910 }
911 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
912 VkPhysicalDevicePointClippingPropertiesKHR *properties =
913 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
914 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
915 break;
916 }
917 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
918 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
919 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
920 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
921 break;
922 }
923 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
924 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
925 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
926 properties->minImportedHostPointerAlignment = 4096;
927 break;
928 }
929 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
930 VkPhysicalDeviceSubgroupProperties *properties =
931 (VkPhysicalDeviceSubgroupProperties*)ext;
932 properties->subgroupSize = 64;
933 properties->supportedStages = VK_SHADER_STAGE_ALL;
934 properties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
935 properties->quadOperationsInAllStages = false;
936 break;
937 }
938 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
939 VkPhysicalDeviceMaintenance3Properties *properties =
940 (VkPhysicalDeviceMaintenance3Properties*)ext;
941 /* Make sure everything is addressable by a signed 32-bit int, and
942 * our largest descriptors are 96 bytes. */
943 properties->maxPerSetDescriptors = (1ull << 31) / 96;
944 /* Our buffer size fields allow only this much */
945 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
946 break;
947 }
948 default:
949 break;
950 }
951 }
952 }
953
954 static void radv_get_physical_device_queue_family_properties(
955 struct radv_physical_device* pdevice,
956 uint32_t* pCount,
957 VkQueueFamilyProperties** pQueueFamilyProperties)
958 {
959 int num_queue_families = 1;
960 int idx;
961 if (pdevice->rad_info.num_compute_rings > 0 &&
962 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
963 num_queue_families++;
964
965 if (pQueueFamilyProperties == NULL) {
966 *pCount = num_queue_families;
967 return;
968 }
969
970 if (!*pCount)
971 return;
972
973 idx = 0;
974 if (*pCount >= 1) {
975 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
976 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
977 VK_QUEUE_COMPUTE_BIT |
978 VK_QUEUE_TRANSFER_BIT |
979 VK_QUEUE_SPARSE_BINDING_BIT,
980 .queueCount = 1,
981 .timestampValidBits = 64,
982 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
983 };
984 idx++;
985 }
986
987 if (pdevice->rad_info.num_compute_rings > 0 &&
988 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
989 if (*pCount > idx) {
990 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
991 .queueFlags = VK_QUEUE_COMPUTE_BIT |
992 VK_QUEUE_TRANSFER_BIT |
993 VK_QUEUE_SPARSE_BINDING_BIT,
994 .queueCount = pdevice->rad_info.num_compute_rings,
995 .timestampValidBits = 64,
996 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
997 };
998 idx++;
999 }
1000 }
1001 *pCount = idx;
1002 }
1003
1004 void radv_GetPhysicalDeviceQueueFamilyProperties(
1005 VkPhysicalDevice physicalDevice,
1006 uint32_t* pCount,
1007 VkQueueFamilyProperties* pQueueFamilyProperties)
1008 {
1009 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1010 if (!pQueueFamilyProperties) {
1011 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1012 return;
1013 }
1014 VkQueueFamilyProperties *properties[] = {
1015 pQueueFamilyProperties + 0,
1016 pQueueFamilyProperties + 1,
1017 pQueueFamilyProperties + 2,
1018 };
1019 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1020 assert(*pCount <= 3);
1021 }
1022
1023 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1024 VkPhysicalDevice physicalDevice,
1025 uint32_t* pCount,
1026 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
1027 {
1028 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1029 if (!pQueueFamilyProperties) {
1030 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1031 return;
1032 }
1033 VkQueueFamilyProperties *properties[] = {
1034 &pQueueFamilyProperties[0].queueFamilyProperties,
1035 &pQueueFamilyProperties[1].queueFamilyProperties,
1036 &pQueueFamilyProperties[2].queueFamilyProperties,
1037 };
1038 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1039 assert(*pCount <= 3);
1040 }
1041
1042 void radv_GetPhysicalDeviceMemoryProperties(
1043 VkPhysicalDevice physicalDevice,
1044 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1045 {
1046 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1047
1048 *pMemoryProperties = physical_device->memory_properties;
1049 }
1050
1051 void radv_GetPhysicalDeviceMemoryProperties2(
1052 VkPhysicalDevice physicalDevice,
1053 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1054 {
1055 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1056 &pMemoryProperties->memoryProperties);
1057 }
1058
1059 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1060 VkDevice _device,
1061 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1062 const void *pHostPointer,
1063 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1064 {
1065 RADV_FROM_HANDLE(radv_device, device, _device);
1066
1067 switch (handleType)
1068 {
1069 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1070 const struct radv_physical_device *physical_device = device->physical_device;
1071 uint32_t memoryTypeBits = 0;
1072 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1073 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1074 memoryTypeBits = (1 << i);
1075 break;
1076 }
1077 }
1078 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1079 return VK_SUCCESS;
1080 }
1081 default:
1082 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1083 }
1084 }
1085
1086 static enum radeon_ctx_priority
1087 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1088 {
1089 /* Default to MEDIUM when a specific global priority isn't requested */
1090 if (!pObj)
1091 return RADEON_CTX_PRIORITY_MEDIUM;
1092
1093 switch(pObj->globalPriority) {
1094 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1095 return RADEON_CTX_PRIORITY_REALTIME;
1096 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1097 return RADEON_CTX_PRIORITY_HIGH;
1098 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1099 return RADEON_CTX_PRIORITY_MEDIUM;
1100 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1101 return RADEON_CTX_PRIORITY_LOW;
1102 default:
1103 unreachable("Illegal global priority value");
1104 return RADEON_CTX_PRIORITY_INVALID;
1105 }
1106 }
1107
1108 static int
1109 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1110 uint32_t queue_family_index, int idx,
1111 VkDeviceQueueCreateFlags flags,
1112 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1113 {
1114 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1115 queue->device = device;
1116 queue->queue_family_index = queue_family_index;
1117 queue->queue_idx = idx;
1118 queue->priority = radv_get_queue_global_priority(global_priority);
1119 queue->flags = flags;
1120
1121 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1122 if (!queue->hw_ctx)
1123 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1124
1125 return VK_SUCCESS;
1126 }
1127
1128 static void
1129 radv_queue_finish(struct radv_queue *queue)
1130 {
1131 if (queue->hw_ctx)
1132 queue->device->ws->ctx_destroy(queue->hw_ctx);
1133
1134 if (queue->initial_full_flush_preamble_cs)
1135 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1136 if (queue->initial_preamble_cs)
1137 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1138 if (queue->continue_preamble_cs)
1139 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1140 if (queue->descriptor_bo)
1141 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1142 if (queue->scratch_bo)
1143 queue->device->ws->buffer_destroy(queue->scratch_bo);
1144 if (queue->esgs_ring_bo)
1145 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1146 if (queue->gsvs_ring_bo)
1147 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1148 if (queue->tess_rings_bo)
1149 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1150 if (queue->compute_scratch_bo)
1151 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1152 }
1153
1154 static void
1155 radv_device_init_gs_info(struct radv_device *device)
1156 {
1157 switch (device->physical_device->rad_info.family) {
1158 case CHIP_OLAND:
1159 case CHIP_HAINAN:
1160 case CHIP_KAVERI:
1161 case CHIP_KABINI:
1162 case CHIP_MULLINS:
1163 case CHIP_ICELAND:
1164 case CHIP_CARRIZO:
1165 case CHIP_STONEY:
1166 device->gs_table_depth = 16;
1167 return;
1168 case CHIP_TAHITI:
1169 case CHIP_PITCAIRN:
1170 case CHIP_VERDE:
1171 case CHIP_BONAIRE:
1172 case CHIP_HAWAII:
1173 case CHIP_TONGA:
1174 case CHIP_FIJI:
1175 case CHIP_POLARIS10:
1176 case CHIP_POLARIS11:
1177 case CHIP_POLARIS12:
1178 case CHIP_VEGA10:
1179 case CHIP_VEGA12:
1180 case CHIP_RAVEN:
1181 device->gs_table_depth = 32;
1182 return;
1183 default:
1184 unreachable("unknown GPU");
1185 }
1186 }
1187
1188 static int radv_get_device_extension_index(const char *name)
1189 {
1190 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1191 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1192 return i;
1193 }
1194 return -1;
1195 }
1196
1197 VkResult radv_CreateDevice(
1198 VkPhysicalDevice physicalDevice,
1199 const VkDeviceCreateInfo* pCreateInfo,
1200 const VkAllocationCallbacks* pAllocator,
1201 VkDevice* pDevice)
1202 {
1203 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1204 VkResult result;
1205 struct radv_device *device;
1206
1207 bool keep_shader_info = false;
1208
1209 /* Check enabled features */
1210 if (pCreateInfo->pEnabledFeatures) {
1211 VkPhysicalDeviceFeatures supported_features;
1212 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1213 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1214 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1215 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1216 for (uint32_t i = 0; i < num_features; i++) {
1217 if (enabled_feature[i] && !supported_feature[i])
1218 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1219 }
1220 }
1221
1222 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1223 sizeof(*device), 8,
1224 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1225 if (!device)
1226 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1227
1228 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1229 device->instance = physical_device->instance;
1230 device->physical_device = physical_device;
1231
1232 device->ws = physical_device->ws;
1233 if (pAllocator)
1234 device->alloc = *pAllocator;
1235 else
1236 device->alloc = physical_device->instance->alloc;
1237
1238 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1239 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1240 int index = radv_get_device_extension_index(ext_name);
1241 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1242 vk_free(&device->alloc, device);
1243 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1244 }
1245
1246 device->enabled_extensions.extensions[index] = true;
1247 }
1248
1249 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1250
1251 mtx_init(&device->shader_slab_mutex, mtx_plain);
1252 list_inithead(&device->shader_slabs);
1253
1254 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1255 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1256 uint32_t qfi = queue_create->queueFamilyIndex;
1257 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1258 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1259
1260 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1261
1262 device->queues[qfi] = vk_alloc(&device->alloc,
1263 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1264 if (!device->queues[qfi]) {
1265 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1266 goto fail;
1267 }
1268
1269 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1270
1271 device->queue_count[qfi] = queue_create->queueCount;
1272
1273 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1274 result = radv_queue_init(device, &device->queues[qfi][q],
1275 qfi, q, queue_create->flags,
1276 global_priority);
1277 if (result != VK_SUCCESS)
1278 goto fail;
1279 }
1280 }
1281
1282 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1283 (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1284
1285 /* Disabled and not implemented for now. */
1286 device->dfsm_allowed = device->pbb_allowed && false;
1287
1288 #ifdef ANDROID
1289 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1290 #endif
1291
1292 device->llvm_supports_spill = true;
1293
1294 /* The maximum number of scratch waves. Scratch space isn't divided
1295 * evenly between CUs. The number is only a function of the number of CUs.
1296 * We can decrease the constant to decrease the scratch buffer size.
1297 *
1298 * sctx->scratch_waves must be >= the maximum posible size of
1299 * 1 threadgroup, so that the hw doesn't hang from being unable
1300 * to start any.
1301 *
1302 * The recommended value is 4 per CU at most. Higher numbers don't
1303 * bring much benefit, but they still occupy chip resources (think
1304 * async compute). I've seen ~2% performance difference between 4 and 32.
1305 */
1306 uint32_t max_threads_per_block = 2048;
1307 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1308 max_threads_per_block / 64);
1309
1310 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1311
1312 if (device->physical_device->rad_info.chip_class >= CIK) {
1313 /* If the KMD allows it (there is a KMD hw register for it),
1314 * allow launching waves out-of-order.
1315 */
1316 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1317 }
1318
1319 radv_device_init_gs_info(device);
1320
1321 device->tess_offchip_block_dw_size =
1322 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1323 device->has_distributed_tess =
1324 device->physical_device->rad_info.chip_class >= VI &&
1325 device->physical_device->rad_info.max_se >= 2;
1326
1327 if (getenv("RADV_TRACE_FILE")) {
1328 const char *filename = getenv("RADV_TRACE_FILE");
1329
1330 keep_shader_info = true;
1331
1332 if (!radv_init_trace(device))
1333 goto fail;
1334
1335 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1336 radv_dump_enabled_options(device, stderr);
1337 }
1338
1339 device->keep_shader_info = keep_shader_info;
1340
1341 result = radv_device_init_meta(device);
1342 if (result != VK_SUCCESS)
1343 goto fail;
1344
1345 radv_device_init_msaa(device);
1346
1347 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1348 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1349 switch (family) {
1350 case RADV_QUEUE_GENERAL:
1351 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1352 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1353 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1354 break;
1355 case RADV_QUEUE_COMPUTE:
1356 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1357 radeon_emit(device->empty_cs[family], 0);
1358 break;
1359 }
1360 device->ws->cs_finalize(device->empty_cs[family]);
1361 }
1362
1363 if (device->physical_device->rad_info.chip_class >= CIK)
1364 cik_create_gfx_config(device);
1365
1366 VkPipelineCacheCreateInfo ci;
1367 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1368 ci.pNext = NULL;
1369 ci.flags = 0;
1370 ci.pInitialData = NULL;
1371 ci.initialDataSize = 0;
1372 VkPipelineCache pc;
1373 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1374 &ci, NULL, &pc);
1375 if (result != VK_SUCCESS)
1376 goto fail_meta;
1377
1378 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1379
1380 *pDevice = radv_device_to_handle(device);
1381 return VK_SUCCESS;
1382
1383 fail_meta:
1384 radv_device_finish_meta(device);
1385 fail:
1386 if (device->trace_bo)
1387 device->ws->buffer_destroy(device->trace_bo);
1388
1389 if (device->gfx_init)
1390 device->ws->buffer_destroy(device->gfx_init);
1391
1392 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1393 for (unsigned q = 0; q < device->queue_count[i]; q++)
1394 radv_queue_finish(&device->queues[i][q]);
1395 if (device->queue_count[i])
1396 vk_free(&device->alloc, device->queues[i]);
1397 }
1398
1399 vk_free(&device->alloc, device);
1400 return result;
1401 }
1402
1403 void radv_DestroyDevice(
1404 VkDevice _device,
1405 const VkAllocationCallbacks* pAllocator)
1406 {
1407 RADV_FROM_HANDLE(radv_device, device, _device);
1408
1409 if (!device)
1410 return;
1411
1412 if (device->trace_bo)
1413 device->ws->buffer_destroy(device->trace_bo);
1414
1415 if (device->gfx_init)
1416 device->ws->buffer_destroy(device->gfx_init);
1417
1418 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1419 for (unsigned q = 0; q < device->queue_count[i]; q++)
1420 radv_queue_finish(&device->queues[i][q]);
1421 if (device->queue_count[i])
1422 vk_free(&device->alloc, device->queues[i]);
1423 if (device->empty_cs[i])
1424 device->ws->cs_destroy(device->empty_cs[i]);
1425 }
1426 radv_device_finish_meta(device);
1427
1428 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1429 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1430
1431 radv_destroy_shader_slabs(device);
1432
1433 vk_free(&device->alloc, device);
1434 }
1435
1436 VkResult radv_EnumerateInstanceLayerProperties(
1437 uint32_t* pPropertyCount,
1438 VkLayerProperties* pProperties)
1439 {
1440 if (pProperties == NULL) {
1441 *pPropertyCount = 0;
1442 return VK_SUCCESS;
1443 }
1444
1445 /* None supported at this time */
1446 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1447 }
1448
1449 VkResult radv_EnumerateDeviceLayerProperties(
1450 VkPhysicalDevice physicalDevice,
1451 uint32_t* pPropertyCount,
1452 VkLayerProperties* pProperties)
1453 {
1454 if (pProperties == NULL) {
1455 *pPropertyCount = 0;
1456 return VK_SUCCESS;
1457 }
1458
1459 /* None supported at this time */
1460 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1461 }
1462
1463 void radv_GetDeviceQueue2(
1464 VkDevice _device,
1465 const VkDeviceQueueInfo2* pQueueInfo,
1466 VkQueue* pQueue)
1467 {
1468 RADV_FROM_HANDLE(radv_device, device, _device);
1469 struct radv_queue *queue;
1470
1471 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1472 if (pQueueInfo->flags != queue->flags) {
1473 /* From the Vulkan 1.1.70 spec:
1474 *
1475 * "The queue returned by vkGetDeviceQueue2 must have the same
1476 * flags value from this structure as that used at device
1477 * creation time in a VkDeviceQueueCreateInfo instance. If no
1478 * matching flags were specified at device creation time then
1479 * pQueue will return VK_NULL_HANDLE."
1480 */
1481 *pQueue = VK_NULL_HANDLE;
1482 return;
1483 }
1484
1485 *pQueue = radv_queue_to_handle(queue);
1486 }
1487
1488 void radv_GetDeviceQueue(
1489 VkDevice _device,
1490 uint32_t queueFamilyIndex,
1491 uint32_t queueIndex,
1492 VkQueue* pQueue)
1493 {
1494 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1495 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1496 .queueFamilyIndex = queueFamilyIndex,
1497 .queueIndex = queueIndex
1498 };
1499
1500 radv_GetDeviceQueue2(_device, &info, pQueue);
1501 }
1502
1503 static void
1504 fill_geom_tess_rings(struct radv_queue *queue,
1505 uint32_t *map,
1506 bool add_sample_positions,
1507 uint32_t esgs_ring_size,
1508 struct radeon_winsys_bo *esgs_ring_bo,
1509 uint32_t gsvs_ring_size,
1510 struct radeon_winsys_bo *gsvs_ring_bo,
1511 uint32_t tess_factor_ring_size,
1512 uint32_t tess_offchip_ring_offset,
1513 uint32_t tess_offchip_ring_size,
1514 struct radeon_winsys_bo *tess_rings_bo)
1515 {
1516 uint64_t esgs_va = 0, gsvs_va = 0;
1517 uint64_t tess_va = 0, tess_offchip_va = 0;
1518 uint32_t *desc = &map[4];
1519
1520 if (esgs_ring_bo)
1521 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1522 if (gsvs_ring_bo)
1523 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1524 if (tess_rings_bo) {
1525 tess_va = radv_buffer_get_va(tess_rings_bo);
1526 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1527 }
1528
1529 /* stride 0, num records - size, add tid, swizzle, elsize4,
1530 index stride 64 */
1531 desc[0] = esgs_va;
1532 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1533 S_008F04_STRIDE(0) |
1534 S_008F04_SWIZZLE_ENABLE(true);
1535 desc[2] = esgs_ring_size;
1536 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1537 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1538 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1539 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1540 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1541 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1542 S_008F0C_ELEMENT_SIZE(1) |
1543 S_008F0C_INDEX_STRIDE(3) |
1544 S_008F0C_ADD_TID_ENABLE(true);
1545
1546 desc += 4;
1547 /* GS entry for ES->GS ring */
1548 /* stride 0, num records - size, elsize0,
1549 index stride 0 */
1550 desc[0] = esgs_va;
1551 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1552 S_008F04_STRIDE(0) |
1553 S_008F04_SWIZZLE_ENABLE(false);
1554 desc[2] = esgs_ring_size;
1555 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1556 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1557 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1558 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1559 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1560 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1561 S_008F0C_ELEMENT_SIZE(0) |
1562 S_008F0C_INDEX_STRIDE(0) |
1563 S_008F0C_ADD_TID_ENABLE(false);
1564
1565 desc += 4;
1566 /* VS entry for GS->VS ring */
1567 /* stride 0, num records - size, elsize0,
1568 index stride 0 */
1569 desc[0] = gsvs_va;
1570 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1571 S_008F04_STRIDE(0) |
1572 S_008F04_SWIZZLE_ENABLE(false);
1573 desc[2] = gsvs_ring_size;
1574 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1575 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1576 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1577 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1578 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1579 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1580 S_008F0C_ELEMENT_SIZE(0) |
1581 S_008F0C_INDEX_STRIDE(0) |
1582 S_008F0C_ADD_TID_ENABLE(false);
1583 desc += 4;
1584
1585 /* stride gsvs_itemsize, num records 64
1586 elsize 4, index stride 16 */
1587 /* shader will patch stride and desc[2] */
1588 desc[0] = gsvs_va;
1589 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1590 S_008F04_STRIDE(0) |
1591 S_008F04_SWIZZLE_ENABLE(true);
1592 desc[2] = 0;
1593 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1594 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1595 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1596 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1597 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1598 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1599 S_008F0C_ELEMENT_SIZE(1) |
1600 S_008F0C_INDEX_STRIDE(1) |
1601 S_008F0C_ADD_TID_ENABLE(true);
1602 desc += 4;
1603
1604 desc[0] = tess_va;
1605 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1606 S_008F04_STRIDE(0) |
1607 S_008F04_SWIZZLE_ENABLE(false);
1608 desc[2] = tess_factor_ring_size;
1609 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1610 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1611 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1612 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1613 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1614 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1615 S_008F0C_ELEMENT_SIZE(0) |
1616 S_008F0C_INDEX_STRIDE(0) |
1617 S_008F0C_ADD_TID_ENABLE(false);
1618 desc += 4;
1619
1620 desc[0] = tess_offchip_va;
1621 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1622 S_008F04_STRIDE(0) |
1623 S_008F04_SWIZZLE_ENABLE(false);
1624 desc[2] = tess_offchip_ring_size;
1625 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1626 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1627 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1628 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1629 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1630 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1631 S_008F0C_ELEMENT_SIZE(0) |
1632 S_008F0C_INDEX_STRIDE(0) |
1633 S_008F0C_ADD_TID_ENABLE(false);
1634 desc += 4;
1635
1636 /* add sample positions after all rings */
1637 memcpy(desc, queue->device->sample_locations_1x, 8);
1638 desc += 2;
1639 memcpy(desc, queue->device->sample_locations_2x, 16);
1640 desc += 4;
1641 memcpy(desc, queue->device->sample_locations_4x, 32);
1642 desc += 8;
1643 memcpy(desc, queue->device->sample_locations_8x, 64);
1644 desc += 16;
1645 memcpy(desc, queue->device->sample_locations_16x, 128);
1646 }
1647
1648 static unsigned
1649 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1650 {
1651 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1652 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1653 device->physical_device->rad_info.family != CHIP_STONEY;
1654 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1655 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1656 device->physical_device->rad_info.max_se;
1657 unsigned offchip_granularity;
1658 unsigned hs_offchip_param;
1659 switch (device->tess_offchip_block_dw_size) {
1660 default:
1661 assert(0);
1662 /* fall through */
1663 case 8192:
1664 offchip_granularity = V_03093C_X_8K_DWORDS;
1665 break;
1666 case 4096:
1667 offchip_granularity = V_03093C_X_4K_DWORDS;
1668 break;
1669 }
1670
1671 switch (device->physical_device->rad_info.chip_class) {
1672 case SI:
1673 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1674 break;
1675 case CIK:
1676 case VI:
1677 case GFX9:
1678 default:
1679 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1680 break;
1681 }
1682
1683 *max_offchip_buffers_p = max_offchip_buffers;
1684 if (device->physical_device->rad_info.chip_class >= CIK) {
1685 if (device->physical_device->rad_info.chip_class >= VI)
1686 --max_offchip_buffers;
1687 hs_offchip_param =
1688 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1689 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1690 } else {
1691 hs_offchip_param =
1692 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1693 }
1694 return hs_offchip_param;
1695 }
1696
1697 static VkResult
1698 radv_get_preamble_cs(struct radv_queue *queue,
1699 uint32_t scratch_size,
1700 uint32_t compute_scratch_size,
1701 uint32_t esgs_ring_size,
1702 uint32_t gsvs_ring_size,
1703 bool needs_tess_rings,
1704 bool needs_sample_positions,
1705 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1706 struct radeon_winsys_cs **initial_preamble_cs,
1707 struct radeon_winsys_cs **continue_preamble_cs)
1708 {
1709 struct radeon_winsys_bo *scratch_bo = NULL;
1710 struct radeon_winsys_bo *descriptor_bo = NULL;
1711 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1712 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1713 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1714 struct radeon_winsys_bo *tess_rings_bo = NULL;
1715 struct radeon_winsys_cs *dest_cs[3] = {0};
1716 bool add_tess_rings = false, add_sample_positions = false;
1717 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1718 unsigned max_offchip_buffers;
1719 unsigned hs_offchip_param = 0;
1720 unsigned tess_offchip_ring_offset;
1721 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1722 if (!queue->has_tess_rings) {
1723 if (needs_tess_rings)
1724 add_tess_rings = true;
1725 }
1726 if (!queue->has_sample_positions) {
1727 if (needs_sample_positions)
1728 add_sample_positions = true;
1729 }
1730 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1731 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1732 &max_offchip_buffers);
1733 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
1734 tess_offchip_ring_size = max_offchip_buffers *
1735 queue->device->tess_offchip_block_dw_size * 4;
1736
1737 if (scratch_size <= queue->scratch_size &&
1738 compute_scratch_size <= queue->compute_scratch_size &&
1739 esgs_ring_size <= queue->esgs_ring_size &&
1740 gsvs_ring_size <= queue->gsvs_ring_size &&
1741 !add_tess_rings && !add_sample_positions &&
1742 queue->initial_preamble_cs) {
1743 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1744 *initial_preamble_cs = queue->initial_preamble_cs;
1745 *continue_preamble_cs = queue->continue_preamble_cs;
1746 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1747 *continue_preamble_cs = NULL;
1748 return VK_SUCCESS;
1749 }
1750
1751 if (scratch_size > queue->scratch_size) {
1752 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1753 scratch_size,
1754 4096,
1755 RADEON_DOMAIN_VRAM,
1756 ring_bo_flags);
1757 if (!scratch_bo)
1758 goto fail;
1759 } else
1760 scratch_bo = queue->scratch_bo;
1761
1762 if (compute_scratch_size > queue->compute_scratch_size) {
1763 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1764 compute_scratch_size,
1765 4096,
1766 RADEON_DOMAIN_VRAM,
1767 ring_bo_flags);
1768 if (!compute_scratch_bo)
1769 goto fail;
1770
1771 } else
1772 compute_scratch_bo = queue->compute_scratch_bo;
1773
1774 if (esgs_ring_size > queue->esgs_ring_size) {
1775 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1776 esgs_ring_size,
1777 4096,
1778 RADEON_DOMAIN_VRAM,
1779 ring_bo_flags);
1780 if (!esgs_ring_bo)
1781 goto fail;
1782 } else {
1783 esgs_ring_bo = queue->esgs_ring_bo;
1784 esgs_ring_size = queue->esgs_ring_size;
1785 }
1786
1787 if (gsvs_ring_size > queue->gsvs_ring_size) {
1788 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1789 gsvs_ring_size,
1790 4096,
1791 RADEON_DOMAIN_VRAM,
1792 ring_bo_flags);
1793 if (!gsvs_ring_bo)
1794 goto fail;
1795 } else {
1796 gsvs_ring_bo = queue->gsvs_ring_bo;
1797 gsvs_ring_size = queue->gsvs_ring_size;
1798 }
1799
1800 if (add_tess_rings) {
1801 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
1802 tess_offchip_ring_offset + tess_offchip_ring_size,
1803 256,
1804 RADEON_DOMAIN_VRAM,
1805 ring_bo_flags);
1806 if (!tess_rings_bo)
1807 goto fail;
1808 } else {
1809 tess_rings_bo = queue->tess_rings_bo;
1810 }
1811
1812 if (scratch_bo != queue->scratch_bo ||
1813 esgs_ring_bo != queue->esgs_ring_bo ||
1814 gsvs_ring_bo != queue->gsvs_ring_bo ||
1815 tess_rings_bo != queue->tess_rings_bo ||
1816 add_sample_positions) {
1817 uint32_t size = 0;
1818 if (gsvs_ring_bo || esgs_ring_bo ||
1819 tess_rings_bo || add_sample_positions) {
1820 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1821 if (add_sample_positions)
1822 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1823 }
1824 else if (scratch_bo)
1825 size = 8; /* 2 dword */
1826
1827 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1828 size,
1829 4096,
1830 RADEON_DOMAIN_VRAM,
1831 RADEON_FLAG_CPU_ACCESS |
1832 RADEON_FLAG_NO_INTERPROCESS_SHARING |
1833 RADEON_FLAG_READ_ONLY);
1834 if (!descriptor_bo)
1835 goto fail;
1836 } else
1837 descriptor_bo = queue->descriptor_bo;
1838
1839 for(int i = 0; i < 3; ++i) {
1840 struct radeon_winsys_cs *cs = NULL;
1841 cs = queue->device->ws->cs_create(queue->device->ws,
1842 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1843 if (!cs)
1844 goto fail;
1845
1846 dest_cs[i] = cs;
1847
1848 if (scratch_bo)
1849 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1850
1851 if (esgs_ring_bo)
1852 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1853
1854 if (gsvs_ring_bo)
1855 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1856
1857 if (tess_rings_bo)
1858 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
1859
1860 if (descriptor_bo)
1861 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1862
1863 if (descriptor_bo != queue->descriptor_bo) {
1864 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1865
1866 if (scratch_bo) {
1867 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1868 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1869 S_008F04_SWIZZLE_ENABLE(1);
1870 map[0] = scratch_va;
1871 map[1] = rsrc1;
1872 }
1873
1874 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
1875 add_sample_positions)
1876 fill_geom_tess_rings(queue, map, add_sample_positions,
1877 esgs_ring_size, esgs_ring_bo,
1878 gsvs_ring_size, gsvs_ring_bo,
1879 tess_factor_ring_size,
1880 tess_offchip_ring_offset,
1881 tess_offchip_ring_size,
1882 tess_rings_bo);
1883
1884 queue->device->ws->buffer_unmap(descriptor_bo);
1885 }
1886
1887 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
1888 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1889 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1890 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1891 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1892 }
1893
1894 if (esgs_ring_bo || gsvs_ring_bo) {
1895 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1896 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1897 radeon_emit(cs, esgs_ring_size >> 8);
1898 radeon_emit(cs, gsvs_ring_size >> 8);
1899 } else {
1900 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1901 radeon_emit(cs, esgs_ring_size >> 8);
1902 radeon_emit(cs, gsvs_ring_size >> 8);
1903 }
1904 }
1905
1906 if (tess_rings_bo) {
1907 uint64_t tf_va = radv_buffer_get_va(tess_rings_bo);
1908 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1909 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1910 S_030938_SIZE(tess_factor_ring_size / 4));
1911 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1912 tf_va >> 8);
1913 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1914 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1915 tf_va >> 40);
1916 }
1917 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1918 } else {
1919 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1920 S_008988_SIZE(tess_factor_ring_size / 4));
1921 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1922 tf_va >> 8);
1923 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1924 hs_offchip_param);
1925 }
1926 }
1927
1928 if (descriptor_bo) {
1929 uint64_t va = radv_buffer_get_va(descriptor_bo);
1930 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1931 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1932 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1933 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1934 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1935
1936 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1937 radeon_set_sh_reg_seq(cs, regs[i], 2);
1938 radeon_emit(cs, va);
1939 radeon_emit(cs, va >> 32);
1940 }
1941 } else {
1942 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1943 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1944 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1945 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1946 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1947 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1948
1949 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1950 radeon_set_sh_reg_seq(cs, regs[i], 2);
1951 radeon_emit(cs, va);
1952 radeon_emit(cs, va >> 32);
1953 }
1954 }
1955 }
1956
1957 if (compute_scratch_bo) {
1958 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1959 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1960 S_008F04_SWIZZLE_ENABLE(1);
1961
1962 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1963
1964 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1965 radeon_emit(cs, scratch_va);
1966 radeon_emit(cs, rsrc1);
1967 }
1968
1969 if (i == 0) {
1970 si_cs_emit_cache_flush(cs,
1971 queue->device->physical_device->rad_info.chip_class,
1972 NULL, 0,
1973 queue->queue_family_index == RING_COMPUTE &&
1974 queue->device->physical_device->rad_info.chip_class >= CIK,
1975 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1976 RADV_CMD_FLAG_INV_ICACHE |
1977 RADV_CMD_FLAG_INV_SMEM_L1 |
1978 RADV_CMD_FLAG_INV_VMEM_L1 |
1979 RADV_CMD_FLAG_INV_GLOBAL_L2);
1980 } else if (i == 1) {
1981 si_cs_emit_cache_flush(cs,
1982 queue->device->physical_device->rad_info.chip_class,
1983 NULL, 0,
1984 queue->queue_family_index == RING_COMPUTE &&
1985 queue->device->physical_device->rad_info.chip_class >= CIK,
1986 RADV_CMD_FLAG_INV_ICACHE |
1987 RADV_CMD_FLAG_INV_SMEM_L1 |
1988 RADV_CMD_FLAG_INV_VMEM_L1 |
1989 RADV_CMD_FLAG_INV_GLOBAL_L2);
1990 }
1991
1992 if (!queue->device->ws->cs_finalize(cs))
1993 goto fail;
1994 }
1995
1996 if (queue->initial_full_flush_preamble_cs)
1997 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1998
1999 if (queue->initial_preamble_cs)
2000 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2001
2002 if (queue->continue_preamble_cs)
2003 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2004
2005 queue->initial_full_flush_preamble_cs = dest_cs[0];
2006 queue->initial_preamble_cs = dest_cs[1];
2007 queue->continue_preamble_cs = dest_cs[2];
2008
2009 if (scratch_bo != queue->scratch_bo) {
2010 if (queue->scratch_bo)
2011 queue->device->ws->buffer_destroy(queue->scratch_bo);
2012 queue->scratch_bo = scratch_bo;
2013 queue->scratch_size = scratch_size;
2014 }
2015
2016 if (compute_scratch_bo != queue->compute_scratch_bo) {
2017 if (queue->compute_scratch_bo)
2018 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2019 queue->compute_scratch_bo = compute_scratch_bo;
2020 queue->compute_scratch_size = compute_scratch_size;
2021 }
2022
2023 if (esgs_ring_bo != queue->esgs_ring_bo) {
2024 if (queue->esgs_ring_bo)
2025 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2026 queue->esgs_ring_bo = esgs_ring_bo;
2027 queue->esgs_ring_size = esgs_ring_size;
2028 }
2029
2030 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2031 if (queue->gsvs_ring_bo)
2032 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2033 queue->gsvs_ring_bo = gsvs_ring_bo;
2034 queue->gsvs_ring_size = gsvs_ring_size;
2035 }
2036
2037 if (tess_rings_bo != queue->tess_rings_bo) {
2038 queue->tess_rings_bo = tess_rings_bo;
2039 queue->has_tess_rings = true;
2040 }
2041
2042 if (descriptor_bo != queue->descriptor_bo) {
2043 if (queue->descriptor_bo)
2044 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2045
2046 queue->descriptor_bo = descriptor_bo;
2047 }
2048
2049 if (add_sample_positions)
2050 queue->has_sample_positions = true;
2051
2052 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2053 *initial_preamble_cs = queue->initial_preamble_cs;
2054 *continue_preamble_cs = queue->continue_preamble_cs;
2055 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2056 *continue_preamble_cs = NULL;
2057 return VK_SUCCESS;
2058 fail:
2059 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2060 if (dest_cs[i])
2061 queue->device->ws->cs_destroy(dest_cs[i]);
2062 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2063 queue->device->ws->buffer_destroy(descriptor_bo);
2064 if (scratch_bo && scratch_bo != queue->scratch_bo)
2065 queue->device->ws->buffer_destroy(scratch_bo);
2066 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2067 queue->device->ws->buffer_destroy(compute_scratch_bo);
2068 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2069 queue->device->ws->buffer_destroy(esgs_ring_bo);
2070 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2071 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2072 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2073 queue->device->ws->buffer_destroy(tess_rings_bo);
2074 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2075 }
2076
2077 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
2078 int num_sems,
2079 const VkSemaphore *sems,
2080 VkFence _fence,
2081 bool reset_temp)
2082 {
2083 int syncobj_idx = 0, sem_idx = 0;
2084
2085 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2086 return VK_SUCCESS;
2087
2088 for (uint32_t i = 0; i < num_sems; i++) {
2089 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2090
2091 if (sem->temp_syncobj || sem->syncobj)
2092 counts->syncobj_count++;
2093 else
2094 counts->sem_count++;
2095 }
2096
2097 if (_fence != VK_NULL_HANDLE) {
2098 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2099 if (fence->temp_syncobj || fence->syncobj)
2100 counts->syncobj_count++;
2101 }
2102
2103 if (counts->syncobj_count) {
2104 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2105 if (!counts->syncobj)
2106 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2107 }
2108
2109 if (counts->sem_count) {
2110 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2111 if (!counts->sem) {
2112 free(counts->syncobj);
2113 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2114 }
2115 }
2116
2117 for (uint32_t i = 0; i < num_sems; i++) {
2118 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2119
2120 if (sem->temp_syncobj) {
2121 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2122 }
2123 else if (sem->syncobj)
2124 counts->syncobj[syncobj_idx++] = sem->syncobj;
2125 else {
2126 assert(sem->sem);
2127 counts->sem[sem_idx++] = sem->sem;
2128 }
2129 }
2130
2131 if (_fence != VK_NULL_HANDLE) {
2132 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2133 if (fence->temp_syncobj)
2134 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2135 else if (fence->syncobj)
2136 counts->syncobj[syncobj_idx++] = fence->syncobj;
2137 }
2138
2139 return VK_SUCCESS;
2140 }
2141
2142 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2143 {
2144 free(sem_info->wait.syncobj);
2145 free(sem_info->wait.sem);
2146 free(sem_info->signal.syncobj);
2147 free(sem_info->signal.sem);
2148 }
2149
2150
2151 static void radv_free_temp_syncobjs(struct radv_device *device,
2152 int num_sems,
2153 const VkSemaphore *sems)
2154 {
2155 for (uint32_t i = 0; i < num_sems; i++) {
2156 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2157
2158 if (sem->temp_syncobj) {
2159 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2160 sem->temp_syncobj = 0;
2161 }
2162 }
2163 }
2164
2165 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2166 int num_wait_sems,
2167 const VkSemaphore *wait_sems,
2168 int num_signal_sems,
2169 const VkSemaphore *signal_sems,
2170 VkFence fence)
2171 {
2172 VkResult ret;
2173 memset(sem_info, 0, sizeof(*sem_info));
2174
2175 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2176 if (ret)
2177 return ret;
2178 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2179 if (ret)
2180 radv_free_sem_info(sem_info);
2181
2182 /* caller can override these */
2183 sem_info->cs_emit_wait = true;
2184 sem_info->cs_emit_signal = true;
2185 return ret;
2186 }
2187
2188 /* Signals fence as soon as all the work currently put on queue is done. */
2189 static VkResult radv_signal_fence(struct radv_queue *queue,
2190 struct radv_fence *fence)
2191 {
2192 int ret;
2193 VkResult result;
2194 struct radv_winsys_sem_info sem_info;
2195
2196 result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2197 radv_fence_to_handle(fence));
2198 if (result != VK_SUCCESS)
2199 return result;
2200
2201 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2202 &queue->device->empty_cs[queue->queue_family_index],
2203 1, NULL, NULL, &sem_info,
2204 false, fence->fence);
2205 radv_free_sem_info(&sem_info);
2206
2207 /* TODO: find a better error */
2208 if (ret)
2209 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2210
2211 return VK_SUCCESS;
2212 }
2213
2214 VkResult radv_QueueSubmit(
2215 VkQueue _queue,
2216 uint32_t submitCount,
2217 const VkSubmitInfo* pSubmits,
2218 VkFence _fence)
2219 {
2220 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2221 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2222 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2223 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2224 int ret;
2225 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2226 uint32_t scratch_size = 0;
2227 uint32_t compute_scratch_size = 0;
2228 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2229 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2230 VkResult result;
2231 bool fence_emitted = false;
2232 bool tess_rings_needed = false;
2233 bool sample_positions_needed = false;
2234
2235 /* Do this first so failing to allocate scratch buffers can't result in
2236 * partially executed submissions. */
2237 for (uint32_t i = 0; i < submitCount; i++) {
2238 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2239 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2240 pSubmits[i].pCommandBuffers[j]);
2241
2242 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2243 compute_scratch_size = MAX2(compute_scratch_size,
2244 cmd_buffer->compute_scratch_size_needed);
2245 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2246 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2247 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2248 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2249 }
2250 }
2251
2252 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2253 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2254 sample_positions_needed, &initial_flush_preamble_cs,
2255 &initial_preamble_cs, &continue_preamble_cs);
2256 if (result != VK_SUCCESS)
2257 return result;
2258
2259 for (uint32_t i = 0; i < submitCount; i++) {
2260 struct radeon_winsys_cs **cs_array;
2261 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2262 bool can_patch = true;
2263 uint32_t advance;
2264 struct radv_winsys_sem_info sem_info;
2265
2266 result = radv_alloc_sem_info(&sem_info,
2267 pSubmits[i].waitSemaphoreCount,
2268 pSubmits[i].pWaitSemaphores,
2269 pSubmits[i].signalSemaphoreCount,
2270 pSubmits[i].pSignalSemaphores,
2271 _fence);
2272 if (result != VK_SUCCESS)
2273 return result;
2274
2275 if (!pSubmits[i].commandBufferCount) {
2276 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2277 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2278 &queue->device->empty_cs[queue->queue_family_index],
2279 1, NULL, NULL,
2280 &sem_info,
2281 false, base_fence);
2282 if (ret) {
2283 radv_loge("failed to submit CS %d\n", i);
2284 abort();
2285 }
2286 fence_emitted = true;
2287 }
2288 radv_free_sem_info(&sem_info);
2289 continue;
2290 }
2291
2292 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2293 (pSubmits[i].commandBufferCount));
2294
2295 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2296 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2297 pSubmits[i].pCommandBuffers[j]);
2298 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2299
2300 cs_array[j] = cmd_buffer->cs;
2301 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2302 can_patch = false;
2303
2304 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2305 }
2306
2307 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2308 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2309 advance = MIN2(max_cs_submission,
2310 pSubmits[i].commandBufferCount - j);
2311
2312 if (queue->device->trace_bo)
2313 *queue->device->trace_id_ptr = 0;
2314
2315 sem_info.cs_emit_wait = j == 0;
2316 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2317
2318 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2319 advance, initial_preamble, continue_preamble_cs,
2320 &sem_info,
2321 can_patch, base_fence);
2322
2323 if (ret) {
2324 radv_loge("failed to submit CS %d\n", i);
2325 abort();
2326 }
2327 fence_emitted = true;
2328 if (queue->device->trace_bo) {
2329 radv_check_gpu_hangs(queue, cs_array[j]);
2330 }
2331 }
2332
2333 radv_free_temp_syncobjs(queue->device,
2334 pSubmits[i].waitSemaphoreCount,
2335 pSubmits[i].pWaitSemaphores);
2336 radv_free_sem_info(&sem_info);
2337 free(cs_array);
2338 }
2339
2340 if (fence) {
2341 if (!fence_emitted) {
2342 radv_signal_fence(queue, fence);
2343 }
2344 fence->submitted = true;
2345 }
2346
2347 return VK_SUCCESS;
2348 }
2349
2350 VkResult radv_QueueWaitIdle(
2351 VkQueue _queue)
2352 {
2353 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2354
2355 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2356 radv_queue_family_to_ring(queue->queue_family_index),
2357 queue->queue_idx);
2358 return VK_SUCCESS;
2359 }
2360
2361 VkResult radv_DeviceWaitIdle(
2362 VkDevice _device)
2363 {
2364 RADV_FROM_HANDLE(radv_device, device, _device);
2365
2366 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2367 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2368 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2369 }
2370 }
2371 return VK_SUCCESS;
2372 }
2373
2374 VkResult radv_EnumerateInstanceExtensionProperties(
2375 const char* pLayerName,
2376 uint32_t* pPropertyCount,
2377 VkExtensionProperties* pProperties)
2378 {
2379 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2380
2381 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2382 if (radv_supported_instance_extensions.extensions[i]) {
2383 vk_outarray_append(&out, prop) {
2384 *prop = radv_instance_extensions[i];
2385 }
2386 }
2387 }
2388
2389 return vk_outarray_status(&out);
2390 }
2391
2392 VkResult radv_EnumerateDeviceExtensionProperties(
2393 VkPhysicalDevice physicalDevice,
2394 const char* pLayerName,
2395 uint32_t* pPropertyCount,
2396 VkExtensionProperties* pProperties)
2397 {
2398 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2399 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2400
2401 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2402 if (device->supported_extensions.extensions[i]) {
2403 vk_outarray_append(&out, prop) {
2404 *prop = radv_device_extensions[i];
2405 }
2406 }
2407 }
2408
2409 return vk_outarray_status(&out);
2410 }
2411
2412 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2413 VkInstance _instance,
2414 const char* pName)
2415 {
2416 RADV_FROM_HANDLE(radv_instance, instance, _instance);
2417
2418 return radv_lookup_entrypoint_checked(pName,
2419 instance ? instance->apiVersion : 0,
2420 instance ? &instance->enabled_extensions : NULL,
2421 NULL);
2422 }
2423
2424 /* The loader wants us to expose a second GetInstanceProcAddr function
2425 * to work around certain LD_PRELOAD issues seen in apps.
2426 */
2427 PUBLIC
2428 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2429 VkInstance instance,
2430 const char* pName);
2431
2432 PUBLIC
2433 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2434 VkInstance instance,
2435 const char* pName)
2436 {
2437 return radv_GetInstanceProcAddr(instance, pName);
2438 }
2439
2440 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2441 VkDevice _device,
2442 const char* pName)
2443 {
2444 RADV_FROM_HANDLE(radv_device, device, _device);
2445
2446 return radv_lookup_entrypoint_checked(pName,
2447 device->instance->apiVersion,
2448 &device->instance->enabled_extensions,
2449 &device->enabled_extensions);
2450 }
2451
2452 bool radv_get_memory_fd(struct radv_device *device,
2453 struct radv_device_memory *memory,
2454 int *pFD)
2455 {
2456 struct radeon_bo_metadata metadata;
2457
2458 if (memory->image) {
2459 radv_init_metadata(device, memory->image, &metadata);
2460 device->ws->buffer_set_metadata(memory->bo, &metadata);
2461 }
2462
2463 return device->ws->buffer_get_fd(device->ws, memory->bo,
2464 pFD);
2465 }
2466
2467 static VkResult radv_alloc_memory(struct radv_device *device,
2468 const VkMemoryAllocateInfo* pAllocateInfo,
2469 const VkAllocationCallbacks* pAllocator,
2470 VkDeviceMemory* pMem)
2471 {
2472 struct radv_device_memory *mem;
2473 VkResult result;
2474 enum radeon_bo_domain domain;
2475 uint32_t flags = 0;
2476 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2477
2478 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2479
2480 if (pAllocateInfo->allocationSize == 0) {
2481 /* Apparently, this is allowed */
2482 *pMem = VK_NULL_HANDLE;
2483 return VK_SUCCESS;
2484 }
2485
2486 const VkImportMemoryFdInfoKHR *import_info =
2487 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2488 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2489 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2490 const VkExportMemoryAllocateInfoKHR *export_info =
2491 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2492 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2493 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2494
2495 const struct wsi_memory_allocate_info *wsi_info =
2496 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2497
2498 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2499 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2500 if (mem == NULL)
2501 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2502
2503 if (wsi_info && wsi_info->implicit_sync)
2504 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2505
2506 if (dedicate_info) {
2507 mem->image = radv_image_from_handle(dedicate_info->image);
2508 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2509 } else {
2510 mem->image = NULL;
2511 mem->buffer = NULL;
2512 }
2513
2514 mem->user_ptr = NULL;
2515
2516 if (import_info) {
2517 assert(import_info->handleType ==
2518 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2519 import_info->handleType ==
2520 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2521 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2522 NULL, NULL);
2523 if (!mem->bo) {
2524 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2525 goto fail;
2526 } else {
2527 close(import_info->fd);
2528 goto out_success;
2529 }
2530 }
2531
2532 if (host_ptr_info) {
2533 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2534 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2535 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2536 pAllocateInfo->allocationSize);
2537 if (!mem->bo) {
2538 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2539 goto fail;
2540 } else {
2541 mem->user_ptr = host_ptr_info->pHostPointer;
2542 goto out_success;
2543 }
2544 }
2545
2546 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2547 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2548 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2549 domain = RADEON_DOMAIN_GTT;
2550 else
2551 domain = RADEON_DOMAIN_VRAM;
2552
2553 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2554 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2555 else
2556 flags |= RADEON_FLAG_CPU_ACCESS;
2557
2558 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2559 flags |= RADEON_FLAG_GTT_WC;
2560
2561 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2562 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2563
2564 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2565 domain, flags);
2566
2567 if (!mem->bo) {
2568 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2569 goto fail;
2570 }
2571 mem->type_index = mem_type_index;
2572 out_success:
2573 *pMem = radv_device_memory_to_handle(mem);
2574
2575 return VK_SUCCESS;
2576
2577 fail:
2578 vk_free2(&device->alloc, pAllocator, mem);
2579
2580 return result;
2581 }
2582
2583 VkResult radv_AllocateMemory(
2584 VkDevice _device,
2585 const VkMemoryAllocateInfo* pAllocateInfo,
2586 const VkAllocationCallbacks* pAllocator,
2587 VkDeviceMemory* pMem)
2588 {
2589 RADV_FROM_HANDLE(radv_device, device, _device);
2590 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2591 }
2592
2593 void radv_FreeMemory(
2594 VkDevice _device,
2595 VkDeviceMemory _mem,
2596 const VkAllocationCallbacks* pAllocator)
2597 {
2598 RADV_FROM_HANDLE(radv_device, device, _device);
2599 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2600
2601 if (mem == NULL)
2602 return;
2603
2604 device->ws->buffer_destroy(mem->bo);
2605 mem->bo = NULL;
2606
2607 vk_free2(&device->alloc, pAllocator, mem);
2608 }
2609
2610 VkResult radv_MapMemory(
2611 VkDevice _device,
2612 VkDeviceMemory _memory,
2613 VkDeviceSize offset,
2614 VkDeviceSize size,
2615 VkMemoryMapFlags flags,
2616 void** ppData)
2617 {
2618 RADV_FROM_HANDLE(radv_device, device, _device);
2619 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2620
2621 if (mem == NULL) {
2622 *ppData = NULL;
2623 return VK_SUCCESS;
2624 }
2625
2626 if (mem->user_ptr)
2627 *ppData = mem->user_ptr;
2628 else
2629 *ppData = device->ws->buffer_map(mem->bo);
2630
2631 if (*ppData) {
2632 *ppData += offset;
2633 return VK_SUCCESS;
2634 }
2635
2636 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2637 }
2638
2639 void radv_UnmapMemory(
2640 VkDevice _device,
2641 VkDeviceMemory _memory)
2642 {
2643 RADV_FROM_HANDLE(radv_device, device, _device);
2644 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2645
2646 if (mem == NULL)
2647 return;
2648
2649 if (mem->user_ptr == NULL)
2650 device->ws->buffer_unmap(mem->bo);
2651 }
2652
2653 VkResult radv_FlushMappedMemoryRanges(
2654 VkDevice _device,
2655 uint32_t memoryRangeCount,
2656 const VkMappedMemoryRange* pMemoryRanges)
2657 {
2658 return VK_SUCCESS;
2659 }
2660
2661 VkResult radv_InvalidateMappedMemoryRanges(
2662 VkDevice _device,
2663 uint32_t memoryRangeCount,
2664 const VkMappedMemoryRange* pMemoryRanges)
2665 {
2666 return VK_SUCCESS;
2667 }
2668
2669 void radv_GetBufferMemoryRequirements(
2670 VkDevice _device,
2671 VkBuffer _buffer,
2672 VkMemoryRequirements* pMemoryRequirements)
2673 {
2674 RADV_FROM_HANDLE(radv_device, device, _device);
2675 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2676
2677 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2678
2679 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2680 pMemoryRequirements->alignment = 4096;
2681 else
2682 pMemoryRequirements->alignment = 16;
2683
2684 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2685 }
2686
2687 void radv_GetBufferMemoryRequirements2(
2688 VkDevice device,
2689 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2690 VkMemoryRequirements2KHR* pMemoryRequirements)
2691 {
2692 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2693 &pMemoryRequirements->memoryRequirements);
2694 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2695 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2696 switch (ext->sType) {
2697 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2698 VkMemoryDedicatedRequirementsKHR *req =
2699 (VkMemoryDedicatedRequirementsKHR *) ext;
2700 req->requiresDedicatedAllocation = buffer->shareable;
2701 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2702 break;
2703 }
2704 default:
2705 break;
2706 }
2707 }
2708 }
2709
2710 void radv_GetImageMemoryRequirements(
2711 VkDevice _device,
2712 VkImage _image,
2713 VkMemoryRequirements* pMemoryRequirements)
2714 {
2715 RADV_FROM_HANDLE(radv_device, device, _device);
2716 RADV_FROM_HANDLE(radv_image, image, _image);
2717
2718 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2719
2720 pMemoryRequirements->size = image->size;
2721 pMemoryRequirements->alignment = image->alignment;
2722 }
2723
2724 void radv_GetImageMemoryRequirements2(
2725 VkDevice device,
2726 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2727 VkMemoryRequirements2KHR* pMemoryRequirements)
2728 {
2729 radv_GetImageMemoryRequirements(device, pInfo->image,
2730 &pMemoryRequirements->memoryRequirements);
2731
2732 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2733
2734 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2735 switch (ext->sType) {
2736 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2737 VkMemoryDedicatedRequirementsKHR *req =
2738 (VkMemoryDedicatedRequirementsKHR *) ext;
2739 req->requiresDedicatedAllocation = image->shareable;
2740 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2741 break;
2742 }
2743 default:
2744 break;
2745 }
2746 }
2747 }
2748
2749 void radv_GetImageSparseMemoryRequirements(
2750 VkDevice device,
2751 VkImage image,
2752 uint32_t* pSparseMemoryRequirementCount,
2753 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2754 {
2755 stub();
2756 }
2757
2758 void radv_GetImageSparseMemoryRequirements2(
2759 VkDevice device,
2760 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2761 uint32_t* pSparseMemoryRequirementCount,
2762 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2763 {
2764 stub();
2765 }
2766
2767 void radv_GetDeviceMemoryCommitment(
2768 VkDevice device,
2769 VkDeviceMemory memory,
2770 VkDeviceSize* pCommittedMemoryInBytes)
2771 {
2772 *pCommittedMemoryInBytes = 0;
2773 }
2774
2775 VkResult radv_BindBufferMemory2(VkDevice device,
2776 uint32_t bindInfoCount,
2777 const VkBindBufferMemoryInfoKHR *pBindInfos)
2778 {
2779 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2780 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2781 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2782
2783 if (mem) {
2784 buffer->bo = mem->bo;
2785 buffer->offset = pBindInfos[i].memoryOffset;
2786 } else {
2787 buffer->bo = NULL;
2788 }
2789 }
2790 return VK_SUCCESS;
2791 }
2792
2793 VkResult radv_BindBufferMemory(
2794 VkDevice device,
2795 VkBuffer buffer,
2796 VkDeviceMemory memory,
2797 VkDeviceSize memoryOffset)
2798 {
2799 const VkBindBufferMemoryInfoKHR info = {
2800 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2801 .buffer = buffer,
2802 .memory = memory,
2803 .memoryOffset = memoryOffset
2804 };
2805
2806 return radv_BindBufferMemory2(device, 1, &info);
2807 }
2808
2809 VkResult radv_BindImageMemory2(VkDevice device,
2810 uint32_t bindInfoCount,
2811 const VkBindImageMemoryInfoKHR *pBindInfos)
2812 {
2813 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2814 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2815 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2816
2817 if (mem) {
2818 image->bo = mem->bo;
2819 image->offset = pBindInfos[i].memoryOffset;
2820 } else {
2821 image->bo = NULL;
2822 image->offset = 0;
2823 }
2824 }
2825 return VK_SUCCESS;
2826 }
2827
2828
2829 VkResult radv_BindImageMemory(
2830 VkDevice device,
2831 VkImage image,
2832 VkDeviceMemory memory,
2833 VkDeviceSize memoryOffset)
2834 {
2835 const VkBindImageMemoryInfoKHR info = {
2836 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2837 .image = image,
2838 .memory = memory,
2839 .memoryOffset = memoryOffset
2840 };
2841
2842 return radv_BindImageMemory2(device, 1, &info);
2843 }
2844
2845
2846 static void
2847 radv_sparse_buffer_bind_memory(struct radv_device *device,
2848 const VkSparseBufferMemoryBindInfo *bind)
2849 {
2850 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2851
2852 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2853 struct radv_device_memory *mem = NULL;
2854
2855 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2856 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2857
2858 device->ws->buffer_virtual_bind(buffer->bo,
2859 bind->pBinds[i].resourceOffset,
2860 bind->pBinds[i].size,
2861 mem ? mem->bo : NULL,
2862 bind->pBinds[i].memoryOffset);
2863 }
2864 }
2865
2866 static void
2867 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2868 const VkSparseImageOpaqueMemoryBindInfo *bind)
2869 {
2870 RADV_FROM_HANDLE(radv_image, image, bind->image);
2871
2872 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2873 struct radv_device_memory *mem = NULL;
2874
2875 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2876 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2877
2878 device->ws->buffer_virtual_bind(image->bo,
2879 bind->pBinds[i].resourceOffset,
2880 bind->pBinds[i].size,
2881 mem ? mem->bo : NULL,
2882 bind->pBinds[i].memoryOffset);
2883 }
2884 }
2885
2886 VkResult radv_QueueBindSparse(
2887 VkQueue _queue,
2888 uint32_t bindInfoCount,
2889 const VkBindSparseInfo* pBindInfo,
2890 VkFence _fence)
2891 {
2892 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2893 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2894 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2895 bool fence_emitted = false;
2896
2897 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2898 struct radv_winsys_sem_info sem_info;
2899 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2900 radv_sparse_buffer_bind_memory(queue->device,
2901 pBindInfo[i].pBufferBinds + j);
2902 }
2903
2904 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2905 radv_sparse_image_opaque_bind_memory(queue->device,
2906 pBindInfo[i].pImageOpaqueBinds + j);
2907 }
2908
2909 VkResult result;
2910 result = radv_alloc_sem_info(&sem_info,
2911 pBindInfo[i].waitSemaphoreCount,
2912 pBindInfo[i].pWaitSemaphores,
2913 pBindInfo[i].signalSemaphoreCount,
2914 pBindInfo[i].pSignalSemaphores,
2915 _fence);
2916 if (result != VK_SUCCESS)
2917 return result;
2918
2919 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2920 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2921 &queue->device->empty_cs[queue->queue_family_index],
2922 1, NULL, NULL,
2923 &sem_info,
2924 false, base_fence);
2925 fence_emitted = true;
2926 if (fence)
2927 fence->submitted = true;
2928 }
2929
2930 radv_free_sem_info(&sem_info);
2931
2932 }
2933
2934 if (fence) {
2935 if (!fence_emitted) {
2936 radv_signal_fence(queue, fence);
2937 }
2938 fence->submitted = true;
2939 }
2940
2941 return VK_SUCCESS;
2942 }
2943
2944 VkResult radv_CreateFence(
2945 VkDevice _device,
2946 const VkFenceCreateInfo* pCreateInfo,
2947 const VkAllocationCallbacks* pAllocator,
2948 VkFence* pFence)
2949 {
2950 RADV_FROM_HANDLE(radv_device, device, _device);
2951 const VkExportFenceCreateInfoKHR *export =
2952 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2953 VkExternalFenceHandleTypeFlagsKHR handleTypes =
2954 export ? export->handleTypes : 0;
2955
2956 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2957 sizeof(*fence), 8,
2958 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2959
2960 if (!fence)
2961 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2962
2963 fence->submitted = false;
2964 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2965 fence->temp_syncobj = 0;
2966 if (device->always_use_syncobj || handleTypes) {
2967 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2968 if (ret) {
2969 vk_free2(&device->alloc, pAllocator, fence);
2970 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2971 }
2972 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2973 device->ws->signal_syncobj(device->ws, fence->syncobj);
2974 }
2975 fence->fence = NULL;
2976 } else {
2977 fence->fence = device->ws->create_fence();
2978 if (!fence->fence) {
2979 vk_free2(&device->alloc, pAllocator, fence);
2980 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2981 }
2982 fence->syncobj = 0;
2983 }
2984
2985 *pFence = radv_fence_to_handle(fence);
2986
2987 return VK_SUCCESS;
2988 }
2989
2990 void radv_DestroyFence(
2991 VkDevice _device,
2992 VkFence _fence,
2993 const VkAllocationCallbacks* pAllocator)
2994 {
2995 RADV_FROM_HANDLE(radv_device, device, _device);
2996 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2997
2998 if (!fence)
2999 return;
3000
3001 if (fence->temp_syncobj)
3002 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3003 if (fence->syncobj)
3004 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3005 if (fence->fence)
3006 device->ws->destroy_fence(fence->fence);
3007 vk_free2(&device->alloc, pAllocator, fence);
3008 }
3009
3010
3011 static uint64_t radv_get_current_time()
3012 {
3013 struct timespec tv;
3014 clock_gettime(CLOCK_MONOTONIC, &tv);
3015 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3016 }
3017
3018 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3019 {
3020 uint64_t current_time = radv_get_current_time();
3021
3022 timeout = MIN2(UINT64_MAX - current_time, timeout);
3023
3024 return current_time + timeout;
3025 }
3026
3027
3028 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3029 {
3030 for (uint32_t i = 0; i < fenceCount; ++i) {
3031 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3032 if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
3033 return false;
3034 }
3035 return true;
3036 }
3037
3038 VkResult radv_WaitForFences(
3039 VkDevice _device,
3040 uint32_t fenceCount,
3041 const VkFence* pFences,
3042 VkBool32 waitAll,
3043 uint64_t timeout)
3044 {
3045 RADV_FROM_HANDLE(radv_device, device, _device);
3046 timeout = radv_get_absolute_timeout(timeout);
3047
3048 if (device->always_use_syncobj) {
3049 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3050 if (!handles)
3051 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3052
3053 for (uint32_t i = 0; i < fenceCount; ++i) {
3054 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3055 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3056 }
3057
3058 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3059
3060 free(handles);
3061 return success ? VK_SUCCESS : VK_TIMEOUT;
3062 }
3063
3064 if (!waitAll && fenceCount > 1) {
3065 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3066 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3067 uint32_t wait_count = 0;
3068 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3069 if (!fences)
3070 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3071
3072 for (uint32_t i = 0; i < fenceCount; ++i) {
3073 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3074
3075 if (fence->signalled) {
3076 free(fences);
3077 return VK_SUCCESS;
3078 }
3079
3080 fences[wait_count++] = fence->fence;
3081 }
3082
3083 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3084 waitAll, timeout - radv_get_current_time());
3085
3086 free(fences);
3087 return success ? VK_SUCCESS : VK_TIMEOUT;
3088 }
3089
3090 while(radv_get_current_time() <= timeout) {
3091 for (uint32_t i = 0; i < fenceCount; ++i) {
3092 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3093 return VK_SUCCESS;
3094 }
3095 }
3096 return VK_TIMEOUT;
3097 }
3098
3099 for (uint32_t i = 0; i < fenceCount; ++i) {
3100 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3101 bool expired = false;
3102
3103 if (fence->temp_syncobj) {
3104 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3105 return VK_TIMEOUT;
3106 continue;
3107 }
3108
3109 if (fence->syncobj) {
3110 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3111 return VK_TIMEOUT;
3112 continue;
3113 }
3114
3115 if (fence->signalled)
3116 continue;
3117
3118 if (!fence->submitted) {
3119 while(radv_get_current_time() <= timeout && !fence->submitted)
3120 /* Do nothing */;
3121
3122 if (!fence->submitted)
3123 return VK_TIMEOUT;
3124
3125 /* Recheck as it may have been set by submitting operations. */
3126 if (fence->signalled)
3127 continue;
3128 }
3129
3130 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
3131 if (!expired)
3132 return VK_TIMEOUT;
3133
3134 fence->signalled = true;
3135 }
3136
3137 return VK_SUCCESS;
3138 }
3139
3140 VkResult radv_ResetFences(VkDevice _device,
3141 uint32_t fenceCount,
3142 const VkFence *pFences)
3143 {
3144 RADV_FROM_HANDLE(radv_device, device, _device);
3145
3146 for (unsigned i = 0; i < fenceCount; ++i) {
3147 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3148 fence->submitted = fence->signalled = false;
3149
3150 /* Per spec, we first restore the permanent payload, and then reset, so
3151 * having a temp syncobj should not skip resetting the permanent syncobj. */
3152 if (fence->temp_syncobj) {
3153 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3154 fence->temp_syncobj = 0;
3155 }
3156
3157 if (fence->syncobj) {
3158 device->ws->reset_syncobj(device->ws, fence->syncobj);
3159 }
3160 }
3161
3162 return VK_SUCCESS;
3163 }
3164
3165 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3166 {
3167 RADV_FROM_HANDLE(radv_device, device, _device);
3168 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3169
3170 if (fence->temp_syncobj) {
3171 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3172 return success ? VK_SUCCESS : VK_NOT_READY;
3173 }
3174
3175 if (fence->syncobj) {
3176 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3177 return success ? VK_SUCCESS : VK_NOT_READY;
3178 }
3179
3180 if (fence->signalled)
3181 return VK_SUCCESS;
3182 if (!fence->submitted)
3183 return VK_NOT_READY;
3184 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3185 return VK_NOT_READY;
3186
3187 return VK_SUCCESS;
3188 }
3189
3190
3191 // Queue semaphore functions
3192
3193 VkResult radv_CreateSemaphore(
3194 VkDevice _device,
3195 const VkSemaphoreCreateInfo* pCreateInfo,
3196 const VkAllocationCallbacks* pAllocator,
3197 VkSemaphore* pSemaphore)
3198 {
3199 RADV_FROM_HANDLE(radv_device, device, _device);
3200 const VkExportSemaphoreCreateInfoKHR *export =
3201 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3202 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3203 export ? export->handleTypes : 0;
3204
3205 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3206 sizeof(*sem), 8,
3207 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3208 if (!sem)
3209 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3210
3211 sem->temp_syncobj = 0;
3212 /* create a syncobject if we are going to export this semaphore */
3213 if (device->always_use_syncobj || handleTypes) {
3214 assert (device->physical_device->rad_info.has_syncobj);
3215 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3216 if (ret) {
3217 vk_free2(&device->alloc, pAllocator, sem);
3218 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3219 }
3220 sem->sem = NULL;
3221 } else {
3222 sem->sem = device->ws->create_sem(device->ws);
3223 if (!sem->sem) {
3224 vk_free2(&device->alloc, pAllocator, sem);
3225 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3226 }
3227 sem->syncobj = 0;
3228 }
3229
3230 *pSemaphore = radv_semaphore_to_handle(sem);
3231 return VK_SUCCESS;
3232 }
3233
3234 void radv_DestroySemaphore(
3235 VkDevice _device,
3236 VkSemaphore _semaphore,
3237 const VkAllocationCallbacks* pAllocator)
3238 {
3239 RADV_FROM_HANDLE(radv_device, device, _device);
3240 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3241 if (!_semaphore)
3242 return;
3243
3244 if (sem->syncobj)
3245 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3246 else
3247 device->ws->destroy_sem(sem->sem);
3248 vk_free2(&device->alloc, pAllocator, sem);
3249 }
3250
3251 VkResult radv_CreateEvent(
3252 VkDevice _device,
3253 const VkEventCreateInfo* pCreateInfo,
3254 const VkAllocationCallbacks* pAllocator,
3255 VkEvent* pEvent)
3256 {
3257 RADV_FROM_HANDLE(radv_device, device, _device);
3258 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3259 sizeof(*event), 8,
3260 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3261
3262 if (!event)
3263 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3264
3265 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3266 RADEON_DOMAIN_GTT,
3267 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3268 if (!event->bo) {
3269 vk_free2(&device->alloc, pAllocator, event);
3270 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3271 }
3272
3273 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3274
3275 *pEvent = radv_event_to_handle(event);
3276
3277 return VK_SUCCESS;
3278 }
3279
3280 void radv_DestroyEvent(
3281 VkDevice _device,
3282 VkEvent _event,
3283 const VkAllocationCallbacks* pAllocator)
3284 {
3285 RADV_FROM_HANDLE(radv_device, device, _device);
3286 RADV_FROM_HANDLE(radv_event, event, _event);
3287
3288 if (!event)
3289 return;
3290 device->ws->buffer_destroy(event->bo);
3291 vk_free2(&device->alloc, pAllocator, event);
3292 }
3293
3294 VkResult radv_GetEventStatus(
3295 VkDevice _device,
3296 VkEvent _event)
3297 {
3298 RADV_FROM_HANDLE(radv_event, event, _event);
3299
3300 if (*event->map == 1)
3301 return VK_EVENT_SET;
3302 return VK_EVENT_RESET;
3303 }
3304
3305 VkResult radv_SetEvent(
3306 VkDevice _device,
3307 VkEvent _event)
3308 {
3309 RADV_FROM_HANDLE(radv_event, event, _event);
3310 *event->map = 1;
3311
3312 return VK_SUCCESS;
3313 }
3314
3315 VkResult radv_ResetEvent(
3316 VkDevice _device,
3317 VkEvent _event)
3318 {
3319 RADV_FROM_HANDLE(radv_event, event, _event);
3320 *event->map = 0;
3321
3322 return VK_SUCCESS;
3323 }
3324
3325 VkResult radv_CreateBuffer(
3326 VkDevice _device,
3327 const VkBufferCreateInfo* pCreateInfo,
3328 const VkAllocationCallbacks* pAllocator,
3329 VkBuffer* pBuffer)
3330 {
3331 RADV_FROM_HANDLE(radv_device, device, _device);
3332 struct radv_buffer *buffer;
3333
3334 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3335
3336 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3337 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3338 if (buffer == NULL)
3339 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3340
3341 buffer->size = pCreateInfo->size;
3342 buffer->usage = pCreateInfo->usage;
3343 buffer->bo = NULL;
3344 buffer->offset = 0;
3345 buffer->flags = pCreateInfo->flags;
3346
3347 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3348 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3349
3350 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3351 buffer->bo = device->ws->buffer_create(device->ws,
3352 align64(buffer->size, 4096),
3353 4096, 0, RADEON_FLAG_VIRTUAL);
3354 if (!buffer->bo) {
3355 vk_free2(&device->alloc, pAllocator, buffer);
3356 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3357 }
3358 }
3359
3360 *pBuffer = radv_buffer_to_handle(buffer);
3361
3362 return VK_SUCCESS;
3363 }
3364
3365 void radv_DestroyBuffer(
3366 VkDevice _device,
3367 VkBuffer _buffer,
3368 const VkAllocationCallbacks* pAllocator)
3369 {
3370 RADV_FROM_HANDLE(radv_device, device, _device);
3371 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3372
3373 if (!buffer)
3374 return;
3375
3376 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3377 device->ws->buffer_destroy(buffer->bo);
3378
3379 vk_free2(&device->alloc, pAllocator, buffer);
3380 }
3381
3382 static inline unsigned
3383 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3384 {
3385 if (stencil)
3386 return image->surface.u.legacy.stencil_tiling_index[level];
3387 else
3388 return image->surface.u.legacy.tiling_index[level];
3389 }
3390
3391 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3392 {
3393 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3394 }
3395
3396 static void
3397 radv_initialise_color_surface(struct radv_device *device,
3398 struct radv_color_buffer_info *cb,
3399 struct radv_image_view *iview)
3400 {
3401 const struct vk_format_description *desc;
3402 unsigned ntype, format, swap, endian;
3403 unsigned blend_clamp = 0, blend_bypass = 0;
3404 uint64_t va;
3405 const struct radeon_surf *surf = &iview->image->surface;
3406
3407 desc = vk_format_description(iview->vk_format);
3408
3409 memset(cb, 0, sizeof(*cb));
3410
3411 /* Intensity is implemented as Red, so treat it that way. */
3412 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3413
3414 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3415
3416 cb->cb_color_base = va >> 8;
3417
3418 if (device->physical_device->rad_info.chip_class >= GFX9) {
3419 struct gfx9_surf_meta_flags meta;
3420 if (iview->image->dcc_offset)
3421 meta = iview->image->surface.u.gfx9.dcc;
3422 else
3423 meta = iview->image->surface.u.gfx9.cmask;
3424
3425 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3426 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3427 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3428 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3429
3430 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3431 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3432 } else {
3433 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3434 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3435
3436 cb->cb_color_base += level_info->offset >> 8;
3437 if (level_info->mode == RADEON_SURF_MODE_2D)
3438 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3439
3440 pitch_tile_max = level_info->nblk_x / 8 - 1;
3441 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3442 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3443
3444 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3445 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3446 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3447
3448 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3449
3450 if (iview->image->fmask.size) {
3451 if (device->physical_device->rad_info.chip_class >= CIK)
3452 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3453 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3454 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3455 } else {
3456 /* This must be set for fast clear to work without FMASK. */
3457 if (device->physical_device->rad_info.chip_class >= CIK)
3458 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3459 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3460 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3461 }
3462 }
3463
3464 /* CMASK variables */
3465 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3466 va += iview->image->cmask.offset;
3467 cb->cb_color_cmask = va >> 8;
3468
3469 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3470 va += iview->image->dcc_offset;
3471 cb->cb_dcc_base = va >> 8;
3472 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3473
3474 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3475 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3476 S_028C6C_SLICE_MAX(max_slice);
3477
3478 if (iview->image->info.samples > 1) {
3479 unsigned log_samples = util_logbase2(iview->image->info.samples);
3480
3481 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3482 S_028C74_NUM_FRAGMENTS(log_samples);
3483 }
3484
3485 if (iview->image->fmask.size) {
3486 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3487 cb->cb_color_fmask = va >> 8;
3488 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3489 } else {
3490 cb->cb_color_fmask = cb->cb_color_base;
3491 }
3492
3493 ntype = radv_translate_color_numformat(iview->vk_format,
3494 desc,
3495 vk_format_get_first_non_void_channel(iview->vk_format));
3496 format = radv_translate_colorformat(iview->vk_format);
3497 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3498 radv_finishme("Illegal color\n");
3499 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3500 endian = radv_colorformat_endian_swap(format);
3501
3502 /* blend clamp should be set for all NORM/SRGB types */
3503 if (ntype == V_028C70_NUMBER_UNORM ||
3504 ntype == V_028C70_NUMBER_SNORM ||
3505 ntype == V_028C70_NUMBER_SRGB)
3506 blend_clamp = 1;
3507
3508 /* set blend bypass according to docs if SINT/UINT or
3509 8/24 COLOR variants */
3510 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3511 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3512 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3513 blend_clamp = 0;
3514 blend_bypass = 1;
3515 }
3516 #if 0
3517 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3518 (format == V_028C70_COLOR_8 ||
3519 format == V_028C70_COLOR_8_8 ||
3520 format == V_028C70_COLOR_8_8_8_8))
3521 ->color_is_int8 = true;
3522 #endif
3523 cb->cb_color_info = S_028C70_FORMAT(format) |
3524 S_028C70_COMP_SWAP(swap) |
3525 S_028C70_BLEND_CLAMP(blend_clamp) |
3526 S_028C70_BLEND_BYPASS(blend_bypass) |
3527 S_028C70_SIMPLE_FLOAT(1) |
3528 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3529 ntype != V_028C70_NUMBER_SNORM &&
3530 ntype != V_028C70_NUMBER_SRGB &&
3531 format != V_028C70_COLOR_8_24 &&
3532 format != V_028C70_COLOR_24_8) |
3533 S_028C70_NUMBER_TYPE(ntype) |
3534 S_028C70_ENDIAN(endian);
3535 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3536 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3537 if (device->physical_device->rad_info.chip_class == SI) {
3538 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3539 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3540 }
3541 }
3542
3543 if (iview->image->cmask.size &&
3544 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3545 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3546
3547 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3548 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3549
3550 if (device->physical_device->rad_info.chip_class >= VI) {
3551 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3552 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3553 unsigned independent_64b_blocks = 0;
3554 unsigned max_compressed_block_size;
3555
3556 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3557 64 for APU because all of our APUs to date use DIMMs which have
3558 a request granularity size of 64B while all other chips have a
3559 32B request size */
3560 if (!device->physical_device->rad_info.has_dedicated_vram)
3561 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3562
3563 if (iview->image->info.samples > 1) {
3564 if (iview->image->surface.bpe == 1)
3565 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3566 else if (iview->image->surface.bpe == 2)
3567 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3568 }
3569
3570 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3571 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3572 independent_64b_blocks = 1;
3573 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3574 } else
3575 max_compressed_block_size = max_uncompressed_block_size;
3576
3577 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3578 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3579 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3580 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3581 }
3582
3583 /* This must be set for fast clear to work without FMASK. */
3584 if (!iview->image->fmask.size &&
3585 device->physical_device->rad_info.chip_class == SI) {
3586 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3587 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3588 }
3589
3590 if (device->physical_device->rad_info.chip_class >= GFX9) {
3591 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3592 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3593
3594 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3595 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3596 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3597 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3598 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3599 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3600 }
3601 }
3602
3603 static unsigned
3604 radv_calc_decompress_on_z_planes(struct radv_device *device,
3605 struct radv_image_view *iview)
3606 {
3607 unsigned max_zplanes = 0;
3608
3609 assert(iview->image->tc_compatible_htile);
3610
3611 if (device->physical_device->rad_info.chip_class >= GFX9) {
3612 /* Default value for 32-bit depth surfaces. */
3613 max_zplanes = 4;
3614
3615 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3616 iview->image->info.samples > 1)
3617 max_zplanes = 2;
3618
3619 max_zplanes = max_zplanes + 1;
3620 } else {
3621 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
3622 /* Do not enable Z plane compression for 16-bit depth
3623 * surfaces because isn't supported on GFX8. Only
3624 * 32-bit depth surfaces are supported by the hardware.
3625 * This allows to maintain shader compatibility and to
3626 * reduce the number of depth decompressions.
3627 */
3628 max_zplanes = 1;
3629 } else {
3630 if (iview->image->info.samples <= 1)
3631 max_zplanes = 5;
3632 else if (iview->image->info.samples <= 4)
3633 max_zplanes = 3;
3634 else
3635 max_zplanes = 2;
3636 }
3637 }
3638
3639 return max_zplanes;
3640 }
3641
3642 static void
3643 radv_initialise_ds_surface(struct radv_device *device,
3644 struct radv_ds_buffer_info *ds,
3645 struct radv_image_view *iview)
3646 {
3647 unsigned level = iview->base_mip;
3648 unsigned format, stencil_format;
3649 uint64_t va, s_offs, z_offs;
3650 bool stencil_only = false;
3651 memset(ds, 0, sizeof(*ds));
3652 switch (iview->image->vk_format) {
3653 case VK_FORMAT_D24_UNORM_S8_UINT:
3654 case VK_FORMAT_X8_D24_UNORM_PACK32:
3655 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3656 ds->offset_scale = 2.0f;
3657 break;
3658 case VK_FORMAT_D16_UNORM:
3659 case VK_FORMAT_D16_UNORM_S8_UINT:
3660 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3661 ds->offset_scale = 4.0f;
3662 break;
3663 case VK_FORMAT_D32_SFLOAT:
3664 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3665 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3666 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3667 ds->offset_scale = 1.0f;
3668 break;
3669 case VK_FORMAT_S8_UINT:
3670 stencil_only = true;
3671 break;
3672 default:
3673 break;
3674 }
3675
3676 format = radv_translate_dbformat(iview->image->vk_format);
3677 stencil_format = iview->image->surface.has_stencil ?
3678 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3679
3680 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3681 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3682 S_028008_SLICE_MAX(max_slice);
3683
3684 ds->db_htile_data_base = 0;
3685 ds->db_htile_surface = 0;
3686
3687 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3688 s_offs = z_offs = va;
3689
3690 if (device->physical_device->rad_info.chip_class >= GFX9) {
3691 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3692 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3693
3694 ds->db_z_info = S_028038_FORMAT(format) |
3695 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3696 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3697 S_028038_MAXMIP(iview->image->info.levels - 1);
3698 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3699 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3700
3701 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3702 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3703 ds->db_depth_view |= S_028008_MIPID(level);
3704
3705 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3706 S_02801C_Y_MAX(iview->image->info.height - 1);
3707
3708 if (radv_htile_enabled(iview->image, level)) {
3709 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3710
3711 if (iview->image->tc_compatible_htile) {
3712 unsigned max_zplanes =
3713 radv_calc_decompress_on_z_planes(device, iview);
3714
3715 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
3716 S_028038_ITERATE_FLUSH(1);
3717 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3718 }
3719
3720 if (!iview->image->surface.has_stencil)
3721 /* Use all of the htile_buffer for depth if there's no stencil. */
3722 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3723 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3724 iview->image->htile_offset;
3725 ds->db_htile_data_base = va >> 8;
3726 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3727 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3728 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3729 }
3730 } else {
3731 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3732
3733 if (stencil_only)
3734 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3735
3736 z_offs += iview->image->surface.u.legacy.level[level].offset;
3737 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3738
3739 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3740 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3741 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3742
3743 if (iview->image->info.samples > 1)
3744 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3745
3746 if (device->physical_device->rad_info.chip_class >= CIK) {
3747 struct radeon_info *info = &device->physical_device->rad_info;
3748 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3749 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3750 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3751 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3752 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3753 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3754
3755 if (stencil_only)
3756 tile_mode = stencil_tile_mode;
3757
3758 ds->db_depth_info |=
3759 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3760 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3761 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3762 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3763 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3764 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3765 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3766 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3767 } else {
3768 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3769 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3770 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3771 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3772 if (stencil_only)
3773 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3774 }
3775
3776 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3777 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3778 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3779
3780 if (radv_htile_enabled(iview->image, level)) {
3781 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3782
3783 if (!iview->image->surface.has_stencil &&
3784 !iview->image->tc_compatible_htile)
3785 /* Use all of the htile_buffer for depth if there's no stencil. */
3786 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3787
3788 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3789 iview->image->htile_offset;
3790 ds->db_htile_data_base = va >> 8;
3791 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3792
3793 if (iview->image->tc_compatible_htile) {
3794 unsigned max_zplanes =
3795 radv_calc_decompress_on_z_planes(device, iview);
3796
3797 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3798 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
3799 }
3800 }
3801 }
3802
3803 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3804 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3805 }
3806
3807 VkResult radv_CreateFramebuffer(
3808 VkDevice _device,
3809 const VkFramebufferCreateInfo* pCreateInfo,
3810 const VkAllocationCallbacks* pAllocator,
3811 VkFramebuffer* pFramebuffer)
3812 {
3813 RADV_FROM_HANDLE(radv_device, device, _device);
3814 struct radv_framebuffer *framebuffer;
3815
3816 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3817
3818 size_t size = sizeof(*framebuffer) +
3819 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3820 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3821 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3822 if (framebuffer == NULL)
3823 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3824
3825 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3826 framebuffer->width = pCreateInfo->width;
3827 framebuffer->height = pCreateInfo->height;
3828 framebuffer->layers = pCreateInfo->layers;
3829 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3830 VkImageView _iview = pCreateInfo->pAttachments[i];
3831 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3832 framebuffer->attachments[i].attachment = iview;
3833 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3834 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3835 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3836 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3837 }
3838 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3839 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3840 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3841 }
3842
3843 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3844 return VK_SUCCESS;
3845 }
3846
3847 void radv_DestroyFramebuffer(
3848 VkDevice _device,
3849 VkFramebuffer _fb,
3850 const VkAllocationCallbacks* pAllocator)
3851 {
3852 RADV_FROM_HANDLE(radv_device, device, _device);
3853 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3854
3855 if (!fb)
3856 return;
3857 vk_free2(&device->alloc, pAllocator, fb);
3858 }
3859
3860 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3861 {
3862 switch (address_mode) {
3863 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3864 return V_008F30_SQ_TEX_WRAP;
3865 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3866 return V_008F30_SQ_TEX_MIRROR;
3867 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3868 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3869 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3870 return V_008F30_SQ_TEX_CLAMP_BORDER;
3871 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3872 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3873 default:
3874 unreachable("illegal tex wrap mode");
3875 break;
3876 }
3877 }
3878
3879 static unsigned
3880 radv_tex_compare(VkCompareOp op)
3881 {
3882 switch (op) {
3883 case VK_COMPARE_OP_NEVER:
3884 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3885 case VK_COMPARE_OP_LESS:
3886 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3887 case VK_COMPARE_OP_EQUAL:
3888 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3889 case VK_COMPARE_OP_LESS_OR_EQUAL:
3890 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3891 case VK_COMPARE_OP_GREATER:
3892 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3893 case VK_COMPARE_OP_NOT_EQUAL:
3894 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3895 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3896 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3897 case VK_COMPARE_OP_ALWAYS:
3898 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3899 default:
3900 unreachable("illegal compare mode");
3901 break;
3902 }
3903 }
3904
3905 static unsigned
3906 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3907 {
3908 switch (filter) {
3909 case VK_FILTER_NEAREST:
3910 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3911 V_008F38_SQ_TEX_XY_FILTER_POINT);
3912 case VK_FILTER_LINEAR:
3913 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3914 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3915 case VK_FILTER_CUBIC_IMG:
3916 default:
3917 fprintf(stderr, "illegal texture filter");
3918 return 0;
3919 }
3920 }
3921
3922 static unsigned
3923 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3924 {
3925 switch (mode) {
3926 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3927 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3928 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3929 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3930 default:
3931 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3932 }
3933 }
3934
3935 static unsigned
3936 radv_tex_bordercolor(VkBorderColor bcolor)
3937 {
3938 switch (bcolor) {
3939 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3940 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3941 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3942 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3943 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3944 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3945 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3946 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3947 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3948 default:
3949 break;
3950 }
3951 return 0;
3952 }
3953
3954 static unsigned
3955 radv_tex_aniso_filter(unsigned filter)
3956 {
3957 if (filter < 2)
3958 return 0;
3959 if (filter < 4)
3960 return 1;
3961 if (filter < 8)
3962 return 2;
3963 if (filter < 16)
3964 return 3;
3965 return 4;
3966 }
3967
3968 static void
3969 radv_init_sampler(struct radv_device *device,
3970 struct radv_sampler *sampler,
3971 const VkSamplerCreateInfo *pCreateInfo)
3972 {
3973 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3974 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3975 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3976 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3977
3978 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3979 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3980 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3981 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3982 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3983 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3984 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3985 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3986 S_008F30_DISABLE_CUBE_WRAP(0) |
3987 S_008F30_COMPAT_MODE(is_vi));
3988 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3989 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3990 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3991 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3992 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3993 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3994 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3995 S_008F38_MIP_POINT_PRECLAMP(0) |
3996 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3997 S_008F38_FILTER_PREC_FIX(1) |
3998 S_008F38_ANISO_OVERRIDE(is_vi));
3999 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4000 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4001 }
4002
4003 VkResult radv_CreateSampler(
4004 VkDevice _device,
4005 const VkSamplerCreateInfo* pCreateInfo,
4006 const VkAllocationCallbacks* pAllocator,
4007 VkSampler* pSampler)
4008 {
4009 RADV_FROM_HANDLE(radv_device, device, _device);
4010 struct radv_sampler *sampler;
4011
4012 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4013
4014 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4015 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4016 if (!sampler)
4017 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4018
4019 radv_init_sampler(device, sampler, pCreateInfo);
4020 *pSampler = radv_sampler_to_handle(sampler);
4021
4022 return VK_SUCCESS;
4023 }
4024
4025 void radv_DestroySampler(
4026 VkDevice _device,
4027 VkSampler _sampler,
4028 const VkAllocationCallbacks* pAllocator)
4029 {
4030 RADV_FROM_HANDLE(radv_device, device, _device);
4031 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4032
4033 if (!sampler)
4034 return;
4035 vk_free2(&device->alloc, pAllocator, sampler);
4036 }
4037
4038 /* vk_icd.h does not declare this function, so we declare it here to
4039 * suppress Wmissing-prototypes.
4040 */
4041 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4042 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4043
4044 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4045 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4046 {
4047 /* For the full details on loader interface versioning, see
4048 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4049 * What follows is a condensed summary, to help you navigate the large and
4050 * confusing official doc.
4051 *
4052 * - Loader interface v0 is incompatible with later versions. We don't
4053 * support it.
4054 *
4055 * - In loader interface v1:
4056 * - The first ICD entrypoint called by the loader is
4057 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4058 * entrypoint.
4059 * - The ICD must statically expose no other Vulkan symbol unless it is
4060 * linked with -Bsymbolic.
4061 * - Each dispatchable Vulkan handle created by the ICD must be
4062 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4063 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4064 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4065 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4066 * such loader-managed surfaces.
4067 *
4068 * - Loader interface v2 differs from v1 in:
4069 * - The first ICD entrypoint called by the loader is
4070 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4071 * statically expose this entrypoint.
4072 *
4073 * - Loader interface v3 differs from v2 in:
4074 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4075 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4076 * because the loader no longer does so.
4077 */
4078 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4079 return VK_SUCCESS;
4080 }
4081
4082 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4083 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4084 int *pFD)
4085 {
4086 RADV_FROM_HANDLE(radv_device, device, _device);
4087 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4088
4089 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4090
4091 /* At the moment, we support only the below handle types. */
4092 assert(pGetFdInfo->handleType ==
4093 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4094 pGetFdInfo->handleType ==
4095 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4096
4097 bool ret = radv_get_memory_fd(device, memory, pFD);
4098 if (ret == false)
4099 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4100 return VK_SUCCESS;
4101 }
4102
4103 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4104 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4105 int fd,
4106 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4107 {
4108 switch (handleType) {
4109 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4110 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4111 return VK_SUCCESS;
4112
4113 default:
4114 /* The valid usage section for this function says:
4115 *
4116 * "handleType must not be one of the handle types defined as
4117 * opaque."
4118 *
4119 * So opaque handle types fall into the default "unsupported" case.
4120 */
4121 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4122 }
4123 }
4124
4125 static VkResult radv_import_opaque_fd(struct radv_device *device,
4126 int fd,
4127 uint32_t *syncobj)
4128 {
4129 uint32_t syncobj_handle = 0;
4130 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4131 if (ret != 0)
4132 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4133
4134 if (*syncobj)
4135 device->ws->destroy_syncobj(device->ws, *syncobj);
4136
4137 *syncobj = syncobj_handle;
4138 close(fd);
4139
4140 return VK_SUCCESS;
4141 }
4142
4143 static VkResult radv_import_sync_fd(struct radv_device *device,
4144 int fd,
4145 uint32_t *syncobj)
4146 {
4147 /* If we create a syncobj we do it locally so that if we have an error, we don't
4148 * leave a syncobj in an undetermined state in the fence. */
4149 uint32_t syncobj_handle = *syncobj;
4150 if (!syncobj_handle) {
4151 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4152 if (ret) {
4153 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4154 }
4155 }
4156
4157 if (fd == -1) {
4158 device->ws->signal_syncobj(device->ws, syncobj_handle);
4159 } else {
4160 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4161 if (ret != 0)
4162 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4163 }
4164
4165 *syncobj = syncobj_handle;
4166 if (fd != -1)
4167 close(fd);
4168
4169 return VK_SUCCESS;
4170 }
4171
4172 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4173 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4174 {
4175 RADV_FROM_HANDLE(radv_device, device, _device);
4176 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4177 uint32_t *syncobj_dst = NULL;
4178
4179 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4180 syncobj_dst = &sem->temp_syncobj;
4181 } else {
4182 syncobj_dst = &sem->syncobj;
4183 }
4184
4185 switch(pImportSemaphoreFdInfo->handleType) {
4186 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4187 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4188 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4189 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4190 default:
4191 unreachable("Unhandled semaphore handle type");
4192 }
4193 }
4194
4195 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4196 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4197 int *pFd)
4198 {
4199 RADV_FROM_HANDLE(radv_device, device, _device);
4200 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4201 int ret;
4202 uint32_t syncobj_handle;
4203
4204 if (sem->temp_syncobj)
4205 syncobj_handle = sem->temp_syncobj;
4206 else
4207 syncobj_handle = sem->syncobj;
4208
4209 switch(pGetFdInfo->handleType) {
4210 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4211 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4212 break;
4213 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4214 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4215 if (!ret) {
4216 if (sem->temp_syncobj) {
4217 close (sem->temp_syncobj);
4218 sem->temp_syncobj = 0;
4219 } else {
4220 device->ws->reset_syncobj(device->ws, syncobj_handle);
4221 }
4222 }
4223 break;
4224 default:
4225 unreachable("Unhandled semaphore handle type");
4226 }
4227
4228 if (ret)
4229 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4230 return VK_SUCCESS;
4231 }
4232
4233 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4234 VkPhysicalDevice physicalDevice,
4235 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4236 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
4237 {
4238 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4239
4240 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4241 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4242 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4243 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4244 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4245 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4246 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4247 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4248 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4249 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4250 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4251 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4252 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4253 } else {
4254 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4255 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4256 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4257 }
4258 }
4259
4260 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4261 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4262 {
4263 RADV_FROM_HANDLE(radv_device, device, _device);
4264 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4265 uint32_t *syncobj_dst = NULL;
4266
4267
4268 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4269 syncobj_dst = &fence->temp_syncobj;
4270 } else {
4271 syncobj_dst = &fence->syncobj;
4272 }
4273
4274 switch(pImportFenceFdInfo->handleType) {
4275 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4276 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4277 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4278 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4279 default:
4280 unreachable("Unhandled fence handle type");
4281 }
4282 }
4283
4284 VkResult radv_GetFenceFdKHR(VkDevice _device,
4285 const VkFenceGetFdInfoKHR *pGetFdInfo,
4286 int *pFd)
4287 {
4288 RADV_FROM_HANDLE(radv_device, device, _device);
4289 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4290 int ret;
4291 uint32_t syncobj_handle;
4292
4293 if (fence->temp_syncobj)
4294 syncobj_handle = fence->temp_syncobj;
4295 else
4296 syncobj_handle = fence->syncobj;
4297
4298 switch(pGetFdInfo->handleType) {
4299 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4300 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4301 break;
4302 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4303 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4304 if (!ret) {
4305 if (fence->temp_syncobj) {
4306 close (fence->temp_syncobj);
4307 fence->temp_syncobj = 0;
4308 } else {
4309 device->ws->reset_syncobj(device->ws, syncobj_handle);
4310 }
4311 }
4312 break;
4313 default:
4314 unreachable("Unhandled fence handle type");
4315 }
4316
4317 if (ret)
4318 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4319 return VK_SUCCESS;
4320 }
4321
4322 void radv_GetPhysicalDeviceExternalFenceProperties(
4323 VkPhysicalDevice physicalDevice,
4324 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4325 VkExternalFencePropertiesKHR* pExternalFenceProperties)
4326 {
4327 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4328
4329 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4330 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4331 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4332 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4333 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4334 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4335 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4336 } else {
4337 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4338 pExternalFenceProperties->compatibleHandleTypes = 0;
4339 pExternalFenceProperties->externalFenceFeatures = 0;
4340 }
4341 }
4342
4343 VkResult
4344 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4345 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4346 const VkAllocationCallbacks* pAllocator,
4347 VkDebugReportCallbackEXT* pCallback)
4348 {
4349 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4350 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4351 pCreateInfo, pAllocator, &instance->alloc,
4352 pCallback);
4353 }
4354
4355 void
4356 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4357 VkDebugReportCallbackEXT _callback,
4358 const VkAllocationCallbacks* pAllocator)
4359 {
4360 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4361 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4362 _callback, pAllocator, &instance->alloc);
4363 }
4364
4365 void
4366 radv_DebugReportMessageEXT(VkInstance _instance,
4367 VkDebugReportFlagsEXT flags,
4368 VkDebugReportObjectTypeEXT objectType,
4369 uint64_t object,
4370 size_t location,
4371 int32_t messageCode,
4372 const char* pLayerPrefix,
4373 const char* pMessage)
4374 {
4375 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4376 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4377 object, location, messageCode, pLayerPrefix, pMessage);
4378 }
4379
4380 void
4381 radv_GetDeviceGroupPeerMemoryFeatures(
4382 VkDevice device,
4383 uint32_t heapIndex,
4384 uint32_t localDeviceIndex,
4385 uint32_t remoteDeviceIndex,
4386 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
4387 {
4388 assert(localDeviceIndex == remoteDeviceIndex);
4389
4390 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4391 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4392 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4393 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4394 }