radv: Implement VK_EXT_debug_report.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static void
79 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80 {
81 const char *chip_string;
82 char llvm_string[32] = {};
83
84 switch (family) {
85 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
86 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
87 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
88 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
89 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
90 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
91 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
92 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
93 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
94 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
95 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
96 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
97 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
98 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
99 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
100 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
101 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
102 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
103 case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
104 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
105 default: chip_string = "AMD RADV unknown"; break;
106 }
107
108 if (HAVE_LLVM > 0) {
109 snprintf(llvm_string, sizeof(llvm_string),
110 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
111 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
112 }
113
114 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115 }
116
117 static void
118 radv_physical_device_init_mem_types(struct radv_physical_device *device)
119 {
120 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
121 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
122 device->rad_info.vram_vis_size);
123
124 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
125 device->memory_properties.memoryHeapCount = 0;
126 if (device->rad_info.vram_size - visible_vram_size > 0) {
127 vram_index = device->memory_properties.memoryHeapCount++;
128 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
129 .size = device->rad_info.vram_size - visible_vram_size,
130 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
131 };
132 }
133 if (visible_vram_size) {
134 visible_vram_index = device->memory_properties.memoryHeapCount++;
135 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
136 .size = visible_vram_size,
137 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
138 };
139 }
140 if (device->rad_info.gart_size > 0) {
141 gart_index = device->memory_properties.memoryHeapCount++;
142 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
143 .size = device->rad_info.gart_size,
144 .flags = 0,
145 };
146 }
147
148 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
149 unsigned type_count = 0;
150 if (vram_index >= 0) {
151 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
152 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
153 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
154 .heapIndex = vram_index,
155 };
156 }
157 if (gart_index >= 0) {
158 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
159 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
160 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
161 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
162 .heapIndex = gart_index,
163 };
164 }
165 if (visible_vram_index >= 0) {
166 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
167 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
168 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
169 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
170 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
171 .heapIndex = visible_vram_index,
172 };
173 }
174 if (gart_index >= 0) {
175 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
176 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
177 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
178 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
179 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
180 .heapIndex = gart_index,
181 };
182 }
183 device->memory_properties.memoryTypeCount = type_count;
184 }
185
186 static VkResult
187 radv_physical_device_init(struct radv_physical_device *device,
188 struct radv_instance *instance,
189 drmDevicePtr drm_device)
190 {
191 const char *path = drm_device->nodes[DRM_NODE_RENDER];
192 VkResult result;
193 drmVersionPtr version;
194 int fd;
195
196 fd = open(path, O_RDWR | O_CLOEXEC);
197 if (fd < 0)
198 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
199
200 version = drmGetVersion(fd);
201 if (!version) {
202 close(fd);
203 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
204 "failed to get version %s: %m", path);
205 }
206
207 if (strcmp(version->name, "amdgpu")) {
208 drmFreeVersion(version);
209 close(fd);
210 return VK_ERROR_INCOMPATIBLE_DRIVER;
211 }
212 drmFreeVersion(version);
213
214 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
215 device->instance = instance;
216 assert(strlen(path) < ARRAY_SIZE(device->path));
217 strncpy(device->path, path, ARRAY_SIZE(device->path));
218
219 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
220 instance->perftest_flags);
221 if (!device->ws) {
222 result = VK_ERROR_INCOMPATIBLE_DRIVER;
223 goto fail;
224 }
225
226 device->local_fd = fd;
227 device->ws->query_info(device->ws, &device->rad_info);
228
229 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
230
231 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
232 device->ws->destroy(device->ws);
233 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
234 "cannot generate UUID");
235 goto fail;
236 }
237
238 /* These flags affect shader compilation. */
239 uint64_t shader_env_flags =
240 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
241 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
242
243 /* The gpu id is already embeded in the uuid so we just pass "radv"
244 * when creating the cache.
245 */
246 char buf[VK_UUID_SIZE * 2 + 1];
247 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
248 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
249
250 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
251
252 radv_get_driver_uuid(&device->device_uuid);
253 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
254
255 if (device->rad_info.family == CHIP_STONEY ||
256 device->rad_info.chip_class >= GFX9) {
257 device->has_rbplus = true;
258 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
259 }
260
261 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
262 * on SI.
263 */
264 device->has_clear_state = device->rad_info.chip_class >= CIK;
265
266 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
267
268 /* Vega10/Raven need a special workaround for a hardware bug. */
269 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
270 device->rad_info.family == CHIP_RAVEN;
271
272 radv_physical_device_init_mem_types(device);
273
274 result = radv_init_wsi(device);
275 if (result != VK_SUCCESS) {
276 device->ws->destroy(device->ws);
277 goto fail;
278 }
279
280 return VK_SUCCESS;
281
282 fail:
283 close(fd);
284 return result;
285 }
286
287 static void
288 radv_physical_device_finish(struct radv_physical_device *device)
289 {
290 radv_finish_wsi(device);
291 device->ws->destroy(device->ws);
292 disk_cache_destroy(device->disk_cache);
293 close(device->local_fd);
294 }
295
296 static void *
297 default_alloc_func(void *pUserData, size_t size, size_t align,
298 VkSystemAllocationScope allocationScope)
299 {
300 return malloc(size);
301 }
302
303 static void *
304 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
305 size_t align, VkSystemAllocationScope allocationScope)
306 {
307 return realloc(pOriginal, size);
308 }
309
310 static void
311 default_free_func(void *pUserData, void *pMemory)
312 {
313 free(pMemory);
314 }
315
316 static const VkAllocationCallbacks default_alloc = {
317 .pUserData = NULL,
318 .pfnAllocation = default_alloc_func,
319 .pfnReallocation = default_realloc_func,
320 .pfnFree = default_free_func,
321 };
322
323 static const struct debug_control radv_debug_options[] = {
324 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
325 {"nodcc", RADV_DEBUG_NO_DCC},
326 {"shaders", RADV_DEBUG_DUMP_SHADERS},
327 {"nocache", RADV_DEBUG_NO_CACHE},
328 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
329 {"nohiz", RADV_DEBUG_NO_HIZ},
330 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
331 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
332 {"allbos", RADV_DEBUG_ALL_BOS},
333 {"noibs", RADV_DEBUG_NO_IBS},
334 {"spirv", RADV_DEBUG_DUMP_SPIRV},
335 {"vmfaults", RADV_DEBUG_VM_FAULTS},
336 {"zerovram", RADV_DEBUG_ZERO_VRAM},
337 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
338 {"nosisched", RADV_DEBUG_NO_SISCHED},
339 {NULL, 0}
340 };
341
342 const char *
343 radv_get_debug_option_name(int id)
344 {
345 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
346 return radv_debug_options[id].string;
347 }
348
349 static const struct debug_control radv_perftest_options[] = {
350 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
351 {"sisched", RADV_PERFTEST_SISCHED},
352 {"localbos", RADV_PERFTEST_LOCAL_BOS},
353 {"binning", RADV_PERFTEST_BINNING},
354 {NULL, 0}
355 };
356
357 const char *
358 radv_get_perftest_option_name(int id)
359 {
360 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
361 return radv_perftest_options[id].string;
362 }
363
364 static void
365 radv_handle_per_app_options(struct radv_instance *instance,
366 const VkApplicationInfo *info)
367 {
368 const char *name = info ? info->pApplicationName : NULL;
369
370 if (!name)
371 return;
372
373 if (!strcmp(name, "Talos - Linux - 32bit") ||
374 !strcmp(name, "Talos - Linux - 64bit")) {
375 /* Force enable LLVM sisched for Talos because it looks safe
376 * and it gives few more FPS.
377 */
378 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
379 }
380 }
381
382 VkResult radv_CreateInstance(
383 const VkInstanceCreateInfo* pCreateInfo,
384 const VkAllocationCallbacks* pAllocator,
385 VkInstance* pInstance)
386 {
387 struct radv_instance *instance;
388 VkResult result;
389
390 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
391
392 uint32_t client_version;
393 if (pCreateInfo->pApplicationInfo &&
394 pCreateInfo->pApplicationInfo->apiVersion != 0) {
395 client_version = pCreateInfo->pApplicationInfo->apiVersion;
396 } else {
397 client_version = VK_MAKE_VERSION(1, 0, 0);
398 }
399
400 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
401 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
402 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
403 "Client requested version %d.%d.%d",
404 VK_VERSION_MAJOR(client_version),
405 VK_VERSION_MINOR(client_version),
406 VK_VERSION_PATCH(client_version));
407 }
408
409 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
410 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
411 if (!radv_instance_extension_supported(ext_name))
412 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
413 }
414
415 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
416 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
417 if (!instance)
418 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
419
420 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
421
422 if (pAllocator)
423 instance->alloc = *pAllocator;
424 else
425 instance->alloc = default_alloc;
426
427 instance->apiVersion = client_version;
428 instance->physicalDeviceCount = -1;
429
430 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
431 if (result != VK_SUCCESS) {
432 vk_free2(&default_alloc, pAllocator, instance);
433 return vk_error(result);
434 }
435
436 _mesa_locale_init();
437
438 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
439
440 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
441 radv_debug_options);
442
443 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
444 radv_perftest_options);
445
446 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
447
448 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
449 /* Disable sisched when the user requests it, this is mostly
450 * useful when the driver force-enable sisched for the given
451 * application.
452 */
453 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
454 }
455
456 *pInstance = radv_instance_to_handle(instance);
457
458 return VK_SUCCESS;
459 }
460
461 void radv_DestroyInstance(
462 VkInstance _instance,
463 const VkAllocationCallbacks* pAllocator)
464 {
465 RADV_FROM_HANDLE(radv_instance, instance, _instance);
466
467 if (!instance)
468 return;
469
470 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
471 radv_physical_device_finish(instance->physicalDevices + i);
472 }
473
474 VG(VALGRIND_DESTROY_MEMPOOL(instance));
475
476 _mesa_locale_fini();
477
478 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
479
480 vk_free(&instance->alloc, instance);
481 }
482
483 static VkResult
484 radv_enumerate_devices(struct radv_instance *instance)
485 {
486 /* TODO: Check for more devices ? */
487 drmDevicePtr devices[8];
488 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
489 int max_devices;
490
491 instance->physicalDeviceCount = 0;
492
493 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
494 if (max_devices < 1)
495 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
496
497 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
498 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
499 devices[i]->bustype == DRM_BUS_PCI &&
500 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
501
502 result = radv_physical_device_init(instance->physicalDevices +
503 instance->physicalDeviceCount,
504 instance,
505 devices[i]);
506 if (result == VK_SUCCESS)
507 ++instance->physicalDeviceCount;
508 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
509 break;
510 }
511 }
512 drmFreeDevices(devices, max_devices);
513
514 return result;
515 }
516
517 VkResult radv_EnumeratePhysicalDevices(
518 VkInstance _instance,
519 uint32_t* pPhysicalDeviceCount,
520 VkPhysicalDevice* pPhysicalDevices)
521 {
522 RADV_FROM_HANDLE(radv_instance, instance, _instance);
523 VkResult result;
524
525 if (instance->physicalDeviceCount < 0) {
526 result = radv_enumerate_devices(instance);
527 if (result != VK_SUCCESS &&
528 result != VK_ERROR_INCOMPATIBLE_DRIVER)
529 return result;
530 }
531
532 if (!pPhysicalDevices) {
533 *pPhysicalDeviceCount = instance->physicalDeviceCount;
534 } else {
535 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
536 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
537 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
538 }
539
540 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
541 : VK_SUCCESS;
542 }
543
544 void radv_GetPhysicalDeviceFeatures(
545 VkPhysicalDevice physicalDevice,
546 VkPhysicalDeviceFeatures* pFeatures)
547 {
548 memset(pFeatures, 0, sizeof(*pFeatures));
549
550 *pFeatures = (VkPhysicalDeviceFeatures) {
551 .robustBufferAccess = true,
552 .fullDrawIndexUint32 = true,
553 .imageCubeArray = true,
554 .independentBlend = true,
555 .geometryShader = true,
556 .tessellationShader = true,
557 .sampleRateShading = true,
558 .dualSrcBlend = true,
559 .logicOp = true,
560 .multiDrawIndirect = true,
561 .drawIndirectFirstInstance = true,
562 .depthClamp = true,
563 .depthBiasClamp = true,
564 .fillModeNonSolid = true,
565 .depthBounds = true,
566 .wideLines = true,
567 .largePoints = true,
568 .alphaToOne = true,
569 .multiViewport = true,
570 .samplerAnisotropy = true,
571 .textureCompressionETC2 = false,
572 .textureCompressionASTC_LDR = false,
573 .textureCompressionBC = true,
574 .occlusionQueryPrecise = true,
575 .pipelineStatisticsQuery = true,
576 .vertexPipelineStoresAndAtomics = true,
577 .fragmentStoresAndAtomics = true,
578 .shaderTessellationAndGeometryPointSize = true,
579 .shaderImageGatherExtended = true,
580 .shaderStorageImageExtendedFormats = true,
581 .shaderStorageImageMultisample = false,
582 .shaderUniformBufferArrayDynamicIndexing = true,
583 .shaderSampledImageArrayDynamicIndexing = true,
584 .shaderStorageBufferArrayDynamicIndexing = true,
585 .shaderStorageImageArrayDynamicIndexing = true,
586 .shaderStorageImageReadWithoutFormat = true,
587 .shaderStorageImageWriteWithoutFormat = true,
588 .shaderClipDistance = true,
589 .shaderCullDistance = true,
590 .shaderFloat64 = true,
591 .shaderInt64 = true,
592 .shaderInt16 = false,
593 .sparseBinding = true,
594 .variableMultisampleRate = true,
595 .inheritedQueries = true,
596 };
597 }
598
599 void radv_GetPhysicalDeviceFeatures2KHR(
600 VkPhysicalDevice physicalDevice,
601 VkPhysicalDeviceFeatures2KHR *pFeatures)
602 {
603 vk_foreach_struct(ext, pFeatures->pNext) {
604 switch (ext->sType) {
605 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
606 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
607 features->variablePointersStorageBuffer = true;
608 features->variablePointers = false;
609 break;
610 }
611 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
612 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
613 features->multiview = true;
614 features->multiviewGeometryShader = true;
615 features->multiviewTessellationShader = true;
616 break;
617 }
618 default:
619 break;
620 }
621 }
622 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
623 }
624
625 void radv_GetPhysicalDeviceProperties(
626 VkPhysicalDevice physicalDevice,
627 VkPhysicalDeviceProperties* pProperties)
628 {
629 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
630 VkSampleCountFlags sample_counts = 0xf;
631
632 /* make sure that the entire descriptor set is addressable with a signed
633 * 32-bit int. So the sum of all limits scaled by descriptor size has to
634 * be at most 2 GiB. the combined image & samples object count as one of
635 * both. This limit is for the pipeline layout, not for the set layout, but
636 * there is no set limit, so we just set a pipeline limit. I don't think
637 * any app is going to hit this soon. */
638 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
639 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
640 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
641 32 /* sampler, largest when combined with image */ +
642 64 /* sampled image */ +
643 64 /* storage image */);
644
645 VkPhysicalDeviceLimits limits = {
646 .maxImageDimension1D = (1 << 14),
647 .maxImageDimension2D = (1 << 14),
648 .maxImageDimension3D = (1 << 11),
649 .maxImageDimensionCube = (1 << 14),
650 .maxImageArrayLayers = (1 << 11),
651 .maxTexelBufferElements = 128 * 1024 * 1024,
652 .maxUniformBufferRange = UINT32_MAX,
653 .maxStorageBufferRange = UINT32_MAX,
654 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
655 .maxMemoryAllocationCount = UINT32_MAX,
656 .maxSamplerAllocationCount = 64 * 1024,
657 .bufferImageGranularity = 64, /* A cache line */
658 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
659 .maxBoundDescriptorSets = MAX_SETS,
660 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
661 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
662 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
663 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
664 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
665 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
666 .maxPerStageResources = max_descriptor_set_size,
667 .maxDescriptorSetSamplers = max_descriptor_set_size,
668 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
669 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
670 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
671 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
672 .maxDescriptorSetSampledImages = max_descriptor_set_size,
673 .maxDescriptorSetStorageImages = max_descriptor_set_size,
674 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
675 .maxVertexInputAttributes = 32,
676 .maxVertexInputBindings = 32,
677 .maxVertexInputAttributeOffset = 2047,
678 .maxVertexInputBindingStride = 2048,
679 .maxVertexOutputComponents = 128,
680 .maxTessellationGenerationLevel = 64,
681 .maxTessellationPatchSize = 32,
682 .maxTessellationControlPerVertexInputComponents = 128,
683 .maxTessellationControlPerVertexOutputComponents = 128,
684 .maxTessellationControlPerPatchOutputComponents = 120,
685 .maxTessellationControlTotalOutputComponents = 4096,
686 .maxTessellationEvaluationInputComponents = 128,
687 .maxTessellationEvaluationOutputComponents = 128,
688 .maxGeometryShaderInvocations = 127,
689 .maxGeometryInputComponents = 64,
690 .maxGeometryOutputComponents = 128,
691 .maxGeometryOutputVertices = 256,
692 .maxGeometryTotalOutputComponents = 1024,
693 .maxFragmentInputComponents = 128,
694 .maxFragmentOutputAttachments = 8,
695 .maxFragmentDualSrcAttachments = 1,
696 .maxFragmentCombinedOutputResources = 8,
697 .maxComputeSharedMemorySize = 32768,
698 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
699 .maxComputeWorkGroupInvocations = 2048,
700 .maxComputeWorkGroupSize = {
701 2048,
702 2048,
703 2048
704 },
705 .subPixelPrecisionBits = 4 /* FIXME */,
706 .subTexelPrecisionBits = 4 /* FIXME */,
707 .mipmapPrecisionBits = 4 /* FIXME */,
708 .maxDrawIndexedIndexValue = UINT32_MAX,
709 .maxDrawIndirectCount = UINT32_MAX,
710 .maxSamplerLodBias = 16,
711 .maxSamplerAnisotropy = 16,
712 .maxViewports = MAX_VIEWPORTS,
713 .maxViewportDimensions = { (1 << 14), (1 << 14) },
714 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
715 .viewportSubPixelBits = 13, /* We take a float? */
716 .minMemoryMapAlignment = 4096, /* A page */
717 .minTexelBufferOffsetAlignment = 1,
718 .minUniformBufferOffsetAlignment = 4,
719 .minStorageBufferOffsetAlignment = 4,
720 .minTexelOffset = -32,
721 .maxTexelOffset = 31,
722 .minTexelGatherOffset = -32,
723 .maxTexelGatherOffset = 31,
724 .minInterpolationOffset = -2,
725 .maxInterpolationOffset = 2,
726 .subPixelInterpolationOffsetBits = 8,
727 .maxFramebufferWidth = (1 << 14),
728 .maxFramebufferHeight = (1 << 14),
729 .maxFramebufferLayers = (1 << 10),
730 .framebufferColorSampleCounts = sample_counts,
731 .framebufferDepthSampleCounts = sample_counts,
732 .framebufferStencilSampleCounts = sample_counts,
733 .framebufferNoAttachmentsSampleCounts = sample_counts,
734 .maxColorAttachments = MAX_RTS,
735 .sampledImageColorSampleCounts = sample_counts,
736 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
737 .sampledImageDepthSampleCounts = sample_counts,
738 .sampledImageStencilSampleCounts = sample_counts,
739 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
740 .maxSampleMaskWords = 1,
741 .timestampComputeAndGraphics = true,
742 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
743 .maxClipDistances = 8,
744 .maxCullDistances = 8,
745 .maxCombinedClipAndCullDistances = 8,
746 .discreteQueuePriorities = 1,
747 .pointSizeRange = { 0.125, 255.875 },
748 .lineWidthRange = { 0.0, 7.9921875 },
749 .pointSizeGranularity = (1.0 / 8.0),
750 .lineWidthGranularity = (1.0 / 128.0),
751 .strictLines = false, /* FINISHME */
752 .standardSampleLocations = true,
753 .optimalBufferCopyOffsetAlignment = 128,
754 .optimalBufferCopyRowPitchAlignment = 128,
755 .nonCoherentAtomSize = 64,
756 };
757
758 *pProperties = (VkPhysicalDeviceProperties) {
759 .apiVersion = radv_physical_device_api_version(pdevice),
760 .driverVersion = vk_get_driver_version(),
761 .vendorID = ATI_VENDOR_ID,
762 .deviceID = pdevice->rad_info.pci_id,
763 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
764 .limits = limits,
765 .sparseProperties = {0},
766 };
767
768 strcpy(pProperties->deviceName, pdevice->name);
769 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
770 }
771
772 void radv_GetPhysicalDeviceProperties2KHR(
773 VkPhysicalDevice physicalDevice,
774 VkPhysicalDeviceProperties2KHR *pProperties)
775 {
776 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
777 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
778
779 vk_foreach_struct(ext, pProperties->pNext) {
780 switch (ext->sType) {
781 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
782 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
783 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
784 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
785 break;
786 }
787 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
788 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
789 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
790 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
791 properties->deviceLUIDValid = false;
792 break;
793 }
794 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
795 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
796 properties->maxMultiviewViewCount = MAX_VIEWS;
797 properties->maxMultiviewInstanceIndex = INT_MAX;
798 break;
799 }
800 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
801 VkPhysicalDevicePointClippingPropertiesKHR *properties =
802 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
803 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
804 break;
805 }
806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
807 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
808 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
809 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
810 break;
811 }
812 default:
813 break;
814 }
815 }
816 }
817
818 static void radv_get_physical_device_queue_family_properties(
819 struct radv_physical_device* pdevice,
820 uint32_t* pCount,
821 VkQueueFamilyProperties** pQueueFamilyProperties)
822 {
823 int num_queue_families = 1;
824 int idx;
825 if (pdevice->rad_info.num_compute_rings > 0 &&
826 pdevice->rad_info.chip_class >= CIK &&
827 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
828 num_queue_families++;
829
830 if (pQueueFamilyProperties == NULL) {
831 *pCount = num_queue_families;
832 return;
833 }
834
835 if (!*pCount)
836 return;
837
838 idx = 0;
839 if (*pCount >= 1) {
840 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
841 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
842 VK_QUEUE_COMPUTE_BIT |
843 VK_QUEUE_TRANSFER_BIT |
844 VK_QUEUE_SPARSE_BINDING_BIT,
845 .queueCount = 1,
846 .timestampValidBits = 64,
847 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
848 };
849 idx++;
850 }
851
852 if (pdevice->rad_info.num_compute_rings > 0 &&
853 pdevice->rad_info.chip_class >= CIK &&
854 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
855 if (*pCount > idx) {
856 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
857 .queueFlags = VK_QUEUE_COMPUTE_BIT |
858 VK_QUEUE_TRANSFER_BIT |
859 VK_QUEUE_SPARSE_BINDING_BIT,
860 .queueCount = pdevice->rad_info.num_compute_rings,
861 .timestampValidBits = 64,
862 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
863 };
864 idx++;
865 }
866 }
867 *pCount = idx;
868 }
869
870 void radv_GetPhysicalDeviceQueueFamilyProperties(
871 VkPhysicalDevice physicalDevice,
872 uint32_t* pCount,
873 VkQueueFamilyProperties* pQueueFamilyProperties)
874 {
875 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
876 if (!pQueueFamilyProperties) {
877 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
878 return;
879 }
880 VkQueueFamilyProperties *properties[] = {
881 pQueueFamilyProperties + 0,
882 pQueueFamilyProperties + 1,
883 pQueueFamilyProperties + 2,
884 };
885 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
886 assert(*pCount <= 3);
887 }
888
889 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
890 VkPhysicalDevice physicalDevice,
891 uint32_t* pCount,
892 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
893 {
894 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
895 if (!pQueueFamilyProperties) {
896 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
897 return;
898 }
899 VkQueueFamilyProperties *properties[] = {
900 &pQueueFamilyProperties[0].queueFamilyProperties,
901 &pQueueFamilyProperties[1].queueFamilyProperties,
902 &pQueueFamilyProperties[2].queueFamilyProperties,
903 };
904 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
905 assert(*pCount <= 3);
906 }
907
908 void radv_GetPhysicalDeviceMemoryProperties(
909 VkPhysicalDevice physicalDevice,
910 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
911 {
912 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
913
914 *pMemoryProperties = physical_device->memory_properties;
915 }
916
917 void radv_GetPhysicalDeviceMemoryProperties2KHR(
918 VkPhysicalDevice physicalDevice,
919 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
920 {
921 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
922 &pMemoryProperties->memoryProperties);
923 }
924
925 static enum radeon_ctx_priority
926 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
927 {
928 /* Default to MEDIUM when a specific global priority isn't requested */
929 if (!pObj)
930 return RADEON_CTX_PRIORITY_MEDIUM;
931
932 switch(pObj->globalPriority) {
933 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
934 return RADEON_CTX_PRIORITY_REALTIME;
935 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
936 return RADEON_CTX_PRIORITY_HIGH;
937 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
938 return RADEON_CTX_PRIORITY_MEDIUM;
939 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
940 return RADEON_CTX_PRIORITY_LOW;
941 default:
942 unreachable("Illegal global priority value");
943 return RADEON_CTX_PRIORITY_INVALID;
944 }
945 }
946
947 static int
948 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
949 uint32_t queue_family_index, int idx,
950 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
951 {
952 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
953 queue->device = device;
954 queue->queue_family_index = queue_family_index;
955 queue->queue_idx = idx;
956 queue->priority = radv_get_queue_global_priority(global_priority);
957
958 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
959 if (!queue->hw_ctx)
960 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
961
962 return VK_SUCCESS;
963 }
964
965 static void
966 radv_queue_finish(struct radv_queue *queue)
967 {
968 if (queue->hw_ctx)
969 queue->device->ws->ctx_destroy(queue->hw_ctx);
970
971 if (queue->initial_full_flush_preamble_cs)
972 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
973 if (queue->initial_preamble_cs)
974 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
975 if (queue->continue_preamble_cs)
976 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
977 if (queue->descriptor_bo)
978 queue->device->ws->buffer_destroy(queue->descriptor_bo);
979 if (queue->scratch_bo)
980 queue->device->ws->buffer_destroy(queue->scratch_bo);
981 if (queue->esgs_ring_bo)
982 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
983 if (queue->gsvs_ring_bo)
984 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
985 if (queue->tess_factor_ring_bo)
986 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
987 if (queue->tess_offchip_ring_bo)
988 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
989 if (queue->compute_scratch_bo)
990 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
991 }
992
993 static void
994 radv_device_init_gs_info(struct radv_device *device)
995 {
996 switch (device->physical_device->rad_info.family) {
997 case CHIP_OLAND:
998 case CHIP_HAINAN:
999 case CHIP_KAVERI:
1000 case CHIP_KABINI:
1001 case CHIP_MULLINS:
1002 case CHIP_ICELAND:
1003 case CHIP_CARRIZO:
1004 case CHIP_STONEY:
1005 device->gs_table_depth = 16;
1006 return;
1007 case CHIP_TAHITI:
1008 case CHIP_PITCAIRN:
1009 case CHIP_VERDE:
1010 case CHIP_BONAIRE:
1011 case CHIP_HAWAII:
1012 case CHIP_TONGA:
1013 case CHIP_FIJI:
1014 case CHIP_POLARIS10:
1015 case CHIP_POLARIS11:
1016 case CHIP_POLARIS12:
1017 case CHIP_VEGA10:
1018 case CHIP_RAVEN:
1019 device->gs_table_depth = 32;
1020 return;
1021 default:
1022 unreachable("unknown GPU");
1023 }
1024 }
1025
1026 VkResult radv_CreateDevice(
1027 VkPhysicalDevice physicalDevice,
1028 const VkDeviceCreateInfo* pCreateInfo,
1029 const VkAllocationCallbacks* pAllocator,
1030 VkDevice* pDevice)
1031 {
1032 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1033 VkResult result;
1034 struct radv_device *device;
1035
1036 bool keep_shader_info = false;
1037
1038 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1039 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1040 if (!radv_physical_device_extension_supported(physical_device, ext_name))
1041 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1042
1043 if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
1044 keep_shader_info = true;
1045 }
1046
1047 /* Check enabled features */
1048 if (pCreateInfo->pEnabledFeatures) {
1049 VkPhysicalDeviceFeatures supported_features;
1050 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1051 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1052 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1053 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1054 for (uint32_t i = 0; i < num_features; i++) {
1055 if (enabled_feature[i] && !supported_feature[i])
1056 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1057 }
1058 }
1059
1060 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1061 sizeof(*device), 8,
1062 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1063 if (!device)
1064 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1065
1066 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1067 device->instance = physical_device->instance;
1068 device->physical_device = physical_device;
1069
1070 device->ws = physical_device->ws;
1071 if (pAllocator)
1072 device->alloc = *pAllocator;
1073 else
1074 device->alloc = physical_device->instance->alloc;
1075
1076 mtx_init(&device->shader_slab_mutex, mtx_plain);
1077 list_inithead(&device->shader_slabs);
1078
1079 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1080 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1081 uint32_t qfi = queue_create->queueFamilyIndex;
1082 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1083 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1084
1085 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1086
1087 device->queues[qfi] = vk_alloc(&device->alloc,
1088 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1089 if (!device->queues[qfi]) {
1090 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1091 goto fail;
1092 }
1093
1094 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1095
1096 device->queue_count[qfi] = queue_create->queueCount;
1097
1098 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1099 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1100 if (result != VK_SUCCESS)
1101 goto fail;
1102 }
1103 }
1104
1105 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1106 (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1107
1108 /* Disabled and not implemented for now. */
1109 device->dfsm_allowed = device->pbb_allowed && false;
1110
1111
1112 #if HAVE_LLVM < 0x0400
1113 device->llvm_supports_spill = false;
1114 #else
1115 device->llvm_supports_spill = true;
1116 #endif
1117
1118 /* The maximum number of scratch waves. Scratch space isn't divided
1119 * evenly between CUs. The number is only a function of the number of CUs.
1120 * We can decrease the constant to decrease the scratch buffer size.
1121 *
1122 * sctx->scratch_waves must be >= the maximum posible size of
1123 * 1 threadgroup, so that the hw doesn't hang from being unable
1124 * to start any.
1125 *
1126 * The recommended value is 4 per CU at most. Higher numbers don't
1127 * bring much benefit, but they still occupy chip resources (think
1128 * async compute). I've seen ~2% performance difference between 4 and 32.
1129 */
1130 uint32_t max_threads_per_block = 2048;
1131 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1132 max_threads_per_block / 64);
1133
1134 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
1135 S_00B800_FORCE_START_AT_000(1);
1136
1137 if (device->physical_device->rad_info.chip_class >= CIK) {
1138 /* If the KMD allows it (there is a KMD hw register for it),
1139 * allow launching waves out-of-order.
1140 */
1141 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1142 }
1143
1144 radv_device_init_gs_info(device);
1145
1146 device->tess_offchip_block_dw_size =
1147 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1148 device->has_distributed_tess =
1149 device->physical_device->rad_info.chip_class >= VI &&
1150 device->physical_device->rad_info.max_se >= 2;
1151
1152 if (getenv("RADV_TRACE_FILE")) {
1153 keep_shader_info = true;
1154
1155 if (!radv_init_trace(device))
1156 goto fail;
1157 }
1158
1159 device->keep_shader_info = keep_shader_info;
1160
1161 result = radv_device_init_meta(device);
1162 if (result != VK_SUCCESS)
1163 goto fail;
1164
1165 radv_device_init_msaa(device);
1166
1167 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1168 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1169 switch (family) {
1170 case RADV_QUEUE_GENERAL:
1171 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1172 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1173 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1174 break;
1175 case RADV_QUEUE_COMPUTE:
1176 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1177 radeon_emit(device->empty_cs[family], 0);
1178 break;
1179 }
1180 device->ws->cs_finalize(device->empty_cs[family]);
1181 }
1182
1183 if (device->physical_device->rad_info.chip_class >= CIK)
1184 cik_create_gfx_config(device);
1185
1186 VkPipelineCacheCreateInfo ci;
1187 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1188 ci.pNext = NULL;
1189 ci.flags = 0;
1190 ci.pInitialData = NULL;
1191 ci.initialDataSize = 0;
1192 VkPipelineCache pc;
1193 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1194 &ci, NULL, &pc);
1195 if (result != VK_SUCCESS)
1196 goto fail;
1197
1198 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1199
1200 *pDevice = radv_device_to_handle(device);
1201 return VK_SUCCESS;
1202
1203 fail:
1204 if (device->trace_bo)
1205 device->ws->buffer_destroy(device->trace_bo);
1206
1207 if (device->gfx_init)
1208 device->ws->buffer_destroy(device->gfx_init);
1209
1210 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1211 for (unsigned q = 0; q < device->queue_count[i]; q++)
1212 radv_queue_finish(&device->queues[i][q]);
1213 if (device->queue_count[i])
1214 vk_free(&device->alloc, device->queues[i]);
1215 }
1216
1217 vk_free(&device->alloc, device);
1218 return result;
1219 }
1220
1221 void radv_DestroyDevice(
1222 VkDevice _device,
1223 const VkAllocationCallbacks* pAllocator)
1224 {
1225 RADV_FROM_HANDLE(radv_device, device, _device);
1226
1227 if (!device)
1228 return;
1229
1230 if (device->trace_bo)
1231 device->ws->buffer_destroy(device->trace_bo);
1232
1233 if (device->gfx_init)
1234 device->ws->buffer_destroy(device->gfx_init);
1235
1236 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1237 for (unsigned q = 0; q < device->queue_count[i]; q++)
1238 radv_queue_finish(&device->queues[i][q]);
1239 if (device->queue_count[i])
1240 vk_free(&device->alloc, device->queues[i]);
1241 if (device->empty_cs[i])
1242 device->ws->cs_destroy(device->empty_cs[i]);
1243 }
1244 radv_device_finish_meta(device);
1245
1246 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1247 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1248
1249 radv_destroy_shader_slabs(device);
1250
1251 vk_free(&device->alloc, device);
1252 }
1253
1254 VkResult radv_EnumerateInstanceLayerProperties(
1255 uint32_t* pPropertyCount,
1256 VkLayerProperties* pProperties)
1257 {
1258 if (pProperties == NULL) {
1259 *pPropertyCount = 0;
1260 return VK_SUCCESS;
1261 }
1262
1263 /* None supported at this time */
1264 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1265 }
1266
1267 VkResult radv_EnumerateDeviceLayerProperties(
1268 VkPhysicalDevice physicalDevice,
1269 uint32_t* pPropertyCount,
1270 VkLayerProperties* pProperties)
1271 {
1272 if (pProperties == NULL) {
1273 *pPropertyCount = 0;
1274 return VK_SUCCESS;
1275 }
1276
1277 /* None supported at this time */
1278 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1279 }
1280
1281 void radv_GetDeviceQueue(
1282 VkDevice _device,
1283 uint32_t queueFamilyIndex,
1284 uint32_t queueIndex,
1285 VkQueue* pQueue)
1286 {
1287 RADV_FROM_HANDLE(radv_device, device, _device);
1288
1289 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1290 }
1291
1292 static void
1293 fill_geom_tess_rings(struct radv_queue *queue,
1294 uint32_t *map,
1295 bool add_sample_positions,
1296 uint32_t esgs_ring_size,
1297 struct radeon_winsys_bo *esgs_ring_bo,
1298 uint32_t gsvs_ring_size,
1299 struct radeon_winsys_bo *gsvs_ring_bo,
1300 uint32_t tess_factor_ring_size,
1301 struct radeon_winsys_bo *tess_factor_ring_bo,
1302 uint32_t tess_offchip_ring_size,
1303 struct radeon_winsys_bo *tess_offchip_ring_bo)
1304 {
1305 uint64_t esgs_va = 0, gsvs_va = 0;
1306 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1307 uint32_t *desc = &map[4];
1308
1309 if (esgs_ring_bo)
1310 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1311 if (gsvs_ring_bo)
1312 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1313 if (tess_factor_ring_bo)
1314 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1315 if (tess_offchip_ring_bo)
1316 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1317
1318 /* stride 0, num records - size, add tid, swizzle, elsize4,
1319 index stride 64 */
1320 desc[0] = esgs_va;
1321 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1322 S_008F04_STRIDE(0) |
1323 S_008F04_SWIZZLE_ENABLE(true);
1324 desc[2] = esgs_ring_size;
1325 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1326 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1327 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1328 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1329 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1330 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1331 S_008F0C_ELEMENT_SIZE(1) |
1332 S_008F0C_INDEX_STRIDE(3) |
1333 S_008F0C_ADD_TID_ENABLE(true);
1334
1335 desc += 4;
1336 /* GS entry for ES->GS ring */
1337 /* stride 0, num records - size, elsize0,
1338 index stride 0 */
1339 desc[0] = esgs_va;
1340 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1341 S_008F04_STRIDE(0) |
1342 S_008F04_SWIZZLE_ENABLE(false);
1343 desc[2] = esgs_ring_size;
1344 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1345 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1346 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1347 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1348 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1349 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1350 S_008F0C_ELEMENT_SIZE(0) |
1351 S_008F0C_INDEX_STRIDE(0) |
1352 S_008F0C_ADD_TID_ENABLE(false);
1353
1354 desc += 4;
1355 /* VS entry for GS->VS ring */
1356 /* stride 0, num records - size, elsize0,
1357 index stride 0 */
1358 desc[0] = gsvs_va;
1359 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1360 S_008F04_STRIDE(0) |
1361 S_008F04_SWIZZLE_ENABLE(false);
1362 desc[2] = gsvs_ring_size;
1363 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1364 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1365 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1366 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1367 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1368 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1369 S_008F0C_ELEMENT_SIZE(0) |
1370 S_008F0C_INDEX_STRIDE(0) |
1371 S_008F0C_ADD_TID_ENABLE(false);
1372 desc += 4;
1373
1374 /* stride gsvs_itemsize, num records 64
1375 elsize 4, index stride 16 */
1376 /* shader will patch stride and desc[2] */
1377 desc[0] = gsvs_va;
1378 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1379 S_008F04_STRIDE(0) |
1380 S_008F04_SWIZZLE_ENABLE(true);
1381 desc[2] = 0;
1382 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1383 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1384 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1385 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1386 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1387 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1388 S_008F0C_ELEMENT_SIZE(1) |
1389 S_008F0C_INDEX_STRIDE(1) |
1390 S_008F0C_ADD_TID_ENABLE(true);
1391 desc += 4;
1392
1393 desc[0] = tess_factor_va;
1394 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1395 S_008F04_STRIDE(0) |
1396 S_008F04_SWIZZLE_ENABLE(false);
1397 desc[2] = tess_factor_ring_size;
1398 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1399 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1400 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1401 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1402 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1403 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1404 S_008F0C_ELEMENT_SIZE(0) |
1405 S_008F0C_INDEX_STRIDE(0) |
1406 S_008F0C_ADD_TID_ENABLE(false);
1407 desc += 4;
1408
1409 desc[0] = tess_offchip_va;
1410 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1411 S_008F04_STRIDE(0) |
1412 S_008F04_SWIZZLE_ENABLE(false);
1413 desc[2] = tess_offchip_ring_size;
1414 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1415 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1416 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1417 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1418 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1419 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1420 S_008F0C_ELEMENT_SIZE(0) |
1421 S_008F0C_INDEX_STRIDE(0) |
1422 S_008F0C_ADD_TID_ENABLE(false);
1423 desc += 4;
1424
1425 /* add sample positions after all rings */
1426 memcpy(desc, queue->device->sample_locations_1x, 8);
1427 desc += 2;
1428 memcpy(desc, queue->device->sample_locations_2x, 16);
1429 desc += 4;
1430 memcpy(desc, queue->device->sample_locations_4x, 32);
1431 desc += 8;
1432 memcpy(desc, queue->device->sample_locations_8x, 64);
1433 desc += 16;
1434 memcpy(desc, queue->device->sample_locations_16x, 128);
1435 }
1436
1437 static unsigned
1438 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1439 {
1440 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1441 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1442 device->physical_device->rad_info.family != CHIP_STONEY;
1443 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1444 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1445 device->physical_device->rad_info.max_se;
1446 unsigned offchip_granularity;
1447 unsigned hs_offchip_param;
1448 switch (device->tess_offchip_block_dw_size) {
1449 default:
1450 assert(0);
1451 /* fall through */
1452 case 8192:
1453 offchip_granularity = V_03093C_X_8K_DWORDS;
1454 break;
1455 case 4096:
1456 offchip_granularity = V_03093C_X_4K_DWORDS;
1457 break;
1458 }
1459
1460 switch (device->physical_device->rad_info.chip_class) {
1461 case SI:
1462 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1463 break;
1464 case CIK:
1465 case VI:
1466 case GFX9:
1467 default:
1468 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1469 break;
1470 }
1471
1472 *max_offchip_buffers_p = max_offchip_buffers;
1473 if (device->physical_device->rad_info.chip_class >= CIK) {
1474 if (device->physical_device->rad_info.chip_class >= VI)
1475 --max_offchip_buffers;
1476 hs_offchip_param =
1477 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1478 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1479 } else {
1480 hs_offchip_param =
1481 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1482 }
1483 return hs_offchip_param;
1484 }
1485
1486 static VkResult
1487 radv_get_preamble_cs(struct radv_queue *queue,
1488 uint32_t scratch_size,
1489 uint32_t compute_scratch_size,
1490 uint32_t esgs_ring_size,
1491 uint32_t gsvs_ring_size,
1492 bool needs_tess_rings,
1493 bool needs_sample_positions,
1494 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1495 struct radeon_winsys_cs **initial_preamble_cs,
1496 struct radeon_winsys_cs **continue_preamble_cs)
1497 {
1498 struct radeon_winsys_bo *scratch_bo = NULL;
1499 struct radeon_winsys_bo *descriptor_bo = NULL;
1500 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1501 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1502 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1503 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1504 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1505 struct radeon_winsys_cs *dest_cs[3] = {0};
1506 bool add_tess_rings = false, add_sample_positions = false;
1507 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1508 unsigned max_offchip_buffers;
1509 unsigned hs_offchip_param = 0;
1510 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1511 if (!queue->has_tess_rings) {
1512 if (needs_tess_rings)
1513 add_tess_rings = true;
1514 }
1515 if (!queue->has_sample_positions) {
1516 if (needs_sample_positions)
1517 add_sample_positions = true;
1518 }
1519 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1520 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1521 &max_offchip_buffers);
1522 tess_offchip_ring_size = max_offchip_buffers *
1523 queue->device->tess_offchip_block_dw_size * 4;
1524
1525 if (scratch_size <= queue->scratch_size &&
1526 compute_scratch_size <= queue->compute_scratch_size &&
1527 esgs_ring_size <= queue->esgs_ring_size &&
1528 gsvs_ring_size <= queue->gsvs_ring_size &&
1529 !add_tess_rings && !add_sample_positions &&
1530 queue->initial_preamble_cs) {
1531 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1532 *initial_preamble_cs = queue->initial_preamble_cs;
1533 *continue_preamble_cs = queue->continue_preamble_cs;
1534 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1535 *continue_preamble_cs = NULL;
1536 return VK_SUCCESS;
1537 }
1538
1539 if (scratch_size > queue->scratch_size) {
1540 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1541 scratch_size,
1542 4096,
1543 RADEON_DOMAIN_VRAM,
1544 ring_bo_flags);
1545 if (!scratch_bo)
1546 goto fail;
1547 } else
1548 scratch_bo = queue->scratch_bo;
1549
1550 if (compute_scratch_size > queue->compute_scratch_size) {
1551 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1552 compute_scratch_size,
1553 4096,
1554 RADEON_DOMAIN_VRAM,
1555 ring_bo_flags);
1556 if (!compute_scratch_bo)
1557 goto fail;
1558
1559 } else
1560 compute_scratch_bo = queue->compute_scratch_bo;
1561
1562 if (esgs_ring_size > queue->esgs_ring_size) {
1563 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1564 esgs_ring_size,
1565 4096,
1566 RADEON_DOMAIN_VRAM,
1567 ring_bo_flags);
1568 if (!esgs_ring_bo)
1569 goto fail;
1570 } else {
1571 esgs_ring_bo = queue->esgs_ring_bo;
1572 esgs_ring_size = queue->esgs_ring_size;
1573 }
1574
1575 if (gsvs_ring_size > queue->gsvs_ring_size) {
1576 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1577 gsvs_ring_size,
1578 4096,
1579 RADEON_DOMAIN_VRAM,
1580 ring_bo_flags);
1581 if (!gsvs_ring_bo)
1582 goto fail;
1583 } else {
1584 gsvs_ring_bo = queue->gsvs_ring_bo;
1585 gsvs_ring_size = queue->gsvs_ring_size;
1586 }
1587
1588 if (add_tess_rings) {
1589 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1590 tess_factor_ring_size,
1591 256,
1592 RADEON_DOMAIN_VRAM,
1593 ring_bo_flags);
1594 if (!tess_factor_ring_bo)
1595 goto fail;
1596 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1597 tess_offchip_ring_size,
1598 256,
1599 RADEON_DOMAIN_VRAM,
1600 ring_bo_flags);
1601 if (!tess_offchip_ring_bo)
1602 goto fail;
1603 } else {
1604 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1605 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1606 }
1607
1608 if (scratch_bo != queue->scratch_bo ||
1609 esgs_ring_bo != queue->esgs_ring_bo ||
1610 gsvs_ring_bo != queue->gsvs_ring_bo ||
1611 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1612 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1613 uint32_t size = 0;
1614 if (gsvs_ring_bo || esgs_ring_bo ||
1615 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1616 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1617 if (add_sample_positions)
1618 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1619 }
1620 else if (scratch_bo)
1621 size = 8; /* 2 dword */
1622
1623 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1624 size,
1625 4096,
1626 RADEON_DOMAIN_VRAM,
1627 RADEON_FLAG_CPU_ACCESS |
1628 RADEON_FLAG_NO_INTERPROCESS_SHARING |
1629 RADEON_FLAG_READ_ONLY);
1630 if (!descriptor_bo)
1631 goto fail;
1632 } else
1633 descriptor_bo = queue->descriptor_bo;
1634
1635 for(int i = 0; i < 3; ++i) {
1636 struct radeon_winsys_cs *cs = NULL;
1637 cs = queue->device->ws->cs_create(queue->device->ws,
1638 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1639 if (!cs)
1640 goto fail;
1641
1642 dest_cs[i] = cs;
1643
1644 if (scratch_bo)
1645 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1646
1647 if (esgs_ring_bo)
1648 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1649
1650 if (gsvs_ring_bo)
1651 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1652
1653 if (tess_factor_ring_bo)
1654 radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1655
1656 if (tess_offchip_ring_bo)
1657 radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1658
1659 if (descriptor_bo)
1660 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1661
1662 if (descriptor_bo != queue->descriptor_bo) {
1663 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1664
1665 if (scratch_bo) {
1666 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1667 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1668 S_008F04_SWIZZLE_ENABLE(1);
1669 map[0] = scratch_va;
1670 map[1] = rsrc1;
1671 }
1672
1673 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1674 add_sample_positions)
1675 fill_geom_tess_rings(queue, map, add_sample_positions,
1676 esgs_ring_size, esgs_ring_bo,
1677 gsvs_ring_size, gsvs_ring_bo,
1678 tess_factor_ring_size, tess_factor_ring_bo,
1679 tess_offchip_ring_size, tess_offchip_ring_bo);
1680
1681 queue->device->ws->buffer_unmap(descriptor_bo);
1682 }
1683
1684 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1685 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1686 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1687 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1688 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1689 }
1690
1691 if (esgs_ring_bo || gsvs_ring_bo) {
1692 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1693 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1694 radeon_emit(cs, esgs_ring_size >> 8);
1695 radeon_emit(cs, gsvs_ring_size >> 8);
1696 } else {
1697 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1698 radeon_emit(cs, esgs_ring_size >> 8);
1699 radeon_emit(cs, gsvs_ring_size >> 8);
1700 }
1701 }
1702
1703 if (tess_factor_ring_bo) {
1704 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1705 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1706 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1707 S_030938_SIZE(tess_factor_ring_size / 4));
1708 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1709 tf_va >> 8);
1710 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1711 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1712 tf_va >> 40);
1713 }
1714 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1715 } else {
1716 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1717 S_008988_SIZE(tess_factor_ring_size / 4));
1718 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1719 tf_va >> 8);
1720 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1721 hs_offchip_param);
1722 }
1723 }
1724
1725 if (descriptor_bo) {
1726 uint64_t va = radv_buffer_get_va(descriptor_bo);
1727 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1728 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1729 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1730 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1731 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1732
1733 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1734 radeon_set_sh_reg_seq(cs, regs[i], 2);
1735 radeon_emit(cs, va);
1736 radeon_emit(cs, va >> 32);
1737 }
1738 } else {
1739 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1740 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1741 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1742 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1743 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1744 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1745
1746 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1747 radeon_set_sh_reg_seq(cs, regs[i], 2);
1748 radeon_emit(cs, va);
1749 radeon_emit(cs, va >> 32);
1750 }
1751 }
1752 }
1753
1754 if (compute_scratch_bo) {
1755 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1756 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1757 S_008F04_SWIZZLE_ENABLE(1);
1758
1759 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1760
1761 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1762 radeon_emit(cs, scratch_va);
1763 radeon_emit(cs, rsrc1);
1764 }
1765
1766 if (i == 0) {
1767 si_cs_emit_cache_flush(cs,
1768 false,
1769 queue->device->physical_device->rad_info.chip_class,
1770 NULL, 0,
1771 queue->queue_family_index == RING_COMPUTE &&
1772 queue->device->physical_device->rad_info.chip_class >= CIK,
1773 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1774 RADV_CMD_FLAG_INV_ICACHE |
1775 RADV_CMD_FLAG_INV_SMEM_L1 |
1776 RADV_CMD_FLAG_INV_VMEM_L1 |
1777 RADV_CMD_FLAG_INV_GLOBAL_L2);
1778 } else if (i == 1) {
1779 si_cs_emit_cache_flush(cs,
1780 false,
1781 queue->device->physical_device->rad_info.chip_class,
1782 NULL, 0,
1783 queue->queue_family_index == RING_COMPUTE &&
1784 queue->device->physical_device->rad_info.chip_class >= CIK,
1785 RADV_CMD_FLAG_INV_ICACHE |
1786 RADV_CMD_FLAG_INV_SMEM_L1 |
1787 RADV_CMD_FLAG_INV_VMEM_L1 |
1788 RADV_CMD_FLAG_INV_GLOBAL_L2);
1789 }
1790
1791 if (!queue->device->ws->cs_finalize(cs))
1792 goto fail;
1793 }
1794
1795 if (queue->initial_full_flush_preamble_cs)
1796 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1797
1798 if (queue->initial_preamble_cs)
1799 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1800
1801 if (queue->continue_preamble_cs)
1802 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1803
1804 queue->initial_full_flush_preamble_cs = dest_cs[0];
1805 queue->initial_preamble_cs = dest_cs[1];
1806 queue->continue_preamble_cs = dest_cs[2];
1807
1808 if (scratch_bo != queue->scratch_bo) {
1809 if (queue->scratch_bo)
1810 queue->device->ws->buffer_destroy(queue->scratch_bo);
1811 queue->scratch_bo = scratch_bo;
1812 queue->scratch_size = scratch_size;
1813 }
1814
1815 if (compute_scratch_bo != queue->compute_scratch_bo) {
1816 if (queue->compute_scratch_bo)
1817 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1818 queue->compute_scratch_bo = compute_scratch_bo;
1819 queue->compute_scratch_size = compute_scratch_size;
1820 }
1821
1822 if (esgs_ring_bo != queue->esgs_ring_bo) {
1823 if (queue->esgs_ring_bo)
1824 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1825 queue->esgs_ring_bo = esgs_ring_bo;
1826 queue->esgs_ring_size = esgs_ring_size;
1827 }
1828
1829 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1830 if (queue->gsvs_ring_bo)
1831 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1832 queue->gsvs_ring_bo = gsvs_ring_bo;
1833 queue->gsvs_ring_size = gsvs_ring_size;
1834 }
1835
1836 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1837 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1838 }
1839
1840 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1841 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1842 queue->has_tess_rings = true;
1843 }
1844
1845 if (descriptor_bo != queue->descriptor_bo) {
1846 if (queue->descriptor_bo)
1847 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1848
1849 queue->descriptor_bo = descriptor_bo;
1850 }
1851
1852 if (add_sample_positions)
1853 queue->has_sample_positions = true;
1854
1855 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1856 *initial_preamble_cs = queue->initial_preamble_cs;
1857 *continue_preamble_cs = queue->continue_preamble_cs;
1858 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1859 *continue_preamble_cs = NULL;
1860 return VK_SUCCESS;
1861 fail:
1862 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1863 if (dest_cs[i])
1864 queue->device->ws->cs_destroy(dest_cs[i]);
1865 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1866 queue->device->ws->buffer_destroy(descriptor_bo);
1867 if (scratch_bo && scratch_bo != queue->scratch_bo)
1868 queue->device->ws->buffer_destroy(scratch_bo);
1869 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1870 queue->device->ws->buffer_destroy(compute_scratch_bo);
1871 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1872 queue->device->ws->buffer_destroy(esgs_ring_bo);
1873 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1874 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1875 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1876 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1877 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1878 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1879 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1880 }
1881
1882 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1883 int num_sems,
1884 const VkSemaphore *sems,
1885 VkFence _fence,
1886 bool reset_temp)
1887 {
1888 int syncobj_idx = 0, sem_idx = 0;
1889
1890 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
1891 return VK_SUCCESS;
1892
1893 for (uint32_t i = 0; i < num_sems; i++) {
1894 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1895
1896 if (sem->temp_syncobj || sem->syncobj)
1897 counts->syncobj_count++;
1898 else
1899 counts->sem_count++;
1900 }
1901
1902 if (_fence != VK_NULL_HANDLE) {
1903 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1904 if (fence->temp_syncobj || fence->syncobj)
1905 counts->syncobj_count++;
1906 }
1907
1908 if (counts->syncobj_count) {
1909 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
1910 if (!counts->syncobj)
1911 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1912 }
1913
1914 if (counts->sem_count) {
1915 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
1916 if (!counts->sem) {
1917 free(counts->syncobj);
1918 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1919 }
1920 }
1921
1922 for (uint32_t i = 0; i < num_sems; i++) {
1923 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1924
1925 if (sem->temp_syncobj) {
1926 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
1927 }
1928 else if (sem->syncobj)
1929 counts->syncobj[syncobj_idx++] = sem->syncobj;
1930 else {
1931 assert(sem->sem);
1932 counts->sem[sem_idx++] = sem->sem;
1933 }
1934 }
1935
1936 if (_fence != VK_NULL_HANDLE) {
1937 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1938 if (fence->temp_syncobj)
1939 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
1940 else if (fence->syncobj)
1941 counts->syncobj[syncobj_idx++] = fence->syncobj;
1942 }
1943
1944 return VK_SUCCESS;
1945 }
1946
1947 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
1948 {
1949 free(sem_info->wait.syncobj);
1950 free(sem_info->wait.sem);
1951 free(sem_info->signal.syncobj);
1952 free(sem_info->signal.sem);
1953 }
1954
1955
1956 static void radv_free_temp_syncobjs(struct radv_device *device,
1957 int num_sems,
1958 const VkSemaphore *sems)
1959 {
1960 for (uint32_t i = 0; i < num_sems; i++) {
1961 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1962
1963 if (sem->temp_syncobj) {
1964 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
1965 sem->temp_syncobj = 0;
1966 }
1967 }
1968 }
1969
1970 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1971 int num_wait_sems,
1972 const VkSemaphore *wait_sems,
1973 int num_signal_sems,
1974 const VkSemaphore *signal_sems,
1975 VkFence fence)
1976 {
1977 VkResult ret;
1978 memset(sem_info, 0, sizeof(*sem_info));
1979
1980 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
1981 if (ret)
1982 return ret;
1983 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
1984 if (ret)
1985 radv_free_sem_info(sem_info);
1986
1987 /* caller can override these */
1988 sem_info->cs_emit_wait = true;
1989 sem_info->cs_emit_signal = true;
1990 return ret;
1991 }
1992
1993 VkResult radv_QueueSubmit(
1994 VkQueue _queue,
1995 uint32_t submitCount,
1996 const VkSubmitInfo* pSubmits,
1997 VkFence _fence)
1998 {
1999 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2000 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2001 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2002 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2003 int ret;
2004 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2005 uint32_t scratch_size = 0;
2006 uint32_t compute_scratch_size = 0;
2007 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2008 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2009 VkResult result;
2010 bool fence_emitted = false;
2011 bool tess_rings_needed = false;
2012 bool sample_positions_needed = false;
2013
2014 /* Do this first so failing to allocate scratch buffers can't result in
2015 * partially executed submissions. */
2016 for (uint32_t i = 0; i < submitCount; i++) {
2017 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2018 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2019 pSubmits[i].pCommandBuffers[j]);
2020
2021 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2022 compute_scratch_size = MAX2(compute_scratch_size,
2023 cmd_buffer->compute_scratch_size_needed);
2024 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2025 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2026 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2027 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2028 }
2029 }
2030
2031 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2032 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2033 sample_positions_needed, &initial_flush_preamble_cs,
2034 &initial_preamble_cs, &continue_preamble_cs);
2035 if (result != VK_SUCCESS)
2036 return result;
2037
2038 for (uint32_t i = 0; i < submitCount; i++) {
2039 struct radeon_winsys_cs **cs_array;
2040 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2041 bool can_patch = true;
2042 uint32_t advance;
2043 struct radv_winsys_sem_info sem_info;
2044
2045 result = radv_alloc_sem_info(&sem_info,
2046 pSubmits[i].waitSemaphoreCount,
2047 pSubmits[i].pWaitSemaphores,
2048 pSubmits[i].signalSemaphoreCount,
2049 pSubmits[i].pSignalSemaphores,
2050 _fence);
2051 if (result != VK_SUCCESS)
2052 return result;
2053
2054 if (!pSubmits[i].commandBufferCount) {
2055 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2056 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2057 &queue->device->empty_cs[queue->queue_family_index],
2058 1, NULL, NULL,
2059 &sem_info,
2060 false, base_fence);
2061 if (ret) {
2062 radv_loge("failed to submit CS %d\n", i);
2063 abort();
2064 }
2065 fence_emitted = true;
2066 }
2067 radv_free_sem_info(&sem_info);
2068 continue;
2069 }
2070
2071 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2072 (pSubmits[i].commandBufferCount));
2073
2074 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2075 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2076 pSubmits[i].pCommandBuffers[j]);
2077 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2078
2079 cs_array[j] = cmd_buffer->cs;
2080 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2081 can_patch = false;
2082
2083 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2084 }
2085
2086 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2087 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2088 advance = MIN2(max_cs_submission,
2089 pSubmits[i].commandBufferCount - j);
2090
2091 if (queue->device->trace_bo)
2092 *queue->device->trace_id_ptr = 0;
2093
2094 sem_info.cs_emit_wait = j == 0;
2095 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2096
2097 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2098 advance, initial_preamble, continue_preamble_cs,
2099 &sem_info,
2100 can_patch, base_fence);
2101
2102 if (ret) {
2103 radv_loge("failed to submit CS %d\n", i);
2104 abort();
2105 }
2106 fence_emitted = true;
2107 if (queue->device->trace_bo) {
2108 radv_check_gpu_hangs(queue, cs_array[j]);
2109 }
2110 }
2111
2112 radv_free_temp_syncobjs(queue->device,
2113 pSubmits[i].waitSemaphoreCount,
2114 pSubmits[i].pWaitSemaphores);
2115 radv_free_sem_info(&sem_info);
2116 free(cs_array);
2117 }
2118
2119 if (fence) {
2120 if (!fence_emitted) {
2121 struct radv_winsys_sem_info sem_info;
2122
2123 result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2124 _fence);
2125 if (result != VK_SUCCESS)
2126 return result;
2127
2128 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2129 &queue->device->empty_cs[queue->queue_family_index],
2130 1, NULL, NULL, &sem_info,
2131 false, base_fence);
2132 radv_free_sem_info(&sem_info);
2133 }
2134 fence->submitted = true;
2135 }
2136
2137 return VK_SUCCESS;
2138 }
2139
2140 VkResult radv_QueueWaitIdle(
2141 VkQueue _queue)
2142 {
2143 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2144
2145 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2146 radv_queue_family_to_ring(queue->queue_family_index),
2147 queue->queue_idx);
2148 return VK_SUCCESS;
2149 }
2150
2151 VkResult radv_DeviceWaitIdle(
2152 VkDevice _device)
2153 {
2154 RADV_FROM_HANDLE(radv_device, device, _device);
2155
2156 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2157 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2158 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2159 }
2160 }
2161 return VK_SUCCESS;
2162 }
2163
2164 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2165 VkInstance instance,
2166 const char* pName)
2167 {
2168 return radv_lookup_entrypoint(pName);
2169 }
2170
2171 /* The loader wants us to expose a second GetInstanceProcAddr function
2172 * to work around certain LD_PRELOAD issues seen in apps.
2173 */
2174 PUBLIC
2175 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2176 VkInstance instance,
2177 const char* pName);
2178
2179 PUBLIC
2180 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2181 VkInstance instance,
2182 const char* pName)
2183 {
2184 return radv_GetInstanceProcAddr(instance, pName);
2185 }
2186
2187 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2188 VkDevice device,
2189 const char* pName)
2190 {
2191 return radv_lookup_entrypoint(pName);
2192 }
2193
2194 bool radv_get_memory_fd(struct radv_device *device,
2195 struct radv_device_memory *memory,
2196 int *pFD)
2197 {
2198 struct radeon_bo_metadata metadata;
2199
2200 if (memory->image) {
2201 radv_init_metadata(device, memory->image, &metadata);
2202 device->ws->buffer_set_metadata(memory->bo, &metadata);
2203 }
2204
2205 return device->ws->buffer_get_fd(device->ws, memory->bo,
2206 pFD);
2207 }
2208
2209 static VkResult radv_alloc_memory(struct radv_device *device,
2210 const VkMemoryAllocateInfo* pAllocateInfo,
2211 const VkAllocationCallbacks* pAllocator,
2212 VkDeviceMemory* pMem)
2213 {
2214 struct radv_device_memory *mem;
2215 VkResult result;
2216 enum radeon_bo_domain domain;
2217 uint32_t flags = 0;
2218 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2219
2220 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2221
2222 if (pAllocateInfo->allocationSize == 0) {
2223 /* Apparently, this is allowed */
2224 *pMem = VK_NULL_HANDLE;
2225 return VK_SUCCESS;
2226 }
2227
2228 const VkImportMemoryFdInfoKHR *import_info =
2229 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2230 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2231 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2232 const VkExportMemoryAllocateInfoKHR *export_info =
2233 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2234
2235 const struct wsi_memory_allocate_info *wsi_info =
2236 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2237
2238 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2239 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2240 if (mem == NULL)
2241 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2242
2243 if (wsi_info && wsi_info->implicit_sync)
2244 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2245
2246 if (dedicate_info) {
2247 mem->image = radv_image_from_handle(dedicate_info->image);
2248 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2249 } else {
2250 mem->image = NULL;
2251 mem->buffer = NULL;
2252 }
2253
2254 if (import_info) {
2255 assert(import_info->handleType ==
2256 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2257 import_info->handleType ==
2258 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2259 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2260 NULL, NULL);
2261 if (!mem->bo) {
2262 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2263 goto fail;
2264 } else {
2265 close(import_info->fd);
2266 goto out_success;
2267 }
2268 }
2269
2270 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2271 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2272 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2273 domain = RADEON_DOMAIN_GTT;
2274 else
2275 domain = RADEON_DOMAIN_VRAM;
2276
2277 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2278 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2279 else
2280 flags |= RADEON_FLAG_CPU_ACCESS;
2281
2282 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2283 flags |= RADEON_FLAG_GTT_WC;
2284
2285 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2286 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2287
2288 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2289 domain, flags);
2290
2291 if (!mem->bo) {
2292 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2293 goto fail;
2294 }
2295 mem->type_index = mem_type_index;
2296 out_success:
2297 *pMem = radv_device_memory_to_handle(mem);
2298
2299 return VK_SUCCESS;
2300
2301 fail:
2302 vk_free2(&device->alloc, pAllocator, mem);
2303
2304 return result;
2305 }
2306
2307 VkResult radv_AllocateMemory(
2308 VkDevice _device,
2309 const VkMemoryAllocateInfo* pAllocateInfo,
2310 const VkAllocationCallbacks* pAllocator,
2311 VkDeviceMemory* pMem)
2312 {
2313 RADV_FROM_HANDLE(radv_device, device, _device);
2314 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2315 }
2316
2317 void radv_FreeMemory(
2318 VkDevice _device,
2319 VkDeviceMemory _mem,
2320 const VkAllocationCallbacks* pAllocator)
2321 {
2322 RADV_FROM_HANDLE(radv_device, device, _device);
2323 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2324
2325 if (mem == NULL)
2326 return;
2327
2328 device->ws->buffer_destroy(mem->bo);
2329 mem->bo = NULL;
2330
2331 vk_free2(&device->alloc, pAllocator, mem);
2332 }
2333
2334 VkResult radv_MapMemory(
2335 VkDevice _device,
2336 VkDeviceMemory _memory,
2337 VkDeviceSize offset,
2338 VkDeviceSize size,
2339 VkMemoryMapFlags flags,
2340 void** ppData)
2341 {
2342 RADV_FROM_HANDLE(radv_device, device, _device);
2343 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2344
2345 if (mem == NULL) {
2346 *ppData = NULL;
2347 return VK_SUCCESS;
2348 }
2349
2350 *ppData = device->ws->buffer_map(mem->bo);
2351 if (*ppData) {
2352 *ppData += offset;
2353 return VK_SUCCESS;
2354 }
2355
2356 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2357 }
2358
2359 void radv_UnmapMemory(
2360 VkDevice _device,
2361 VkDeviceMemory _memory)
2362 {
2363 RADV_FROM_HANDLE(radv_device, device, _device);
2364 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2365
2366 if (mem == NULL)
2367 return;
2368
2369 device->ws->buffer_unmap(mem->bo);
2370 }
2371
2372 VkResult radv_FlushMappedMemoryRanges(
2373 VkDevice _device,
2374 uint32_t memoryRangeCount,
2375 const VkMappedMemoryRange* pMemoryRanges)
2376 {
2377 return VK_SUCCESS;
2378 }
2379
2380 VkResult radv_InvalidateMappedMemoryRanges(
2381 VkDevice _device,
2382 uint32_t memoryRangeCount,
2383 const VkMappedMemoryRange* pMemoryRanges)
2384 {
2385 return VK_SUCCESS;
2386 }
2387
2388 void radv_GetBufferMemoryRequirements(
2389 VkDevice _device,
2390 VkBuffer _buffer,
2391 VkMemoryRequirements* pMemoryRequirements)
2392 {
2393 RADV_FROM_HANDLE(radv_device, device, _device);
2394 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2395
2396 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2397
2398 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2399 pMemoryRequirements->alignment = 4096;
2400 else
2401 pMemoryRequirements->alignment = 16;
2402
2403 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2404 }
2405
2406 void radv_GetBufferMemoryRequirements2KHR(
2407 VkDevice device,
2408 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2409 VkMemoryRequirements2KHR* pMemoryRequirements)
2410 {
2411 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2412 &pMemoryRequirements->memoryRequirements);
2413 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2414 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2415 switch (ext->sType) {
2416 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2417 VkMemoryDedicatedRequirementsKHR *req =
2418 (VkMemoryDedicatedRequirementsKHR *) ext;
2419 req->requiresDedicatedAllocation = buffer->shareable;
2420 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2421 break;
2422 }
2423 default:
2424 break;
2425 }
2426 }
2427 }
2428
2429 void radv_GetImageMemoryRequirements(
2430 VkDevice _device,
2431 VkImage _image,
2432 VkMemoryRequirements* pMemoryRequirements)
2433 {
2434 RADV_FROM_HANDLE(radv_device, device, _device);
2435 RADV_FROM_HANDLE(radv_image, image, _image);
2436
2437 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2438
2439 pMemoryRequirements->size = image->size;
2440 pMemoryRequirements->alignment = image->alignment;
2441 }
2442
2443 void radv_GetImageMemoryRequirements2KHR(
2444 VkDevice device,
2445 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2446 VkMemoryRequirements2KHR* pMemoryRequirements)
2447 {
2448 radv_GetImageMemoryRequirements(device, pInfo->image,
2449 &pMemoryRequirements->memoryRequirements);
2450
2451 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2452
2453 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2454 switch (ext->sType) {
2455 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2456 VkMemoryDedicatedRequirementsKHR *req =
2457 (VkMemoryDedicatedRequirementsKHR *) ext;
2458 req->requiresDedicatedAllocation = image->shareable;
2459 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2460 break;
2461 }
2462 default:
2463 break;
2464 }
2465 }
2466 }
2467
2468 void radv_GetImageSparseMemoryRequirements(
2469 VkDevice device,
2470 VkImage image,
2471 uint32_t* pSparseMemoryRequirementCount,
2472 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2473 {
2474 stub();
2475 }
2476
2477 void radv_GetImageSparseMemoryRequirements2KHR(
2478 VkDevice device,
2479 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2480 uint32_t* pSparseMemoryRequirementCount,
2481 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2482 {
2483 stub();
2484 }
2485
2486 void radv_GetDeviceMemoryCommitment(
2487 VkDevice device,
2488 VkDeviceMemory memory,
2489 VkDeviceSize* pCommittedMemoryInBytes)
2490 {
2491 *pCommittedMemoryInBytes = 0;
2492 }
2493
2494 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2495 uint32_t bindInfoCount,
2496 const VkBindBufferMemoryInfoKHR *pBindInfos)
2497 {
2498 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2499 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2500 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2501
2502 if (mem) {
2503 buffer->bo = mem->bo;
2504 buffer->offset = pBindInfos[i].memoryOffset;
2505 } else {
2506 buffer->bo = NULL;
2507 }
2508 }
2509 return VK_SUCCESS;
2510 }
2511
2512 VkResult radv_BindBufferMemory(
2513 VkDevice device,
2514 VkBuffer buffer,
2515 VkDeviceMemory memory,
2516 VkDeviceSize memoryOffset)
2517 {
2518 const VkBindBufferMemoryInfoKHR info = {
2519 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2520 .buffer = buffer,
2521 .memory = memory,
2522 .memoryOffset = memoryOffset
2523 };
2524
2525 return radv_BindBufferMemory2KHR(device, 1, &info);
2526 }
2527
2528 VkResult radv_BindImageMemory2KHR(VkDevice device,
2529 uint32_t bindInfoCount,
2530 const VkBindImageMemoryInfoKHR *pBindInfos)
2531 {
2532 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2533 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2534 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2535
2536 if (mem) {
2537 image->bo = mem->bo;
2538 image->offset = pBindInfos[i].memoryOffset;
2539 } else {
2540 image->bo = NULL;
2541 image->offset = 0;
2542 }
2543 }
2544 return VK_SUCCESS;
2545 }
2546
2547
2548 VkResult radv_BindImageMemory(
2549 VkDevice device,
2550 VkImage image,
2551 VkDeviceMemory memory,
2552 VkDeviceSize memoryOffset)
2553 {
2554 const VkBindImageMemoryInfoKHR info = {
2555 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2556 .image = image,
2557 .memory = memory,
2558 .memoryOffset = memoryOffset
2559 };
2560
2561 return radv_BindImageMemory2KHR(device, 1, &info);
2562 }
2563
2564
2565 static void
2566 radv_sparse_buffer_bind_memory(struct radv_device *device,
2567 const VkSparseBufferMemoryBindInfo *bind)
2568 {
2569 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2570
2571 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2572 struct radv_device_memory *mem = NULL;
2573
2574 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2575 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2576
2577 device->ws->buffer_virtual_bind(buffer->bo,
2578 bind->pBinds[i].resourceOffset,
2579 bind->pBinds[i].size,
2580 mem ? mem->bo : NULL,
2581 bind->pBinds[i].memoryOffset);
2582 }
2583 }
2584
2585 static void
2586 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2587 const VkSparseImageOpaqueMemoryBindInfo *bind)
2588 {
2589 RADV_FROM_HANDLE(radv_image, image, bind->image);
2590
2591 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2592 struct radv_device_memory *mem = NULL;
2593
2594 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2595 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2596
2597 device->ws->buffer_virtual_bind(image->bo,
2598 bind->pBinds[i].resourceOffset,
2599 bind->pBinds[i].size,
2600 mem ? mem->bo : NULL,
2601 bind->pBinds[i].memoryOffset);
2602 }
2603 }
2604
2605 VkResult radv_QueueBindSparse(
2606 VkQueue _queue,
2607 uint32_t bindInfoCount,
2608 const VkBindSparseInfo* pBindInfo,
2609 VkFence _fence)
2610 {
2611 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2612 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2613 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2614 bool fence_emitted = false;
2615
2616 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2617 struct radv_winsys_sem_info sem_info;
2618 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2619 radv_sparse_buffer_bind_memory(queue->device,
2620 pBindInfo[i].pBufferBinds + j);
2621 }
2622
2623 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2624 radv_sparse_image_opaque_bind_memory(queue->device,
2625 pBindInfo[i].pImageOpaqueBinds + j);
2626 }
2627
2628 VkResult result;
2629 result = radv_alloc_sem_info(&sem_info,
2630 pBindInfo[i].waitSemaphoreCount,
2631 pBindInfo[i].pWaitSemaphores,
2632 pBindInfo[i].signalSemaphoreCount,
2633 pBindInfo[i].pSignalSemaphores,
2634 _fence);
2635 if (result != VK_SUCCESS)
2636 return result;
2637
2638 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2639 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2640 &queue->device->empty_cs[queue->queue_family_index],
2641 1, NULL, NULL,
2642 &sem_info,
2643 false, base_fence);
2644 fence_emitted = true;
2645 if (fence)
2646 fence->submitted = true;
2647 }
2648
2649 radv_free_sem_info(&sem_info);
2650
2651 }
2652
2653 if (fence && !fence_emitted) {
2654 fence->signalled = true;
2655 }
2656
2657 return VK_SUCCESS;
2658 }
2659
2660 VkResult radv_CreateFence(
2661 VkDevice _device,
2662 const VkFenceCreateInfo* pCreateInfo,
2663 const VkAllocationCallbacks* pAllocator,
2664 VkFence* pFence)
2665 {
2666 RADV_FROM_HANDLE(radv_device, device, _device);
2667 const VkExportFenceCreateInfoKHR *export =
2668 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
2669 VkExternalFenceHandleTypeFlagsKHR handleTypes =
2670 export ? export->handleTypes : 0;
2671
2672 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2673 sizeof(*fence), 8,
2674 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2675
2676 if (!fence)
2677 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2678
2679 fence->submitted = false;
2680 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2681 fence->temp_syncobj = 0;
2682 if (handleTypes) {
2683 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
2684 if (ret) {
2685 vk_free2(&device->alloc, pAllocator, fence);
2686 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2687 }
2688 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
2689 device->ws->signal_syncobj(device->ws, fence->syncobj);
2690 }
2691 fence->fence = NULL;
2692 } else {
2693 fence->fence = device->ws->create_fence();
2694 if (!fence->fence) {
2695 vk_free2(&device->alloc, pAllocator, fence);
2696 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2697 }
2698 fence->syncobj = 0;
2699 }
2700
2701 *pFence = radv_fence_to_handle(fence);
2702
2703 return VK_SUCCESS;
2704 }
2705
2706 void radv_DestroyFence(
2707 VkDevice _device,
2708 VkFence _fence,
2709 const VkAllocationCallbacks* pAllocator)
2710 {
2711 RADV_FROM_HANDLE(radv_device, device, _device);
2712 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2713
2714 if (!fence)
2715 return;
2716
2717 if (fence->temp_syncobj)
2718 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2719 if (fence->syncobj)
2720 device->ws->destroy_syncobj(device->ws, fence->syncobj);
2721 if (fence->fence)
2722 device->ws->destroy_fence(fence->fence);
2723 vk_free2(&device->alloc, pAllocator, fence);
2724 }
2725
2726 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2727 {
2728 uint64_t current_time;
2729 struct timespec tv;
2730
2731 clock_gettime(CLOCK_MONOTONIC, &tv);
2732 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2733
2734 timeout = MIN2(UINT64_MAX - current_time, timeout);
2735
2736 return current_time + timeout;
2737 }
2738
2739 VkResult radv_WaitForFences(
2740 VkDevice _device,
2741 uint32_t fenceCount,
2742 const VkFence* pFences,
2743 VkBool32 waitAll,
2744 uint64_t timeout)
2745 {
2746 RADV_FROM_HANDLE(radv_device, device, _device);
2747 timeout = radv_get_absolute_timeout(timeout);
2748
2749 if (!waitAll && fenceCount > 1) {
2750 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2751 }
2752
2753 for (uint32_t i = 0; i < fenceCount; ++i) {
2754 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2755 bool expired = false;
2756
2757 if (fence->temp_syncobj) {
2758 if (!device->ws->wait_syncobj(device->ws, fence->temp_syncobj, timeout))
2759 return VK_TIMEOUT;
2760 continue;
2761 }
2762
2763 if (fence->syncobj) {
2764 if (!device->ws->wait_syncobj(device->ws, fence->syncobj, timeout))
2765 return VK_TIMEOUT;
2766 continue;
2767 }
2768
2769 if (fence->signalled)
2770 continue;
2771
2772 if (!fence->submitted)
2773 return VK_TIMEOUT;
2774
2775 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2776 if (!expired)
2777 return VK_TIMEOUT;
2778
2779 fence->signalled = true;
2780 }
2781
2782 return VK_SUCCESS;
2783 }
2784
2785 VkResult radv_ResetFences(VkDevice _device,
2786 uint32_t fenceCount,
2787 const VkFence *pFences)
2788 {
2789 RADV_FROM_HANDLE(radv_device, device, _device);
2790
2791 for (unsigned i = 0; i < fenceCount; ++i) {
2792 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2793 fence->submitted = fence->signalled = false;
2794
2795 /* Per spec, we first restore the permanent payload, and then reset, so
2796 * having a temp syncobj should not skip resetting the permanent syncobj. */
2797 if (fence->temp_syncobj) {
2798 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
2799 fence->temp_syncobj = 0;
2800 }
2801
2802 if (fence->syncobj) {
2803 device->ws->reset_syncobj(device->ws, fence->syncobj);
2804 }
2805 }
2806
2807 return VK_SUCCESS;
2808 }
2809
2810 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2811 {
2812 RADV_FROM_HANDLE(radv_device, device, _device);
2813 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2814
2815 if (fence->temp_syncobj) {
2816 bool success = device->ws->wait_syncobj(device->ws, fence->temp_syncobj, 0);
2817 return success ? VK_SUCCESS : VK_NOT_READY;
2818 }
2819
2820 if (fence->syncobj) {
2821 bool success = device->ws->wait_syncobj(device->ws, fence->syncobj, 0);
2822 return success ? VK_SUCCESS : VK_NOT_READY;
2823 }
2824
2825 if (fence->signalled)
2826 return VK_SUCCESS;
2827 if (!fence->submitted)
2828 return VK_NOT_READY;
2829 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2830 return VK_NOT_READY;
2831
2832 return VK_SUCCESS;
2833 }
2834
2835
2836 // Queue semaphore functions
2837
2838 VkResult radv_CreateSemaphore(
2839 VkDevice _device,
2840 const VkSemaphoreCreateInfo* pCreateInfo,
2841 const VkAllocationCallbacks* pAllocator,
2842 VkSemaphore* pSemaphore)
2843 {
2844 RADV_FROM_HANDLE(radv_device, device, _device);
2845 const VkExportSemaphoreCreateInfoKHR *export =
2846 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2847 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2848 export ? export->handleTypes : 0;
2849
2850 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2851 sizeof(*sem), 8,
2852 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2853 if (!sem)
2854 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2855
2856 sem->temp_syncobj = 0;
2857 /* create a syncobject if we are going to export this semaphore */
2858 if (handleTypes) {
2859 assert (device->physical_device->rad_info.has_syncobj);
2860 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2861 if (ret) {
2862 vk_free2(&device->alloc, pAllocator, sem);
2863 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2864 }
2865 sem->sem = NULL;
2866 } else {
2867 sem->sem = device->ws->create_sem(device->ws);
2868 if (!sem->sem) {
2869 vk_free2(&device->alloc, pAllocator, sem);
2870 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2871 }
2872 sem->syncobj = 0;
2873 }
2874
2875 *pSemaphore = radv_semaphore_to_handle(sem);
2876 return VK_SUCCESS;
2877 }
2878
2879 void radv_DestroySemaphore(
2880 VkDevice _device,
2881 VkSemaphore _semaphore,
2882 const VkAllocationCallbacks* pAllocator)
2883 {
2884 RADV_FROM_HANDLE(radv_device, device, _device);
2885 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2886 if (!_semaphore)
2887 return;
2888
2889 if (sem->syncobj)
2890 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2891 else
2892 device->ws->destroy_sem(sem->sem);
2893 vk_free2(&device->alloc, pAllocator, sem);
2894 }
2895
2896 VkResult radv_CreateEvent(
2897 VkDevice _device,
2898 const VkEventCreateInfo* pCreateInfo,
2899 const VkAllocationCallbacks* pAllocator,
2900 VkEvent* pEvent)
2901 {
2902 RADV_FROM_HANDLE(radv_device, device, _device);
2903 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2904 sizeof(*event), 8,
2905 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2906
2907 if (!event)
2908 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2909
2910 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2911 RADEON_DOMAIN_GTT,
2912 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
2913 if (!event->bo) {
2914 vk_free2(&device->alloc, pAllocator, event);
2915 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2916 }
2917
2918 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2919
2920 *pEvent = radv_event_to_handle(event);
2921
2922 return VK_SUCCESS;
2923 }
2924
2925 void radv_DestroyEvent(
2926 VkDevice _device,
2927 VkEvent _event,
2928 const VkAllocationCallbacks* pAllocator)
2929 {
2930 RADV_FROM_HANDLE(radv_device, device, _device);
2931 RADV_FROM_HANDLE(radv_event, event, _event);
2932
2933 if (!event)
2934 return;
2935 device->ws->buffer_destroy(event->bo);
2936 vk_free2(&device->alloc, pAllocator, event);
2937 }
2938
2939 VkResult radv_GetEventStatus(
2940 VkDevice _device,
2941 VkEvent _event)
2942 {
2943 RADV_FROM_HANDLE(radv_event, event, _event);
2944
2945 if (*event->map == 1)
2946 return VK_EVENT_SET;
2947 return VK_EVENT_RESET;
2948 }
2949
2950 VkResult radv_SetEvent(
2951 VkDevice _device,
2952 VkEvent _event)
2953 {
2954 RADV_FROM_HANDLE(radv_event, event, _event);
2955 *event->map = 1;
2956
2957 return VK_SUCCESS;
2958 }
2959
2960 VkResult radv_ResetEvent(
2961 VkDevice _device,
2962 VkEvent _event)
2963 {
2964 RADV_FROM_HANDLE(radv_event, event, _event);
2965 *event->map = 0;
2966
2967 return VK_SUCCESS;
2968 }
2969
2970 VkResult radv_CreateBuffer(
2971 VkDevice _device,
2972 const VkBufferCreateInfo* pCreateInfo,
2973 const VkAllocationCallbacks* pAllocator,
2974 VkBuffer* pBuffer)
2975 {
2976 RADV_FROM_HANDLE(radv_device, device, _device);
2977 struct radv_buffer *buffer;
2978
2979 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2980
2981 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2982 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2983 if (buffer == NULL)
2984 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2985
2986 buffer->size = pCreateInfo->size;
2987 buffer->usage = pCreateInfo->usage;
2988 buffer->bo = NULL;
2989 buffer->offset = 0;
2990 buffer->flags = pCreateInfo->flags;
2991
2992 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
2993 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
2994
2995 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2996 buffer->bo = device->ws->buffer_create(device->ws,
2997 align64(buffer->size, 4096),
2998 4096, 0, RADEON_FLAG_VIRTUAL);
2999 if (!buffer->bo) {
3000 vk_free2(&device->alloc, pAllocator, buffer);
3001 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3002 }
3003 }
3004
3005 *pBuffer = radv_buffer_to_handle(buffer);
3006
3007 return VK_SUCCESS;
3008 }
3009
3010 void radv_DestroyBuffer(
3011 VkDevice _device,
3012 VkBuffer _buffer,
3013 const VkAllocationCallbacks* pAllocator)
3014 {
3015 RADV_FROM_HANDLE(radv_device, device, _device);
3016 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3017
3018 if (!buffer)
3019 return;
3020
3021 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3022 device->ws->buffer_destroy(buffer->bo);
3023
3024 vk_free2(&device->alloc, pAllocator, buffer);
3025 }
3026
3027 static inline unsigned
3028 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3029 {
3030 if (stencil)
3031 return image->surface.u.legacy.stencil_tiling_index[level];
3032 else
3033 return image->surface.u.legacy.tiling_index[level];
3034 }
3035
3036 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3037 {
3038 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3039 }
3040
3041 static void
3042 radv_initialise_color_surface(struct radv_device *device,
3043 struct radv_color_buffer_info *cb,
3044 struct radv_image_view *iview)
3045 {
3046 const struct vk_format_description *desc;
3047 unsigned ntype, format, swap, endian;
3048 unsigned blend_clamp = 0, blend_bypass = 0;
3049 uint64_t va;
3050 const struct radeon_surf *surf = &iview->image->surface;
3051
3052 desc = vk_format_description(iview->vk_format);
3053
3054 memset(cb, 0, sizeof(*cb));
3055
3056 /* Intensity is implemented as Red, so treat it that way. */
3057 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3058
3059 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3060
3061 cb->cb_color_base = va >> 8;
3062
3063 if (device->physical_device->rad_info.chip_class >= GFX9) {
3064 struct gfx9_surf_meta_flags meta;
3065 if (iview->image->dcc_offset)
3066 meta = iview->image->surface.u.gfx9.dcc;
3067 else
3068 meta = iview->image->surface.u.gfx9.cmask;
3069
3070 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3071 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3072 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3073 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3074
3075 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3076 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3077 } else {
3078 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3079 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3080
3081 cb->cb_color_base += level_info->offset >> 8;
3082 if (level_info->mode == RADEON_SURF_MODE_2D)
3083 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3084
3085 pitch_tile_max = level_info->nblk_x / 8 - 1;
3086 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3087 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3088
3089 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3090 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3091 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3092
3093 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3094
3095 if (iview->image->fmask.size) {
3096 if (device->physical_device->rad_info.chip_class >= CIK)
3097 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3098 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3099 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3100 } else {
3101 /* This must be set for fast clear to work without FMASK. */
3102 if (device->physical_device->rad_info.chip_class >= CIK)
3103 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3104 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3105 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3106 }
3107 }
3108
3109 /* CMASK variables */
3110 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3111 va += iview->image->cmask.offset;
3112 cb->cb_color_cmask = va >> 8;
3113
3114 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3115 va += iview->image->dcc_offset;
3116 cb->cb_dcc_base = va >> 8;
3117 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3118
3119 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3120 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3121 S_028C6C_SLICE_MAX(max_slice);
3122
3123 if (iview->image->info.samples > 1) {
3124 unsigned log_samples = util_logbase2(iview->image->info.samples);
3125
3126 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3127 S_028C74_NUM_FRAGMENTS(log_samples);
3128 }
3129
3130 if (iview->image->fmask.size) {
3131 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3132 cb->cb_color_fmask = va >> 8;
3133 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3134 } else {
3135 cb->cb_color_fmask = cb->cb_color_base;
3136 }
3137
3138 ntype = radv_translate_color_numformat(iview->vk_format,
3139 desc,
3140 vk_format_get_first_non_void_channel(iview->vk_format));
3141 format = radv_translate_colorformat(iview->vk_format);
3142 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3143 radv_finishme("Illegal color\n");
3144 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3145 endian = radv_colorformat_endian_swap(format);
3146
3147 /* blend clamp should be set for all NORM/SRGB types */
3148 if (ntype == V_028C70_NUMBER_UNORM ||
3149 ntype == V_028C70_NUMBER_SNORM ||
3150 ntype == V_028C70_NUMBER_SRGB)
3151 blend_clamp = 1;
3152
3153 /* set blend bypass according to docs if SINT/UINT or
3154 8/24 COLOR variants */
3155 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3156 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3157 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3158 blend_clamp = 0;
3159 blend_bypass = 1;
3160 }
3161 #if 0
3162 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3163 (format == V_028C70_COLOR_8 ||
3164 format == V_028C70_COLOR_8_8 ||
3165 format == V_028C70_COLOR_8_8_8_8))
3166 ->color_is_int8 = true;
3167 #endif
3168 cb->cb_color_info = S_028C70_FORMAT(format) |
3169 S_028C70_COMP_SWAP(swap) |
3170 S_028C70_BLEND_CLAMP(blend_clamp) |
3171 S_028C70_BLEND_BYPASS(blend_bypass) |
3172 S_028C70_SIMPLE_FLOAT(1) |
3173 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3174 ntype != V_028C70_NUMBER_SNORM &&
3175 ntype != V_028C70_NUMBER_SRGB &&
3176 format != V_028C70_COLOR_8_24 &&
3177 format != V_028C70_COLOR_24_8) |
3178 S_028C70_NUMBER_TYPE(ntype) |
3179 S_028C70_ENDIAN(endian);
3180 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3181 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3182 if (device->physical_device->rad_info.chip_class == SI) {
3183 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3184 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3185 }
3186 }
3187
3188 if (iview->image->cmask.size &&
3189 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3190 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3191
3192 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3193 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3194
3195 if (device->physical_device->rad_info.chip_class >= VI) {
3196 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3197 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3198 unsigned independent_64b_blocks = 0;
3199 unsigned max_compressed_block_size;
3200
3201 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
3202 64 for APU because all of our APUs to date use DIMMs which have
3203 a request granularity size of 64B while all other chips have a
3204 32B request size */
3205 if (!device->physical_device->rad_info.has_dedicated_vram)
3206 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3207
3208 if (iview->image->info.samples > 1) {
3209 if (iview->image->surface.bpe == 1)
3210 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3211 else if (iview->image->surface.bpe == 2)
3212 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3213 }
3214
3215 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3216 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3217 independent_64b_blocks = 1;
3218 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3219 } else
3220 max_compressed_block_size = max_uncompressed_block_size;
3221
3222 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3223 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3224 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3225 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3226 }
3227
3228 /* This must be set for fast clear to work without FMASK. */
3229 if (!iview->image->fmask.size &&
3230 device->physical_device->rad_info.chip_class == SI) {
3231 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3232 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3233 }
3234
3235 if (device->physical_device->rad_info.chip_class >= GFX9) {
3236 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3237 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3238
3239 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3240 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3241 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3242 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3243 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3244 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3245 }
3246 }
3247
3248 static void
3249 radv_initialise_ds_surface(struct radv_device *device,
3250 struct radv_ds_buffer_info *ds,
3251 struct radv_image_view *iview)
3252 {
3253 unsigned level = iview->base_mip;
3254 unsigned format, stencil_format;
3255 uint64_t va, s_offs, z_offs;
3256 bool stencil_only = false;
3257 memset(ds, 0, sizeof(*ds));
3258 switch (iview->image->vk_format) {
3259 case VK_FORMAT_D24_UNORM_S8_UINT:
3260 case VK_FORMAT_X8_D24_UNORM_PACK32:
3261 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3262 ds->offset_scale = 2.0f;
3263 break;
3264 case VK_FORMAT_D16_UNORM:
3265 case VK_FORMAT_D16_UNORM_S8_UINT:
3266 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3267 ds->offset_scale = 4.0f;
3268 break;
3269 case VK_FORMAT_D32_SFLOAT:
3270 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3271 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3272 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3273 ds->offset_scale = 1.0f;
3274 break;
3275 case VK_FORMAT_S8_UINT:
3276 stencil_only = true;
3277 break;
3278 default:
3279 break;
3280 }
3281
3282 format = radv_translate_dbformat(iview->image->vk_format);
3283 stencil_format = iview->image->surface.has_stencil ?
3284 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3285
3286 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3287 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3288 S_028008_SLICE_MAX(max_slice);
3289
3290 ds->db_htile_data_base = 0;
3291 ds->db_htile_surface = 0;
3292
3293 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3294 s_offs = z_offs = va;
3295
3296 if (device->physical_device->rad_info.chip_class >= GFX9) {
3297 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3298 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3299
3300 ds->db_z_info = S_028038_FORMAT(format) |
3301 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3302 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3303 S_028038_MAXMIP(iview->image->info.levels - 1);
3304 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3305 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3306
3307 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3308 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3309 ds->db_depth_view |= S_028008_MIPID(level);
3310
3311 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3312 S_02801C_Y_MAX(iview->image->info.height - 1);
3313
3314 if (radv_htile_enabled(iview->image, level)) {
3315 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3316
3317 if (iview->image->tc_compatible_htile) {
3318 unsigned max_zplanes = 4;
3319
3320 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3321 iview->image->info.samples > 1)
3322 max_zplanes = 2;
3323
3324 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3325 S_028038_ITERATE_FLUSH(1);
3326 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3327 }
3328
3329 if (!iview->image->surface.has_stencil)
3330 /* Use all of the htile_buffer for depth if there's no stencil. */
3331 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3332 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3333 iview->image->htile_offset;
3334 ds->db_htile_data_base = va >> 8;
3335 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3336 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3337 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3338 }
3339 } else {
3340 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3341
3342 if (stencil_only)
3343 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3344
3345 z_offs += iview->image->surface.u.legacy.level[level].offset;
3346 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3347
3348 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3349 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3350 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3351
3352 if (iview->image->info.samples > 1)
3353 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3354
3355 if (device->physical_device->rad_info.chip_class >= CIK) {
3356 struct radeon_info *info = &device->physical_device->rad_info;
3357 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3358 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3359 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3360 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3361 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3362 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3363
3364 if (stencil_only)
3365 tile_mode = stencil_tile_mode;
3366
3367 ds->db_depth_info |=
3368 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3369 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3370 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3371 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3372 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3373 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3374 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3375 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3376 } else {
3377 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3378 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3379 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3380 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3381 if (stencil_only)
3382 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3383 }
3384
3385 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3386 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3387 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3388
3389 if (radv_htile_enabled(iview->image, level)) {
3390 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3391
3392 if (!iview->image->surface.has_stencil &&
3393 !iview->image->tc_compatible_htile)
3394 /* Use all of the htile_buffer for depth if there's no stencil. */
3395 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3396
3397 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3398 iview->image->htile_offset;
3399 ds->db_htile_data_base = va >> 8;
3400 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3401
3402 if (iview->image->tc_compatible_htile) {
3403 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3404
3405 if (iview->image->info.samples <= 1)
3406 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3407 else if (iview->image->info.samples <= 4)
3408 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3409 else
3410 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3411 }
3412 }
3413 }
3414
3415 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3416 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3417 }
3418
3419 VkResult radv_CreateFramebuffer(
3420 VkDevice _device,
3421 const VkFramebufferCreateInfo* pCreateInfo,
3422 const VkAllocationCallbacks* pAllocator,
3423 VkFramebuffer* pFramebuffer)
3424 {
3425 RADV_FROM_HANDLE(radv_device, device, _device);
3426 struct radv_framebuffer *framebuffer;
3427
3428 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3429
3430 size_t size = sizeof(*framebuffer) +
3431 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3432 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3433 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3434 if (framebuffer == NULL)
3435 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3436
3437 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3438 framebuffer->width = pCreateInfo->width;
3439 framebuffer->height = pCreateInfo->height;
3440 framebuffer->layers = pCreateInfo->layers;
3441 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3442 VkImageView _iview = pCreateInfo->pAttachments[i];
3443 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3444 framebuffer->attachments[i].attachment = iview;
3445 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3446 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3447 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3448 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3449 }
3450 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3451 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3452 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
3453 }
3454
3455 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3456 return VK_SUCCESS;
3457 }
3458
3459 void radv_DestroyFramebuffer(
3460 VkDevice _device,
3461 VkFramebuffer _fb,
3462 const VkAllocationCallbacks* pAllocator)
3463 {
3464 RADV_FROM_HANDLE(radv_device, device, _device);
3465 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3466
3467 if (!fb)
3468 return;
3469 vk_free2(&device->alloc, pAllocator, fb);
3470 }
3471
3472 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3473 {
3474 switch (address_mode) {
3475 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3476 return V_008F30_SQ_TEX_WRAP;
3477 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3478 return V_008F30_SQ_TEX_MIRROR;
3479 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3480 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3481 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3482 return V_008F30_SQ_TEX_CLAMP_BORDER;
3483 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3484 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3485 default:
3486 unreachable("illegal tex wrap mode");
3487 break;
3488 }
3489 }
3490
3491 static unsigned
3492 radv_tex_compare(VkCompareOp op)
3493 {
3494 switch (op) {
3495 case VK_COMPARE_OP_NEVER:
3496 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3497 case VK_COMPARE_OP_LESS:
3498 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3499 case VK_COMPARE_OP_EQUAL:
3500 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3501 case VK_COMPARE_OP_LESS_OR_EQUAL:
3502 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3503 case VK_COMPARE_OP_GREATER:
3504 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3505 case VK_COMPARE_OP_NOT_EQUAL:
3506 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3507 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3508 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3509 case VK_COMPARE_OP_ALWAYS:
3510 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3511 default:
3512 unreachable("illegal compare mode");
3513 break;
3514 }
3515 }
3516
3517 static unsigned
3518 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3519 {
3520 switch (filter) {
3521 case VK_FILTER_NEAREST:
3522 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3523 V_008F38_SQ_TEX_XY_FILTER_POINT);
3524 case VK_FILTER_LINEAR:
3525 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3526 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3527 case VK_FILTER_CUBIC_IMG:
3528 default:
3529 fprintf(stderr, "illegal texture filter");
3530 return 0;
3531 }
3532 }
3533
3534 static unsigned
3535 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3536 {
3537 switch (mode) {
3538 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3539 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3540 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3541 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3542 default:
3543 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3544 }
3545 }
3546
3547 static unsigned
3548 radv_tex_bordercolor(VkBorderColor bcolor)
3549 {
3550 switch (bcolor) {
3551 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3552 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3553 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3554 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3555 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3556 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3557 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3558 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3559 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3560 default:
3561 break;
3562 }
3563 return 0;
3564 }
3565
3566 static unsigned
3567 radv_tex_aniso_filter(unsigned filter)
3568 {
3569 if (filter < 2)
3570 return 0;
3571 if (filter < 4)
3572 return 1;
3573 if (filter < 8)
3574 return 2;
3575 if (filter < 16)
3576 return 3;
3577 return 4;
3578 }
3579
3580 static void
3581 radv_init_sampler(struct radv_device *device,
3582 struct radv_sampler *sampler,
3583 const VkSamplerCreateInfo *pCreateInfo)
3584 {
3585 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3586 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3587 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3588 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3589
3590 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3591 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3592 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3593 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3594 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3595 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3596 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3597 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3598 S_008F30_DISABLE_CUBE_WRAP(0) |
3599 S_008F30_COMPAT_MODE(is_vi));
3600 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3601 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3602 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3603 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3604 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3605 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3606 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3607 S_008F38_MIP_POINT_PRECLAMP(0) |
3608 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3609 S_008F38_FILTER_PREC_FIX(1) |
3610 S_008F38_ANISO_OVERRIDE(is_vi));
3611 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3612 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3613 }
3614
3615 VkResult radv_CreateSampler(
3616 VkDevice _device,
3617 const VkSamplerCreateInfo* pCreateInfo,
3618 const VkAllocationCallbacks* pAllocator,
3619 VkSampler* pSampler)
3620 {
3621 RADV_FROM_HANDLE(radv_device, device, _device);
3622 struct radv_sampler *sampler;
3623
3624 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3625
3626 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3627 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3628 if (!sampler)
3629 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3630
3631 radv_init_sampler(device, sampler, pCreateInfo);
3632 *pSampler = radv_sampler_to_handle(sampler);
3633
3634 return VK_SUCCESS;
3635 }
3636
3637 void radv_DestroySampler(
3638 VkDevice _device,
3639 VkSampler _sampler,
3640 const VkAllocationCallbacks* pAllocator)
3641 {
3642 RADV_FROM_HANDLE(radv_device, device, _device);
3643 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3644
3645 if (!sampler)
3646 return;
3647 vk_free2(&device->alloc, pAllocator, sampler);
3648 }
3649
3650 /* vk_icd.h does not declare this function, so we declare it here to
3651 * suppress Wmissing-prototypes.
3652 */
3653 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3654 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3655
3656 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3657 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3658 {
3659 /* For the full details on loader interface versioning, see
3660 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3661 * What follows is a condensed summary, to help you navigate the large and
3662 * confusing official doc.
3663 *
3664 * - Loader interface v0 is incompatible with later versions. We don't
3665 * support it.
3666 *
3667 * - In loader interface v1:
3668 * - The first ICD entrypoint called by the loader is
3669 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3670 * entrypoint.
3671 * - The ICD must statically expose no other Vulkan symbol unless it is
3672 * linked with -Bsymbolic.
3673 * - Each dispatchable Vulkan handle created by the ICD must be
3674 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3675 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3676 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3677 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3678 * such loader-managed surfaces.
3679 *
3680 * - Loader interface v2 differs from v1 in:
3681 * - The first ICD entrypoint called by the loader is
3682 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3683 * statically expose this entrypoint.
3684 *
3685 * - Loader interface v3 differs from v2 in:
3686 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3687 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3688 * because the loader no longer does so.
3689 */
3690 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3691 return VK_SUCCESS;
3692 }
3693
3694 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3695 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3696 int *pFD)
3697 {
3698 RADV_FROM_HANDLE(radv_device, device, _device);
3699 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3700
3701 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3702
3703 /* At the moment, we support only the below handle types. */
3704 assert(pGetFdInfo->handleType ==
3705 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3706 pGetFdInfo->handleType ==
3707 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3708
3709 bool ret = radv_get_memory_fd(device, memory, pFD);
3710 if (ret == false)
3711 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3712 return VK_SUCCESS;
3713 }
3714
3715 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3716 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3717 int fd,
3718 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3719 {
3720 switch (handleType) {
3721 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3722 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
3723 return VK_SUCCESS;
3724
3725 default:
3726 /* The valid usage section for this function says:
3727 *
3728 * "handleType must not be one of the handle types defined as
3729 * opaque."
3730 *
3731 * So opaque handle types fall into the default "unsupported" case.
3732 */
3733 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3734 }
3735 }
3736
3737 static VkResult radv_import_opaque_fd(struct radv_device *device,
3738 int fd,
3739 uint32_t *syncobj)
3740 {
3741 uint32_t syncobj_handle = 0;
3742 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
3743 if (ret != 0)
3744 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3745
3746 if (*syncobj)
3747 device->ws->destroy_syncobj(device->ws, *syncobj);
3748
3749 *syncobj = syncobj_handle;
3750 close(fd);
3751
3752 return VK_SUCCESS;
3753 }
3754
3755 static VkResult radv_import_sync_fd(struct radv_device *device,
3756 int fd,
3757 uint32_t *syncobj)
3758 {
3759 /* If we create a syncobj we do it locally so that if we have an error, we don't
3760 * leave a syncobj in an undetermined state in the fence. */
3761 uint32_t syncobj_handle = *syncobj;
3762 if (!syncobj_handle) {
3763 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
3764 if (ret) {
3765 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3766 }
3767 }
3768
3769 if (fd == -1) {
3770 device->ws->signal_syncobj(device->ws, syncobj_handle);
3771 } else {
3772 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
3773 if (ret != 0)
3774 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3775 }
3776
3777 *syncobj = syncobj_handle;
3778 if (fd != -1)
3779 close(fd);
3780
3781 return VK_SUCCESS;
3782 }
3783
3784 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3785 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3786 {
3787 RADV_FROM_HANDLE(radv_device, device, _device);
3788 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3789 uint32_t *syncobj_dst = NULL;
3790
3791 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3792 syncobj_dst = &sem->temp_syncobj;
3793 } else {
3794 syncobj_dst = &sem->syncobj;
3795 }
3796
3797 switch(pImportSemaphoreFdInfo->handleType) {
3798 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3799 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
3800 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
3801 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
3802 default:
3803 unreachable("Unhandled semaphore handle type");
3804 }
3805 }
3806
3807 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3808 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3809 int *pFd)
3810 {
3811 RADV_FROM_HANDLE(radv_device, device, _device);
3812 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3813 int ret;
3814 uint32_t syncobj_handle;
3815
3816 if (sem->temp_syncobj)
3817 syncobj_handle = sem->temp_syncobj;
3818 else
3819 syncobj_handle = sem->syncobj;
3820
3821 switch(pGetFdInfo->handleType) {
3822 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3823 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3824 break;
3825 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
3826 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
3827 if (!ret) {
3828 if (sem->temp_syncobj) {
3829 close (sem->temp_syncobj);
3830 sem->temp_syncobj = 0;
3831 } else {
3832 device->ws->reset_syncobj(device->ws, syncobj_handle);
3833 }
3834 }
3835 break;
3836 default:
3837 unreachable("Unhandled semaphore handle type");
3838 }
3839
3840 if (ret)
3841 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3842 return VK_SUCCESS;
3843 }
3844
3845 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3846 VkPhysicalDevice physicalDevice,
3847 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3848 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3849 {
3850 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
3851
3852 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
3853 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
3854 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3855 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
3856 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
3857 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
3858 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3859 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3860 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3861 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3862 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3863 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3864 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3865 } else {
3866 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3867 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3868 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3869 }
3870 }
3871
3872 VkResult radv_ImportFenceFdKHR(VkDevice _device,
3873 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
3874 {
3875 RADV_FROM_HANDLE(radv_device, device, _device);
3876 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
3877 uint32_t *syncobj_dst = NULL;
3878
3879
3880 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
3881 syncobj_dst = &fence->temp_syncobj;
3882 } else {
3883 syncobj_dst = &fence->syncobj;
3884 }
3885
3886 switch(pImportFenceFdInfo->handleType) {
3887 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3888 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
3889 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
3890 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
3891 default:
3892 unreachable("Unhandled fence handle type");
3893 }
3894 }
3895
3896 VkResult radv_GetFenceFdKHR(VkDevice _device,
3897 const VkFenceGetFdInfoKHR *pGetFdInfo,
3898 int *pFd)
3899 {
3900 RADV_FROM_HANDLE(radv_device, device, _device);
3901 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
3902 int ret;
3903 uint32_t syncobj_handle;
3904
3905 if (fence->temp_syncobj)
3906 syncobj_handle = fence->temp_syncobj;
3907 else
3908 syncobj_handle = fence->syncobj;
3909
3910 switch(pGetFdInfo->handleType) {
3911 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3912 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3913 break;
3914 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
3915 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
3916 if (!ret) {
3917 if (fence->temp_syncobj) {
3918 close (fence->temp_syncobj);
3919 fence->temp_syncobj = 0;
3920 } else {
3921 device->ws->reset_syncobj(device->ws, syncobj_handle);
3922 }
3923 }
3924 break;
3925 default:
3926 unreachable("Unhandled fence handle type");
3927 }
3928
3929 if (ret)
3930 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3931 return VK_SUCCESS;
3932 }
3933
3934 void radv_GetPhysicalDeviceExternalFencePropertiesKHR(
3935 VkPhysicalDevice physicalDevice,
3936 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
3937 VkExternalFencePropertiesKHR* pExternalFenceProperties)
3938 {
3939 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
3940
3941 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
3942 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3943 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
3944 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
3945 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
3946 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
3947 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3948 } else {
3949 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
3950 pExternalFenceProperties->compatibleHandleTypes = 0;
3951 pExternalFenceProperties->externalFenceFeatures = 0;
3952 }
3953 }
3954
3955 VkResult
3956 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
3957 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
3958 const VkAllocationCallbacks* pAllocator,
3959 VkDebugReportCallbackEXT* pCallback)
3960 {
3961 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3962 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
3963 pCreateInfo, pAllocator, &instance->alloc,
3964 pCallback);
3965 }
3966
3967 void
3968 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
3969 VkDebugReportCallbackEXT _callback,
3970 const VkAllocationCallbacks* pAllocator)
3971 {
3972 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3973 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
3974 _callback, pAllocator, &instance->alloc);
3975 }
3976
3977 void
3978 radv_DebugReportMessageEXT(VkInstance _instance,
3979 VkDebugReportFlagsEXT flags,
3980 VkDebugReportObjectTypeEXT objectType,
3981 uint64_t object,
3982 size_t location,
3983 int32_t messageCode,
3984 const char* pLayerPrefix,
3985 const char* pMessage)
3986 {
3987 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3988 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
3989 object, location, messageCode, pLayerPrefix, pMessage);
3990 }