radv: Implement sparse buffer creation.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 {
88 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
89 .specVersion = 1,
90 },
91 };
92
93 static const VkExtensionProperties common_device_extensions[] = {
94 {
95 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
100 .specVersion = 1,
101 },
102 {
103 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
104 .specVersion = 68,
105 },
106 {
107 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (!instance)
369 return;
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 static VkResult
383 radv_enumerate_devices(struct radv_instance *instance)
384 {
385 /* TODO: Check for more devices ? */
386 drmDevicePtr devices[8];
387 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
388 int max_devices;
389
390 instance->physicalDeviceCount = 0;
391
392 max_devices = drmGetDevices2(0, devices, sizeof(devices));
393 if (max_devices < 1)
394 return VK_ERROR_INCOMPATIBLE_DRIVER;
395
396 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
397 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
398 devices[i]->bustype == DRM_BUS_PCI &&
399 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
400
401 result = radv_physical_device_init(instance->physicalDevices +
402 instance->physicalDeviceCount,
403 instance,
404 devices[i]->nodes[DRM_NODE_RENDER]);
405 if (result == VK_SUCCESS)
406 ++instance->physicalDeviceCount;
407 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
408 return result;
409 }
410 }
411 return result;
412 }
413
414 VkResult radv_EnumeratePhysicalDevices(
415 VkInstance _instance,
416 uint32_t* pPhysicalDeviceCount,
417 VkPhysicalDevice* pPhysicalDevices)
418 {
419 RADV_FROM_HANDLE(radv_instance, instance, _instance);
420 VkResult result;
421
422 if (instance->physicalDeviceCount < 0) {
423 result = radv_enumerate_devices(instance);
424 if (result != VK_SUCCESS &&
425 result != VK_ERROR_INCOMPATIBLE_DRIVER)
426 return result;
427 }
428
429 if (!pPhysicalDevices) {
430 *pPhysicalDeviceCount = instance->physicalDeviceCount;
431 } else {
432 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
433 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
434 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
435 }
436
437 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
438 : VK_SUCCESS;
439 }
440
441 void radv_GetPhysicalDeviceFeatures(
442 VkPhysicalDevice physicalDevice,
443 VkPhysicalDeviceFeatures* pFeatures)
444 {
445 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
446
447 memset(pFeatures, 0, sizeof(*pFeatures));
448
449 *pFeatures = (VkPhysicalDeviceFeatures) {
450 .robustBufferAccess = true,
451 .fullDrawIndexUint32 = true,
452 .imageCubeArray = true,
453 .independentBlend = true,
454 .geometryShader = true,
455 .tessellationShader = false,
456 .sampleRateShading = false,
457 .dualSrcBlend = true,
458 .logicOp = true,
459 .multiDrawIndirect = true,
460 .drawIndirectFirstInstance = true,
461 .depthClamp = true,
462 .depthBiasClamp = true,
463 .fillModeNonSolid = true,
464 .depthBounds = true,
465 .wideLines = true,
466 .largePoints = true,
467 .alphaToOne = true,
468 .multiViewport = true,
469 .samplerAnisotropy = true,
470 .textureCompressionETC2 = false,
471 .textureCompressionASTC_LDR = false,
472 .textureCompressionBC = true,
473 .occlusionQueryPrecise = true,
474 .pipelineStatisticsQuery = false,
475 .vertexPipelineStoresAndAtomics = true,
476 .fragmentStoresAndAtomics = true,
477 .shaderTessellationAndGeometryPointSize = true,
478 .shaderImageGatherExtended = true,
479 .shaderStorageImageExtendedFormats = true,
480 .shaderStorageImageMultisample = false,
481 .shaderUniformBufferArrayDynamicIndexing = true,
482 .shaderSampledImageArrayDynamicIndexing = true,
483 .shaderStorageBufferArrayDynamicIndexing = true,
484 .shaderStorageImageArrayDynamicIndexing = true,
485 .shaderStorageImageReadWithoutFormat = true,
486 .shaderStorageImageWriteWithoutFormat = true,
487 .shaderClipDistance = true,
488 .shaderCullDistance = true,
489 .shaderFloat64 = true,
490 .shaderInt64 = false,
491 .shaderInt16 = false,
492 .variableMultisampleRate = false,
493 .inheritedQueries = false,
494 };
495 }
496
497 void radv_GetPhysicalDeviceFeatures2KHR(
498 VkPhysicalDevice physicalDevice,
499 VkPhysicalDeviceFeatures2KHR *pFeatures)
500 {
501 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
502 }
503
504 static uint32_t radv_get_driver_version()
505 {
506 const char *minor_string = strchr(VERSION, '.');
507 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
508 int major = atoi(VERSION);
509 int minor = minor_string ? atoi(minor_string + 1) : 0;
510 int patch = patch_string ? atoi(patch_string + 1) : 0;
511 if (strstr(VERSION, "devel")) {
512 if (patch == 0) {
513 patch = 99;
514 if (minor == 0) {
515 minor = 99;
516 --major;
517 } else
518 --minor;
519 } else
520 --patch;
521 }
522 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
523 return version;
524 }
525
526 void radv_GetPhysicalDeviceProperties(
527 VkPhysicalDevice physicalDevice,
528 VkPhysicalDeviceProperties* pProperties)
529 {
530 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
531 VkSampleCountFlags sample_counts = 0xf;
532 VkPhysicalDeviceLimits limits = {
533 .maxImageDimension1D = (1 << 14),
534 .maxImageDimension2D = (1 << 14),
535 .maxImageDimension3D = (1 << 11),
536 .maxImageDimensionCube = (1 << 14),
537 .maxImageArrayLayers = (1 << 11),
538 .maxTexelBufferElements = 128 * 1024 * 1024,
539 .maxUniformBufferRange = UINT32_MAX,
540 .maxStorageBufferRange = UINT32_MAX,
541 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
542 .maxMemoryAllocationCount = UINT32_MAX,
543 .maxSamplerAllocationCount = 64 * 1024,
544 .bufferImageGranularity = 64, /* A cache line */
545 .sparseAddressSpaceSize = 0,
546 .maxBoundDescriptorSets = MAX_SETS,
547 .maxPerStageDescriptorSamplers = 64,
548 .maxPerStageDescriptorUniformBuffers = 64,
549 .maxPerStageDescriptorStorageBuffers = 64,
550 .maxPerStageDescriptorSampledImages = 64,
551 .maxPerStageDescriptorStorageImages = 64,
552 .maxPerStageDescriptorInputAttachments = 64,
553 .maxPerStageResources = 128,
554 .maxDescriptorSetSamplers = 256,
555 .maxDescriptorSetUniformBuffers = 256,
556 .maxDescriptorSetUniformBuffersDynamic = 256,
557 .maxDescriptorSetStorageBuffers = 256,
558 .maxDescriptorSetStorageBuffersDynamic = 256,
559 .maxDescriptorSetSampledImages = 256,
560 .maxDescriptorSetStorageImages = 256,
561 .maxDescriptorSetInputAttachments = 256,
562 .maxVertexInputAttributes = 32,
563 .maxVertexInputBindings = 32,
564 .maxVertexInputAttributeOffset = 2047,
565 .maxVertexInputBindingStride = 2048,
566 .maxVertexOutputComponents = 128,
567 .maxTessellationGenerationLevel = 0,
568 .maxTessellationPatchSize = 0,
569 .maxTessellationControlPerVertexInputComponents = 0,
570 .maxTessellationControlPerVertexOutputComponents = 0,
571 .maxTessellationControlPerPatchOutputComponents = 0,
572 .maxTessellationControlTotalOutputComponents = 0,
573 .maxTessellationEvaluationInputComponents = 0,
574 .maxTessellationEvaluationOutputComponents = 0,
575 .maxGeometryShaderInvocations = 32,
576 .maxGeometryInputComponents = 64,
577 .maxGeometryOutputComponents = 128,
578 .maxGeometryOutputVertices = 256,
579 .maxGeometryTotalOutputComponents = 1024,
580 .maxFragmentInputComponents = 128,
581 .maxFragmentOutputAttachments = 8,
582 .maxFragmentDualSrcAttachments = 1,
583 .maxFragmentCombinedOutputResources = 8,
584 .maxComputeSharedMemorySize = 32768,
585 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
586 .maxComputeWorkGroupInvocations = 2048,
587 .maxComputeWorkGroupSize = {
588 2048,
589 2048,
590 2048
591 },
592 .subPixelPrecisionBits = 4 /* FIXME */,
593 .subTexelPrecisionBits = 4 /* FIXME */,
594 .mipmapPrecisionBits = 4 /* FIXME */,
595 .maxDrawIndexedIndexValue = UINT32_MAX,
596 .maxDrawIndirectCount = UINT32_MAX,
597 .maxSamplerLodBias = 16,
598 .maxSamplerAnisotropy = 16,
599 .maxViewports = MAX_VIEWPORTS,
600 .maxViewportDimensions = { (1 << 14), (1 << 14) },
601 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
602 .viewportSubPixelBits = 13, /* We take a float? */
603 .minMemoryMapAlignment = 4096, /* A page */
604 .minTexelBufferOffsetAlignment = 1,
605 .minUniformBufferOffsetAlignment = 4,
606 .minStorageBufferOffsetAlignment = 4,
607 .minTexelOffset = -32,
608 .maxTexelOffset = 31,
609 .minTexelGatherOffset = -32,
610 .maxTexelGatherOffset = 31,
611 .minInterpolationOffset = -2,
612 .maxInterpolationOffset = 2,
613 .subPixelInterpolationOffsetBits = 8,
614 .maxFramebufferWidth = (1 << 14),
615 .maxFramebufferHeight = (1 << 14),
616 .maxFramebufferLayers = (1 << 10),
617 .framebufferColorSampleCounts = sample_counts,
618 .framebufferDepthSampleCounts = sample_counts,
619 .framebufferStencilSampleCounts = sample_counts,
620 .framebufferNoAttachmentsSampleCounts = sample_counts,
621 .maxColorAttachments = MAX_RTS,
622 .sampledImageColorSampleCounts = sample_counts,
623 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
624 .sampledImageDepthSampleCounts = sample_counts,
625 .sampledImageStencilSampleCounts = sample_counts,
626 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
627 .maxSampleMaskWords = 1,
628 .timestampComputeAndGraphics = false,
629 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
630 .maxClipDistances = 8,
631 .maxCullDistances = 8,
632 .maxCombinedClipAndCullDistances = 8,
633 .discreteQueuePriorities = 1,
634 .pointSizeRange = { 0.125, 255.875 },
635 .lineWidthRange = { 0.0, 7.9921875 },
636 .pointSizeGranularity = (1.0 / 8.0),
637 .lineWidthGranularity = (1.0 / 128.0),
638 .strictLines = false, /* FINISHME */
639 .standardSampleLocations = true,
640 .optimalBufferCopyOffsetAlignment = 128,
641 .optimalBufferCopyRowPitchAlignment = 128,
642 .nonCoherentAtomSize = 64,
643 };
644
645 *pProperties = (VkPhysicalDeviceProperties) {
646 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
647 .driverVersion = radv_get_driver_version(),
648 .vendorID = 0x1002,
649 .deviceID = pdevice->rad_info.pci_id,
650 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
651 .limits = limits,
652 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
653 };
654
655 strcpy(pProperties->deviceName, pdevice->name);
656 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
657 }
658
659 void radv_GetPhysicalDeviceProperties2KHR(
660 VkPhysicalDevice physicalDevice,
661 VkPhysicalDeviceProperties2KHR *pProperties)
662 {
663 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
664 }
665
666 static void radv_get_physical_device_queue_family_properties(
667 struct radv_physical_device* pdevice,
668 uint32_t* pCount,
669 VkQueueFamilyProperties** pQueueFamilyProperties)
670 {
671 int num_queue_families = 1;
672 int idx;
673 if (pdevice->rad_info.compute_rings > 0 &&
674 pdevice->rad_info.chip_class >= CIK &&
675 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
676 num_queue_families++;
677
678 if (pQueueFamilyProperties == NULL) {
679 *pCount = num_queue_families;
680 return;
681 }
682
683 if (!*pCount)
684 return;
685
686 idx = 0;
687 if (*pCount >= 1) {
688 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
689 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
690 VK_QUEUE_COMPUTE_BIT |
691 VK_QUEUE_TRANSFER_BIT,
692 .queueCount = 1,
693 .timestampValidBits = 64,
694 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
695 };
696 idx++;
697 }
698
699 if (pdevice->rad_info.compute_rings > 0 &&
700 pdevice->rad_info.chip_class >= CIK &&
701 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
702 if (*pCount > idx) {
703 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
704 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
705 .queueCount = pdevice->rad_info.compute_rings,
706 .timestampValidBits = 64,
707 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
708 };
709 idx++;
710 }
711 }
712 *pCount = idx;
713 }
714
715 void radv_GetPhysicalDeviceQueueFamilyProperties(
716 VkPhysicalDevice physicalDevice,
717 uint32_t* pCount,
718 VkQueueFamilyProperties* pQueueFamilyProperties)
719 {
720 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
721 if (!pQueueFamilyProperties) {
722 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
723 return;
724 }
725 VkQueueFamilyProperties *properties[] = {
726 pQueueFamilyProperties + 0,
727 pQueueFamilyProperties + 1,
728 pQueueFamilyProperties + 2,
729 };
730 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
731 assert(*pCount <= 3);
732 }
733
734 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
735 VkPhysicalDevice physicalDevice,
736 uint32_t* pCount,
737 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
738 {
739 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
740 if (!pQueueFamilyProperties) {
741 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
742 return;
743 }
744 VkQueueFamilyProperties *properties[] = {
745 &pQueueFamilyProperties[0].queueFamilyProperties,
746 &pQueueFamilyProperties[1].queueFamilyProperties,
747 &pQueueFamilyProperties[2].queueFamilyProperties,
748 };
749 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
750 assert(*pCount <= 3);
751 }
752
753 void radv_GetPhysicalDeviceMemoryProperties(
754 VkPhysicalDevice physicalDevice,
755 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
756 {
757 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
758
759 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
760
761 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
762 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
763 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
764 .heapIndex = RADV_MEM_HEAP_VRAM,
765 };
766 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
767 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
768 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
769 .heapIndex = RADV_MEM_HEAP_GTT,
770 };
771 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
772 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
773 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
774 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
775 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
776 };
777 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
778 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
779 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
780 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
781 .heapIndex = RADV_MEM_HEAP_GTT,
782 };
783
784 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
785
786 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
787 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
788 .size = physical_device->rad_info.vram_size -
789 physical_device->rad_info.visible_vram_size,
790 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
791 };
792 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
793 .size = physical_device->rad_info.visible_vram_size,
794 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
795 };
796 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
797 .size = physical_device->rad_info.gart_size,
798 .flags = 0,
799 };
800 }
801
802 void radv_GetPhysicalDeviceMemoryProperties2KHR(
803 VkPhysicalDevice physicalDevice,
804 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
805 {
806 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
807 &pMemoryProperties->memoryProperties);
808 }
809
810 static int
811 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
812 int queue_family_index, int idx)
813 {
814 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
815 queue->device = device;
816 queue->queue_family_index = queue_family_index;
817 queue->queue_idx = idx;
818
819 queue->hw_ctx = device->ws->ctx_create(device->ws);
820 if (!queue->hw_ctx)
821 return VK_ERROR_OUT_OF_HOST_MEMORY;
822
823 return VK_SUCCESS;
824 }
825
826 static void
827 radv_queue_finish(struct radv_queue *queue)
828 {
829 if (queue->hw_ctx)
830 queue->device->ws->ctx_destroy(queue->hw_ctx);
831
832 if (queue->initial_preamble_cs)
833 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
834 if (queue->continue_preamble_cs)
835 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
836 if (queue->descriptor_bo)
837 queue->device->ws->buffer_destroy(queue->descriptor_bo);
838 if (queue->scratch_bo)
839 queue->device->ws->buffer_destroy(queue->scratch_bo);
840 if (queue->esgs_ring_bo)
841 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
842 if (queue->gsvs_ring_bo)
843 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
844 if (queue->compute_scratch_bo)
845 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
846 }
847
848 static void
849 radv_device_init_gs_info(struct radv_device *device)
850 {
851 switch (device->physical_device->rad_info.family) {
852 case CHIP_OLAND:
853 case CHIP_HAINAN:
854 case CHIP_KAVERI:
855 case CHIP_KABINI:
856 case CHIP_MULLINS:
857 case CHIP_ICELAND:
858 case CHIP_CARRIZO:
859 case CHIP_STONEY:
860 device->gs_table_depth = 16;
861 return;
862 case CHIP_TAHITI:
863 case CHIP_PITCAIRN:
864 case CHIP_VERDE:
865 case CHIP_BONAIRE:
866 case CHIP_HAWAII:
867 case CHIP_TONGA:
868 case CHIP_FIJI:
869 case CHIP_POLARIS10:
870 case CHIP_POLARIS11:
871 device->gs_table_depth = 32;
872 return;
873 default:
874 unreachable("unknown GPU");
875 }
876 }
877
878 VkResult radv_CreateDevice(
879 VkPhysicalDevice physicalDevice,
880 const VkDeviceCreateInfo* pCreateInfo,
881 const VkAllocationCallbacks* pAllocator,
882 VkDevice* pDevice)
883 {
884 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
885 VkResult result;
886 struct radv_device *device;
887
888 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
889 if (!is_extension_enabled(physical_device->extensions.ext_array,
890 physical_device->extensions.num_ext,
891 pCreateInfo->ppEnabledExtensionNames[i]))
892 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
893 }
894
895 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
896 sizeof(*device), 8,
897 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
898 if (!device)
899 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
900
901 memset(device, 0, sizeof(*device));
902
903 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
904 device->instance = physical_device->instance;
905 device->physical_device = physical_device;
906
907 device->debug_flags = device->instance->debug_flags;
908
909 device->ws = physical_device->ws;
910 if (pAllocator)
911 device->alloc = *pAllocator;
912 else
913 device->alloc = physical_device->instance->alloc;
914
915 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
916 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
917 uint32_t qfi = queue_create->queueFamilyIndex;
918
919 device->queues[qfi] = vk_alloc(&device->alloc,
920 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
921 if (!device->queues[qfi]) {
922 result = VK_ERROR_OUT_OF_HOST_MEMORY;
923 goto fail;
924 }
925
926 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
927
928 device->queue_count[qfi] = queue_create->queueCount;
929
930 for (unsigned q = 0; q < queue_create->queueCount; q++) {
931 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
932 if (result != VK_SUCCESS)
933 goto fail;
934 }
935 }
936
937 #if HAVE_LLVM < 0x0400
938 device->llvm_supports_spill = false;
939 #else
940 device->llvm_supports_spill = true;
941 #endif
942
943 /* The maximum number of scratch waves. Scratch space isn't divided
944 * evenly between CUs. The number is only a function of the number of CUs.
945 * We can decrease the constant to decrease the scratch buffer size.
946 *
947 * sctx->scratch_waves must be >= the maximum posible size of
948 * 1 threadgroup, so that the hw doesn't hang from being unable
949 * to start any.
950 *
951 * The recommended value is 4 per CU at most. Higher numbers don't
952 * bring much benefit, but they still occupy chip resources (think
953 * async compute). I've seen ~2% performance difference between 4 and 32.
954 */
955 uint32_t max_threads_per_block = 2048;
956 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
957 max_threads_per_block / 64);
958
959 radv_device_init_gs_info(device);
960
961 result = radv_device_init_meta(device);
962 if (result != VK_SUCCESS)
963 goto fail;
964
965 radv_device_init_msaa(device);
966
967 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
968 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
969 switch (family) {
970 case RADV_QUEUE_GENERAL:
971 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
972 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
973 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
974 break;
975 case RADV_QUEUE_COMPUTE:
976 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
977 radeon_emit(device->empty_cs[family], 0);
978 break;
979 }
980 device->ws->cs_finalize(device->empty_cs[family]);
981
982 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
983 switch (family) {
984 case RADV_QUEUE_GENERAL:
985 case RADV_QUEUE_COMPUTE:
986 si_cs_emit_cache_flush(device->flush_cs[family],
987 device->physical_device->rad_info.chip_class,
988 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
989 RADV_CMD_FLAG_INV_ICACHE |
990 RADV_CMD_FLAG_INV_SMEM_L1 |
991 RADV_CMD_FLAG_INV_VMEM_L1 |
992 RADV_CMD_FLAG_INV_GLOBAL_L2);
993 break;
994 }
995 device->ws->cs_finalize(device->flush_cs[family]);
996 }
997
998 if (getenv("RADV_TRACE_FILE")) {
999 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1000 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1001 if (!device->trace_bo)
1002 goto fail;
1003
1004 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1005 if (!device->trace_id_ptr)
1006 goto fail;
1007 }
1008
1009 if (device->physical_device->rad_info.chip_class >= CIK)
1010 cik_create_gfx_config(device);
1011
1012 VkPipelineCacheCreateInfo ci;
1013 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1014 ci.pNext = NULL;
1015 ci.flags = 0;
1016 ci.pInitialData = NULL;
1017 ci.initialDataSize = 0;
1018 VkPipelineCache pc;
1019 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1020 &ci, NULL, &pc);
1021 if (result != VK_SUCCESS)
1022 goto fail;
1023
1024 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1025
1026 *pDevice = radv_device_to_handle(device);
1027 return VK_SUCCESS;
1028
1029 fail:
1030 if (device->trace_bo)
1031 device->ws->buffer_destroy(device->trace_bo);
1032
1033 if (device->gfx_init)
1034 device->ws->buffer_destroy(device->gfx_init);
1035
1036 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1037 for (unsigned q = 0; q < device->queue_count[i]; q++)
1038 radv_queue_finish(&device->queues[i][q]);
1039 if (device->queue_count[i])
1040 vk_free(&device->alloc, device->queues[i]);
1041 }
1042
1043 vk_free(&device->alloc, device);
1044 return result;
1045 }
1046
1047 void radv_DestroyDevice(
1048 VkDevice _device,
1049 const VkAllocationCallbacks* pAllocator)
1050 {
1051 RADV_FROM_HANDLE(radv_device, device, _device);
1052
1053 if (!device)
1054 return;
1055
1056 if (device->trace_bo)
1057 device->ws->buffer_destroy(device->trace_bo);
1058
1059 if (device->gfx_init)
1060 device->ws->buffer_destroy(device->gfx_init);
1061
1062 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1063 for (unsigned q = 0; q < device->queue_count[i]; q++)
1064 radv_queue_finish(&device->queues[i][q]);
1065 if (device->queue_count[i])
1066 vk_free(&device->alloc, device->queues[i]);
1067 if (device->empty_cs[i])
1068 device->ws->cs_destroy(device->empty_cs[i]);
1069 if (device->flush_cs[i])
1070 device->ws->cs_destroy(device->flush_cs[i]);
1071 }
1072 radv_device_finish_meta(device);
1073
1074 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1075 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1076
1077 vk_free(&device->alloc, device);
1078 }
1079
1080 VkResult radv_EnumerateInstanceExtensionProperties(
1081 const char* pLayerName,
1082 uint32_t* pPropertyCount,
1083 VkExtensionProperties* pProperties)
1084 {
1085 if (pProperties == NULL) {
1086 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1087 return VK_SUCCESS;
1088 }
1089
1090 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1091 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1092
1093 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1094 return VK_INCOMPLETE;
1095
1096 return VK_SUCCESS;
1097 }
1098
1099 VkResult radv_EnumerateDeviceExtensionProperties(
1100 VkPhysicalDevice physicalDevice,
1101 const char* pLayerName,
1102 uint32_t* pPropertyCount,
1103 VkExtensionProperties* pProperties)
1104 {
1105 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1106
1107 if (pProperties == NULL) {
1108 *pPropertyCount = pdevice->extensions.num_ext;
1109 return VK_SUCCESS;
1110 }
1111
1112 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1113 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1114
1115 if (*pPropertyCount < pdevice->extensions.num_ext)
1116 return VK_INCOMPLETE;
1117
1118 return VK_SUCCESS;
1119 }
1120
1121 VkResult radv_EnumerateInstanceLayerProperties(
1122 uint32_t* pPropertyCount,
1123 VkLayerProperties* pProperties)
1124 {
1125 if (pProperties == NULL) {
1126 *pPropertyCount = 0;
1127 return VK_SUCCESS;
1128 }
1129
1130 /* None supported at this time */
1131 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1132 }
1133
1134 VkResult radv_EnumerateDeviceLayerProperties(
1135 VkPhysicalDevice physicalDevice,
1136 uint32_t* pPropertyCount,
1137 VkLayerProperties* pProperties)
1138 {
1139 if (pProperties == NULL) {
1140 *pPropertyCount = 0;
1141 return VK_SUCCESS;
1142 }
1143
1144 /* None supported at this time */
1145 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1146 }
1147
1148 void radv_GetDeviceQueue(
1149 VkDevice _device,
1150 uint32_t queueFamilyIndex,
1151 uint32_t queueIndex,
1152 VkQueue* pQueue)
1153 {
1154 RADV_FROM_HANDLE(radv_device, device, _device);
1155
1156 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1157 }
1158
1159 static void radv_dump_trace(struct radv_device *device,
1160 struct radeon_winsys_cs *cs)
1161 {
1162 const char *filename = getenv("RADV_TRACE_FILE");
1163 FILE *f = fopen(filename, "w");
1164 if (!f) {
1165 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1166 return;
1167 }
1168
1169 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1170 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1171 fclose(f);
1172 }
1173
1174 static void
1175 fill_geom_rings(struct radv_queue *queue,
1176 uint32_t *map,
1177 uint32_t esgs_ring_size,
1178 struct radeon_winsys_bo *esgs_ring_bo,
1179 uint32_t gsvs_ring_size,
1180 struct radeon_winsys_bo *gsvs_ring_bo)
1181 {
1182 uint64_t esgs_va = 0, gsvs_va = 0;
1183 uint32_t *desc = &map[4];
1184
1185 if (esgs_ring_bo)
1186 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1187 if (gsvs_ring_bo)
1188 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1189
1190 /* stride 0, num records - size, add tid, swizzle, elsize4,
1191 index stride 64 */
1192 desc[0] = esgs_va;
1193 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1194 S_008F04_STRIDE(0) |
1195 S_008F04_SWIZZLE_ENABLE(true);
1196 desc[2] = esgs_ring_size;
1197 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1198 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1199 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1200 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1201 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1202 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1203 S_008F0C_ELEMENT_SIZE(1) |
1204 S_008F0C_INDEX_STRIDE(3) |
1205 S_008F0C_ADD_TID_ENABLE(true);
1206
1207 desc += 4;
1208 /* GS entry for ES->GS ring */
1209 /* stride 0, num records - size, elsize0,
1210 index stride 0 */
1211 desc[0] = esgs_va;
1212 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1213 S_008F04_STRIDE(0) |
1214 S_008F04_SWIZZLE_ENABLE(false);
1215 desc[2] = esgs_ring_size;
1216 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1217 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1218 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1219 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1220 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1221 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1222 S_008F0C_ELEMENT_SIZE(0) |
1223 S_008F0C_INDEX_STRIDE(0) |
1224 S_008F0C_ADD_TID_ENABLE(false);
1225
1226 desc += 4;
1227 /* VS entry for GS->VS ring */
1228 /* stride 0, num records - size, elsize0,
1229 index stride 0 */
1230 desc[0] = gsvs_va;
1231 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1232 S_008F04_STRIDE(0) |
1233 S_008F04_SWIZZLE_ENABLE(false);
1234 desc[2] = gsvs_ring_size;
1235 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1236 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1237 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1238 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1239 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1240 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1241 S_008F0C_ELEMENT_SIZE(0) |
1242 S_008F0C_INDEX_STRIDE(0) |
1243 S_008F0C_ADD_TID_ENABLE(false);
1244 desc += 4;
1245
1246 /* stride gsvs_itemsize, num records 64
1247 elsize 4, index stride 16 */
1248 /* shader will patch stride and desc[2] */
1249 desc[0] = gsvs_va;
1250 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1251 S_008F04_STRIDE(0) |
1252 S_008F04_SWIZZLE_ENABLE(true);
1253 desc[2] = 0;
1254 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1255 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1256 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1257 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1258 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1259 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1260 S_008F0C_ELEMENT_SIZE(1) |
1261 S_008F0C_INDEX_STRIDE(1) |
1262 S_008F0C_ADD_TID_ENABLE(true);
1263 }
1264
1265 static VkResult
1266 radv_get_preamble_cs(struct radv_queue *queue,
1267 uint32_t scratch_size,
1268 uint32_t compute_scratch_size,
1269 uint32_t esgs_ring_size,
1270 uint32_t gsvs_ring_size,
1271 struct radeon_winsys_cs **initial_preamble_cs,
1272 struct radeon_winsys_cs **continue_preamble_cs)
1273 {
1274 struct radeon_winsys_bo *scratch_bo = NULL;
1275 struct radeon_winsys_bo *descriptor_bo = NULL;
1276 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1277 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1278 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1279 struct radeon_winsys_cs *dest_cs[2] = {0};
1280
1281 if (scratch_size <= queue->scratch_size &&
1282 compute_scratch_size <= queue->compute_scratch_size &&
1283 esgs_ring_size <= queue->esgs_ring_size &&
1284 gsvs_ring_size <= queue->gsvs_ring_size &&
1285 queue->initial_preamble_cs) {
1286 *initial_preamble_cs = queue->initial_preamble_cs;
1287 *continue_preamble_cs = queue->continue_preamble_cs;
1288 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1289 *continue_preamble_cs = NULL;
1290 return VK_SUCCESS;
1291 }
1292
1293 if (scratch_size > queue->scratch_size) {
1294 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1295 scratch_size,
1296 4096,
1297 RADEON_DOMAIN_VRAM,
1298 RADEON_FLAG_NO_CPU_ACCESS);
1299 if (!scratch_bo)
1300 goto fail;
1301 } else
1302 scratch_bo = queue->scratch_bo;
1303
1304 if (compute_scratch_size > queue->compute_scratch_size) {
1305 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1306 compute_scratch_size,
1307 4096,
1308 RADEON_DOMAIN_VRAM,
1309 RADEON_FLAG_NO_CPU_ACCESS);
1310 if (!compute_scratch_bo)
1311 goto fail;
1312
1313 } else
1314 compute_scratch_bo = queue->compute_scratch_bo;
1315
1316 if (esgs_ring_size > queue->esgs_ring_size) {
1317 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1318 esgs_ring_size,
1319 4096,
1320 RADEON_DOMAIN_VRAM,
1321 RADEON_FLAG_NO_CPU_ACCESS);
1322 if (!esgs_ring_bo)
1323 goto fail;
1324 } else {
1325 esgs_ring_bo = queue->esgs_ring_bo;
1326 esgs_ring_size = queue->esgs_ring_size;
1327 }
1328
1329 if (gsvs_ring_size > queue->gsvs_ring_size) {
1330 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1331 gsvs_ring_size,
1332 4096,
1333 RADEON_DOMAIN_VRAM,
1334 RADEON_FLAG_NO_CPU_ACCESS);
1335 if (!gsvs_ring_bo)
1336 goto fail;
1337 } else {
1338 gsvs_ring_bo = queue->gsvs_ring_bo;
1339 gsvs_ring_size = queue->gsvs_ring_size;
1340 }
1341
1342 if (scratch_bo != queue->scratch_bo ||
1343 esgs_ring_bo != queue->esgs_ring_bo ||
1344 gsvs_ring_bo != queue->gsvs_ring_bo) {
1345 uint32_t size = 0;
1346 if (gsvs_ring_bo || esgs_ring_bo)
1347 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1348 else if (scratch_bo)
1349 size = 8; /* 2 dword */
1350
1351 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1352 size,
1353 4096,
1354 RADEON_DOMAIN_VRAM,
1355 RADEON_FLAG_CPU_ACCESS);
1356 if (!descriptor_bo)
1357 goto fail;
1358 } else
1359 descriptor_bo = queue->descriptor_bo;
1360
1361 for(int i = 0; i < 2; ++i) {
1362 struct radeon_winsys_cs *cs = NULL;
1363 cs = queue->device->ws->cs_create(queue->device->ws,
1364 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1365 if (!cs)
1366 goto fail;
1367
1368 dest_cs[i] = cs;
1369
1370 if (scratch_bo)
1371 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1372
1373 if (esgs_ring_bo)
1374 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1375
1376 if (gsvs_ring_bo)
1377 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1378
1379 if (descriptor_bo)
1380 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1381
1382 if (descriptor_bo != queue->descriptor_bo) {
1383 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1384
1385 if (scratch_bo) {
1386 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1387 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1388 S_008F04_SWIZZLE_ENABLE(1);
1389 map[0] = scratch_va;
1390 map[1] = rsrc1;
1391 }
1392
1393 if (esgs_ring_bo || gsvs_ring_bo)
1394 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1395
1396 queue->device->ws->buffer_unmap(descriptor_bo);
1397 }
1398
1399 if (esgs_ring_bo || gsvs_ring_bo) {
1400 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1401 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1402 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1403 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1404
1405 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1406 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1407 radeon_emit(cs, esgs_ring_size >> 8);
1408 radeon_emit(cs, gsvs_ring_size >> 8);
1409 } else {
1410 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1411 radeon_emit(cs, esgs_ring_size >> 8);
1412 radeon_emit(cs, gsvs_ring_size >> 8);
1413 }
1414 }
1415
1416 if (descriptor_bo) {
1417 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1418 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1419 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1420 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1421 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1422 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1423
1424 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1425
1426 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1427 radeon_set_sh_reg_seq(cs, regs[i], 2);
1428 radeon_emit(cs, va);
1429 radeon_emit(cs, va >> 32);
1430 }
1431 }
1432
1433 if (compute_scratch_bo) {
1434 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1435 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1436 S_008F04_SWIZZLE_ENABLE(1);
1437
1438 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1439
1440 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1441 radeon_emit(cs, scratch_va);
1442 radeon_emit(cs, rsrc1);
1443 }
1444
1445 if (!i) {
1446 si_cs_emit_cache_flush(cs,
1447 queue->device->physical_device->rad_info.chip_class,
1448 queue->queue_family_index == RING_COMPUTE &&
1449 queue->device->physical_device->rad_info.chip_class >= CIK,
1450 RADV_CMD_FLAG_INV_ICACHE |
1451 RADV_CMD_FLAG_INV_SMEM_L1 |
1452 RADV_CMD_FLAG_INV_VMEM_L1 |
1453 RADV_CMD_FLAG_INV_GLOBAL_L2);
1454 }
1455
1456 if (!queue->device->ws->cs_finalize(cs))
1457 goto fail;
1458 }
1459
1460 if (queue->initial_preamble_cs)
1461 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1462
1463 if (queue->continue_preamble_cs)
1464 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1465
1466 queue->initial_preamble_cs = dest_cs[0];
1467 queue->continue_preamble_cs = dest_cs[1];
1468
1469 if (scratch_bo != queue->scratch_bo) {
1470 if (queue->scratch_bo)
1471 queue->device->ws->buffer_destroy(queue->scratch_bo);
1472 queue->scratch_bo = scratch_bo;
1473 queue->scratch_size = scratch_size;
1474 }
1475
1476 if (compute_scratch_bo != queue->compute_scratch_bo) {
1477 if (queue->compute_scratch_bo)
1478 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1479 queue->compute_scratch_bo = compute_scratch_bo;
1480 queue->compute_scratch_size = compute_scratch_size;
1481 }
1482
1483 if (esgs_ring_bo != queue->esgs_ring_bo) {
1484 if (queue->esgs_ring_bo)
1485 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1486 queue->esgs_ring_bo = esgs_ring_bo;
1487 queue->esgs_ring_size = esgs_ring_size;
1488 }
1489
1490 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1491 if (queue->gsvs_ring_bo)
1492 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1493 queue->gsvs_ring_bo = gsvs_ring_bo;
1494 queue->gsvs_ring_size = gsvs_ring_size;
1495 }
1496
1497 if (descriptor_bo != queue->descriptor_bo) {
1498 if (queue->descriptor_bo)
1499 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1500
1501 queue->descriptor_bo = descriptor_bo;
1502 }
1503
1504 *initial_preamble_cs = queue->initial_preamble_cs;
1505 *continue_preamble_cs = queue->continue_preamble_cs;
1506 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1507 *continue_preamble_cs = NULL;
1508 return VK_SUCCESS;
1509 fail:
1510 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1511 if (dest_cs[i])
1512 queue->device->ws->cs_destroy(dest_cs[i]);
1513 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1514 queue->device->ws->buffer_destroy(descriptor_bo);
1515 if (scratch_bo && scratch_bo != queue->scratch_bo)
1516 queue->device->ws->buffer_destroy(scratch_bo);
1517 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1518 queue->device->ws->buffer_destroy(compute_scratch_bo);
1519 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1520 queue->device->ws->buffer_destroy(esgs_ring_bo);
1521 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1522 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1523 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1524 }
1525
1526 VkResult radv_QueueSubmit(
1527 VkQueue _queue,
1528 uint32_t submitCount,
1529 const VkSubmitInfo* pSubmits,
1530 VkFence _fence)
1531 {
1532 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1533 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1534 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1535 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1536 int ret;
1537 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1538 uint32_t scratch_size = 0;
1539 uint32_t compute_scratch_size = 0;
1540 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1541 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1542 VkResult result;
1543 bool fence_emitted = false;
1544
1545 /* Do this first so failing to allocate scratch buffers can't result in
1546 * partially executed submissions. */
1547 for (uint32_t i = 0; i < submitCount; i++) {
1548 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1549 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1550 pSubmits[i].pCommandBuffers[j]);
1551
1552 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1553 compute_scratch_size = MAX2(compute_scratch_size,
1554 cmd_buffer->compute_scratch_size_needed);
1555 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1556 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1557 }
1558 }
1559
1560 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1561 esgs_ring_size, gsvs_ring_size,
1562 &initial_preamble_cs, &continue_preamble_cs);
1563 if (result != VK_SUCCESS)
1564 return result;
1565
1566 for (uint32_t i = 0; i < submitCount; i++) {
1567 struct radeon_winsys_cs **cs_array;
1568 bool do_flush = !i;
1569 bool can_patch = !do_flush;
1570 uint32_t advance;
1571
1572 if (!pSubmits[i].commandBufferCount) {
1573 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1574 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1575 &queue->device->empty_cs[queue->queue_family_index],
1576 1, NULL, NULL,
1577 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1578 pSubmits[i].waitSemaphoreCount,
1579 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1580 pSubmits[i].signalSemaphoreCount,
1581 false, base_fence);
1582 if (ret) {
1583 radv_loge("failed to submit CS %d\n", i);
1584 abort();
1585 }
1586 fence_emitted = true;
1587 }
1588 continue;
1589 }
1590
1591 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1592 (pSubmits[i].commandBufferCount + do_flush));
1593
1594 if(do_flush)
1595 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1596
1597 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1598 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1599 pSubmits[i].pCommandBuffers[j]);
1600 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1601
1602 cs_array[j + do_flush] = cmd_buffer->cs;
1603 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1604 can_patch = false;
1605 }
1606
1607 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1608 advance = MIN2(max_cs_submission,
1609 pSubmits[i].commandBufferCount + do_flush - j);
1610 bool b = j == 0;
1611 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1612
1613 if (queue->device->trace_bo)
1614 *queue->device->trace_id_ptr = 0;
1615
1616 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1617 advance, initial_preamble_cs, continue_preamble_cs,
1618 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1619 b ? pSubmits[i].waitSemaphoreCount : 0,
1620 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1621 e ? pSubmits[i].signalSemaphoreCount : 0,
1622 can_patch, base_fence);
1623
1624 if (ret) {
1625 radv_loge("failed to submit CS %d\n", i);
1626 abort();
1627 }
1628 fence_emitted = true;
1629 if (queue->device->trace_bo) {
1630 bool success = queue->device->ws->ctx_wait_idle(
1631 queue->hw_ctx,
1632 radv_queue_family_to_ring(
1633 queue->queue_family_index),
1634 queue->queue_idx);
1635
1636 if (!success) { /* Hang */
1637 radv_dump_trace(queue->device, cs_array[j]);
1638 abort();
1639 }
1640 }
1641 }
1642 free(cs_array);
1643 }
1644
1645 if (fence) {
1646 if (!fence_emitted)
1647 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1648 &queue->device->empty_cs[queue->queue_family_index],
1649 1, NULL, NULL, NULL, 0, NULL, 0,
1650 false, base_fence);
1651
1652 fence->submitted = true;
1653 }
1654
1655 return VK_SUCCESS;
1656 }
1657
1658 VkResult radv_QueueWaitIdle(
1659 VkQueue _queue)
1660 {
1661 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1662
1663 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1664 radv_queue_family_to_ring(queue->queue_family_index),
1665 queue->queue_idx);
1666 return VK_SUCCESS;
1667 }
1668
1669 VkResult radv_DeviceWaitIdle(
1670 VkDevice _device)
1671 {
1672 RADV_FROM_HANDLE(radv_device, device, _device);
1673
1674 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1675 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1676 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1677 }
1678 }
1679 return VK_SUCCESS;
1680 }
1681
1682 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1683 VkInstance instance,
1684 const char* pName)
1685 {
1686 return radv_lookup_entrypoint(pName);
1687 }
1688
1689 /* The loader wants us to expose a second GetInstanceProcAddr function
1690 * to work around certain LD_PRELOAD issues seen in apps.
1691 */
1692 PUBLIC
1693 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1694 VkInstance instance,
1695 const char* pName);
1696
1697 PUBLIC
1698 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1699 VkInstance instance,
1700 const char* pName)
1701 {
1702 return radv_GetInstanceProcAddr(instance, pName);
1703 }
1704
1705 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1706 VkDevice device,
1707 const char* pName)
1708 {
1709 return radv_lookup_entrypoint(pName);
1710 }
1711
1712 bool radv_get_memory_fd(struct radv_device *device,
1713 struct radv_device_memory *memory,
1714 int *pFD)
1715 {
1716 struct radeon_bo_metadata metadata;
1717
1718 if (memory->image) {
1719 radv_init_metadata(device, memory->image, &metadata);
1720 device->ws->buffer_set_metadata(memory->bo, &metadata);
1721 }
1722
1723 return device->ws->buffer_get_fd(device->ws, memory->bo,
1724 pFD);
1725 }
1726
1727 VkResult radv_AllocateMemory(
1728 VkDevice _device,
1729 const VkMemoryAllocateInfo* pAllocateInfo,
1730 const VkAllocationCallbacks* pAllocator,
1731 VkDeviceMemory* pMem)
1732 {
1733 RADV_FROM_HANDLE(radv_device, device, _device);
1734 struct radv_device_memory *mem;
1735 VkResult result;
1736 enum radeon_bo_domain domain;
1737 uint32_t flags = 0;
1738 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1739 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1740
1741 if (pAllocateInfo->allocationSize == 0) {
1742 /* Apparently, this is allowed */
1743 *pMem = VK_NULL_HANDLE;
1744 return VK_SUCCESS;
1745 }
1746
1747 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1748 switch (ext->sType) {
1749 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1750 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1751 break;
1752 default:
1753 break;
1754 }
1755 }
1756
1757 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1758 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1759 if (mem == NULL)
1760 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1761
1762 if (dedicate_info) {
1763 mem->image = radv_image_from_handle(dedicate_info->image);
1764 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1765 } else {
1766 mem->image = NULL;
1767 mem->buffer = NULL;
1768 }
1769
1770 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1771 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1772 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1773 domain = RADEON_DOMAIN_GTT;
1774 else
1775 domain = RADEON_DOMAIN_VRAM;
1776
1777 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1778 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1779 else
1780 flags |= RADEON_FLAG_CPU_ACCESS;
1781
1782 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1783 flags |= RADEON_FLAG_GTT_WC;
1784
1785 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1786 domain, flags);
1787
1788 if (!mem->bo) {
1789 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1790 goto fail;
1791 }
1792 mem->type_index = pAllocateInfo->memoryTypeIndex;
1793
1794 *pMem = radv_device_memory_to_handle(mem);
1795
1796 return VK_SUCCESS;
1797
1798 fail:
1799 vk_free2(&device->alloc, pAllocator, mem);
1800
1801 return result;
1802 }
1803
1804 void radv_FreeMemory(
1805 VkDevice _device,
1806 VkDeviceMemory _mem,
1807 const VkAllocationCallbacks* pAllocator)
1808 {
1809 RADV_FROM_HANDLE(radv_device, device, _device);
1810 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1811
1812 if (mem == NULL)
1813 return;
1814
1815 device->ws->buffer_destroy(mem->bo);
1816 mem->bo = NULL;
1817
1818 vk_free2(&device->alloc, pAllocator, mem);
1819 }
1820
1821 VkResult radv_MapMemory(
1822 VkDevice _device,
1823 VkDeviceMemory _memory,
1824 VkDeviceSize offset,
1825 VkDeviceSize size,
1826 VkMemoryMapFlags flags,
1827 void** ppData)
1828 {
1829 RADV_FROM_HANDLE(radv_device, device, _device);
1830 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1831
1832 if (mem == NULL) {
1833 *ppData = NULL;
1834 return VK_SUCCESS;
1835 }
1836
1837 *ppData = device->ws->buffer_map(mem->bo);
1838 if (*ppData) {
1839 *ppData += offset;
1840 return VK_SUCCESS;
1841 }
1842
1843 return VK_ERROR_MEMORY_MAP_FAILED;
1844 }
1845
1846 void radv_UnmapMemory(
1847 VkDevice _device,
1848 VkDeviceMemory _memory)
1849 {
1850 RADV_FROM_HANDLE(radv_device, device, _device);
1851 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1852
1853 if (mem == NULL)
1854 return;
1855
1856 device->ws->buffer_unmap(mem->bo);
1857 }
1858
1859 VkResult radv_FlushMappedMemoryRanges(
1860 VkDevice _device,
1861 uint32_t memoryRangeCount,
1862 const VkMappedMemoryRange* pMemoryRanges)
1863 {
1864 return VK_SUCCESS;
1865 }
1866
1867 VkResult radv_InvalidateMappedMemoryRanges(
1868 VkDevice _device,
1869 uint32_t memoryRangeCount,
1870 const VkMappedMemoryRange* pMemoryRanges)
1871 {
1872 return VK_SUCCESS;
1873 }
1874
1875 void radv_GetBufferMemoryRequirements(
1876 VkDevice device,
1877 VkBuffer _buffer,
1878 VkMemoryRequirements* pMemoryRequirements)
1879 {
1880 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1881
1882 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1883
1884 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
1885 pMemoryRequirements->alignment = 4096;
1886 else
1887 pMemoryRequirements->alignment = 16;
1888
1889 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
1890 }
1891
1892 void radv_GetImageMemoryRequirements(
1893 VkDevice device,
1894 VkImage _image,
1895 VkMemoryRequirements* pMemoryRequirements)
1896 {
1897 RADV_FROM_HANDLE(radv_image, image, _image);
1898
1899 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1900
1901 pMemoryRequirements->size = image->size;
1902 pMemoryRequirements->alignment = image->alignment;
1903 }
1904
1905 void radv_GetImageSparseMemoryRequirements(
1906 VkDevice device,
1907 VkImage image,
1908 uint32_t* pSparseMemoryRequirementCount,
1909 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1910 {
1911 stub();
1912 }
1913
1914 void radv_GetDeviceMemoryCommitment(
1915 VkDevice device,
1916 VkDeviceMemory memory,
1917 VkDeviceSize* pCommittedMemoryInBytes)
1918 {
1919 *pCommittedMemoryInBytes = 0;
1920 }
1921
1922 VkResult radv_BindBufferMemory(
1923 VkDevice device,
1924 VkBuffer _buffer,
1925 VkDeviceMemory _memory,
1926 VkDeviceSize memoryOffset)
1927 {
1928 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1929 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1930
1931 if (mem) {
1932 buffer->bo = mem->bo;
1933 buffer->offset = memoryOffset;
1934 } else {
1935 buffer->bo = NULL;
1936 buffer->offset = 0;
1937 }
1938
1939 return VK_SUCCESS;
1940 }
1941
1942 VkResult radv_BindImageMemory(
1943 VkDevice device,
1944 VkImage _image,
1945 VkDeviceMemory _memory,
1946 VkDeviceSize memoryOffset)
1947 {
1948 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1949 RADV_FROM_HANDLE(radv_image, image, _image);
1950
1951 if (mem) {
1952 image->bo = mem->bo;
1953 image->offset = memoryOffset;
1954 } else {
1955 image->bo = NULL;
1956 image->offset = 0;
1957 }
1958
1959 return VK_SUCCESS;
1960 }
1961
1962 VkResult radv_QueueBindSparse(
1963 VkQueue queue,
1964 uint32_t bindInfoCount,
1965 const VkBindSparseInfo* pBindInfo,
1966 VkFence fence)
1967 {
1968 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1969 }
1970
1971 VkResult radv_CreateFence(
1972 VkDevice _device,
1973 const VkFenceCreateInfo* pCreateInfo,
1974 const VkAllocationCallbacks* pAllocator,
1975 VkFence* pFence)
1976 {
1977 RADV_FROM_HANDLE(radv_device, device, _device);
1978 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1979 sizeof(*fence), 8,
1980 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1981
1982 if (!fence)
1983 return VK_ERROR_OUT_OF_HOST_MEMORY;
1984
1985 memset(fence, 0, sizeof(*fence));
1986 fence->submitted = false;
1987 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1988 fence->fence = device->ws->create_fence();
1989 if (!fence->fence) {
1990 vk_free2(&device->alloc, pAllocator, fence);
1991 return VK_ERROR_OUT_OF_HOST_MEMORY;
1992 }
1993
1994 *pFence = radv_fence_to_handle(fence);
1995
1996 return VK_SUCCESS;
1997 }
1998
1999 void radv_DestroyFence(
2000 VkDevice _device,
2001 VkFence _fence,
2002 const VkAllocationCallbacks* pAllocator)
2003 {
2004 RADV_FROM_HANDLE(radv_device, device, _device);
2005 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2006
2007 if (!fence)
2008 return;
2009 device->ws->destroy_fence(fence->fence);
2010 vk_free2(&device->alloc, pAllocator, fence);
2011 }
2012
2013 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2014 {
2015 uint64_t current_time;
2016 struct timespec tv;
2017
2018 clock_gettime(CLOCK_MONOTONIC, &tv);
2019 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2020
2021 timeout = MIN2(UINT64_MAX - current_time, timeout);
2022
2023 return current_time + timeout;
2024 }
2025
2026 VkResult radv_WaitForFences(
2027 VkDevice _device,
2028 uint32_t fenceCount,
2029 const VkFence* pFences,
2030 VkBool32 waitAll,
2031 uint64_t timeout)
2032 {
2033 RADV_FROM_HANDLE(radv_device, device, _device);
2034 timeout = radv_get_absolute_timeout(timeout);
2035
2036 if (!waitAll && fenceCount > 1) {
2037 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2038 }
2039
2040 for (uint32_t i = 0; i < fenceCount; ++i) {
2041 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2042 bool expired = false;
2043
2044 if (fence->signalled)
2045 continue;
2046
2047 if (!fence->submitted)
2048 return VK_TIMEOUT;
2049
2050 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2051 if (!expired)
2052 return VK_TIMEOUT;
2053
2054 fence->signalled = true;
2055 }
2056
2057 return VK_SUCCESS;
2058 }
2059
2060 VkResult radv_ResetFences(VkDevice device,
2061 uint32_t fenceCount,
2062 const VkFence *pFences)
2063 {
2064 for (unsigned i = 0; i < fenceCount; ++i) {
2065 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2066 fence->submitted = fence->signalled = false;
2067 }
2068
2069 return VK_SUCCESS;
2070 }
2071
2072 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2073 {
2074 RADV_FROM_HANDLE(radv_device, device, _device);
2075 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2076
2077 if (fence->signalled)
2078 return VK_SUCCESS;
2079 if (!fence->submitted)
2080 return VK_NOT_READY;
2081
2082 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2083 return VK_NOT_READY;
2084
2085 return VK_SUCCESS;
2086 }
2087
2088
2089 // Queue semaphore functions
2090
2091 VkResult radv_CreateSemaphore(
2092 VkDevice _device,
2093 const VkSemaphoreCreateInfo* pCreateInfo,
2094 const VkAllocationCallbacks* pAllocator,
2095 VkSemaphore* pSemaphore)
2096 {
2097 RADV_FROM_HANDLE(radv_device, device, _device);
2098 struct radeon_winsys_sem *sem;
2099
2100 sem = device->ws->create_sem(device->ws);
2101 if (!sem)
2102 return VK_ERROR_OUT_OF_HOST_MEMORY;
2103
2104 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2105 return VK_SUCCESS;
2106 }
2107
2108 void radv_DestroySemaphore(
2109 VkDevice _device,
2110 VkSemaphore _semaphore,
2111 const VkAllocationCallbacks* pAllocator)
2112 {
2113 RADV_FROM_HANDLE(radv_device, device, _device);
2114 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2115 if (!_semaphore)
2116 return;
2117
2118 device->ws->destroy_sem(sem);
2119 }
2120
2121 VkResult radv_CreateEvent(
2122 VkDevice _device,
2123 const VkEventCreateInfo* pCreateInfo,
2124 const VkAllocationCallbacks* pAllocator,
2125 VkEvent* pEvent)
2126 {
2127 RADV_FROM_HANDLE(radv_device, device, _device);
2128 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2129 sizeof(*event), 8,
2130 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2131
2132 if (!event)
2133 return VK_ERROR_OUT_OF_HOST_MEMORY;
2134
2135 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2136 RADEON_DOMAIN_GTT,
2137 RADEON_FLAG_CPU_ACCESS);
2138 if (!event->bo) {
2139 vk_free2(&device->alloc, pAllocator, event);
2140 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2141 }
2142
2143 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2144
2145 *pEvent = radv_event_to_handle(event);
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 void radv_DestroyEvent(
2151 VkDevice _device,
2152 VkEvent _event,
2153 const VkAllocationCallbacks* pAllocator)
2154 {
2155 RADV_FROM_HANDLE(radv_device, device, _device);
2156 RADV_FROM_HANDLE(radv_event, event, _event);
2157
2158 if (!event)
2159 return;
2160 device->ws->buffer_destroy(event->bo);
2161 vk_free2(&device->alloc, pAllocator, event);
2162 }
2163
2164 VkResult radv_GetEventStatus(
2165 VkDevice _device,
2166 VkEvent _event)
2167 {
2168 RADV_FROM_HANDLE(radv_event, event, _event);
2169
2170 if (*event->map == 1)
2171 return VK_EVENT_SET;
2172 return VK_EVENT_RESET;
2173 }
2174
2175 VkResult radv_SetEvent(
2176 VkDevice _device,
2177 VkEvent _event)
2178 {
2179 RADV_FROM_HANDLE(radv_event, event, _event);
2180 *event->map = 1;
2181
2182 return VK_SUCCESS;
2183 }
2184
2185 VkResult radv_ResetEvent(
2186 VkDevice _device,
2187 VkEvent _event)
2188 {
2189 RADV_FROM_HANDLE(radv_event, event, _event);
2190 *event->map = 0;
2191
2192 return VK_SUCCESS;
2193 }
2194
2195 VkResult radv_CreateBuffer(
2196 VkDevice _device,
2197 const VkBufferCreateInfo* pCreateInfo,
2198 const VkAllocationCallbacks* pAllocator,
2199 VkBuffer* pBuffer)
2200 {
2201 RADV_FROM_HANDLE(radv_device, device, _device);
2202 struct radv_buffer *buffer;
2203
2204 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2205
2206 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2207 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2208 if (buffer == NULL)
2209 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2210
2211 buffer->size = pCreateInfo->size;
2212 buffer->usage = pCreateInfo->usage;
2213 buffer->bo = NULL;
2214 buffer->offset = 0;
2215 buffer->flags = pCreateInfo->flags;
2216
2217 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2218 buffer->bo = device->ws->buffer_create(device->ws,
2219 align64(buffer->size, 4096),
2220 4096, 0, RADEON_FLAG_VIRTUAL);
2221 if (!buffer->bo) {
2222 vk_free2(&device->alloc, pAllocator, buffer);
2223 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2224 }
2225 }
2226
2227 *pBuffer = radv_buffer_to_handle(buffer);
2228
2229 return VK_SUCCESS;
2230 }
2231
2232 void radv_DestroyBuffer(
2233 VkDevice _device,
2234 VkBuffer _buffer,
2235 const VkAllocationCallbacks* pAllocator)
2236 {
2237 RADV_FROM_HANDLE(radv_device, device, _device);
2238 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2239
2240 if (!buffer)
2241 return;
2242
2243 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2244 device->ws->buffer_destroy(buffer->bo);
2245
2246 vk_free2(&device->alloc, pAllocator, buffer);
2247 }
2248
2249 static inline unsigned
2250 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2251 {
2252 if (stencil)
2253 return image->surface.stencil_tiling_index[level];
2254 else
2255 return image->surface.tiling_index[level];
2256 }
2257
2258 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2259 {
2260 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2261 }
2262
2263 static void
2264 radv_initialise_color_surface(struct radv_device *device,
2265 struct radv_color_buffer_info *cb,
2266 struct radv_image_view *iview)
2267 {
2268 const struct vk_format_description *desc;
2269 unsigned ntype, format, swap, endian;
2270 unsigned blend_clamp = 0, blend_bypass = 0;
2271 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2272 uint64_t va;
2273 const struct radeon_surf *surf = &iview->image->surface;
2274 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2275
2276 desc = vk_format_description(iview->vk_format);
2277
2278 memset(cb, 0, sizeof(*cb));
2279
2280 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2281 va += level_info->offset;
2282 cb->cb_color_base = va >> 8;
2283
2284 /* CMASK variables */
2285 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2286 va += iview->image->cmask.offset;
2287 cb->cb_color_cmask = va >> 8;
2288 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2289
2290 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2291 va += iview->image->dcc_offset;
2292 cb->cb_dcc_base = va >> 8;
2293
2294 uint32_t max_slice = radv_surface_layer_count(iview);
2295 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2296 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2297
2298 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2299 pitch_tile_max = level_info->nblk_x / 8 - 1;
2300 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2301 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2302
2303 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2304 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2305
2306 /* Intensity is implemented as Red, so treat it that way. */
2307 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2308 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2309
2310 if (iview->image->samples > 1) {
2311 unsigned log_samples = util_logbase2(iview->image->samples);
2312
2313 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2314 S_028C74_NUM_FRAGMENTS(log_samples);
2315 }
2316
2317 if (iview->image->fmask.size) {
2318 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2319 if (device->physical_device->rad_info.chip_class >= CIK)
2320 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2321 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2322 cb->cb_color_fmask = va >> 8;
2323 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2324 } else {
2325 /* This must be set for fast clear to work without FMASK. */
2326 if (device->physical_device->rad_info.chip_class >= CIK)
2327 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2328 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2329 cb->cb_color_fmask = cb->cb_color_base;
2330 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2331 }
2332
2333 ntype = radv_translate_color_numformat(iview->vk_format,
2334 desc,
2335 vk_format_get_first_non_void_channel(iview->vk_format));
2336 format = radv_translate_colorformat(iview->vk_format);
2337 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2338 radv_finishme("Illegal color\n");
2339 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2340 endian = radv_colorformat_endian_swap(format);
2341
2342 /* blend clamp should be set for all NORM/SRGB types */
2343 if (ntype == V_028C70_NUMBER_UNORM ||
2344 ntype == V_028C70_NUMBER_SNORM ||
2345 ntype == V_028C70_NUMBER_SRGB)
2346 blend_clamp = 1;
2347
2348 /* set blend bypass according to docs if SINT/UINT or
2349 8/24 COLOR variants */
2350 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2351 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2352 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2353 blend_clamp = 0;
2354 blend_bypass = 1;
2355 }
2356 #if 0
2357 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2358 (format == V_028C70_COLOR_8 ||
2359 format == V_028C70_COLOR_8_8 ||
2360 format == V_028C70_COLOR_8_8_8_8))
2361 ->color_is_int8 = true;
2362 #endif
2363 cb->cb_color_info = S_028C70_FORMAT(format) |
2364 S_028C70_COMP_SWAP(swap) |
2365 S_028C70_BLEND_CLAMP(blend_clamp) |
2366 S_028C70_BLEND_BYPASS(blend_bypass) |
2367 S_028C70_SIMPLE_FLOAT(1) |
2368 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2369 ntype != V_028C70_NUMBER_SNORM &&
2370 ntype != V_028C70_NUMBER_SRGB &&
2371 format != V_028C70_COLOR_8_24 &&
2372 format != V_028C70_COLOR_24_8) |
2373 S_028C70_NUMBER_TYPE(ntype) |
2374 S_028C70_ENDIAN(endian);
2375 if (iview->image->samples > 1)
2376 if (iview->image->fmask.size)
2377 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2378
2379 if (iview->image->cmask.size &&
2380 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2381 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2382
2383 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2384 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2385
2386 if (device->physical_device->rad_info.chip_class >= VI) {
2387 unsigned max_uncompressed_block_size = 2;
2388 if (iview->image->samples > 1) {
2389 if (iview->image->surface.bpe == 1)
2390 max_uncompressed_block_size = 0;
2391 else if (iview->image->surface.bpe == 2)
2392 max_uncompressed_block_size = 1;
2393 }
2394
2395 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2396 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2397 }
2398
2399 /* This must be set for fast clear to work without FMASK. */
2400 if (!iview->image->fmask.size &&
2401 device->physical_device->rad_info.chip_class == SI) {
2402 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2403 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2404 }
2405 }
2406
2407 static void
2408 radv_initialise_ds_surface(struct radv_device *device,
2409 struct radv_ds_buffer_info *ds,
2410 struct radv_image_view *iview)
2411 {
2412 unsigned level = iview->base_mip;
2413 unsigned format;
2414 uint64_t va, s_offs, z_offs;
2415 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2416 memset(ds, 0, sizeof(*ds));
2417 switch (iview->vk_format) {
2418 case VK_FORMAT_D24_UNORM_S8_UINT:
2419 case VK_FORMAT_X8_D24_UNORM_PACK32:
2420 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2421 ds->offset_scale = 2.0f;
2422 break;
2423 case VK_FORMAT_D16_UNORM:
2424 case VK_FORMAT_D16_UNORM_S8_UINT:
2425 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2426 ds->offset_scale = 4.0f;
2427 break;
2428 case VK_FORMAT_D32_SFLOAT:
2429 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2430 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2431 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2432 ds->offset_scale = 1.0f;
2433 break;
2434 default:
2435 break;
2436 }
2437
2438 format = radv_translate_dbformat(iview->vk_format);
2439
2440 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2441 s_offs = z_offs = va;
2442 z_offs += iview->image->surface.level[level].offset;
2443 s_offs += iview->image->surface.stencil_level[level].offset;
2444
2445 uint32_t max_slice = radv_surface_layer_count(iview);
2446 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2447 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2448 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2449 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2450
2451 if (iview->image->samples > 1)
2452 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2453
2454 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2455 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2456 else
2457 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2458
2459 if (device->physical_device->rad_info.chip_class >= CIK) {
2460 struct radeon_info *info = &device->physical_device->rad_info;
2461 unsigned tiling_index = iview->image->surface.tiling_index[level];
2462 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2463 unsigned macro_index = iview->image->surface.macro_tile_index;
2464 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2465 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2466 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2467
2468 ds->db_depth_info |=
2469 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2470 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2471 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2472 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2473 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2474 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2475 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2476 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2477 } else {
2478 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2479 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2480 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2481 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2482 }
2483
2484 if (iview->image->surface.htile_size && !level) {
2485 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2486 S_028040_ALLOW_EXPCLEAR(1);
2487
2488 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2489 /* Workaround: For a not yet understood reason, the
2490 * combination of MSAA, fast stencil clear and stencil
2491 * decompress messes with subsequent stencil buffer
2492 * uses. Problem was reproduced on Verde, Bonaire,
2493 * Tonga, and Carrizo.
2494 *
2495 * Disabling EXPCLEAR works around the problem.
2496 *
2497 * Check piglit's arb_texture_multisample-stencil-clear
2498 * test if you want to try changing this.
2499 */
2500 if (iview->image->samples <= 1)
2501 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2502 } else
2503 /* Use all of the htile_buffer for depth if there's no stencil. */
2504 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2505
2506 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2507 iview->image->htile_offset;
2508 ds->db_htile_data_base = va >> 8;
2509 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2510 } else {
2511 ds->db_htile_data_base = 0;
2512 ds->db_htile_surface = 0;
2513 }
2514
2515 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2516 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2517
2518 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2519 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2520 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2521 }
2522
2523 VkResult radv_CreateFramebuffer(
2524 VkDevice _device,
2525 const VkFramebufferCreateInfo* pCreateInfo,
2526 const VkAllocationCallbacks* pAllocator,
2527 VkFramebuffer* pFramebuffer)
2528 {
2529 RADV_FROM_HANDLE(radv_device, device, _device);
2530 struct radv_framebuffer *framebuffer;
2531
2532 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2533
2534 size_t size = sizeof(*framebuffer) +
2535 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2536 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2537 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2538 if (framebuffer == NULL)
2539 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2540
2541 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2542 framebuffer->width = pCreateInfo->width;
2543 framebuffer->height = pCreateInfo->height;
2544 framebuffer->layers = pCreateInfo->layers;
2545 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2546 VkImageView _iview = pCreateInfo->pAttachments[i];
2547 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2548 framebuffer->attachments[i].attachment = iview;
2549 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2550 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2551 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2552 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2553 }
2554 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2555 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2556 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2557 }
2558
2559 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2560 return VK_SUCCESS;
2561 }
2562
2563 void radv_DestroyFramebuffer(
2564 VkDevice _device,
2565 VkFramebuffer _fb,
2566 const VkAllocationCallbacks* pAllocator)
2567 {
2568 RADV_FROM_HANDLE(radv_device, device, _device);
2569 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2570
2571 if (!fb)
2572 return;
2573 vk_free2(&device->alloc, pAllocator, fb);
2574 }
2575
2576 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2577 {
2578 switch (address_mode) {
2579 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2580 return V_008F30_SQ_TEX_WRAP;
2581 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2582 return V_008F30_SQ_TEX_MIRROR;
2583 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2584 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2585 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2586 return V_008F30_SQ_TEX_CLAMP_BORDER;
2587 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2588 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2589 default:
2590 unreachable("illegal tex wrap mode");
2591 break;
2592 }
2593 }
2594
2595 static unsigned
2596 radv_tex_compare(VkCompareOp op)
2597 {
2598 switch (op) {
2599 case VK_COMPARE_OP_NEVER:
2600 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2601 case VK_COMPARE_OP_LESS:
2602 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2603 case VK_COMPARE_OP_EQUAL:
2604 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2605 case VK_COMPARE_OP_LESS_OR_EQUAL:
2606 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2607 case VK_COMPARE_OP_GREATER:
2608 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2609 case VK_COMPARE_OP_NOT_EQUAL:
2610 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2611 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2612 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2613 case VK_COMPARE_OP_ALWAYS:
2614 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2615 default:
2616 unreachable("illegal compare mode");
2617 break;
2618 }
2619 }
2620
2621 static unsigned
2622 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2623 {
2624 switch (filter) {
2625 case VK_FILTER_NEAREST:
2626 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2627 V_008F38_SQ_TEX_XY_FILTER_POINT);
2628 case VK_FILTER_LINEAR:
2629 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2630 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2631 case VK_FILTER_CUBIC_IMG:
2632 default:
2633 fprintf(stderr, "illegal texture filter");
2634 return 0;
2635 }
2636 }
2637
2638 static unsigned
2639 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2640 {
2641 switch (mode) {
2642 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2643 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2644 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2645 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2646 default:
2647 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2648 }
2649 }
2650
2651 static unsigned
2652 radv_tex_bordercolor(VkBorderColor bcolor)
2653 {
2654 switch (bcolor) {
2655 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2656 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2657 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2658 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2659 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2660 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2661 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2662 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2663 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2664 default:
2665 break;
2666 }
2667 return 0;
2668 }
2669
2670 static unsigned
2671 radv_tex_aniso_filter(unsigned filter)
2672 {
2673 if (filter < 2)
2674 return 0;
2675 if (filter < 4)
2676 return 1;
2677 if (filter < 8)
2678 return 2;
2679 if (filter < 16)
2680 return 3;
2681 return 4;
2682 }
2683
2684 static void
2685 radv_init_sampler(struct radv_device *device,
2686 struct radv_sampler *sampler,
2687 const VkSamplerCreateInfo *pCreateInfo)
2688 {
2689 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2690 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2691 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2692 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2693
2694 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2695 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2696 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2697 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2698 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2699 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2700 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2701 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2702 S_008F30_DISABLE_CUBE_WRAP(0) |
2703 S_008F30_COMPAT_MODE(is_vi));
2704 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2705 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2706 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2707 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2708 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2709 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2710 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2711 S_008F38_MIP_POINT_PRECLAMP(0) |
2712 S_008F38_DISABLE_LSB_CEIL(1) |
2713 S_008F38_FILTER_PREC_FIX(1) |
2714 S_008F38_ANISO_OVERRIDE(is_vi));
2715 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2716 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2717 }
2718
2719 VkResult radv_CreateSampler(
2720 VkDevice _device,
2721 const VkSamplerCreateInfo* pCreateInfo,
2722 const VkAllocationCallbacks* pAllocator,
2723 VkSampler* pSampler)
2724 {
2725 RADV_FROM_HANDLE(radv_device, device, _device);
2726 struct radv_sampler *sampler;
2727
2728 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2729
2730 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2731 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2732 if (!sampler)
2733 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2734
2735 radv_init_sampler(device, sampler, pCreateInfo);
2736 *pSampler = radv_sampler_to_handle(sampler);
2737
2738 return VK_SUCCESS;
2739 }
2740
2741 void radv_DestroySampler(
2742 VkDevice _device,
2743 VkSampler _sampler,
2744 const VkAllocationCallbacks* pAllocator)
2745 {
2746 RADV_FROM_HANDLE(radv_device, device, _device);
2747 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2748
2749 if (!sampler)
2750 return;
2751 vk_free2(&device->alloc, pAllocator, sampler);
2752 }
2753
2754
2755 /* vk_icd.h does not declare this function, so we declare it here to
2756 * suppress Wmissing-prototypes.
2757 */
2758 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2759 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2760
2761 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2762 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2763 {
2764 /* For the full details on loader interface versioning, see
2765 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2766 * What follows is a condensed summary, to help you navigate the large and
2767 * confusing official doc.
2768 *
2769 * - Loader interface v0 is incompatible with later versions. We don't
2770 * support it.
2771 *
2772 * - In loader interface v1:
2773 * - The first ICD entrypoint called by the loader is
2774 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2775 * entrypoint.
2776 * - The ICD must statically expose no other Vulkan symbol unless it is
2777 * linked with -Bsymbolic.
2778 * - Each dispatchable Vulkan handle created by the ICD must be
2779 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2780 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2781 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2782 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2783 * such loader-managed surfaces.
2784 *
2785 * - Loader interface v2 differs from v1 in:
2786 * - The first ICD entrypoint called by the loader is
2787 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2788 * statically expose this entrypoint.
2789 *
2790 * - Loader interface v3 differs from v2 in:
2791 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2792 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2793 * because the loader no longer does so.
2794 */
2795 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2796 return VK_SUCCESS;
2797 }