radv: implement VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "util/strtod.h"
36
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46 struct radv_dispatch_table dtable;
47
48 static int
49 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
50 {
51 Dl_info info;
52 struct stat st;
53 if (!dladdr(ptr, &info) || !info.dli_fname) {
54 return -1;
55 }
56 if (stat(info.dli_fname, &st)) {
57 return -1;
58 }
59 *timestamp = st.st_mtim.tv_sec;
60 return 0;
61 }
62
63 static int
64 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
65 {
66 uint32_t mesa_timestamp, llvm_timestamp;
67 uint16_t f = family;
68 memset(uuid, 0, VK_UUID_SIZE);
69 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
70 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
71 return -1;
72
73 memcpy(uuid, &mesa_timestamp, 4);
74 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
75 memcpy((char*)uuid + 8, &f, 2);
76 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
77 return 0;
78 }
79
80 static const VkExtensionProperties instance_extensions[] = {
81 {
82 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
83 .specVersion = 25,
84 },
85 #ifdef VK_USE_PLATFORM_XCB_KHR
86 {
87 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_XLIB_KHR
92 {
93 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
94 .specVersion = 6,
95 },
96 #endif
97 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
98 {
99 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
100 .specVersion = 5,
101 },
102 #endif
103 };
104
105 static const VkExtensionProperties common_device_extensions[] = {
106 {
107 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
112 .specVersion = 68,
113 },
114 {
115 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 {
119 .extensionName = VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME,
120 .specVersion = 1,
121 },
122 {
123 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
124 .specVersion = 1,
125 },
126 };
127
128 static VkResult
129 radv_extensions_register(struct radv_instance *instance,
130 struct radv_extensions *extensions,
131 const VkExtensionProperties *new_ext,
132 uint32_t num_ext)
133 {
134 size_t new_size;
135 VkExtensionProperties *new_ptr;
136
137 assert(new_ext && num_ext > 0);
138
139 if (!new_ext)
140 return VK_ERROR_INITIALIZATION_FAILED;
141
142 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
143 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
144 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
145
146 /* Old array continues to be valid, update nothing */
147 if (!new_ptr)
148 return VK_ERROR_OUT_OF_HOST_MEMORY;
149
150 memcpy(&new_ptr[extensions->num_ext], new_ext,
151 num_ext * sizeof(VkExtensionProperties));
152 extensions->ext_array = new_ptr;
153 extensions->num_ext += num_ext;
154
155 return VK_SUCCESS;
156 }
157
158 static void
159 radv_extensions_finish(struct radv_instance *instance,
160 struct radv_extensions *extensions)
161 {
162 assert(extensions);
163
164 if (!extensions)
165 radv_loge("Attemted to free invalid extension struct\n");
166
167 if (extensions->ext_array)
168 vk_free(&instance->alloc, extensions->ext_array);
169 }
170
171 static bool
172 is_extension_enabled(const VkExtensionProperties *extensions,
173 size_t num_ext,
174 const char *name)
175 {
176 assert(extensions && name);
177
178 for (uint32_t i = 0; i < num_ext; i++) {
179 if (strcmp(name, extensions[i].extensionName) == 0)
180 return true;
181 }
182
183 return false;
184 }
185
186 static VkResult
187 radv_physical_device_init(struct radv_physical_device *device,
188 struct radv_instance *instance,
189 const char *path)
190 {
191 VkResult result;
192 drmVersionPtr version;
193 int fd;
194
195 fd = open(path, O_RDWR | O_CLOEXEC);
196 if (fd < 0)
197 return VK_ERROR_INCOMPATIBLE_DRIVER;
198
199 version = drmGetVersion(fd);
200 if (!version) {
201 close(fd);
202 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
203 "failed to get version %s: %m", path);
204 }
205
206 if (strcmp(version->name, "amdgpu")) {
207 drmFreeVersion(version);
208 close(fd);
209 return VK_ERROR_INCOMPATIBLE_DRIVER;
210 }
211 drmFreeVersion(version);
212
213 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
214 device->instance = instance;
215 assert(strlen(path) < ARRAY_SIZE(device->path));
216 strncpy(device->path, path, ARRAY_SIZE(device->path));
217
218 device->ws = radv_amdgpu_winsys_create(fd);
219 if (!device->ws) {
220 result = VK_ERROR_INCOMPATIBLE_DRIVER;
221 goto fail;
222 }
223 device->ws->query_info(device->ws, &device->rad_info);
224 result = radv_init_wsi(device);
225 if (result != VK_SUCCESS) {
226 device->ws->destroy(device->ws);
227 goto fail;
228 }
229
230 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
231 radv_finish_wsi(device);
232 device->ws->destroy(device->ws);
233 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
234 "cannot generate UUID");
235 goto fail;
236 }
237
238 result = radv_extensions_register(instance,
239 &device->extensions,
240 common_device_extensions,
241 ARRAY_SIZE(common_device_extensions));
242 if (result != VK_SUCCESS)
243 goto fail;
244
245 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
246 device->name = device->rad_info.name;
247 close(fd);
248 return VK_SUCCESS;
249
250 fail:
251 close(fd);
252 return result;
253 }
254
255 static void
256 radv_physical_device_finish(struct radv_physical_device *device)
257 {
258 radv_extensions_finish(device->instance, &device->extensions);
259 radv_finish_wsi(device);
260 device->ws->destroy(device->ws);
261 }
262
263
264 static void *
265 default_alloc_func(void *pUserData, size_t size, size_t align,
266 VkSystemAllocationScope allocationScope)
267 {
268 return malloc(size);
269 }
270
271 static void *
272 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
273 size_t align, VkSystemAllocationScope allocationScope)
274 {
275 return realloc(pOriginal, size);
276 }
277
278 static void
279 default_free_func(void *pUserData, void *pMemory)
280 {
281 free(pMemory);
282 }
283
284 static const VkAllocationCallbacks default_alloc = {
285 .pUserData = NULL,
286 .pfnAllocation = default_alloc_func,
287 .pfnReallocation = default_realloc_func,
288 .pfnFree = default_free_func,
289 };
290
291 static const struct debug_control radv_debug_options[] = {
292 {"fastclears", RADV_DEBUG_FAST_CLEARS},
293 {"nodcc", RADV_DEBUG_NO_DCC},
294 {"shaders", RADV_DEBUG_DUMP_SHADERS},
295 {"nocache", RADV_DEBUG_NO_CACHE},
296 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
297 {"nohiz", RADV_DEBUG_NO_HIZ},
298 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
299 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
300 {NULL, 0}
301 };
302
303 VkResult radv_CreateInstance(
304 const VkInstanceCreateInfo* pCreateInfo,
305 const VkAllocationCallbacks* pAllocator,
306 VkInstance* pInstance)
307 {
308 struct radv_instance *instance;
309
310 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
311
312 uint32_t client_version;
313 if (pCreateInfo->pApplicationInfo &&
314 pCreateInfo->pApplicationInfo->apiVersion != 0) {
315 client_version = pCreateInfo->pApplicationInfo->apiVersion;
316 } else {
317 client_version = VK_MAKE_VERSION(1, 0, 0);
318 }
319
320 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
321 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
322 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
323 "Client requested version %d.%d.%d",
324 VK_VERSION_MAJOR(client_version),
325 VK_VERSION_MINOR(client_version),
326 VK_VERSION_PATCH(client_version));
327 }
328
329 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
330 if (!is_extension_enabled(instance_extensions,
331 ARRAY_SIZE(instance_extensions),
332 pCreateInfo->ppEnabledExtensionNames[i]))
333 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
334 }
335
336 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
337 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
338 if (!instance)
339 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
340
341 memset(instance, 0, sizeof(*instance));
342
343 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
344
345 if (pAllocator)
346 instance->alloc = *pAllocator;
347 else
348 instance->alloc = default_alloc;
349
350 instance->apiVersion = client_version;
351 instance->physicalDeviceCount = -1;
352
353 _mesa_locale_init();
354
355 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
356
357 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
358 radv_debug_options);
359
360 *pInstance = radv_instance_to_handle(instance);
361
362 return VK_SUCCESS;
363 }
364
365 void radv_DestroyInstance(
366 VkInstance _instance,
367 const VkAllocationCallbacks* pAllocator)
368 {
369 RADV_FROM_HANDLE(radv_instance, instance, _instance);
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 VkResult radv_EnumeratePhysicalDevices(
383 VkInstance _instance,
384 uint32_t* pPhysicalDeviceCount,
385 VkPhysicalDevice* pPhysicalDevices)
386 {
387 RADV_FROM_HANDLE(radv_instance, instance, _instance);
388 VkResult result;
389
390 if (instance->physicalDeviceCount < 0) {
391 char path[20];
392 instance->physicalDeviceCount = 0;
393 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
394 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
395 result = radv_physical_device_init(instance->physicalDevices +
396 instance->physicalDeviceCount,
397 instance, path);
398 if (result == VK_SUCCESS)
399 ++instance->physicalDeviceCount;
400 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
401 return result;
402 }
403 }
404
405 if (!pPhysicalDevices) {
406 *pPhysicalDeviceCount = instance->physicalDeviceCount;
407 } else {
408 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
409 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
410 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
411 }
412
413 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
414 : VK_SUCCESS;
415 }
416
417 void radv_GetPhysicalDeviceFeatures(
418 VkPhysicalDevice physicalDevice,
419 VkPhysicalDeviceFeatures* pFeatures)
420 {
421 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
422
423 memset(pFeatures, 0, sizeof(*pFeatures));
424
425 *pFeatures = (VkPhysicalDeviceFeatures) {
426 .robustBufferAccess = true,
427 .fullDrawIndexUint32 = true,
428 .imageCubeArray = true,
429 .independentBlend = true,
430 .geometryShader = false,
431 .tessellationShader = false,
432 .sampleRateShading = false,
433 .dualSrcBlend = true,
434 .logicOp = true,
435 .multiDrawIndirect = true,
436 .drawIndirectFirstInstance = true,
437 .depthClamp = true,
438 .depthBiasClamp = true,
439 .fillModeNonSolid = true,
440 .depthBounds = true,
441 .wideLines = true,
442 .largePoints = true,
443 .alphaToOne = true,
444 .multiViewport = false,
445 .samplerAnisotropy = true,
446 .textureCompressionETC2 = false,
447 .textureCompressionASTC_LDR = false,
448 .textureCompressionBC = true,
449 .occlusionQueryPrecise = true,
450 .pipelineStatisticsQuery = false,
451 .vertexPipelineStoresAndAtomics = true,
452 .fragmentStoresAndAtomics = true,
453 .shaderTessellationAndGeometryPointSize = true,
454 .shaderImageGatherExtended = true,
455 .shaderStorageImageExtendedFormats = true,
456 .shaderStorageImageMultisample = false,
457 .shaderUniformBufferArrayDynamicIndexing = true,
458 .shaderSampledImageArrayDynamicIndexing = true,
459 .shaderStorageBufferArrayDynamicIndexing = true,
460 .shaderStorageImageArrayDynamicIndexing = true,
461 .shaderStorageImageReadWithoutFormat = false,
462 .shaderStorageImageWriteWithoutFormat = false,
463 .shaderClipDistance = true,
464 .shaderCullDistance = true,
465 .shaderFloat64 = false,
466 .shaderInt64 = false,
467 .shaderInt16 = false,
468 .alphaToOne = true,
469 .variableMultisampleRate = false,
470 .inheritedQueries = false,
471 };
472 }
473
474 void radv_GetPhysicalDeviceFeatures2KHR(
475 VkPhysicalDevice physicalDevice,
476 VkPhysicalDeviceFeatures2KHR *pFeatures)
477 {
478 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
479 }
480
481 void radv_GetPhysicalDeviceProperties(
482 VkPhysicalDevice physicalDevice,
483 VkPhysicalDeviceProperties* pProperties)
484 {
485 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
486 VkSampleCountFlags sample_counts = 0xf;
487 VkPhysicalDeviceLimits limits = {
488 .maxImageDimension1D = (1 << 14),
489 .maxImageDimension2D = (1 << 14),
490 .maxImageDimension3D = (1 << 11),
491 .maxImageDimensionCube = (1 << 14),
492 .maxImageArrayLayers = (1 << 11),
493 .maxTexelBufferElements = 128 * 1024 * 1024,
494 .maxUniformBufferRange = UINT32_MAX,
495 .maxStorageBufferRange = UINT32_MAX,
496 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
497 .maxMemoryAllocationCount = UINT32_MAX,
498 .maxSamplerAllocationCount = 64 * 1024,
499 .bufferImageGranularity = 64, /* A cache line */
500 .sparseAddressSpaceSize = 0,
501 .maxBoundDescriptorSets = MAX_SETS,
502 .maxPerStageDescriptorSamplers = 64,
503 .maxPerStageDescriptorUniformBuffers = 64,
504 .maxPerStageDescriptorStorageBuffers = 64,
505 .maxPerStageDescriptorSampledImages = 64,
506 .maxPerStageDescriptorStorageImages = 64,
507 .maxPerStageDescriptorInputAttachments = 64,
508 .maxPerStageResources = 128,
509 .maxDescriptorSetSamplers = 256,
510 .maxDescriptorSetUniformBuffers = 256,
511 .maxDescriptorSetUniformBuffersDynamic = 256,
512 .maxDescriptorSetStorageBuffers = 256,
513 .maxDescriptorSetStorageBuffersDynamic = 256,
514 .maxDescriptorSetSampledImages = 256,
515 .maxDescriptorSetStorageImages = 256,
516 .maxDescriptorSetInputAttachments = 256,
517 .maxVertexInputAttributes = 32,
518 .maxVertexInputBindings = 32,
519 .maxVertexInputAttributeOffset = 2047,
520 .maxVertexInputBindingStride = 2048,
521 .maxVertexOutputComponents = 128,
522 .maxTessellationGenerationLevel = 0,
523 .maxTessellationPatchSize = 0,
524 .maxTessellationControlPerVertexInputComponents = 0,
525 .maxTessellationControlPerVertexOutputComponents = 0,
526 .maxTessellationControlPerPatchOutputComponents = 0,
527 .maxTessellationControlTotalOutputComponents = 0,
528 .maxTessellationEvaluationInputComponents = 0,
529 .maxTessellationEvaluationOutputComponents = 0,
530 .maxGeometryShaderInvocations = 32,
531 .maxGeometryInputComponents = 64,
532 .maxGeometryOutputComponents = 128,
533 .maxGeometryOutputVertices = 256,
534 .maxGeometryTotalOutputComponents = 1024,
535 .maxFragmentInputComponents = 128,
536 .maxFragmentOutputAttachments = 8,
537 .maxFragmentDualSrcAttachments = 1,
538 .maxFragmentCombinedOutputResources = 8,
539 .maxComputeSharedMemorySize = 32768,
540 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
541 .maxComputeWorkGroupInvocations = 2048,
542 .maxComputeWorkGroupSize = {
543 2048,
544 2048,
545 2048
546 },
547 .subPixelPrecisionBits = 4 /* FIXME */,
548 .subTexelPrecisionBits = 4 /* FIXME */,
549 .mipmapPrecisionBits = 4 /* FIXME */,
550 .maxDrawIndexedIndexValue = UINT32_MAX,
551 .maxDrawIndirectCount = UINT32_MAX,
552 .maxSamplerLodBias = 16,
553 .maxSamplerAnisotropy = 16,
554 .maxViewports = MAX_VIEWPORTS,
555 .maxViewportDimensions = { (1 << 14), (1 << 14) },
556 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
557 .viewportSubPixelBits = 13, /* We take a float? */
558 .minMemoryMapAlignment = 4096, /* A page */
559 .minTexelBufferOffsetAlignment = 1,
560 .minUniformBufferOffsetAlignment = 4,
561 .minStorageBufferOffsetAlignment = 4,
562 .minTexelOffset = -32,
563 .maxTexelOffset = 31,
564 .minTexelGatherOffset = -32,
565 .maxTexelGatherOffset = 31,
566 .minInterpolationOffset = -2,
567 .maxInterpolationOffset = 2,
568 .subPixelInterpolationOffsetBits = 8,
569 .maxFramebufferWidth = (1 << 14),
570 .maxFramebufferHeight = (1 << 14),
571 .maxFramebufferLayers = (1 << 10),
572 .framebufferColorSampleCounts = sample_counts,
573 .framebufferDepthSampleCounts = sample_counts,
574 .framebufferStencilSampleCounts = sample_counts,
575 .framebufferNoAttachmentsSampleCounts = sample_counts,
576 .maxColorAttachments = MAX_RTS,
577 .sampledImageColorSampleCounts = sample_counts,
578 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
579 .sampledImageDepthSampleCounts = sample_counts,
580 .sampledImageStencilSampleCounts = sample_counts,
581 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
582 .maxSampleMaskWords = 1,
583 .timestampComputeAndGraphics = false,
584 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
585 .maxClipDistances = 8,
586 .maxCullDistances = 8,
587 .maxCombinedClipAndCullDistances = 8,
588 .discreteQueuePriorities = 1,
589 .pointSizeRange = { 0.125, 255.875 },
590 .lineWidthRange = { 0.0, 7.9921875 },
591 .pointSizeGranularity = (1.0 / 8.0),
592 .lineWidthGranularity = (1.0 / 128.0),
593 .strictLines = false, /* FINISHME */
594 .standardSampleLocations = true,
595 .optimalBufferCopyOffsetAlignment = 128,
596 .optimalBufferCopyRowPitchAlignment = 128,
597 .nonCoherentAtomSize = 64,
598 };
599
600 *pProperties = (VkPhysicalDeviceProperties) {
601 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
602 .driverVersion = 1,
603 .vendorID = 0x1002,
604 .deviceID = pdevice->rad_info.pci_id,
605 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
606 .limits = limits,
607 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
608 };
609
610 strcpy(pProperties->deviceName, pdevice->name);
611 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
612 }
613
614 void radv_GetPhysicalDeviceProperties2KHR(
615 VkPhysicalDevice physicalDevice,
616 VkPhysicalDeviceProperties2KHR *pProperties)
617 {
618 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
619 }
620
621 void radv_GetPhysicalDeviceQueueFamilyProperties(
622 VkPhysicalDevice physicalDevice,
623 uint32_t* pCount,
624 VkQueueFamilyProperties* pQueueFamilyProperties)
625 {
626 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
627 int num_queue_families = 1;
628 int idx;
629 if (pdevice->rad_info.compute_rings > 0 &&
630 pdevice->rad_info.chip_class >= CIK &&
631 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
632 num_queue_families++;
633
634 if (pQueueFamilyProperties == NULL) {
635 *pCount = num_queue_families;
636 return;
637 }
638
639 if (!*pCount)
640 return;
641
642 idx = 0;
643 if (*pCount >= 1) {
644 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
645 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
646 VK_QUEUE_COMPUTE_BIT |
647 VK_QUEUE_TRANSFER_BIT,
648 .queueCount = 1,
649 .timestampValidBits = 64,
650 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
651 };
652 idx++;
653 }
654
655 if (pdevice->rad_info.compute_rings > 0 &&
656 pdevice->rad_info.chip_class >= CIK &&
657 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
658 if (*pCount > idx) {
659 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
660 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
661 .queueCount = pdevice->rad_info.compute_rings,
662 .timestampValidBits = 64,
663 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
664 };
665 idx++;
666 }
667 }
668 *pCount = idx;
669 }
670
671 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
672 VkPhysicalDevice physicalDevice,
673 uint32_t* pCount,
674 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
675 {
676 return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
677 pCount,
678 &pQueueFamilyProperties->queueFamilyProperties);
679 }
680
681 void radv_GetPhysicalDeviceMemoryProperties(
682 VkPhysicalDevice physicalDevice,
683 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
684 {
685 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
686
687 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
688
689 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
690 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
691 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
692 .heapIndex = RADV_MEM_HEAP_VRAM,
693 };
694 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
695 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
696 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
697 .heapIndex = RADV_MEM_HEAP_GTT,
698 };
699 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
700 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
701 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
702 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
703 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
704 };
705 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
706 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
707 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
708 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
709 .heapIndex = RADV_MEM_HEAP_GTT,
710 };
711
712 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
713
714 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
715 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
716 .size = physical_device->rad_info.vram_size -
717 physical_device->rad_info.visible_vram_size,
718 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
719 };
720 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
721 .size = physical_device->rad_info.visible_vram_size,
722 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
723 };
724 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
725 .size = physical_device->rad_info.gart_size,
726 .flags = 0,
727 };
728 }
729
730 void radv_GetPhysicalDeviceMemoryProperties2KHR(
731 VkPhysicalDevice physicalDevice,
732 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
733 {
734 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
735 &pMemoryProperties->memoryProperties);
736 }
737
738 static int
739 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
740 int queue_family_index, int idx)
741 {
742 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
743 queue->device = device;
744 queue->queue_family_index = queue_family_index;
745 queue->queue_idx = idx;
746
747 queue->hw_ctx = device->ws->ctx_create(device->ws);
748 if (!queue->hw_ctx)
749 return VK_ERROR_OUT_OF_HOST_MEMORY;
750
751 return VK_SUCCESS;
752 }
753
754 static void
755 radv_queue_finish(struct radv_queue *queue)
756 {
757 if (queue->hw_ctx)
758 queue->device->ws->ctx_destroy(queue->hw_ctx);
759 }
760
761 VkResult radv_CreateDevice(
762 VkPhysicalDevice physicalDevice,
763 const VkDeviceCreateInfo* pCreateInfo,
764 const VkAllocationCallbacks* pAllocator,
765 VkDevice* pDevice)
766 {
767 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
768 VkResult result;
769 struct radv_device *device;
770
771 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
772 if (!is_extension_enabled(physical_device->extensions.ext_array,
773 physical_device->extensions.num_ext,
774 pCreateInfo->ppEnabledExtensionNames[i]))
775 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
776 }
777
778 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
779 sizeof(*device), 8,
780 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
781 if (!device)
782 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
783
784 memset(device, 0, sizeof(*device));
785
786 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
787 device->instance = physical_device->instance;
788 device->physical_device = physical_device;
789
790 device->debug_flags = device->instance->debug_flags;
791
792 device->ws = physical_device->ws;
793 if (pAllocator)
794 device->alloc = *pAllocator;
795 else
796 device->alloc = physical_device->instance->alloc;
797
798 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
799 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
800 uint32_t qfi = queue_create->queueFamilyIndex;
801
802 device->queues[qfi] = vk_alloc(&device->alloc,
803 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
804 if (!device->queues[qfi]) {
805 result = VK_ERROR_OUT_OF_HOST_MEMORY;
806 goto fail;
807 }
808
809 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
810
811 device->queue_count[qfi] = queue_create->queueCount;
812
813 for (unsigned q = 0; q < queue_create->queueCount; q++) {
814 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
815 if (result != VK_SUCCESS)
816 goto fail;
817 }
818 }
819
820 result = radv_device_init_meta(device);
821 if (result != VK_SUCCESS)
822 goto fail;
823
824 radv_device_init_msaa(device);
825
826 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
827 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
828 switch (family) {
829 case RADV_QUEUE_GENERAL:
830 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
831 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
832 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
833 break;
834 case RADV_QUEUE_COMPUTE:
835 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
836 radeon_emit(device->empty_cs[family], 0);
837 break;
838 }
839 device->ws->cs_finalize(device->empty_cs[family]);
840 }
841
842 if (getenv("RADV_TRACE_FILE")) {
843 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
844 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
845 if (!device->trace_bo)
846 goto fail;
847
848 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
849 if (!device->trace_id_ptr)
850 goto fail;
851 }
852
853 *pDevice = radv_device_to_handle(device);
854 return VK_SUCCESS;
855
856 fail:
857 if (device->trace_bo)
858 device->ws->buffer_destroy(device->trace_bo);
859
860 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
861 for (unsigned q = 0; q < device->queue_count[i]; q++)
862 radv_queue_finish(&device->queues[i][q]);
863 if (device->queue_count[i])
864 vk_free(&device->alloc, device->queues[i]);
865 }
866
867 vk_free(&device->alloc, device);
868 return result;
869 }
870
871 void radv_DestroyDevice(
872 VkDevice _device,
873 const VkAllocationCallbacks* pAllocator)
874 {
875 RADV_FROM_HANDLE(radv_device, device, _device);
876
877 if (device->trace_bo)
878 device->ws->buffer_destroy(device->trace_bo);
879
880 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
881 for (unsigned q = 0; q < device->queue_count[i]; q++)
882 radv_queue_finish(&device->queues[i][q]);
883 if (device->queue_count[i])
884 vk_free(&device->alloc, device->queues[i]);
885 }
886 radv_device_finish_meta(device);
887
888 vk_free(&device->alloc, device);
889 }
890
891 VkResult radv_EnumerateInstanceExtensionProperties(
892 const char* pLayerName,
893 uint32_t* pPropertyCount,
894 VkExtensionProperties* pProperties)
895 {
896 if (pProperties == NULL) {
897 *pPropertyCount = ARRAY_SIZE(instance_extensions);
898 return VK_SUCCESS;
899 }
900
901 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
902 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
903
904 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
905 return VK_INCOMPLETE;
906
907 return VK_SUCCESS;
908 }
909
910 VkResult radv_EnumerateDeviceExtensionProperties(
911 VkPhysicalDevice physicalDevice,
912 const char* pLayerName,
913 uint32_t* pPropertyCount,
914 VkExtensionProperties* pProperties)
915 {
916 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
917
918 if (pProperties == NULL) {
919 *pPropertyCount = pdevice->extensions.num_ext;
920 return VK_SUCCESS;
921 }
922
923 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
924 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
925
926 if (*pPropertyCount < pdevice->extensions.num_ext)
927 return VK_INCOMPLETE;
928
929 return VK_SUCCESS;
930 }
931
932 VkResult radv_EnumerateInstanceLayerProperties(
933 uint32_t* pPropertyCount,
934 VkLayerProperties* pProperties)
935 {
936 if (pProperties == NULL) {
937 *pPropertyCount = 0;
938 return VK_SUCCESS;
939 }
940
941 /* None supported at this time */
942 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
943 }
944
945 VkResult radv_EnumerateDeviceLayerProperties(
946 VkPhysicalDevice physicalDevice,
947 uint32_t* pPropertyCount,
948 VkLayerProperties* pProperties)
949 {
950 if (pProperties == NULL) {
951 *pPropertyCount = 0;
952 return VK_SUCCESS;
953 }
954
955 /* None supported at this time */
956 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
957 }
958
959 void radv_GetDeviceQueue(
960 VkDevice _device,
961 uint32_t queueFamilyIndex,
962 uint32_t queueIndex,
963 VkQueue* pQueue)
964 {
965 RADV_FROM_HANDLE(radv_device, device, _device);
966
967 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
968 }
969
970 static void radv_dump_trace(struct radv_device *device,
971 struct radeon_winsys_cs *cs)
972 {
973 const char *filename = getenv("RADV_TRACE_FILE");
974 FILE *f = fopen(filename, "w");
975 if (!f) {
976 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
977 return;
978 }
979
980 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
981 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
982 fclose(f);
983 }
984
985 VkResult radv_QueueSubmit(
986 VkQueue _queue,
987 uint32_t submitCount,
988 const VkSubmitInfo* pSubmits,
989 VkFence _fence)
990 {
991 RADV_FROM_HANDLE(radv_queue, queue, _queue);
992 RADV_FROM_HANDLE(radv_fence, fence, _fence);
993 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
994 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
995 int ret;
996 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
997
998 for (uint32_t i = 0; i < submitCount; i++) {
999 struct radeon_winsys_cs **cs_array;
1000 bool can_patch = true;
1001 uint32_t advance;
1002
1003 if (!pSubmits[i].commandBufferCount)
1004 continue;
1005
1006 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1007 pSubmits[i].commandBufferCount);
1008
1009 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1010 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1011 pSubmits[i].pCommandBuffers[j]);
1012 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1013
1014 cs_array[j] = cmd_buffer->cs;
1015 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1016 can_patch = false;
1017 }
1018
1019 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1020 advance = MIN2(max_cs_submission,
1021 pSubmits[i].commandBufferCount - j);
1022 bool b = j == 0;
1023 bool e = j + advance == pSubmits[i].commandBufferCount;
1024
1025 if (queue->device->trace_bo)
1026 *queue->device->trace_id_ptr = 0;
1027
1028 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
1029 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1030 b ? pSubmits[i].waitSemaphoreCount : 0,
1031 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1032 e ? pSubmits[i].signalSemaphoreCount : 0,
1033 can_patch, base_fence);
1034
1035 if (ret) {
1036 radv_loge("failed to submit CS %d\n", i);
1037 abort();
1038 }
1039 if (queue->device->trace_bo) {
1040 bool success = queue->device->ws->ctx_wait_idle(
1041 queue->hw_ctx,
1042 radv_queue_family_to_ring(
1043 queue->queue_family_index),
1044 queue->queue_idx);
1045
1046 if (!success) { /* Hang */
1047 radv_dump_trace(queue->device, cs_array[j]);
1048 abort();
1049 }
1050 }
1051 }
1052 free(cs_array);
1053 }
1054
1055 if (fence) {
1056 if (!submitCount)
1057 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1058 &queue->device->empty_cs[queue->queue_family_index],
1059 1, NULL, 0, NULL, 0, false, base_fence);
1060
1061 fence->submitted = true;
1062 }
1063
1064 return VK_SUCCESS;
1065 }
1066
1067 VkResult radv_QueueWaitIdle(
1068 VkQueue _queue)
1069 {
1070 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1071
1072 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1073 radv_queue_family_to_ring(queue->queue_family_index),
1074 queue->queue_idx);
1075 return VK_SUCCESS;
1076 }
1077
1078 VkResult radv_DeviceWaitIdle(
1079 VkDevice _device)
1080 {
1081 RADV_FROM_HANDLE(radv_device, device, _device);
1082
1083 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1084 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1085 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1086 }
1087 }
1088 return VK_SUCCESS;
1089 }
1090
1091 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1092 VkInstance instance,
1093 const char* pName)
1094 {
1095 return radv_lookup_entrypoint(pName);
1096 }
1097
1098 /* The loader wants us to expose a second GetInstanceProcAddr function
1099 * to work around certain LD_PRELOAD issues seen in apps.
1100 */
1101 PUBLIC
1102 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1103 VkInstance instance,
1104 const char* pName);
1105
1106 PUBLIC
1107 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1108 VkInstance instance,
1109 const char* pName)
1110 {
1111 return radv_GetInstanceProcAddr(instance, pName);
1112 }
1113
1114 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1115 VkDevice device,
1116 const char* pName)
1117 {
1118 return radv_lookup_entrypoint(pName);
1119 }
1120
1121 VkResult radv_AllocateMemory(
1122 VkDevice _device,
1123 const VkMemoryAllocateInfo* pAllocateInfo,
1124 const VkAllocationCallbacks* pAllocator,
1125 VkDeviceMemory* pMem)
1126 {
1127 RADV_FROM_HANDLE(radv_device, device, _device);
1128 struct radv_device_memory *mem;
1129 VkResult result;
1130 enum radeon_bo_domain domain;
1131 uint32_t flags = 0;
1132 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1133
1134 if (pAllocateInfo->allocationSize == 0) {
1135 /* Apparently, this is allowed */
1136 *pMem = VK_NULL_HANDLE;
1137 return VK_SUCCESS;
1138 }
1139
1140 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1141 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1142 if (mem == NULL)
1143 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1144
1145 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1146 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1147 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1148 domain = RADEON_DOMAIN_GTT;
1149 else
1150 domain = RADEON_DOMAIN_VRAM;
1151
1152 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1153 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1154 else
1155 flags |= RADEON_FLAG_CPU_ACCESS;
1156
1157 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1158 flags |= RADEON_FLAG_GTT_WC;
1159
1160 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1161 domain, flags);
1162
1163 if (!mem->bo) {
1164 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1165 goto fail;
1166 }
1167 mem->type_index = pAllocateInfo->memoryTypeIndex;
1168
1169 *pMem = radv_device_memory_to_handle(mem);
1170
1171 return VK_SUCCESS;
1172
1173 fail:
1174 vk_free2(&device->alloc, pAllocator, mem);
1175
1176 return result;
1177 }
1178
1179 void radv_FreeMemory(
1180 VkDevice _device,
1181 VkDeviceMemory _mem,
1182 const VkAllocationCallbacks* pAllocator)
1183 {
1184 RADV_FROM_HANDLE(radv_device, device, _device);
1185 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1186
1187 if (mem == NULL)
1188 return;
1189
1190 device->ws->buffer_destroy(mem->bo);
1191 mem->bo = NULL;
1192
1193 vk_free2(&device->alloc, pAllocator, mem);
1194 }
1195
1196 VkResult radv_MapMemory(
1197 VkDevice _device,
1198 VkDeviceMemory _memory,
1199 VkDeviceSize offset,
1200 VkDeviceSize size,
1201 VkMemoryMapFlags flags,
1202 void** ppData)
1203 {
1204 RADV_FROM_HANDLE(radv_device, device, _device);
1205 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1206
1207 if (mem == NULL) {
1208 *ppData = NULL;
1209 return VK_SUCCESS;
1210 }
1211
1212 *ppData = device->ws->buffer_map(mem->bo);
1213 if (*ppData) {
1214 *ppData += offset;
1215 return VK_SUCCESS;
1216 }
1217
1218 return VK_ERROR_MEMORY_MAP_FAILED;
1219 }
1220
1221 void radv_UnmapMemory(
1222 VkDevice _device,
1223 VkDeviceMemory _memory)
1224 {
1225 RADV_FROM_HANDLE(radv_device, device, _device);
1226 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1227
1228 if (mem == NULL)
1229 return;
1230
1231 device->ws->buffer_unmap(mem->bo);
1232 }
1233
1234 VkResult radv_FlushMappedMemoryRanges(
1235 VkDevice _device,
1236 uint32_t memoryRangeCount,
1237 const VkMappedMemoryRange* pMemoryRanges)
1238 {
1239 return VK_SUCCESS;
1240 }
1241
1242 VkResult radv_InvalidateMappedMemoryRanges(
1243 VkDevice _device,
1244 uint32_t memoryRangeCount,
1245 const VkMappedMemoryRange* pMemoryRanges)
1246 {
1247 return VK_SUCCESS;
1248 }
1249
1250 void radv_GetBufferMemoryRequirements(
1251 VkDevice device,
1252 VkBuffer _buffer,
1253 VkMemoryRequirements* pMemoryRequirements)
1254 {
1255 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1256
1257 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1258
1259 pMemoryRequirements->size = buffer->size;
1260 pMemoryRequirements->alignment = 16;
1261 }
1262
1263 void radv_GetImageMemoryRequirements(
1264 VkDevice device,
1265 VkImage _image,
1266 VkMemoryRequirements* pMemoryRequirements)
1267 {
1268 RADV_FROM_HANDLE(radv_image, image, _image);
1269
1270 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1271
1272 pMemoryRequirements->size = image->size;
1273 pMemoryRequirements->alignment = image->alignment;
1274 }
1275
1276 void radv_GetImageSparseMemoryRequirements(
1277 VkDevice device,
1278 VkImage image,
1279 uint32_t* pSparseMemoryRequirementCount,
1280 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1281 {
1282 stub();
1283 }
1284
1285 void radv_GetDeviceMemoryCommitment(
1286 VkDevice device,
1287 VkDeviceMemory memory,
1288 VkDeviceSize* pCommittedMemoryInBytes)
1289 {
1290 *pCommittedMemoryInBytes = 0;
1291 }
1292
1293 VkResult radv_BindBufferMemory(
1294 VkDevice device,
1295 VkBuffer _buffer,
1296 VkDeviceMemory _memory,
1297 VkDeviceSize memoryOffset)
1298 {
1299 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1300 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1301
1302 if (mem) {
1303 buffer->bo = mem->bo;
1304 buffer->offset = memoryOffset;
1305 } else {
1306 buffer->bo = NULL;
1307 buffer->offset = 0;
1308 }
1309
1310 return VK_SUCCESS;
1311 }
1312
1313 VkResult radv_BindImageMemory(
1314 VkDevice device,
1315 VkImage _image,
1316 VkDeviceMemory _memory,
1317 VkDeviceSize memoryOffset)
1318 {
1319 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1320 RADV_FROM_HANDLE(radv_image, image, _image);
1321
1322 if (mem) {
1323 image->bo = mem->bo;
1324 image->offset = memoryOffset;
1325 } else {
1326 image->bo = NULL;
1327 image->offset = 0;
1328 }
1329
1330 return VK_SUCCESS;
1331 }
1332
1333 VkResult radv_QueueBindSparse(
1334 VkQueue queue,
1335 uint32_t bindInfoCount,
1336 const VkBindSparseInfo* pBindInfo,
1337 VkFence fence)
1338 {
1339 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1340 }
1341
1342 VkResult radv_CreateFence(
1343 VkDevice _device,
1344 const VkFenceCreateInfo* pCreateInfo,
1345 const VkAllocationCallbacks* pAllocator,
1346 VkFence* pFence)
1347 {
1348 RADV_FROM_HANDLE(radv_device, device, _device);
1349 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1350 sizeof(*fence), 8,
1351 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1352
1353 if (!fence)
1354 return VK_ERROR_OUT_OF_HOST_MEMORY;
1355
1356 memset(fence, 0, sizeof(*fence));
1357 fence->submitted = false;
1358 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1359 fence->fence = device->ws->create_fence();
1360 if (!fence->fence) {
1361 vk_free2(&device->alloc, pAllocator, fence);
1362 return VK_ERROR_OUT_OF_HOST_MEMORY;
1363 }
1364
1365 *pFence = radv_fence_to_handle(fence);
1366
1367 return VK_SUCCESS;
1368 }
1369
1370 void radv_DestroyFence(
1371 VkDevice _device,
1372 VkFence _fence,
1373 const VkAllocationCallbacks* pAllocator)
1374 {
1375 RADV_FROM_HANDLE(radv_device, device, _device);
1376 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1377
1378 if (!fence)
1379 return;
1380 device->ws->destroy_fence(fence->fence);
1381 vk_free2(&device->alloc, pAllocator, fence);
1382 }
1383
1384 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1385 {
1386 uint64_t current_time;
1387 struct timespec tv;
1388
1389 clock_gettime(CLOCK_MONOTONIC, &tv);
1390 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1391
1392 timeout = MIN2(UINT64_MAX - current_time, timeout);
1393
1394 return current_time + timeout;
1395 }
1396
1397 VkResult radv_WaitForFences(
1398 VkDevice _device,
1399 uint32_t fenceCount,
1400 const VkFence* pFences,
1401 VkBool32 waitAll,
1402 uint64_t timeout)
1403 {
1404 RADV_FROM_HANDLE(radv_device, device, _device);
1405 timeout = radv_get_absolute_timeout(timeout);
1406
1407 if (!waitAll && fenceCount > 1) {
1408 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1409 }
1410
1411 for (uint32_t i = 0; i < fenceCount; ++i) {
1412 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1413 bool expired = false;
1414
1415 if (fence->signalled)
1416 continue;
1417
1418 if (!fence->submitted)
1419 return VK_TIMEOUT;
1420
1421 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1422 if (!expired)
1423 return VK_TIMEOUT;
1424
1425 fence->signalled = true;
1426 }
1427
1428 return VK_SUCCESS;
1429 }
1430
1431 VkResult radv_ResetFences(VkDevice device,
1432 uint32_t fenceCount,
1433 const VkFence *pFences)
1434 {
1435 for (unsigned i = 0; i < fenceCount; ++i) {
1436 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1437 fence->submitted = fence->signalled = false;
1438 }
1439
1440 return VK_SUCCESS;
1441 }
1442
1443 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1444 {
1445 RADV_FROM_HANDLE(radv_device, device, _device);
1446 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1447
1448 if (fence->signalled)
1449 return VK_SUCCESS;
1450 if (!fence->submitted)
1451 return VK_NOT_READY;
1452
1453 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1454 return VK_NOT_READY;
1455
1456 return VK_SUCCESS;
1457 }
1458
1459
1460 // Queue semaphore functions
1461
1462 VkResult radv_CreateSemaphore(
1463 VkDevice _device,
1464 const VkSemaphoreCreateInfo* pCreateInfo,
1465 const VkAllocationCallbacks* pAllocator,
1466 VkSemaphore* pSemaphore)
1467 {
1468 RADV_FROM_HANDLE(radv_device, device, _device);
1469 struct radeon_winsys_sem *sem;
1470
1471 sem = device->ws->create_sem(device->ws);
1472 if (!sem)
1473 return VK_ERROR_OUT_OF_HOST_MEMORY;
1474
1475 *pSemaphore = (VkSemaphore)sem;
1476 return VK_SUCCESS;
1477 }
1478
1479 void radv_DestroySemaphore(
1480 VkDevice _device,
1481 VkSemaphore _semaphore,
1482 const VkAllocationCallbacks* pAllocator)
1483 {
1484 RADV_FROM_HANDLE(radv_device, device, _device);
1485 struct radeon_winsys_sem *sem;
1486 if (!_semaphore)
1487 return;
1488
1489 sem = (struct radeon_winsys_sem *)_semaphore;
1490 device->ws->destroy_sem(sem);
1491 }
1492
1493 VkResult radv_CreateEvent(
1494 VkDevice _device,
1495 const VkEventCreateInfo* pCreateInfo,
1496 const VkAllocationCallbacks* pAllocator,
1497 VkEvent* pEvent)
1498 {
1499 RADV_FROM_HANDLE(radv_device, device, _device);
1500 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1501 sizeof(*event), 8,
1502 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1503
1504 if (!event)
1505 return VK_ERROR_OUT_OF_HOST_MEMORY;
1506
1507 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1508 RADEON_DOMAIN_GTT,
1509 RADEON_FLAG_CPU_ACCESS);
1510 if (!event->bo) {
1511 vk_free2(&device->alloc, pAllocator, event);
1512 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1513 }
1514
1515 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1516
1517 *pEvent = radv_event_to_handle(event);
1518
1519 return VK_SUCCESS;
1520 }
1521
1522 void radv_DestroyEvent(
1523 VkDevice _device,
1524 VkEvent _event,
1525 const VkAllocationCallbacks* pAllocator)
1526 {
1527 RADV_FROM_HANDLE(radv_device, device, _device);
1528 RADV_FROM_HANDLE(radv_event, event, _event);
1529
1530 if (!event)
1531 return;
1532 device->ws->buffer_destroy(event->bo);
1533 vk_free2(&device->alloc, pAllocator, event);
1534 }
1535
1536 VkResult radv_GetEventStatus(
1537 VkDevice _device,
1538 VkEvent _event)
1539 {
1540 RADV_FROM_HANDLE(radv_event, event, _event);
1541
1542 if (*event->map == 1)
1543 return VK_EVENT_SET;
1544 return VK_EVENT_RESET;
1545 }
1546
1547 VkResult radv_SetEvent(
1548 VkDevice _device,
1549 VkEvent _event)
1550 {
1551 RADV_FROM_HANDLE(radv_event, event, _event);
1552 *event->map = 1;
1553
1554 return VK_SUCCESS;
1555 }
1556
1557 VkResult radv_ResetEvent(
1558 VkDevice _device,
1559 VkEvent _event)
1560 {
1561 RADV_FROM_HANDLE(radv_event, event, _event);
1562 *event->map = 0;
1563
1564 return VK_SUCCESS;
1565 }
1566
1567 VkResult radv_CreateBuffer(
1568 VkDevice _device,
1569 const VkBufferCreateInfo* pCreateInfo,
1570 const VkAllocationCallbacks* pAllocator,
1571 VkBuffer* pBuffer)
1572 {
1573 RADV_FROM_HANDLE(radv_device, device, _device);
1574 struct radv_buffer *buffer;
1575
1576 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1577
1578 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1579 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1580 if (buffer == NULL)
1581 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1582
1583 buffer->size = pCreateInfo->size;
1584 buffer->usage = pCreateInfo->usage;
1585 buffer->bo = NULL;
1586 buffer->offset = 0;
1587
1588 *pBuffer = radv_buffer_to_handle(buffer);
1589
1590 return VK_SUCCESS;
1591 }
1592
1593 void radv_DestroyBuffer(
1594 VkDevice _device,
1595 VkBuffer _buffer,
1596 const VkAllocationCallbacks* pAllocator)
1597 {
1598 RADV_FROM_HANDLE(radv_device, device, _device);
1599 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1600
1601 if (!buffer)
1602 return;
1603
1604 vk_free2(&device->alloc, pAllocator, buffer);
1605 }
1606
1607 static inline unsigned
1608 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
1609 {
1610 if (stencil)
1611 return image->surface.stencil_tiling_index[level];
1612 else
1613 return image->surface.tiling_index[level];
1614 }
1615
1616 static void
1617 radv_initialise_color_surface(struct radv_device *device,
1618 struct radv_color_buffer_info *cb,
1619 struct radv_image_view *iview)
1620 {
1621 const struct vk_format_description *desc;
1622 unsigned ntype, format, swap, endian;
1623 unsigned blend_clamp = 0, blend_bypass = 0;
1624 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1625 uint64_t va;
1626 const struct radeon_surf *surf = &iview->image->surface;
1627 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
1628
1629 desc = vk_format_description(iview->vk_format);
1630
1631 memset(cb, 0, sizeof(*cb));
1632
1633 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1634 va += level_info->offset;
1635 cb->cb_color_base = va >> 8;
1636
1637 /* CMASK variables */
1638 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1639 va += iview->image->cmask.offset;
1640 cb->cb_color_cmask = va >> 8;
1641 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
1642
1643 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1644 va += iview->image->dcc_offset;
1645 cb->cb_dcc_base = va >> 8;
1646
1647 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
1648 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
1649 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
1650
1651 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
1652 pitch_tile_max = level_info->nblk_x / 8 - 1;
1653 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1654 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
1655
1656 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1657 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1658
1659 /* Intensity is implemented as Red, so treat it that way. */
1660 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
1661 S_028C74_TILE_MODE_INDEX(tile_mode_index);
1662
1663 if (iview->image->samples > 1) {
1664 unsigned log_samples = util_logbase2(iview->image->samples);
1665
1666 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1667 S_028C74_NUM_FRAGMENTS(log_samples);
1668 }
1669
1670 if (iview->image->fmask.size) {
1671 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
1672 if (device->physical_device->rad_info.chip_class >= CIK)
1673 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
1674 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
1675 cb->cb_color_fmask = va >> 8;
1676 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
1677 } else {
1678 /* This must be set for fast clear to work without FMASK. */
1679 if (device->physical_device->rad_info.chip_class >= CIK)
1680 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1681 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1682 cb->cb_color_fmask = cb->cb_color_base;
1683 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1684 }
1685
1686 ntype = radv_translate_color_numformat(iview->vk_format,
1687 desc,
1688 vk_format_get_first_non_void_channel(iview->vk_format));
1689 format = radv_translate_colorformat(iview->vk_format);
1690 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
1691 radv_finishme("Illegal color\n");
1692 swap = radv_translate_colorswap(iview->vk_format, FALSE);
1693 endian = radv_colorformat_endian_swap(format);
1694
1695 /* blend clamp should be set for all NORM/SRGB types */
1696 if (ntype == V_028C70_NUMBER_UNORM ||
1697 ntype == V_028C70_NUMBER_SNORM ||
1698 ntype == V_028C70_NUMBER_SRGB)
1699 blend_clamp = 1;
1700
1701 /* set blend bypass according to docs if SINT/UINT or
1702 8/24 COLOR variants */
1703 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1704 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1705 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1706 blend_clamp = 0;
1707 blend_bypass = 1;
1708 }
1709 #if 0
1710 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1711 (format == V_028C70_COLOR_8 ||
1712 format == V_028C70_COLOR_8_8 ||
1713 format == V_028C70_COLOR_8_8_8_8))
1714 ->color_is_int8 = true;
1715 #endif
1716 cb->cb_color_info = S_028C70_FORMAT(format) |
1717 S_028C70_COMP_SWAP(swap) |
1718 S_028C70_BLEND_CLAMP(blend_clamp) |
1719 S_028C70_BLEND_BYPASS(blend_bypass) |
1720 S_028C70_SIMPLE_FLOAT(1) |
1721 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
1722 ntype != V_028C70_NUMBER_SNORM &&
1723 ntype != V_028C70_NUMBER_SRGB &&
1724 format != V_028C70_COLOR_8_24 &&
1725 format != V_028C70_COLOR_24_8) |
1726 S_028C70_NUMBER_TYPE(ntype) |
1727 S_028C70_ENDIAN(endian);
1728 if (iview->image->samples > 1)
1729 if (iview->image->fmask.size)
1730 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1731
1732 if (iview->image->cmask.size &&
1733 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
1734 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1735
1736 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
1737 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1738
1739 if (device->physical_device->rad_info.chip_class >= VI) {
1740 unsigned max_uncompressed_block_size = 2;
1741 if (iview->image->samples > 1) {
1742 if (iview->image->surface.bpe == 1)
1743 max_uncompressed_block_size = 0;
1744 else if (iview->image->surface.bpe == 2)
1745 max_uncompressed_block_size = 1;
1746 }
1747
1748 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1749 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1750 }
1751
1752 /* This must be set for fast clear to work without FMASK. */
1753 if (!iview->image->fmask.size &&
1754 device->physical_device->rad_info.chip_class == SI) {
1755 unsigned bankh = util_logbase2(iview->image->surface.bankh);
1756 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1757 }
1758 }
1759
1760 static void
1761 radv_initialise_ds_surface(struct radv_device *device,
1762 struct radv_ds_buffer_info *ds,
1763 struct radv_image_view *iview)
1764 {
1765 unsigned level = iview->base_mip;
1766 unsigned format;
1767 uint64_t va, s_offs, z_offs;
1768 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
1769 memset(ds, 0, sizeof(*ds));
1770 switch (iview->vk_format) {
1771 case VK_FORMAT_D24_UNORM_S8_UINT:
1772 case VK_FORMAT_X8_D24_UNORM_PACK32:
1773 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1774 ds->offset_scale = 2.0f;
1775 break;
1776 case VK_FORMAT_D16_UNORM:
1777 case VK_FORMAT_D16_UNORM_S8_UINT:
1778 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1779 ds->offset_scale = 4.0f;
1780 break;
1781 case VK_FORMAT_D32_SFLOAT:
1782 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1783 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1784 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1785 ds->offset_scale = 1.0f;
1786 break;
1787 default:
1788 break;
1789 }
1790
1791 format = radv_translate_dbformat(iview->vk_format);
1792 if (format == V_028040_Z_INVALID) {
1793 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
1794 }
1795
1796 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1797 s_offs = z_offs = va;
1798 z_offs += iview->image->surface.level[level].offset;
1799 s_offs += iview->image->surface.stencil_level[level].offset;
1800
1801 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
1802 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
1803 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
1804 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1805 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
1806
1807 if (iview->image->samples > 1)
1808 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
1809
1810 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
1811 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
1812 else
1813 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1814
1815 if (device->physical_device->rad_info.chip_class >= CIK) {
1816 struct radeon_info *info = &device->physical_device->rad_info;
1817 unsigned tiling_index = iview->image->surface.tiling_index[level];
1818 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
1819 unsigned macro_index = iview->image->surface.macro_tile_index;
1820 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
1821 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
1822 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
1823
1824 ds->db_depth_info |=
1825 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
1826 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
1827 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
1828 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
1829 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
1830 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
1831 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
1832 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
1833 } else {
1834 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
1835 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1836 tile_mode_index = si_tile_mode_index(iview->image, level, true);
1837 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1838 }
1839
1840 if (iview->image->htile.size && !level) {
1841 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1842 S_028040_ALLOW_EXPCLEAR(1);
1843
1844 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
1845 /* Workaround: For a not yet understood reason, the
1846 * combination of MSAA, fast stencil clear and stencil
1847 * decompress messes with subsequent stencil buffer
1848 * uses. Problem was reproduced on Verde, Bonaire,
1849 * Tonga, and Carrizo.
1850 *
1851 * Disabling EXPCLEAR works around the problem.
1852 *
1853 * Check piglit's arb_texture_multisample-stencil-clear
1854 * test if you want to try changing this.
1855 */
1856 if (iview->image->samples <= 1)
1857 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
1858 } else
1859 /* Use all of the htile_buffer for depth if there's no stencil. */
1860 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
1861
1862 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
1863 iview->image->htile.offset;
1864 ds->db_htile_data_base = va >> 8;
1865 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
1866 } else {
1867 ds->db_htile_data_base = 0;
1868 ds->db_htile_surface = 0;
1869 }
1870
1871 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
1872 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
1873
1874 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
1875 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
1876 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
1877 }
1878
1879 VkResult radv_CreateFramebuffer(
1880 VkDevice _device,
1881 const VkFramebufferCreateInfo* pCreateInfo,
1882 const VkAllocationCallbacks* pAllocator,
1883 VkFramebuffer* pFramebuffer)
1884 {
1885 RADV_FROM_HANDLE(radv_device, device, _device);
1886 struct radv_framebuffer *framebuffer;
1887
1888 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1889
1890 size_t size = sizeof(*framebuffer) +
1891 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
1892 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
1893 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1894 if (framebuffer == NULL)
1895 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1896
1897 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1898 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1899 VkImageView _iview = pCreateInfo->pAttachments[i];
1900 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
1901 framebuffer->attachments[i].attachment = iview;
1902 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
1903 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
1904 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1905 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
1906 }
1907 }
1908
1909 framebuffer->width = pCreateInfo->width;
1910 framebuffer->height = pCreateInfo->height;
1911 framebuffer->layers = pCreateInfo->layers;
1912
1913 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
1914 return VK_SUCCESS;
1915 }
1916
1917 void radv_DestroyFramebuffer(
1918 VkDevice _device,
1919 VkFramebuffer _fb,
1920 const VkAllocationCallbacks* pAllocator)
1921 {
1922 RADV_FROM_HANDLE(radv_device, device, _device);
1923 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
1924
1925 if (!fb)
1926 return;
1927 vk_free2(&device->alloc, pAllocator, fb);
1928 }
1929
1930 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
1931 {
1932 switch (address_mode) {
1933 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
1934 return V_008F30_SQ_TEX_WRAP;
1935 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1936 return V_008F30_SQ_TEX_MIRROR;
1937 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1938 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1939 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
1940 return V_008F30_SQ_TEX_CLAMP_BORDER;
1941 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1942 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1943 default:
1944 unreachable("illegal tex wrap mode");
1945 break;
1946 }
1947 }
1948
1949 static unsigned
1950 radv_tex_compare(VkCompareOp op)
1951 {
1952 switch (op) {
1953 case VK_COMPARE_OP_NEVER:
1954 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1955 case VK_COMPARE_OP_LESS:
1956 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1957 case VK_COMPARE_OP_EQUAL:
1958 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1959 case VK_COMPARE_OP_LESS_OR_EQUAL:
1960 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1961 case VK_COMPARE_OP_GREATER:
1962 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1963 case VK_COMPARE_OP_NOT_EQUAL:
1964 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1965 case VK_COMPARE_OP_GREATER_OR_EQUAL:
1966 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1967 case VK_COMPARE_OP_ALWAYS:
1968 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1969 default:
1970 unreachable("illegal compare mode");
1971 break;
1972 }
1973 }
1974
1975 static unsigned
1976 radv_tex_filter(VkFilter filter, unsigned max_ansio)
1977 {
1978 switch (filter) {
1979 case VK_FILTER_NEAREST:
1980 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
1981 V_008F38_SQ_TEX_XY_FILTER_POINT);
1982 case VK_FILTER_LINEAR:
1983 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
1984 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
1985 case VK_FILTER_CUBIC_IMG:
1986 default:
1987 fprintf(stderr, "illegal texture filter");
1988 return 0;
1989 }
1990 }
1991
1992 static unsigned
1993 radv_tex_mipfilter(VkSamplerMipmapMode mode)
1994 {
1995 switch (mode) {
1996 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
1997 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1998 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
1999 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2000 default:
2001 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2002 }
2003 }
2004
2005 static unsigned
2006 radv_tex_bordercolor(VkBorderColor bcolor)
2007 {
2008 switch (bcolor) {
2009 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2010 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2011 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2012 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2013 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2014 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2015 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2016 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2017 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2018 default:
2019 break;
2020 }
2021 return 0;
2022 }
2023
2024 static unsigned
2025 radv_tex_aniso_filter(unsigned filter)
2026 {
2027 if (filter < 2)
2028 return 0;
2029 if (filter < 4)
2030 return 1;
2031 if (filter < 8)
2032 return 2;
2033 if (filter < 16)
2034 return 3;
2035 return 4;
2036 }
2037
2038 static void
2039 radv_init_sampler(struct radv_device *device,
2040 struct radv_sampler *sampler,
2041 const VkSamplerCreateInfo *pCreateInfo)
2042 {
2043 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2044 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2045 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2046 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2047
2048 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2049 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2050 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2051 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2052 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2053 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2054 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2055 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2056 S_008F30_DISABLE_CUBE_WRAP(0) |
2057 S_008F30_COMPAT_MODE(is_vi));
2058 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2059 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2060 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2061 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2062 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2063 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2064 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2065 S_008F38_MIP_POINT_PRECLAMP(1) |
2066 S_008F38_DISABLE_LSB_CEIL(1) |
2067 S_008F38_FILTER_PREC_FIX(1) |
2068 S_008F38_ANISO_OVERRIDE(is_vi));
2069 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2070 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2071 }
2072
2073 VkResult radv_CreateSampler(
2074 VkDevice _device,
2075 const VkSamplerCreateInfo* pCreateInfo,
2076 const VkAllocationCallbacks* pAllocator,
2077 VkSampler* pSampler)
2078 {
2079 RADV_FROM_HANDLE(radv_device, device, _device);
2080 struct radv_sampler *sampler;
2081
2082 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2083
2084 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2085 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2086 if (!sampler)
2087 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2088
2089 radv_init_sampler(device, sampler, pCreateInfo);
2090 *pSampler = radv_sampler_to_handle(sampler);
2091
2092 return VK_SUCCESS;
2093 }
2094
2095 void radv_DestroySampler(
2096 VkDevice _device,
2097 VkSampler _sampler,
2098 const VkAllocationCallbacks* pAllocator)
2099 {
2100 RADV_FROM_HANDLE(radv_device, device, _device);
2101 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2102
2103 if (!sampler)
2104 return;
2105 vk_free2(&device->alloc, pAllocator, sampler);
2106 }
2107
2108
2109 /* vk_icd.h does not declare this function, so we declare it here to
2110 * suppress Wmissing-prototypes.
2111 */
2112 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2113 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2114
2115 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2116 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2117 {
2118 /* For the full details on loader interface versioning, see
2119 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2120 * What follows is a condensed summary, to help you navigate the large and
2121 * confusing official doc.
2122 *
2123 * - Loader interface v0 is incompatible with later versions. We don't
2124 * support it.
2125 *
2126 * - In loader interface v1:
2127 * - The first ICD entrypoint called by the loader is
2128 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2129 * entrypoint.
2130 * - The ICD must statically expose no other Vulkan symbol unless it is
2131 * linked with -Bsymbolic.
2132 * - Each dispatchable Vulkan handle created by the ICD must be
2133 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2134 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2135 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2136 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2137 * such loader-managed surfaces.
2138 *
2139 * - Loader interface v2 differs from v1 in:
2140 * - The first ICD entrypoint called by the loader is
2141 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2142 * statically expose this entrypoint.
2143 *
2144 * - Loader interface v3 differs from v2 in:
2145 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2146 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2147 * because the loader no longer does so.
2148 */
2149 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2150 return VK_SUCCESS;
2151 }