radv: make device extension setup dynamic
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "util/strtod.h"
36
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46 struct radv_dispatch_table dtable;
47
48 static int
49 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
50 {
51 Dl_info info;
52 struct stat st;
53 if (!dladdr(ptr, &info) || !info.dli_fname) {
54 return -1;
55 }
56 if (stat(info.dli_fname, &st)) {
57 return -1;
58 }
59 *timestamp = st.st_mtim.tv_sec;
60 return 0;
61 }
62
63 static int
64 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
65 {
66 uint32_t mesa_timestamp, llvm_timestamp;
67 uint16_t f = family;
68 memset(uuid, 0, VK_UUID_SIZE);
69 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
70 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
71 return -1;
72
73 memcpy(uuid, &mesa_timestamp, 4);
74 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
75 memcpy((char*)uuid + 8, &f, 2);
76 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
77 return 0;
78 }
79
80 static const VkExtensionProperties instance_extensions[] = {
81 {
82 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
83 .specVersion = 25,
84 },
85 #ifdef VK_USE_PLATFORM_XCB_KHR
86 {
87 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_XLIB_KHR
92 {
93 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
94 .specVersion = 6,
95 },
96 #endif
97 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
98 {
99 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
100 .specVersion = 5,
101 },
102 #endif
103 };
104
105 static const VkExtensionProperties common_device_extensions[] = {
106 {
107 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
112 .specVersion = 68,
113 },
114 {
115 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 {
119 .extensionName = VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME,
120 .specVersion = 1,
121 },
122 };
123
124 static VkResult
125 radv_extensions_register(struct radv_instance *instance,
126 struct radv_extensions *extensions,
127 const VkExtensionProperties *new_ext,
128 uint32_t num_ext)
129 {
130 size_t new_size;
131 VkExtensionProperties *new_ptr;
132
133 assert(new_ext && num_ext > 0);
134
135 if (!new_ext)
136 return VK_ERROR_INITIALIZATION_FAILED;
137
138 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
139 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
140 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
141
142 /* Old array continues to be valid, update nothing */
143 if (!new_ptr)
144 return VK_ERROR_OUT_OF_HOST_MEMORY;
145
146 memcpy(&new_ptr[extensions->num_ext], new_ext,
147 num_ext * sizeof(VkExtensionProperties));
148 extensions->ext_array = new_ptr;
149 extensions->num_ext += num_ext;
150
151 return VK_SUCCESS;
152 }
153
154 static void
155 radv_extensions_finish(struct radv_instance *instance,
156 struct radv_extensions *extensions)
157 {
158 assert(extensions);
159
160 if (!extensions)
161 radv_loge("Attemted to free invalid extension struct\n");
162
163 if (extensions->ext_array)
164 vk_free(&instance->alloc, extensions->ext_array);
165 }
166
167 static bool
168 is_extension_enabled(const VkExtensionProperties *extensions,
169 size_t num_ext,
170 const char *name)
171 {
172 assert(extensions && name);
173
174 for (uint32_t i = 0; i < num_ext; i++) {
175 if (strcmp(name, extensions[i].extensionName) == 0)
176 return true;
177 }
178
179 return false;
180 }
181
182 static VkResult
183 radv_physical_device_init(struct radv_physical_device *device,
184 struct radv_instance *instance,
185 const char *path)
186 {
187 VkResult result;
188 drmVersionPtr version;
189 int fd;
190
191 fd = open(path, O_RDWR | O_CLOEXEC);
192 if (fd < 0)
193 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
194 "failed to open %s: %m", path);
195
196 version = drmGetVersion(fd);
197 if (!version) {
198 close(fd);
199 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
200 "failed to get version %s: %m", path);
201 }
202
203 if (strcmp(version->name, "amdgpu")) {
204 drmFreeVersion(version);
205 close(fd);
206 return VK_ERROR_INCOMPATIBLE_DRIVER;
207 }
208 drmFreeVersion(version);
209
210 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
211 device->instance = instance;
212 assert(strlen(path) < ARRAY_SIZE(device->path));
213 strncpy(device->path, path, ARRAY_SIZE(device->path));
214
215 device->ws = radv_amdgpu_winsys_create(fd);
216 if (!device->ws) {
217 result = VK_ERROR_INCOMPATIBLE_DRIVER;
218 goto fail;
219 }
220 device->ws->query_info(device->ws, &device->rad_info);
221 result = radv_init_wsi(device);
222 if (result != VK_SUCCESS) {
223 device->ws->destroy(device->ws);
224 goto fail;
225 }
226
227 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
228 radv_finish_wsi(device);
229 device->ws->destroy(device->ws);
230 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
231 "cannot generate UUID");
232 goto fail;
233 }
234
235 result = radv_extensions_register(instance,
236 &device->extensions,
237 common_device_extensions,
238 ARRAY_SIZE(common_device_extensions));
239 if (result != VK_SUCCESS)
240 goto fail;
241
242 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
243 device->name = device->rad_info.name;
244 close(fd);
245 return VK_SUCCESS;
246
247 fail:
248 close(fd);
249 return result;
250 }
251
252 static void
253 radv_physical_device_finish(struct radv_physical_device *device)
254 {
255 radv_extensions_finish(device->instance, &device->extensions);
256 radv_finish_wsi(device);
257 device->ws->destroy(device->ws);
258 }
259
260
261 static void *
262 default_alloc_func(void *pUserData, size_t size, size_t align,
263 VkSystemAllocationScope allocationScope)
264 {
265 return malloc(size);
266 }
267
268 static void *
269 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
270 size_t align, VkSystemAllocationScope allocationScope)
271 {
272 return realloc(pOriginal, size);
273 }
274
275 static void
276 default_free_func(void *pUserData, void *pMemory)
277 {
278 free(pMemory);
279 }
280
281 static const VkAllocationCallbacks default_alloc = {
282 .pUserData = NULL,
283 .pfnAllocation = default_alloc_func,
284 .pfnReallocation = default_realloc_func,
285 .pfnFree = default_free_func,
286 };
287
288 static const struct debug_control radv_debug_options[] = {
289 {"fastclears", RADV_DEBUG_FAST_CLEARS},
290 {"nodcc", RADV_DEBUG_NO_DCC},
291 {"shaders", RADV_DEBUG_DUMP_SHADERS},
292 {"nocache", RADV_DEBUG_NO_CACHE},
293 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
294 {"nohiz", RADV_DEBUG_NO_HIZ},
295 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
296 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (instance->physicalDeviceCount > 0) {
369 /* We support at most one physical device. */
370 assert(instance->physicalDeviceCount == 1);
371 radv_physical_device_finish(&instance->physicalDevice);
372 }
373
374 VG(VALGRIND_DESTROY_MEMPOOL(instance));
375
376 _mesa_locale_fini();
377
378 vk_free(&instance->alloc, instance);
379 }
380
381 VkResult radv_EnumeratePhysicalDevices(
382 VkInstance _instance,
383 uint32_t* pPhysicalDeviceCount,
384 VkPhysicalDevice* pPhysicalDevices)
385 {
386 RADV_FROM_HANDLE(radv_instance, instance, _instance);
387 VkResult result;
388
389 if (instance->physicalDeviceCount < 0) {
390 char path[20];
391 for (unsigned i = 0; i < 8; i++) {
392 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
393 result = radv_physical_device_init(&instance->physicalDevice,
394 instance, path);
395 if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
396 break;
397 }
398
399 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
400 instance->physicalDeviceCount = 0;
401 } else if (result == VK_SUCCESS) {
402 instance->physicalDeviceCount = 1;
403 } else {
404 return result;
405 }
406 }
407
408 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
409 * otherwise it's an inout parameter.
410 *
411 * The Vulkan spec (git aaed022) says:
412 *
413 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
414 * that is initialized with the number of devices the application is
415 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
416 * an array of at least this many VkPhysicalDevice handles [...].
417 *
418 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
419 * overwrites the contents of the variable pointed to by
420 * pPhysicalDeviceCount with the number of physical devices in in the
421 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
422 * pPhysicalDeviceCount with the number of physical handles written to
423 * pPhysicalDevices.
424 */
425 if (!pPhysicalDevices) {
426 *pPhysicalDeviceCount = instance->physicalDeviceCount;
427 } else if (*pPhysicalDeviceCount >= 1) {
428 pPhysicalDevices[0] = radv_physical_device_to_handle(&instance->physicalDevice);
429 *pPhysicalDeviceCount = 1;
430 } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
431 return VK_INCOMPLETE;
432 } else {
433 *pPhysicalDeviceCount = 0;
434 }
435
436 return VK_SUCCESS;
437 }
438
439 void radv_GetPhysicalDeviceFeatures(
440 VkPhysicalDevice physicalDevice,
441 VkPhysicalDeviceFeatures* pFeatures)
442 {
443 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
444
445 memset(pFeatures, 0, sizeof(*pFeatures));
446
447 *pFeatures = (VkPhysicalDeviceFeatures) {
448 .robustBufferAccess = true,
449 .fullDrawIndexUint32 = true,
450 .imageCubeArray = true,
451 .independentBlend = true,
452 .geometryShader = false,
453 .tessellationShader = false,
454 .sampleRateShading = false,
455 .dualSrcBlend = true,
456 .logicOp = true,
457 .multiDrawIndirect = true,
458 .drawIndirectFirstInstance = true,
459 .depthClamp = true,
460 .depthBiasClamp = true,
461 .fillModeNonSolid = true,
462 .depthBounds = true,
463 .wideLines = true,
464 .largePoints = true,
465 .alphaToOne = true,
466 .multiViewport = false,
467 .samplerAnisotropy = true,
468 .textureCompressionETC2 = false,
469 .textureCompressionASTC_LDR = false,
470 .textureCompressionBC = true,
471 .occlusionQueryPrecise = true,
472 .pipelineStatisticsQuery = false,
473 .vertexPipelineStoresAndAtomics = true,
474 .fragmentStoresAndAtomics = true,
475 .shaderTessellationAndGeometryPointSize = true,
476 .shaderImageGatherExtended = true,
477 .shaderStorageImageExtendedFormats = true,
478 .shaderStorageImageMultisample = false,
479 .shaderUniformBufferArrayDynamicIndexing = true,
480 .shaderSampledImageArrayDynamicIndexing = true,
481 .shaderStorageBufferArrayDynamicIndexing = true,
482 .shaderStorageImageArrayDynamicIndexing = true,
483 .shaderStorageImageReadWithoutFormat = false,
484 .shaderStorageImageWriteWithoutFormat = false,
485 .shaderClipDistance = true,
486 .shaderCullDistance = true,
487 .shaderFloat64 = false,
488 .shaderInt64 = false,
489 .shaderInt16 = false,
490 .alphaToOne = true,
491 .variableMultisampleRate = false,
492 .inheritedQueries = false,
493 };
494 }
495
496 void radv_GetPhysicalDeviceProperties(
497 VkPhysicalDevice physicalDevice,
498 VkPhysicalDeviceProperties* pProperties)
499 {
500 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
501 VkSampleCountFlags sample_counts = 0xf;
502 VkPhysicalDeviceLimits limits = {
503 .maxImageDimension1D = (1 << 14),
504 .maxImageDimension2D = (1 << 14),
505 .maxImageDimension3D = (1 << 11),
506 .maxImageDimensionCube = (1 << 14),
507 .maxImageArrayLayers = (1 << 11),
508 .maxTexelBufferElements = 128 * 1024 * 1024,
509 .maxUniformBufferRange = UINT32_MAX,
510 .maxStorageBufferRange = UINT32_MAX,
511 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
512 .maxMemoryAllocationCount = UINT32_MAX,
513 .maxSamplerAllocationCount = 64 * 1024,
514 .bufferImageGranularity = 64, /* A cache line */
515 .sparseAddressSpaceSize = 0,
516 .maxBoundDescriptorSets = MAX_SETS,
517 .maxPerStageDescriptorSamplers = 64,
518 .maxPerStageDescriptorUniformBuffers = 64,
519 .maxPerStageDescriptorStorageBuffers = 64,
520 .maxPerStageDescriptorSampledImages = 64,
521 .maxPerStageDescriptorStorageImages = 64,
522 .maxPerStageDescriptorInputAttachments = 64,
523 .maxPerStageResources = 128,
524 .maxDescriptorSetSamplers = 256,
525 .maxDescriptorSetUniformBuffers = 256,
526 .maxDescriptorSetUniformBuffersDynamic = 256,
527 .maxDescriptorSetStorageBuffers = 256,
528 .maxDescriptorSetStorageBuffersDynamic = 256,
529 .maxDescriptorSetSampledImages = 256,
530 .maxDescriptorSetStorageImages = 256,
531 .maxDescriptorSetInputAttachments = 256,
532 .maxVertexInputAttributes = 32,
533 .maxVertexInputBindings = 32,
534 .maxVertexInputAttributeOffset = 2047,
535 .maxVertexInputBindingStride = 2048,
536 .maxVertexOutputComponents = 128,
537 .maxTessellationGenerationLevel = 0,
538 .maxTessellationPatchSize = 0,
539 .maxTessellationControlPerVertexInputComponents = 0,
540 .maxTessellationControlPerVertexOutputComponents = 0,
541 .maxTessellationControlPerPatchOutputComponents = 0,
542 .maxTessellationControlTotalOutputComponents = 0,
543 .maxTessellationEvaluationInputComponents = 0,
544 .maxTessellationEvaluationOutputComponents = 0,
545 .maxGeometryShaderInvocations = 32,
546 .maxGeometryInputComponents = 64,
547 .maxGeometryOutputComponents = 128,
548 .maxGeometryOutputVertices = 256,
549 .maxGeometryTotalOutputComponents = 1024,
550 .maxFragmentInputComponents = 128,
551 .maxFragmentOutputAttachments = 8,
552 .maxFragmentDualSrcAttachments = 1,
553 .maxFragmentCombinedOutputResources = 8,
554 .maxComputeSharedMemorySize = 32768,
555 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
556 .maxComputeWorkGroupInvocations = 2048,
557 .maxComputeWorkGroupSize = {
558 2048,
559 2048,
560 2048
561 },
562 .subPixelPrecisionBits = 4 /* FIXME */,
563 .subTexelPrecisionBits = 4 /* FIXME */,
564 .mipmapPrecisionBits = 4 /* FIXME */,
565 .maxDrawIndexedIndexValue = UINT32_MAX,
566 .maxDrawIndirectCount = UINT32_MAX,
567 .maxSamplerLodBias = 16,
568 .maxSamplerAnisotropy = 16,
569 .maxViewports = MAX_VIEWPORTS,
570 .maxViewportDimensions = { (1 << 14), (1 << 14) },
571 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
572 .viewportSubPixelBits = 13, /* We take a float? */
573 .minMemoryMapAlignment = 4096, /* A page */
574 .minTexelBufferOffsetAlignment = 1,
575 .minUniformBufferOffsetAlignment = 4,
576 .minStorageBufferOffsetAlignment = 4,
577 .minTexelOffset = -32,
578 .maxTexelOffset = 31,
579 .minTexelGatherOffset = -32,
580 .maxTexelGatherOffset = 31,
581 .minInterpolationOffset = -2,
582 .maxInterpolationOffset = 2,
583 .subPixelInterpolationOffsetBits = 8,
584 .maxFramebufferWidth = (1 << 14),
585 .maxFramebufferHeight = (1 << 14),
586 .maxFramebufferLayers = (1 << 10),
587 .framebufferColorSampleCounts = sample_counts,
588 .framebufferDepthSampleCounts = sample_counts,
589 .framebufferStencilSampleCounts = sample_counts,
590 .framebufferNoAttachmentsSampleCounts = sample_counts,
591 .maxColorAttachments = MAX_RTS,
592 .sampledImageColorSampleCounts = sample_counts,
593 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
594 .sampledImageDepthSampleCounts = sample_counts,
595 .sampledImageStencilSampleCounts = sample_counts,
596 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
597 .maxSampleMaskWords = 1,
598 .timestampComputeAndGraphics = false,
599 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
600 .maxClipDistances = 8,
601 .maxCullDistances = 8,
602 .maxCombinedClipAndCullDistances = 8,
603 .discreteQueuePriorities = 1,
604 .pointSizeRange = { 0.125, 255.875 },
605 .lineWidthRange = { 0.0, 7.9921875 },
606 .pointSizeGranularity = (1.0 / 8.0),
607 .lineWidthGranularity = (1.0 / 128.0),
608 .strictLines = false, /* FINISHME */
609 .standardSampleLocations = true,
610 .optimalBufferCopyOffsetAlignment = 128,
611 .optimalBufferCopyRowPitchAlignment = 128,
612 .nonCoherentAtomSize = 64,
613 };
614
615 *pProperties = (VkPhysicalDeviceProperties) {
616 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
617 .driverVersion = 1,
618 .vendorID = 0x1002,
619 .deviceID = pdevice->rad_info.pci_id,
620 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
621 .limits = limits,
622 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
623 };
624
625 strcpy(pProperties->deviceName, pdevice->name);
626 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
627 }
628
629 void radv_GetPhysicalDeviceQueueFamilyProperties(
630 VkPhysicalDevice physicalDevice,
631 uint32_t* pCount,
632 VkQueueFamilyProperties* pQueueFamilyProperties)
633 {
634 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
635 int num_queue_families = 1;
636 int idx;
637 if (pdevice->rad_info.compute_rings > 0 &&
638 pdevice->rad_info.chip_class >= CIK &&
639 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
640 num_queue_families++;
641
642 if (pQueueFamilyProperties == NULL) {
643 *pCount = num_queue_families;
644 return;
645 }
646
647 if (!*pCount)
648 return;
649
650 idx = 0;
651 if (*pCount >= 1) {
652 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
653 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
654 VK_QUEUE_COMPUTE_BIT |
655 VK_QUEUE_TRANSFER_BIT,
656 .queueCount = 1,
657 .timestampValidBits = 64,
658 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
659 };
660 idx++;
661 }
662
663 if (pdevice->rad_info.compute_rings > 0 &&
664 pdevice->rad_info.chip_class >= CIK &&
665 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
666 if (*pCount > idx) {
667 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
668 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
669 .queueCount = pdevice->rad_info.compute_rings,
670 .timestampValidBits = 64,
671 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
672 };
673 idx++;
674 }
675 }
676 *pCount = idx;
677 }
678
679 void radv_GetPhysicalDeviceMemoryProperties(
680 VkPhysicalDevice physicalDevice,
681 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
682 {
683 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
684
685 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
686
687 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
688 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
689 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
690 .heapIndex = RADV_MEM_HEAP_VRAM,
691 };
692 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
693 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
694 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
695 .heapIndex = RADV_MEM_HEAP_GTT,
696 };
697 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
698 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
699 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
700 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
701 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
702 };
703 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
704 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
705 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
706 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
707 .heapIndex = RADV_MEM_HEAP_GTT,
708 };
709
710 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
711
712 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
713 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
714 .size = physical_device->rad_info.vram_size -
715 physical_device->rad_info.visible_vram_size,
716 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
717 };
718 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
719 .size = physical_device->rad_info.visible_vram_size,
720 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
721 };
722 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
723 .size = physical_device->rad_info.gart_size,
724 .flags = 0,
725 };
726 }
727
728 static int
729 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
730 int queue_family_index, int idx)
731 {
732 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
733 queue->device = device;
734 queue->queue_family_index = queue_family_index;
735 queue->queue_idx = idx;
736
737 queue->hw_ctx = device->ws->ctx_create(device->ws);
738 if (!queue->hw_ctx)
739 return VK_ERROR_OUT_OF_HOST_MEMORY;
740
741 return VK_SUCCESS;
742 }
743
744 static void
745 radv_queue_finish(struct radv_queue *queue)
746 {
747 if (queue->hw_ctx)
748 queue->device->ws->ctx_destroy(queue->hw_ctx);
749 }
750
751 VkResult radv_CreateDevice(
752 VkPhysicalDevice physicalDevice,
753 const VkDeviceCreateInfo* pCreateInfo,
754 const VkAllocationCallbacks* pAllocator,
755 VkDevice* pDevice)
756 {
757 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
758 VkResult result;
759 struct radv_device *device;
760
761 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
762 if (!is_extension_enabled(physical_device->extensions.ext_array,
763 physical_device->extensions.num_ext,
764 pCreateInfo->ppEnabledExtensionNames[i]))
765 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
766 }
767
768 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
769 sizeof(*device), 8,
770 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
771 if (!device)
772 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
773
774 memset(device, 0, sizeof(*device));
775
776 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
777 device->instance = physical_device->instance;
778
779 device->debug_flags = device->instance->debug_flags;
780
781 device->ws = physical_device->ws;
782 if (pAllocator)
783 device->alloc = *pAllocator;
784 else
785 device->alloc = physical_device->instance->alloc;
786
787 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
788 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
789 uint32_t qfi = queue_create->queueFamilyIndex;
790
791 device->queues[qfi] = vk_alloc(&device->alloc,
792 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
793 if (!device->queues[qfi]) {
794 result = VK_ERROR_OUT_OF_HOST_MEMORY;
795 goto fail;
796 }
797
798 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
799
800 device->queue_count[qfi] = queue_create->queueCount;
801
802 for (unsigned q = 0; q < queue_create->queueCount; q++) {
803 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
804 if (result != VK_SUCCESS)
805 goto fail;
806 }
807 }
808
809 result = radv_device_init_meta(device);
810 if (result != VK_SUCCESS)
811 goto fail;
812
813 radv_device_init_msaa(device);
814
815 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
816 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
817 switch (family) {
818 case RADV_QUEUE_GENERAL:
819 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
820 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
821 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
822 break;
823 case RADV_QUEUE_COMPUTE:
824 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
825 radeon_emit(device->empty_cs[family], 0);
826 break;
827 }
828 device->ws->cs_finalize(device->empty_cs[family]);
829 }
830
831 if (getenv("RADV_TRACE_FILE")) {
832 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
833 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
834 if (!device->trace_bo)
835 goto fail;
836
837 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
838 if (!device->trace_id_ptr)
839 goto fail;
840 }
841
842 *pDevice = radv_device_to_handle(device);
843 return VK_SUCCESS;
844
845 fail:
846 if (device->trace_bo)
847 device->ws->buffer_destroy(device->trace_bo);
848
849 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
850 for (unsigned q = 0; q < device->queue_count[i]; q++)
851 radv_queue_finish(&device->queues[i][q]);
852 if (device->queue_count[i])
853 vk_free(&device->alloc, device->queues[i]);
854 }
855
856 vk_free(&device->alloc, device);
857 return result;
858 }
859
860 void radv_DestroyDevice(
861 VkDevice _device,
862 const VkAllocationCallbacks* pAllocator)
863 {
864 RADV_FROM_HANDLE(radv_device, device, _device);
865
866 if (device->trace_bo)
867 device->ws->buffer_destroy(device->trace_bo);
868
869 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
870 for (unsigned q = 0; q < device->queue_count[i]; q++)
871 radv_queue_finish(&device->queues[i][q]);
872 if (device->queue_count[i])
873 vk_free(&device->alloc, device->queues[i]);
874 }
875 radv_device_finish_meta(device);
876
877 vk_free(&device->alloc, device);
878 }
879
880 VkResult radv_EnumerateInstanceExtensionProperties(
881 const char* pLayerName,
882 uint32_t* pPropertyCount,
883 VkExtensionProperties* pProperties)
884 {
885 if (pProperties == NULL) {
886 *pPropertyCount = ARRAY_SIZE(instance_extensions);
887 return VK_SUCCESS;
888 }
889
890 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
891 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
892
893 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
894 return VK_INCOMPLETE;
895
896 return VK_SUCCESS;
897 }
898
899 VkResult radv_EnumerateDeviceExtensionProperties(
900 VkPhysicalDevice physicalDevice,
901 const char* pLayerName,
902 uint32_t* pPropertyCount,
903 VkExtensionProperties* pProperties)
904 {
905 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
906
907 if (pProperties == NULL) {
908 *pPropertyCount = pdevice->extensions.num_ext;
909 return VK_SUCCESS;
910 }
911
912 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
913 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
914
915 if (*pPropertyCount < pdevice->extensions.num_ext)
916 return VK_INCOMPLETE;
917
918 return VK_SUCCESS;
919 }
920
921 VkResult radv_EnumerateInstanceLayerProperties(
922 uint32_t* pPropertyCount,
923 VkLayerProperties* pProperties)
924 {
925 if (pProperties == NULL) {
926 *pPropertyCount = 0;
927 return VK_SUCCESS;
928 }
929
930 /* None supported at this time */
931 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
932 }
933
934 VkResult radv_EnumerateDeviceLayerProperties(
935 VkPhysicalDevice physicalDevice,
936 uint32_t* pPropertyCount,
937 VkLayerProperties* pProperties)
938 {
939 if (pProperties == NULL) {
940 *pPropertyCount = 0;
941 return VK_SUCCESS;
942 }
943
944 /* None supported at this time */
945 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
946 }
947
948 void radv_GetDeviceQueue(
949 VkDevice _device,
950 uint32_t queueFamilyIndex,
951 uint32_t queueIndex,
952 VkQueue* pQueue)
953 {
954 RADV_FROM_HANDLE(radv_device, device, _device);
955
956 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
957 }
958
959 static void radv_dump_trace(struct radv_device *device,
960 struct radeon_winsys_cs *cs)
961 {
962 const char *filename = getenv("RADV_TRACE_FILE");
963 FILE *f = fopen(filename, "w");
964 if (!f) {
965 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
966 return;
967 }
968
969 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
970 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
971 fclose(f);
972 }
973
974 VkResult radv_QueueSubmit(
975 VkQueue _queue,
976 uint32_t submitCount,
977 const VkSubmitInfo* pSubmits,
978 VkFence _fence)
979 {
980 RADV_FROM_HANDLE(radv_queue, queue, _queue);
981 RADV_FROM_HANDLE(radv_fence, fence, _fence);
982 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
983 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
984 int ret;
985 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
986
987 for (uint32_t i = 0; i < submitCount; i++) {
988 struct radeon_winsys_cs **cs_array;
989 bool can_patch = true;
990 uint32_t advance;
991
992 if (!pSubmits[i].commandBufferCount)
993 continue;
994
995 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
996 pSubmits[i].commandBufferCount);
997
998 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
999 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1000 pSubmits[i].pCommandBuffers[j]);
1001 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1002
1003 cs_array[j] = cmd_buffer->cs;
1004 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1005 can_patch = false;
1006 }
1007
1008 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1009 advance = MIN2(max_cs_submission,
1010 pSubmits[i].commandBufferCount - j);
1011 bool b = j == 0;
1012 bool e = j + advance == pSubmits[i].commandBufferCount;
1013
1014 if (queue->device->trace_bo)
1015 *queue->device->trace_id_ptr = 0;
1016
1017 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
1018 pSubmits[i].commandBufferCount,
1019 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1020 b ? pSubmits[i].waitSemaphoreCount : 0,
1021 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1022 e ? pSubmits[i].signalSemaphoreCount : 0,
1023 can_patch, base_fence);
1024
1025 if (ret) {
1026 radv_loge("failed to submit CS %d\n", i);
1027 abort();
1028 }
1029 if (queue->device->trace_bo) {
1030 bool success = queue->device->ws->ctx_wait_idle(
1031 queue->hw_ctx,
1032 radv_queue_family_to_ring(
1033 queue->queue_family_index),
1034 queue->queue_idx);
1035
1036 if (!success) { /* Hang */
1037 radv_dump_trace(queue->device, cs_array[j]);
1038 abort();
1039 }
1040 }
1041 }
1042 free(cs_array);
1043 }
1044
1045 if (fence) {
1046 if (!submitCount)
1047 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1048 &queue->device->empty_cs[queue->queue_family_index],
1049 1, NULL, 0, NULL, 0, false, base_fence);
1050
1051 fence->submitted = true;
1052 }
1053
1054 return VK_SUCCESS;
1055 }
1056
1057 VkResult radv_QueueWaitIdle(
1058 VkQueue _queue)
1059 {
1060 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1061
1062 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1063 radv_queue_family_to_ring(queue->queue_family_index),
1064 queue->queue_idx);
1065 return VK_SUCCESS;
1066 }
1067
1068 VkResult radv_DeviceWaitIdle(
1069 VkDevice _device)
1070 {
1071 RADV_FROM_HANDLE(radv_device, device, _device);
1072
1073 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1074 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1075 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1076 }
1077 }
1078 return VK_SUCCESS;
1079 }
1080
1081 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1082 VkInstance instance,
1083 const char* pName)
1084 {
1085 return radv_lookup_entrypoint(pName);
1086 }
1087
1088 /* The loader wants us to expose a second GetInstanceProcAddr function
1089 * to work around certain LD_PRELOAD issues seen in apps.
1090 */
1091 PUBLIC
1092 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1093 VkInstance instance,
1094 const char* pName);
1095
1096 PUBLIC
1097 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1098 VkInstance instance,
1099 const char* pName)
1100 {
1101 return radv_GetInstanceProcAddr(instance, pName);
1102 }
1103
1104 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1105 VkDevice device,
1106 const char* pName)
1107 {
1108 return radv_lookup_entrypoint(pName);
1109 }
1110
1111 VkResult radv_AllocateMemory(
1112 VkDevice _device,
1113 const VkMemoryAllocateInfo* pAllocateInfo,
1114 const VkAllocationCallbacks* pAllocator,
1115 VkDeviceMemory* pMem)
1116 {
1117 RADV_FROM_HANDLE(radv_device, device, _device);
1118 struct radv_device_memory *mem;
1119 VkResult result;
1120 enum radeon_bo_domain domain;
1121 uint32_t flags = 0;
1122 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1123
1124 if (pAllocateInfo->allocationSize == 0) {
1125 /* Apparently, this is allowed */
1126 *pMem = VK_NULL_HANDLE;
1127 return VK_SUCCESS;
1128 }
1129
1130 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1131 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1132 if (mem == NULL)
1133 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1134
1135 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1136 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1137 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1138 domain = RADEON_DOMAIN_GTT;
1139 else
1140 domain = RADEON_DOMAIN_VRAM;
1141
1142 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1143 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1144 else
1145 flags |= RADEON_FLAG_CPU_ACCESS;
1146
1147 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1148 flags |= RADEON_FLAG_GTT_WC;
1149
1150 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1151 domain, flags);
1152
1153 if (!mem->bo) {
1154 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1155 goto fail;
1156 }
1157 mem->type_index = pAllocateInfo->memoryTypeIndex;
1158
1159 *pMem = radv_device_memory_to_handle(mem);
1160
1161 return VK_SUCCESS;
1162
1163 fail:
1164 vk_free2(&device->alloc, pAllocator, mem);
1165
1166 return result;
1167 }
1168
1169 void radv_FreeMemory(
1170 VkDevice _device,
1171 VkDeviceMemory _mem,
1172 const VkAllocationCallbacks* pAllocator)
1173 {
1174 RADV_FROM_HANDLE(radv_device, device, _device);
1175 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1176
1177 if (mem == NULL)
1178 return;
1179
1180 device->ws->buffer_destroy(mem->bo);
1181 mem->bo = NULL;
1182
1183 vk_free2(&device->alloc, pAllocator, mem);
1184 }
1185
1186 VkResult radv_MapMemory(
1187 VkDevice _device,
1188 VkDeviceMemory _memory,
1189 VkDeviceSize offset,
1190 VkDeviceSize size,
1191 VkMemoryMapFlags flags,
1192 void** ppData)
1193 {
1194 RADV_FROM_HANDLE(radv_device, device, _device);
1195 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1196
1197 if (mem == NULL) {
1198 *ppData = NULL;
1199 return VK_SUCCESS;
1200 }
1201
1202 *ppData = device->ws->buffer_map(mem->bo);
1203 if (*ppData) {
1204 *ppData += offset;
1205 return VK_SUCCESS;
1206 }
1207
1208 return VK_ERROR_MEMORY_MAP_FAILED;
1209 }
1210
1211 void radv_UnmapMemory(
1212 VkDevice _device,
1213 VkDeviceMemory _memory)
1214 {
1215 RADV_FROM_HANDLE(radv_device, device, _device);
1216 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1217
1218 if (mem == NULL)
1219 return;
1220
1221 device->ws->buffer_unmap(mem->bo);
1222 }
1223
1224 VkResult radv_FlushMappedMemoryRanges(
1225 VkDevice _device,
1226 uint32_t memoryRangeCount,
1227 const VkMappedMemoryRange* pMemoryRanges)
1228 {
1229 return VK_SUCCESS;
1230 }
1231
1232 VkResult radv_InvalidateMappedMemoryRanges(
1233 VkDevice _device,
1234 uint32_t memoryRangeCount,
1235 const VkMappedMemoryRange* pMemoryRanges)
1236 {
1237 return VK_SUCCESS;
1238 }
1239
1240 void radv_GetBufferMemoryRequirements(
1241 VkDevice device,
1242 VkBuffer _buffer,
1243 VkMemoryRequirements* pMemoryRequirements)
1244 {
1245 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1246
1247 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1248
1249 pMemoryRequirements->size = buffer->size;
1250 pMemoryRequirements->alignment = 16;
1251 }
1252
1253 void radv_GetImageMemoryRequirements(
1254 VkDevice device,
1255 VkImage _image,
1256 VkMemoryRequirements* pMemoryRequirements)
1257 {
1258 RADV_FROM_HANDLE(radv_image, image, _image);
1259
1260 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1261
1262 pMemoryRequirements->size = image->size;
1263 pMemoryRequirements->alignment = image->alignment;
1264 }
1265
1266 void radv_GetImageSparseMemoryRequirements(
1267 VkDevice device,
1268 VkImage image,
1269 uint32_t* pSparseMemoryRequirementCount,
1270 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1271 {
1272 stub();
1273 }
1274
1275 void radv_GetDeviceMemoryCommitment(
1276 VkDevice device,
1277 VkDeviceMemory memory,
1278 VkDeviceSize* pCommittedMemoryInBytes)
1279 {
1280 *pCommittedMemoryInBytes = 0;
1281 }
1282
1283 VkResult radv_BindBufferMemory(
1284 VkDevice device,
1285 VkBuffer _buffer,
1286 VkDeviceMemory _memory,
1287 VkDeviceSize memoryOffset)
1288 {
1289 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1290 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1291
1292 if (mem) {
1293 buffer->bo = mem->bo;
1294 buffer->offset = memoryOffset;
1295 } else {
1296 buffer->bo = NULL;
1297 buffer->offset = 0;
1298 }
1299
1300 return VK_SUCCESS;
1301 }
1302
1303 VkResult radv_BindImageMemory(
1304 VkDevice device,
1305 VkImage _image,
1306 VkDeviceMemory _memory,
1307 VkDeviceSize memoryOffset)
1308 {
1309 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1310 RADV_FROM_HANDLE(radv_image, image, _image);
1311
1312 if (mem) {
1313 image->bo = mem->bo;
1314 image->offset = memoryOffset;
1315 } else {
1316 image->bo = NULL;
1317 image->offset = 0;
1318 }
1319
1320 return VK_SUCCESS;
1321 }
1322
1323 VkResult radv_QueueBindSparse(
1324 VkQueue queue,
1325 uint32_t bindInfoCount,
1326 const VkBindSparseInfo* pBindInfo,
1327 VkFence fence)
1328 {
1329 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1330 }
1331
1332 VkResult radv_CreateFence(
1333 VkDevice _device,
1334 const VkFenceCreateInfo* pCreateInfo,
1335 const VkAllocationCallbacks* pAllocator,
1336 VkFence* pFence)
1337 {
1338 RADV_FROM_HANDLE(radv_device, device, _device);
1339 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1340 sizeof(*fence), 8,
1341 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1342
1343 if (!fence)
1344 return VK_ERROR_OUT_OF_HOST_MEMORY;
1345
1346 memset(fence, 0, sizeof(*fence));
1347 fence->submitted = false;
1348 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1349 fence->fence = device->ws->create_fence();
1350 if (!fence->fence) {
1351 vk_free2(&device->alloc, pAllocator, fence);
1352 return VK_ERROR_OUT_OF_HOST_MEMORY;
1353 }
1354
1355 *pFence = radv_fence_to_handle(fence);
1356
1357 return VK_SUCCESS;
1358 }
1359
1360 void radv_DestroyFence(
1361 VkDevice _device,
1362 VkFence _fence,
1363 const VkAllocationCallbacks* pAllocator)
1364 {
1365 RADV_FROM_HANDLE(radv_device, device, _device);
1366 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1367
1368 if (!fence)
1369 return;
1370 device->ws->destroy_fence(fence->fence);
1371 vk_free2(&device->alloc, pAllocator, fence);
1372 }
1373
1374 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1375 {
1376 uint64_t current_time;
1377 struct timespec tv;
1378
1379 clock_gettime(CLOCK_MONOTONIC, &tv);
1380 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1381
1382 timeout = MIN2(UINT64_MAX - current_time, timeout);
1383
1384 return current_time + timeout;
1385 }
1386
1387 VkResult radv_WaitForFences(
1388 VkDevice _device,
1389 uint32_t fenceCount,
1390 const VkFence* pFences,
1391 VkBool32 waitAll,
1392 uint64_t timeout)
1393 {
1394 RADV_FROM_HANDLE(radv_device, device, _device);
1395 timeout = radv_get_absolute_timeout(timeout);
1396
1397 if (!waitAll && fenceCount > 1) {
1398 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1399 }
1400
1401 for (uint32_t i = 0; i < fenceCount; ++i) {
1402 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1403 bool expired = false;
1404
1405 if (fence->signalled)
1406 continue;
1407
1408 if (!fence->submitted)
1409 return VK_TIMEOUT;
1410
1411 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1412 if (!expired)
1413 return VK_TIMEOUT;
1414
1415 fence->signalled = true;
1416 }
1417
1418 return VK_SUCCESS;
1419 }
1420
1421 VkResult radv_ResetFences(VkDevice device,
1422 uint32_t fenceCount,
1423 const VkFence *pFences)
1424 {
1425 for (unsigned i = 0; i < fenceCount; ++i) {
1426 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1427 fence->submitted = fence->signalled = false;
1428 }
1429
1430 return VK_SUCCESS;
1431 }
1432
1433 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1434 {
1435 RADV_FROM_HANDLE(radv_device, device, _device);
1436 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1437
1438 if (fence->signalled)
1439 return VK_SUCCESS;
1440 if (!fence->submitted)
1441 return VK_NOT_READY;
1442
1443 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1444 return VK_NOT_READY;
1445
1446 return VK_SUCCESS;
1447 }
1448
1449
1450 // Queue semaphore functions
1451
1452 VkResult radv_CreateSemaphore(
1453 VkDevice _device,
1454 const VkSemaphoreCreateInfo* pCreateInfo,
1455 const VkAllocationCallbacks* pAllocator,
1456 VkSemaphore* pSemaphore)
1457 {
1458 RADV_FROM_HANDLE(radv_device, device, _device);
1459 struct radeon_winsys_sem *sem;
1460
1461 sem = device->ws->create_sem(device->ws);
1462 if (!sem)
1463 return VK_ERROR_OUT_OF_HOST_MEMORY;
1464
1465 *pSemaphore = (VkSemaphore)sem;
1466 return VK_SUCCESS;
1467 }
1468
1469 void radv_DestroySemaphore(
1470 VkDevice _device,
1471 VkSemaphore _semaphore,
1472 const VkAllocationCallbacks* pAllocator)
1473 {
1474 RADV_FROM_HANDLE(radv_device, device, _device);
1475 struct radeon_winsys_sem *sem;
1476 if (!_semaphore)
1477 return;
1478
1479 sem = (struct radeon_winsys_sem *)_semaphore;
1480 device->ws->destroy_sem(sem);
1481 }
1482
1483 VkResult radv_CreateEvent(
1484 VkDevice _device,
1485 const VkEventCreateInfo* pCreateInfo,
1486 const VkAllocationCallbacks* pAllocator,
1487 VkEvent* pEvent)
1488 {
1489 RADV_FROM_HANDLE(radv_device, device, _device);
1490 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1491 sizeof(*event), 8,
1492 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1493
1494 if (!event)
1495 return VK_ERROR_OUT_OF_HOST_MEMORY;
1496
1497 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1498 RADEON_DOMAIN_GTT,
1499 RADEON_FLAG_CPU_ACCESS);
1500 if (!event->bo) {
1501 vk_free2(&device->alloc, pAllocator, event);
1502 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1503 }
1504
1505 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1506
1507 *pEvent = radv_event_to_handle(event);
1508
1509 return VK_SUCCESS;
1510 }
1511
1512 void radv_DestroyEvent(
1513 VkDevice _device,
1514 VkEvent _event,
1515 const VkAllocationCallbacks* pAllocator)
1516 {
1517 RADV_FROM_HANDLE(radv_device, device, _device);
1518 RADV_FROM_HANDLE(radv_event, event, _event);
1519
1520 if (!event)
1521 return;
1522 device->ws->buffer_destroy(event->bo);
1523 vk_free2(&device->alloc, pAllocator, event);
1524 }
1525
1526 VkResult radv_GetEventStatus(
1527 VkDevice _device,
1528 VkEvent _event)
1529 {
1530 RADV_FROM_HANDLE(radv_event, event, _event);
1531
1532 if (*event->map == 1)
1533 return VK_EVENT_SET;
1534 return VK_EVENT_RESET;
1535 }
1536
1537 VkResult radv_SetEvent(
1538 VkDevice _device,
1539 VkEvent _event)
1540 {
1541 RADV_FROM_HANDLE(radv_event, event, _event);
1542 *event->map = 1;
1543
1544 return VK_SUCCESS;
1545 }
1546
1547 VkResult radv_ResetEvent(
1548 VkDevice _device,
1549 VkEvent _event)
1550 {
1551 RADV_FROM_HANDLE(radv_event, event, _event);
1552 *event->map = 0;
1553
1554 return VK_SUCCESS;
1555 }
1556
1557 VkResult radv_CreateBuffer(
1558 VkDevice _device,
1559 const VkBufferCreateInfo* pCreateInfo,
1560 const VkAllocationCallbacks* pAllocator,
1561 VkBuffer* pBuffer)
1562 {
1563 RADV_FROM_HANDLE(radv_device, device, _device);
1564 struct radv_buffer *buffer;
1565
1566 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1567
1568 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1569 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1570 if (buffer == NULL)
1571 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1572
1573 buffer->size = pCreateInfo->size;
1574 buffer->usage = pCreateInfo->usage;
1575 buffer->bo = NULL;
1576 buffer->offset = 0;
1577
1578 *pBuffer = radv_buffer_to_handle(buffer);
1579
1580 return VK_SUCCESS;
1581 }
1582
1583 void radv_DestroyBuffer(
1584 VkDevice _device,
1585 VkBuffer _buffer,
1586 const VkAllocationCallbacks* pAllocator)
1587 {
1588 RADV_FROM_HANDLE(radv_device, device, _device);
1589 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1590
1591 if (!buffer)
1592 return;
1593
1594 vk_free2(&device->alloc, pAllocator, buffer);
1595 }
1596
1597 static inline unsigned
1598 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
1599 {
1600 if (stencil)
1601 return image->surface.stencil_tiling_index[level];
1602 else
1603 return image->surface.tiling_index[level];
1604 }
1605
1606 static void
1607 radv_initialise_color_surface(struct radv_device *device,
1608 struct radv_color_buffer_info *cb,
1609 struct radv_image_view *iview)
1610 {
1611 const struct vk_format_description *desc;
1612 unsigned ntype, format, swap, endian;
1613 unsigned blend_clamp = 0, blend_bypass = 0;
1614 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1615 uint64_t va;
1616 const struct radeon_surf *surf = &iview->image->surface;
1617 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
1618
1619 desc = vk_format_description(iview->vk_format);
1620
1621 memset(cb, 0, sizeof(*cb));
1622
1623 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1624 va += level_info->offset;
1625 cb->cb_color_base = va >> 8;
1626
1627 /* CMASK variables */
1628 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1629 va += iview->image->cmask.offset;
1630 cb->cb_color_cmask = va >> 8;
1631 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
1632
1633 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1634 va += iview->image->dcc_offset;
1635 cb->cb_dcc_base = va >> 8;
1636
1637 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
1638 S_028C6C_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
1639
1640 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
1641 pitch_tile_max = level_info->nblk_x / 8 - 1;
1642 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1643 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
1644
1645 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1646 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1647
1648 /* Intensity is implemented as Red, so treat it that way. */
1649 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
1650 S_028C74_TILE_MODE_INDEX(tile_mode_index);
1651
1652 if (iview->image->samples > 1) {
1653 unsigned log_samples = util_logbase2(iview->image->samples);
1654
1655 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1656 S_028C74_NUM_FRAGMENTS(log_samples);
1657 }
1658
1659 if (iview->image->fmask.size) {
1660 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
1661 if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
1662 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
1663 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
1664 cb->cb_color_fmask = va >> 8;
1665 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
1666 } else {
1667 /* This must be set for fast clear to work without FMASK. */
1668 if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
1669 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1670 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1671 cb->cb_color_fmask = cb->cb_color_base;
1672 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1673 }
1674
1675 ntype = radv_translate_color_numformat(iview->vk_format,
1676 desc,
1677 vk_format_get_first_non_void_channel(iview->vk_format));
1678 format = radv_translate_colorformat(iview->vk_format);
1679 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
1680 radv_finishme("Illegal color\n");
1681 swap = radv_translate_colorswap(iview->vk_format, FALSE);
1682 endian = radv_colorformat_endian_swap(format);
1683
1684 /* blend clamp should be set for all NORM/SRGB types */
1685 if (ntype == V_028C70_NUMBER_UNORM ||
1686 ntype == V_028C70_NUMBER_SNORM ||
1687 ntype == V_028C70_NUMBER_SRGB)
1688 blend_clamp = 1;
1689
1690 /* set blend bypass according to docs if SINT/UINT or
1691 8/24 COLOR variants */
1692 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1693 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1694 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1695 blend_clamp = 0;
1696 blend_bypass = 1;
1697 }
1698 #if 0
1699 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1700 (format == V_028C70_COLOR_8 ||
1701 format == V_028C70_COLOR_8_8 ||
1702 format == V_028C70_COLOR_8_8_8_8))
1703 ->color_is_int8 = true;
1704 #endif
1705 cb->cb_color_info = S_028C70_FORMAT(format) |
1706 S_028C70_COMP_SWAP(swap) |
1707 S_028C70_BLEND_CLAMP(blend_clamp) |
1708 S_028C70_BLEND_BYPASS(blend_bypass) |
1709 S_028C70_SIMPLE_FLOAT(1) |
1710 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
1711 ntype != V_028C70_NUMBER_SNORM &&
1712 ntype != V_028C70_NUMBER_SRGB &&
1713 format != V_028C70_COLOR_8_24 &&
1714 format != V_028C70_COLOR_24_8) |
1715 S_028C70_NUMBER_TYPE(ntype) |
1716 S_028C70_ENDIAN(endian);
1717 if (iview->image->samples > 1)
1718 if (iview->image->fmask.size)
1719 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1720
1721 if (iview->image->cmask.size &&
1722 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
1723 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1724
1725 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
1726 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1727
1728 if (device->instance->physicalDevice.rad_info.chip_class >= VI) {
1729 unsigned max_uncompressed_block_size = 2;
1730 if (iview->image->samples > 1) {
1731 if (iview->image->surface.bpe == 1)
1732 max_uncompressed_block_size = 0;
1733 else if (iview->image->surface.bpe == 2)
1734 max_uncompressed_block_size = 1;
1735 }
1736
1737 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1738 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1739 }
1740
1741 /* This must be set for fast clear to work without FMASK. */
1742 if (!iview->image->fmask.size &&
1743 device->instance->physicalDevice.rad_info.chip_class == SI) {
1744 unsigned bankh = util_logbase2(iview->image->surface.bankh);
1745 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1746 }
1747 }
1748
1749 static void
1750 radv_initialise_ds_surface(struct radv_device *device,
1751 struct radv_ds_buffer_info *ds,
1752 struct radv_image_view *iview)
1753 {
1754 unsigned level = iview->base_mip;
1755 unsigned format;
1756 uint64_t va, s_offs, z_offs;
1757 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
1758 memset(ds, 0, sizeof(*ds));
1759 switch (iview->vk_format) {
1760 case VK_FORMAT_D24_UNORM_S8_UINT:
1761 case VK_FORMAT_X8_D24_UNORM_PACK32:
1762 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1763 ds->offset_scale = 2.0f;
1764 break;
1765 case VK_FORMAT_D16_UNORM:
1766 case VK_FORMAT_D16_UNORM_S8_UINT:
1767 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1768 ds->offset_scale = 4.0f;
1769 break;
1770 case VK_FORMAT_D32_SFLOAT:
1771 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1772 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1773 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1774 ds->offset_scale = 1.0f;
1775 break;
1776 default:
1777 break;
1778 }
1779
1780 format = radv_translate_dbformat(iview->vk_format);
1781 if (format == V_028040_Z_INVALID) {
1782 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
1783 }
1784
1785 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1786 s_offs = z_offs = va;
1787 z_offs += iview->image->surface.level[level].offset;
1788 s_offs += iview->image->surface.stencil_level[level].offset;
1789
1790 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
1791 S_028008_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
1792 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1793 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
1794
1795 if (iview->image->samples > 1)
1796 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
1797
1798 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
1799 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
1800 else
1801 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1802
1803 if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
1804 struct radeon_info *info = &device->instance->physicalDevice.rad_info;
1805 unsigned tiling_index = iview->image->surface.tiling_index[level];
1806 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
1807 unsigned macro_index = iview->image->surface.macro_tile_index;
1808 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
1809 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
1810 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
1811
1812 ds->db_depth_info |=
1813 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
1814 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
1815 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
1816 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
1817 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
1818 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
1819 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
1820 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
1821 } else {
1822 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
1823 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1824 tile_mode_index = si_tile_mode_index(iview->image, level, true);
1825 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1826 }
1827
1828 if (iview->image->htile.size && !level) {
1829 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1830 S_028040_ALLOW_EXPCLEAR(1);
1831
1832 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
1833 /* Workaround: For a not yet understood reason, the
1834 * combination of MSAA, fast stencil clear and stencil
1835 * decompress messes with subsequent stencil buffer
1836 * uses. Problem was reproduced on Verde, Bonaire,
1837 * Tonga, and Carrizo.
1838 *
1839 * Disabling EXPCLEAR works around the problem.
1840 *
1841 * Check piglit's arb_texture_multisample-stencil-clear
1842 * test if you want to try changing this.
1843 */
1844 if (iview->image->samples <= 1)
1845 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
1846 } else
1847 /* Use all of the htile_buffer for depth if there's no stencil. */
1848 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
1849
1850 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
1851 iview->image->htile.offset;
1852 ds->db_htile_data_base = va >> 8;
1853 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
1854 } else {
1855 ds->db_htile_data_base = 0;
1856 ds->db_htile_surface = 0;
1857 }
1858
1859 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
1860 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
1861
1862 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
1863 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
1864 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
1865 }
1866
1867 VkResult radv_CreateFramebuffer(
1868 VkDevice _device,
1869 const VkFramebufferCreateInfo* pCreateInfo,
1870 const VkAllocationCallbacks* pAllocator,
1871 VkFramebuffer* pFramebuffer)
1872 {
1873 RADV_FROM_HANDLE(radv_device, device, _device);
1874 struct radv_framebuffer *framebuffer;
1875
1876 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1877
1878 size_t size = sizeof(*framebuffer) +
1879 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
1880 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
1881 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1882 if (framebuffer == NULL)
1883 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1884
1885 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1886 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1887 VkImageView _iview = pCreateInfo->pAttachments[i];
1888 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
1889 framebuffer->attachments[i].attachment = iview;
1890 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
1891 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
1892 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1893 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
1894 }
1895 }
1896
1897 framebuffer->width = pCreateInfo->width;
1898 framebuffer->height = pCreateInfo->height;
1899 framebuffer->layers = pCreateInfo->layers;
1900
1901 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
1902 return VK_SUCCESS;
1903 }
1904
1905 void radv_DestroyFramebuffer(
1906 VkDevice _device,
1907 VkFramebuffer _fb,
1908 const VkAllocationCallbacks* pAllocator)
1909 {
1910 RADV_FROM_HANDLE(radv_device, device, _device);
1911 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
1912
1913 if (!fb)
1914 return;
1915 vk_free2(&device->alloc, pAllocator, fb);
1916 }
1917
1918 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
1919 {
1920 switch (address_mode) {
1921 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
1922 return V_008F30_SQ_TEX_WRAP;
1923 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1924 return V_008F30_SQ_TEX_MIRROR;
1925 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1926 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1927 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
1928 return V_008F30_SQ_TEX_CLAMP_BORDER;
1929 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1930 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1931 default:
1932 unreachable("illegal tex wrap mode");
1933 break;
1934 }
1935 }
1936
1937 static unsigned
1938 radv_tex_compare(VkCompareOp op)
1939 {
1940 switch (op) {
1941 case VK_COMPARE_OP_NEVER:
1942 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1943 case VK_COMPARE_OP_LESS:
1944 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1945 case VK_COMPARE_OP_EQUAL:
1946 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1947 case VK_COMPARE_OP_LESS_OR_EQUAL:
1948 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1949 case VK_COMPARE_OP_GREATER:
1950 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1951 case VK_COMPARE_OP_NOT_EQUAL:
1952 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1953 case VK_COMPARE_OP_GREATER_OR_EQUAL:
1954 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1955 case VK_COMPARE_OP_ALWAYS:
1956 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1957 default:
1958 unreachable("illegal compare mode");
1959 break;
1960 }
1961 }
1962
1963 static unsigned
1964 radv_tex_filter(VkFilter filter, unsigned max_ansio)
1965 {
1966 switch (filter) {
1967 case VK_FILTER_NEAREST:
1968 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
1969 V_008F38_SQ_TEX_XY_FILTER_POINT);
1970 case VK_FILTER_LINEAR:
1971 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
1972 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
1973 case VK_FILTER_CUBIC_IMG:
1974 default:
1975 fprintf(stderr, "illegal texture filter");
1976 return 0;
1977 }
1978 }
1979
1980 static unsigned
1981 radv_tex_mipfilter(VkSamplerMipmapMode mode)
1982 {
1983 switch (mode) {
1984 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
1985 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1986 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
1987 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1988 default:
1989 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1990 }
1991 }
1992
1993 static unsigned
1994 radv_tex_bordercolor(VkBorderColor bcolor)
1995 {
1996 switch (bcolor) {
1997 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
1998 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
1999 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2000 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2001 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2002 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2003 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2004 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2005 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2006 default:
2007 break;
2008 }
2009 return 0;
2010 }
2011
2012 static unsigned
2013 radv_tex_aniso_filter(unsigned filter)
2014 {
2015 if (filter < 2)
2016 return 0;
2017 if (filter < 4)
2018 return 1;
2019 if (filter < 8)
2020 return 2;
2021 if (filter < 16)
2022 return 3;
2023 return 4;
2024 }
2025
2026 static void
2027 radv_init_sampler(struct radv_device *device,
2028 struct radv_sampler *sampler,
2029 const VkSamplerCreateInfo *pCreateInfo)
2030 {
2031 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2032 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2033 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2034 bool is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);
2035
2036 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2037 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2038 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2039 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2040 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2041 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2042 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2043 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2044 S_008F30_DISABLE_CUBE_WRAP(0) |
2045 S_008F30_COMPAT_MODE(is_vi));
2046 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2047 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2048 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2049 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2050 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2051 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2052 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2053 S_008F38_MIP_POINT_PRECLAMP(1) |
2054 S_008F38_DISABLE_LSB_CEIL(1) |
2055 S_008F38_FILTER_PREC_FIX(1) |
2056 S_008F38_ANISO_OVERRIDE(is_vi));
2057 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2058 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2059 }
2060
2061 VkResult radv_CreateSampler(
2062 VkDevice _device,
2063 const VkSamplerCreateInfo* pCreateInfo,
2064 const VkAllocationCallbacks* pAllocator,
2065 VkSampler* pSampler)
2066 {
2067 RADV_FROM_HANDLE(radv_device, device, _device);
2068 struct radv_sampler *sampler;
2069
2070 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2071
2072 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2073 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2074 if (!sampler)
2075 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2076
2077 radv_init_sampler(device, sampler, pCreateInfo);
2078 *pSampler = radv_sampler_to_handle(sampler);
2079
2080 return VK_SUCCESS;
2081 }
2082
2083 void radv_DestroySampler(
2084 VkDevice _device,
2085 VkSampler _sampler,
2086 const VkAllocationCallbacks* pAllocator)
2087 {
2088 RADV_FROM_HANDLE(radv_device, device, _device);
2089 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2090
2091 if (!sampler)
2092 return;
2093 vk_free2(&device->alloc, pAllocator, sampler);
2094 }
2095
2096
2097 /* vk_icd.h does not declare this function, so we declare it here to
2098 * suppress Wmissing-prototypes.
2099 */
2100 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2101 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2102
2103 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2104 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2105 {
2106 /* For the full details on loader interface versioning, see
2107 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2108 * What follows is a condensed summary, to help you navigate the large and
2109 * confusing official doc.
2110 *
2111 * - Loader interface v0 is incompatible with later versions. We don't
2112 * support it.
2113 *
2114 * - In loader interface v1:
2115 * - The first ICD entrypoint called by the loader is
2116 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2117 * entrypoint.
2118 * - The ICD must statically expose no other Vulkan symbol unless it is
2119 * linked with -Bsymbolic.
2120 * - Each dispatchable Vulkan handle created by the ICD must be
2121 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2122 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2123 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2124 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2125 * such loader-managed surfaces.
2126 *
2127 * - Loader interface v2 differs from v1 in:
2128 * - The first ICD entrypoint called by the loader is
2129 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2130 * statically expose this entrypoint.
2131 *
2132 * - Loader interface v3 differs from v2 in:
2133 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2134 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2135 * because the loader no longer does so.
2136 */
2137 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2138 return VK_SUCCESS;
2139 }