radv: add support for some device specific tess information.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 {
88 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
89 .specVersion = 1,
90 },
91 };
92
93 static const VkExtensionProperties common_device_extensions[] = {
94 {
95 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
100 .specVersion = 1,
101 },
102 {
103 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
104 .specVersion = 68,
105 },
106 {
107 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (!instance)
369 return;
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 static VkResult
383 radv_enumerate_devices(struct radv_instance *instance)
384 {
385 /* TODO: Check for more devices ? */
386 drmDevicePtr devices[8];
387 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
388 int max_devices;
389
390 instance->physicalDeviceCount = 0;
391
392 max_devices = drmGetDevices2(0, devices, sizeof(devices));
393 if (max_devices < 1)
394 return VK_ERROR_INCOMPATIBLE_DRIVER;
395
396 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
397 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
398 devices[i]->bustype == DRM_BUS_PCI &&
399 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
400
401 result = radv_physical_device_init(instance->physicalDevices +
402 instance->physicalDeviceCount,
403 instance,
404 devices[i]->nodes[DRM_NODE_RENDER]);
405 if (result == VK_SUCCESS)
406 ++instance->physicalDeviceCount;
407 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
408 return result;
409 }
410 }
411 return result;
412 }
413
414 VkResult radv_EnumeratePhysicalDevices(
415 VkInstance _instance,
416 uint32_t* pPhysicalDeviceCount,
417 VkPhysicalDevice* pPhysicalDevices)
418 {
419 RADV_FROM_HANDLE(radv_instance, instance, _instance);
420 VkResult result;
421
422 if (instance->physicalDeviceCount < 0) {
423 result = radv_enumerate_devices(instance);
424 if (result != VK_SUCCESS &&
425 result != VK_ERROR_INCOMPATIBLE_DRIVER)
426 return result;
427 }
428
429 if (!pPhysicalDevices) {
430 *pPhysicalDeviceCount = instance->physicalDeviceCount;
431 } else {
432 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
433 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
434 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
435 }
436
437 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
438 : VK_SUCCESS;
439 }
440
441 void radv_GetPhysicalDeviceFeatures(
442 VkPhysicalDevice physicalDevice,
443 VkPhysicalDeviceFeatures* pFeatures)
444 {
445 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
446
447 memset(pFeatures, 0, sizeof(*pFeatures));
448
449 *pFeatures = (VkPhysicalDeviceFeatures) {
450 .robustBufferAccess = true,
451 .fullDrawIndexUint32 = true,
452 .imageCubeArray = true,
453 .independentBlend = true,
454 .geometryShader = true,
455 .tessellationShader = false,
456 .sampleRateShading = false,
457 .dualSrcBlend = true,
458 .logicOp = true,
459 .multiDrawIndirect = true,
460 .drawIndirectFirstInstance = true,
461 .depthClamp = true,
462 .depthBiasClamp = true,
463 .fillModeNonSolid = true,
464 .depthBounds = true,
465 .wideLines = true,
466 .largePoints = true,
467 .alphaToOne = true,
468 .multiViewport = true,
469 .samplerAnisotropy = true,
470 .textureCompressionETC2 = false,
471 .textureCompressionASTC_LDR = false,
472 .textureCompressionBC = true,
473 .occlusionQueryPrecise = true,
474 .pipelineStatisticsQuery = false,
475 .vertexPipelineStoresAndAtomics = true,
476 .fragmentStoresAndAtomics = true,
477 .shaderTessellationAndGeometryPointSize = true,
478 .shaderImageGatherExtended = true,
479 .shaderStorageImageExtendedFormats = true,
480 .shaderStorageImageMultisample = false,
481 .shaderUniformBufferArrayDynamicIndexing = true,
482 .shaderSampledImageArrayDynamicIndexing = true,
483 .shaderStorageBufferArrayDynamicIndexing = true,
484 .shaderStorageImageArrayDynamicIndexing = true,
485 .shaderStorageImageReadWithoutFormat = true,
486 .shaderStorageImageWriteWithoutFormat = true,
487 .shaderClipDistance = true,
488 .shaderCullDistance = true,
489 .shaderFloat64 = true,
490 .shaderInt64 = false,
491 .shaderInt16 = false,
492 .sparseBinding = true,
493 .variableMultisampleRate = false,
494 .inheritedQueries = false,
495 };
496 }
497
498 void radv_GetPhysicalDeviceFeatures2KHR(
499 VkPhysicalDevice physicalDevice,
500 VkPhysicalDeviceFeatures2KHR *pFeatures)
501 {
502 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
503 }
504
505 static uint32_t radv_get_driver_version()
506 {
507 const char *minor_string = strchr(VERSION, '.');
508 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
509 int major = atoi(VERSION);
510 int minor = minor_string ? atoi(minor_string + 1) : 0;
511 int patch = patch_string ? atoi(patch_string + 1) : 0;
512 if (strstr(VERSION, "devel")) {
513 if (patch == 0) {
514 patch = 99;
515 if (minor == 0) {
516 minor = 99;
517 --major;
518 } else
519 --minor;
520 } else
521 --patch;
522 }
523 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
524 return version;
525 }
526
527 void radv_GetPhysicalDeviceProperties(
528 VkPhysicalDevice physicalDevice,
529 VkPhysicalDeviceProperties* pProperties)
530 {
531 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
532 VkSampleCountFlags sample_counts = 0xf;
533 VkPhysicalDeviceLimits limits = {
534 .maxImageDimension1D = (1 << 14),
535 .maxImageDimension2D = (1 << 14),
536 .maxImageDimension3D = (1 << 11),
537 .maxImageDimensionCube = (1 << 14),
538 .maxImageArrayLayers = (1 << 11),
539 .maxTexelBufferElements = 128 * 1024 * 1024,
540 .maxUniformBufferRange = UINT32_MAX,
541 .maxStorageBufferRange = UINT32_MAX,
542 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
543 .maxMemoryAllocationCount = UINT32_MAX,
544 .maxSamplerAllocationCount = 64 * 1024,
545 .bufferImageGranularity = 64, /* A cache line */
546 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
547 .maxBoundDescriptorSets = MAX_SETS,
548 .maxPerStageDescriptorSamplers = 64,
549 .maxPerStageDescriptorUniformBuffers = 64,
550 .maxPerStageDescriptorStorageBuffers = 64,
551 .maxPerStageDescriptorSampledImages = 64,
552 .maxPerStageDescriptorStorageImages = 64,
553 .maxPerStageDescriptorInputAttachments = 64,
554 .maxPerStageResources = 128,
555 .maxDescriptorSetSamplers = 256,
556 .maxDescriptorSetUniformBuffers = 256,
557 .maxDescriptorSetUniformBuffersDynamic = 256,
558 .maxDescriptorSetStorageBuffers = 256,
559 .maxDescriptorSetStorageBuffersDynamic = 256,
560 .maxDescriptorSetSampledImages = 256,
561 .maxDescriptorSetStorageImages = 256,
562 .maxDescriptorSetInputAttachments = 256,
563 .maxVertexInputAttributes = 32,
564 .maxVertexInputBindings = 32,
565 .maxVertexInputAttributeOffset = 2047,
566 .maxVertexInputBindingStride = 2048,
567 .maxVertexOutputComponents = 128,
568 .maxTessellationGenerationLevel = 0,
569 .maxTessellationPatchSize = 0,
570 .maxTessellationControlPerVertexInputComponents = 0,
571 .maxTessellationControlPerVertexOutputComponents = 0,
572 .maxTessellationControlPerPatchOutputComponents = 0,
573 .maxTessellationControlTotalOutputComponents = 0,
574 .maxTessellationEvaluationInputComponents = 0,
575 .maxTessellationEvaluationOutputComponents = 0,
576 .maxGeometryShaderInvocations = 32,
577 .maxGeometryInputComponents = 64,
578 .maxGeometryOutputComponents = 128,
579 .maxGeometryOutputVertices = 256,
580 .maxGeometryTotalOutputComponents = 1024,
581 .maxFragmentInputComponents = 128,
582 .maxFragmentOutputAttachments = 8,
583 .maxFragmentDualSrcAttachments = 1,
584 .maxFragmentCombinedOutputResources = 8,
585 .maxComputeSharedMemorySize = 32768,
586 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
587 .maxComputeWorkGroupInvocations = 2048,
588 .maxComputeWorkGroupSize = {
589 2048,
590 2048,
591 2048
592 },
593 .subPixelPrecisionBits = 4 /* FIXME */,
594 .subTexelPrecisionBits = 4 /* FIXME */,
595 .mipmapPrecisionBits = 4 /* FIXME */,
596 .maxDrawIndexedIndexValue = UINT32_MAX,
597 .maxDrawIndirectCount = UINT32_MAX,
598 .maxSamplerLodBias = 16,
599 .maxSamplerAnisotropy = 16,
600 .maxViewports = MAX_VIEWPORTS,
601 .maxViewportDimensions = { (1 << 14), (1 << 14) },
602 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
603 .viewportSubPixelBits = 13, /* We take a float? */
604 .minMemoryMapAlignment = 4096, /* A page */
605 .minTexelBufferOffsetAlignment = 1,
606 .minUniformBufferOffsetAlignment = 4,
607 .minStorageBufferOffsetAlignment = 4,
608 .minTexelOffset = -32,
609 .maxTexelOffset = 31,
610 .minTexelGatherOffset = -32,
611 .maxTexelGatherOffset = 31,
612 .minInterpolationOffset = -2,
613 .maxInterpolationOffset = 2,
614 .subPixelInterpolationOffsetBits = 8,
615 .maxFramebufferWidth = (1 << 14),
616 .maxFramebufferHeight = (1 << 14),
617 .maxFramebufferLayers = (1 << 10),
618 .framebufferColorSampleCounts = sample_counts,
619 .framebufferDepthSampleCounts = sample_counts,
620 .framebufferStencilSampleCounts = sample_counts,
621 .framebufferNoAttachmentsSampleCounts = sample_counts,
622 .maxColorAttachments = MAX_RTS,
623 .sampledImageColorSampleCounts = sample_counts,
624 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
625 .sampledImageDepthSampleCounts = sample_counts,
626 .sampledImageStencilSampleCounts = sample_counts,
627 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
628 .maxSampleMaskWords = 1,
629 .timestampComputeAndGraphics = false,
630 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
631 .maxClipDistances = 8,
632 .maxCullDistances = 8,
633 .maxCombinedClipAndCullDistances = 8,
634 .discreteQueuePriorities = 1,
635 .pointSizeRange = { 0.125, 255.875 },
636 .lineWidthRange = { 0.0, 7.9921875 },
637 .pointSizeGranularity = (1.0 / 8.0),
638 .lineWidthGranularity = (1.0 / 128.0),
639 .strictLines = false, /* FINISHME */
640 .standardSampleLocations = true,
641 .optimalBufferCopyOffsetAlignment = 128,
642 .optimalBufferCopyRowPitchAlignment = 128,
643 .nonCoherentAtomSize = 64,
644 };
645
646 *pProperties = (VkPhysicalDeviceProperties) {
647 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
648 .driverVersion = radv_get_driver_version(),
649 .vendorID = 0x1002,
650 .deviceID = pdevice->rad_info.pci_id,
651 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
652 .limits = limits,
653 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
654 };
655
656 strcpy(pProperties->deviceName, pdevice->name);
657 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
658 }
659
660 void radv_GetPhysicalDeviceProperties2KHR(
661 VkPhysicalDevice physicalDevice,
662 VkPhysicalDeviceProperties2KHR *pProperties)
663 {
664 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
665 }
666
667 static void radv_get_physical_device_queue_family_properties(
668 struct radv_physical_device* pdevice,
669 uint32_t* pCount,
670 VkQueueFamilyProperties** pQueueFamilyProperties)
671 {
672 int num_queue_families = 1;
673 int idx;
674 if (pdevice->rad_info.compute_rings > 0 &&
675 pdevice->rad_info.chip_class >= CIK &&
676 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
677 num_queue_families++;
678
679 if (pQueueFamilyProperties == NULL) {
680 *pCount = num_queue_families;
681 return;
682 }
683
684 if (!*pCount)
685 return;
686
687 idx = 0;
688 if (*pCount >= 1) {
689 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
690 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
691 VK_QUEUE_COMPUTE_BIT |
692 VK_QUEUE_TRANSFER_BIT |
693 VK_QUEUE_SPARSE_BINDING_BIT,
694 .queueCount = 1,
695 .timestampValidBits = 64,
696 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
697 };
698 idx++;
699 }
700
701 if (pdevice->rad_info.compute_rings > 0 &&
702 pdevice->rad_info.chip_class >= CIK &&
703 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
704 if (*pCount > idx) {
705 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
706 .queueFlags = VK_QUEUE_COMPUTE_BIT |
707 VK_QUEUE_TRANSFER_BIT |
708 VK_QUEUE_SPARSE_BINDING_BIT,
709 .queueCount = pdevice->rad_info.compute_rings,
710 .timestampValidBits = 64,
711 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
712 };
713 idx++;
714 }
715 }
716 *pCount = idx;
717 }
718
719 void radv_GetPhysicalDeviceQueueFamilyProperties(
720 VkPhysicalDevice physicalDevice,
721 uint32_t* pCount,
722 VkQueueFamilyProperties* pQueueFamilyProperties)
723 {
724 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
725 if (!pQueueFamilyProperties) {
726 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
727 return;
728 }
729 VkQueueFamilyProperties *properties[] = {
730 pQueueFamilyProperties + 0,
731 pQueueFamilyProperties + 1,
732 pQueueFamilyProperties + 2,
733 };
734 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
735 assert(*pCount <= 3);
736 }
737
738 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
739 VkPhysicalDevice physicalDevice,
740 uint32_t* pCount,
741 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
742 {
743 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
744 if (!pQueueFamilyProperties) {
745 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
746 return;
747 }
748 VkQueueFamilyProperties *properties[] = {
749 &pQueueFamilyProperties[0].queueFamilyProperties,
750 &pQueueFamilyProperties[1].queueFamilyProperties,
751 &pQueueFamilyProperties[2].queueFamilyProperties,
752 };
753 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
754 assert(*pCount <= 3);
755 }
756
757 void radv_GetPhysicalDeviceMemoryProperties(
758 VkPhysicalDevice physicalDevice,
759 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
760 {
761 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
762
763 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
764
765 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
766 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
767 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
768 .heapIndex = RADV_MEM_HEAP_VRAM,
769 };
770 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
771 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
772 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
773 .heapIndex = RADV_MEM_HEAP_GTT,
774 };
775 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
776 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
777 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
778 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
779 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
780 };
781 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
782 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
783 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
784 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
785 .heapIndex = RADV_MEM_HEAP_GTT,
786 };
787
788 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
789
790 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
791 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
792 .size = physical_device->rad_info.vram_size -
793 physical_device->rad_info.visible_vram_size,
794 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
795 };
796 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
797 .size = physical_device->rad_info.visible_vram_size,
798 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
799 };
800 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
801 .size = physical_device->rad_info.gart_size,
802 .flags = 0,
803 };
804 }
805
806 void radv_GetPhysicalDeviceMemoryProperties2KHR(
807 VkPhysicalDevice physicalDevice,
808 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
809 {
810 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
811 &pMemoryProperties->memoryProperties);
812 }
813
814 static int
815 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
816 int queue_family_index, int idx)
817 {
818 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
819 queue->device = device;
820 queue->queue_family_index = queue_family_index;
821 queue->queue_idx = idx;
822
823 queue->hw_ctx = device->ws->ctx_create(device->ws);
824 if (!queue->hw_ctx)
825 return VK_ERROR_OUT_OF_HOST_MEMORY;
826
827 return VK_SUCCESS;
828 }
829
830 static void
831 radv_queue_finish(struct radv_queue *queue)
832 {
833 if (queue->hw_ctx)
834 queue->device->ws->ctx_destroy(queue->hw_ctx);
835
836 if (queue->initial_preamble_cs)
837 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
838 if (queue->continue_preamble_cs)
839 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
840 if (queue->descriptor_bo)
841 queue->device->ws->buffer_destroy(queue->descriptor_bo);
842 if (queue->scratch_bo)
843 queue->device->ws->buffer_destroy(queue->scratch_bo);
844 if (queue->esgs_ring_bo)
845 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
846 if (queue->gsvs_ring_bo)
847 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
848 if (queue->compute_scratch_bo)
849 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
850 }
851
852 static void
853 radv_device_init_gs_info(struct radv_device *device)
854 {
855 switch (device->physical_device->rad_info.family) {
856 case CHIP_OLAND:
857 case CHIP_HAINAN:
858 case CHIP_KAVERI:
859 case CHIP_KABINI:
860 case CHIP_MULLINS:
861 case CHIP_ICELAND:
862 case CHIP_CARRIZO:
863 case CHIP_STONEY:
864 device->gs_table_depth = 16;
865 return;
866 case CHIP_TAHITI:
867 case CHIP_PITCAIRN:
868 case CHIP_VERDE:
869 case CHIP_BONAIRE:
870 case CHIP_HAWAII:
871 case CHIP_TONGA:
872 case CHIP_FIJI:
873 case CHIP_POLARIS10:
874 case CHIP_POLARIS11:
875 device->gs_table_depth = 32;
876 return;
877 default:
878 unreachable("unknown GPU");
879 }
880 }
881
882 VkResult radv_CreateDevice(
883 VkPhysicalDevice physicalDevice,
884 const VkDeviceCreateInfo* pCreateInfo,
885 const VkAllocationCallbacks* pAllocator,
886 VkDevice* pDevice)
887 {
888 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
889 VkResult result;
890 struct radv_device *device;
891
892 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
893 if (!is_extension_enabled(physical_device->extensions.ext_array,
894 physical_device->extensions.num_ext,
895 pCreateInfo->ppEnabledExtensionNames[i]))
896 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
897 }
898
899 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
900 sizeof(*device), 8,
901 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
902 if (!device)
903 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
904
905 memset(device, 0, sizeof(*device));
906
907 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
908 device->instance = physical_device->instance;
909 device->physical_device = physical_device;
910
911 device->debug_flags = device->instance->debug_flags;
912
913 device->ws = physical_device->ws;
914 if (pAllocator)
915 device->alloc = *pAllocator;
916 else
917 device->alloc = physical_device->instance->alloc;
918
919 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
920 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
921 uint32_t qfi = queue_create->queueFamilyIndex;
922
923 device->queues[qfi] = vk_alloc(&device->alloc,
924 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
925 if (!device->queues[qfi]) {
926 result = VK_ERROR_OUT_OF_HOST_MEMORY;
927 goto fail;
928 }
929
930 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
931
932 device->queue_count[qfi] = queue_create->queueCount;
933
934 for (unsigned q = 0; q < queue_create->queueCount; q++) {
935 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
936 if (result != VK_SUCCESS)
937 goto fail;
938 }
939 }
940
941 #if HAVE_LLVM < 0x0400
942 device->llvm_supports_spill = false;
943 #else
944 device->llvm_supports_spill = true;
945 #endif
946
947 /* The maximum number of scratch waves. Scratch space isn't divided
948 * evenly between CUs. The number is only a function of the number of CUs.
949 * We can decrease the constant to decrease the scratch buffer size.
950 *
951 * sctx->scratch_waves must be >= the maximum posible size of
952 * 1 threadgroup, so that the hw doesn't hang from being unable
953 * to start any.
954 *
955 * The recommended value is 4 per CU at most. Higher numbers don't
956 * bring much benefit, but they still occupy chip resources (think
957 * async compute). I've seen ~2% performance difference between 4 and 32.
958 */
959 uint32_t max_threads_per_block = 2048;
960 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
961 max_threads_per_block / 64);
962
963 radv_device_init_gs_info(device);
964
965 device->tess_offchip_block_dw_size =
966 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
967 device->has_distributed_tess =
968 device->physical_device->rad_info.chip_class >= VI &&
969 device->physical_device->rad_info.max_se >= 2;
970
971 result = radv_device_init_meta(device);
972 if (result != VK_SUCCESS)
973 goto fail;
974
975 radv_device_init_msaa(device);
976
977 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
978 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
979 switch (family) {
980 case RADV_QUEUE_GENERAL:
981 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
982 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
983 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
984 break;
985 case RADV_QUEUE_COMPUTE:
986 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
987 radeon_emit(device->empty_cs[family], 0);
988 break;
989 }
990 device->ws->cs_finalize(device->empty_cs[family]);
991
992 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
993 switch (family) {
994 case RADV_QUEUE_GENERAL:
995 case RADV_QUEUE_COMPUTE:
996 si_cs_emit_cache_flush(device->flush_cs[family],
997 device->physical_device->rad_info.chip_class,
998 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
999 RADV_CMD_FLAG_INV_ICACHE |
1000 RADV_CMD_FLAG_INV_SMEM_L1 |
1001 RADV_CMD_FLAG_INV_VMEM_L1 |
1002 RADV_CMD_FLAG_INV_GLOBAL_L2);
1003 break;
1004 }
1005 device->ws->cs_finalize(device->flush_cs[family]);
1006 }
1007
1008 if (getenv("RADV_TRACE_FILE")) {
1009 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1010 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1011 if (!device->trace_bo)
1012 goto fail;
1013
1014 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1015 if (!device->trace_id_ptr)
1016 goto fail;
1017 }
1018
1019 if (device->physical_device->rad_info.chip_class >= CIK)
1020 cik_create_gfx_config(device);
1021
1022 VkPipelineCacheCreateInfo ci;
1023 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1024 ci.pNext = NULL;
1025 ci.flags = 0;
1026 ci.pInitialData = NULL;
1027 ci.initialDataSize = 0;
1028 VkPipelineCache pc;
1029 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1030 &ci, NULL, &pc);
1031 if (result != VK_SUCCESS)
1032 goto fail;
1033
1034 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1035
1036 *pDevice = radv_device_to_handle(device);
1037 return VK_SUCCESS;
1038
1039 fail:
1040 if (device->trace_bo)
1041 device->ws->buffer_destroy(device->trace_bo);
1042
1043 if (device->gfx_init)
1044 device->ws->buffer_destroy(device->gfx_init);
1045
1046 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1047 for (unsigned q = 0; q < device->queue_count[i]; q++)
1048 radv_queue_finish(&device->queues[i][q]);
1049 if (device->queue_count[i])
1050 vk_free(&device->alloc, device->queues[i]);
1051 }
1052
1053 vk_free(&device->alloc, device);
1054 return result;
1055 }
1056
1057 void radv_DestroyDevice(
1058 VkDevice _device,
1059 const VkAllocationCallbacks* pAllocator)
1060 {
1061 RADV_FROM_HANDLE(radv_device, device, _device);
1062
1063 if (!device)
1064 return;
1065
1066 if (device->trace_bo)
1067 device->ws->buffer_destroy(device->trace_bo);
1068
1069 if (device->gfx_init)
1070 device->ws->buffer_destroy(device->gfx_init);
1071
1072 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1073 for (unsigned q = 0; q < device->queue_count[i]; q++)
1074 radv_queue_finish(&device->queues[i][q]);
1075 if (device->queue_count[i])
1076 vk_free(&device->alloc, device->queues[i]);
1077 if (device->empty_cs[i])
1078 device->ws->cs_destroy(device->empty_cs[i]);
1079 if (device->flush_cs[i])
1080 device->ws->cs_destroy(device->flush_cs[i]);
1081 }
1082 radv_device_finish_meta(device);
1083
1084 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1085 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1086
1087 vk_free(&device->alloc, device);
1088 }
1089
1090 VkResult radv_EnumerateInstanceExtensionProperties(
1091 const char* pLayerName,
1092 uint32_t* pPropertyCount,
1093 VkExtensionProperties* pProperties)
1094 {
1095 if (pProperties == NULL) {
1096 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1097 return VK_SUCCESS;
1098 }
1099
1100 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1101 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1102
1103 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1104 return VK_INCOMPLETE;
1105
1106 return VK_SUCCESS;
1107 }
1108
1109 VkResult radv_EnumerateDeviceExtensionProperties(
1110 VkPhysicalDevice physicalDevice,
1111 const char* pLayerName,
1112 uint32_t* pPropertyCount,
1113 VkExtensionProperties* pProperties)
1114 {
1115 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1116
1117 if (pProperties == NULL) {
1118 *pPropertyCount = pdevice->extensions.num_ext;
1119 return VK_SUCCESS;
1120 }
1121
1122 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1123 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1124
1125 if (*pPropertyCount < pdevice->extensions.num_ext)
1126 return VK_INCOMPLETE;
1127
1128 return VK_SUCCESS;
1129 }
1130
1131 VkResult radv_EnumerateInstanceLayerProperties(
1132 uint32_t* pPropertyCount,
1133 VkLayerProperties* pProperties)
1134 {
1135 if (pProperties == NULL) {
1136 *pPropertyCount = 0;
1137 return VK_SUCCESS;
1138 }
1139
1140 /* None supported at this time */
1141 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1142 }
1143
1144 VkResult radv_EnumerateDeviceLayerProperties(
1145 VkPhysicalDevice physicalDevice,
1146 uint32_t* pPropertyCount,
1147 VkLayerProperties* pProperties)
1148 {
1149 if (pProperties == NULL) {
1150 *pPropertyCount = 0;
1151 return VK_SUCCESS;
1152 }
1153
1154 /* None supported at this time */
1155 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1156 }
1157
1158 void radv_GetDeviceQueue(
1159 VkDevice _device,
1160 uint32_t queueFamilyIndex,
1161 uint32_t queueIndex,
1162 VkQueue* pQueue)
1163 {
1164 RADV_FROM_HANDLE(radv_device, device, _device);
1165
1166 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1167 }
1168
1169 static void radv_dump_trace(struct radv_device *device,
1170 struct radeon_winsys_cs *cs)
1171 {
1172 const char *filename = getenv("RADV_TRACE_FILE");
1173 FILE *f = fopen(filename, "w");
1174 if (!f) {
1175 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1176 return;
1177 }
1178
1179 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1180 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1181 fclose(f);
1182 }
1183
1184 static void
1185 fill_geom_rings(struct radv_queue *queue,
1186 uint32_t *map,
1187 uint32_t esgs_ring_size,
1188 struct radeon_winsys_bo *esgs_ring_bo,
1189 uint32_t gsvs_ring_size,
1190 struct radeon_winsys_bo *gsvs_ring_bo)
1191 {
1192 uint64_t esgs_va = 0, gsvs_va = 0;
1193 uint32_t *desc = &map[4];
1194
1195 if (esgs_ring_bo)
1196 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1197 if (gsvs_ring_bo)
1198 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1199
1200 /* stride 0, num records - size, add tid, swizzle, elsize4,
1201 index stride 64 */
1202 desc[0] = esgs_va;
1203 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1204 S_008F04_STRIDE(0) |
1205 S_008F04_SWIZZLE_ENABLE(true);
1206 desc[2] = esgs_ring_size;
1207 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1208 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1209 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1210 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1211 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1212 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1213 S_008F0C_ELEMENT_SIZE(1) |
1214 S_008F0C_INDEX_STRIDE(3) |
1215 S_008F0C_ADD_TID_ENABLE(true);
1216
1217 desc += 4;
1218 /* GS entry for ES->GS ring */
1219 /* stride 0, num records - size, elsize0,
1220 index stride 0 */
1221 desc[0] = esgs_va;
1222 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1223 S_008F04_STRIDE(0) |
1224 S_008F04_SWIZZLE_ENABLE(false);
1225 desc[2] = esgs_ring_size;
1226 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1227 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1228 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1229 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1230 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1231 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1232 S_008F0C_ELEMENT_SIZE(0) |
1233 S_008F0C_INDEX_STRIDE(0) |
1234 S_008F0C_ADD_TID_ENABLE(false);
1235
1236 desc += 4;
1237 /* VS entry for GS->VS ring */
1238 /* stride 0, num records - size, elsize0,
1239 index stride 0 */
1240 desc[0] = gsvs_va;
1241 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1242 S_008F04_STRIDE(0) |
1243 S_008F04_SWIZZLE_ENABLE(false);
1244 desc[2] = gsvs_ring_size;
1245 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1246 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1247 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1248 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1249 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1250 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1251 S_008F0C_ELEMENT_SIZE(0) |
1252 S_008F0C_INDEX_STRIDE(0) |
1253 S_008F0C_ADD_TID_ENABLE(false);
1254 desc += 4;
1255
1256 /* stride gsvs_itemsize, num records 64
1257 elsize 4, index stride 16 */
1258 /* shader will patch stride and desc[2] */
1259 desc[0] = gsvs_va;
1260 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1261 S_008F04_STRIDE(0) |
1262 S_008F04_SWIZZLE_ENABLE(true);
1263 desc[2] = 0;
1264 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1265 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1266 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1267 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1268 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1269 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1270 S_008F0C_ELEMENT_SIZE(1) |
1271 S_008F0C_INDEX_STRIDE(1) |
1272 S_008F0C_ADD_TID_ENABLE(true);
1273 }
1274
1275 static VkResult
1276 radv_get_preamble_cs(struct radv_queue *queue,
1277 uint32_t scratch_size,
1278 uint32_t compute_scratch_size,
1279 uint32_t esgs_ring_size,
1280 uint32_t gsvs_ring_size,
1281 struct radeon_winsys_cs **initial_preamble_cs,
1282 struct radeon_winsys_cs **continue_preamble_cs)
1283 {
1284 struct radeon_winsys_bo *scratch_bo = NULL;
1285 struct radeon_winsys_bo *descriptor_bo = NULL;
1286 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1287 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1288 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1289 struct radeon_winsys_cs *dest_cs[2] = {0};
1290
1291 if (scratch_size <= queue->scratch_size &&
1292 compute_scratch_size <= queue->compute_scratch_size &&
1293 esgs_ring_size <= queue->esgs_ring_size &&
1294 gsvs_ring_size <= queue->gsvs_ring_size &&
1295 queue->initial_preamble_cs) {
1296 *initial_preamble_cs = queue->initial_preamble_cs;
1297 *continue_preamble_cs = queue->continue_preamble_cs;
1298 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1299 *continue_preamble_cs = NULL;
1300 return VK_SUCCESS;
1301 }
1302
1303 if (scratch_size > queue->scratch_size) {
1304 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1305 scratch_size,
1306 4096,
1307 RADEON_DOMAIN_VRAM,
1308 RADEON_FLAG_NO_CPU_ACCESS);
1309 if (!scratch_bo)
1310 goto fail;
1311 } else
1312 scratch_bo = queue->scratch_bo;
1313
1314 if (compute_scratch_size > queue->compute_scratch_size) {
1315 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1316 compute_scratch_size,
1317 4096,
1318 RADEON_DOMAIN_VRAM,
1319 RADEON_FLAG_NO_CPU_ACCESS);
1320 if (!compute_scratch_bo)
1321 goto fail;
1322
1323 } else
1324 compute_scratch_bo = queue->compute_scratch_bo;
1325
1326 if (esgs_ring_size > queue->esgs_ring_size) {
1327 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1328 esgs_ring_size,
1329 4096,
1330 RADEON_DOMAIN_VRAM,
1331 RADEON_FLAG_NO_CPU_ACCESS);
1332 if (!esgs_ring_bo)
1333 goto fail;
1334 } else {
1335 esgs_ring_bo = queue->esgs_ring_bo;
1336 esgs_ring_size = queue->esgs_ring_size;
1337 }
1338
1339 if (gsvs_ring_size > queue->gsvs_ring_size) {
1340 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1341 gsvs_ring_size,
1342 4096,
1343 RADEON_DOMAIN_VRAM,
1344 RADEON_FLAG_NO_CPU_ACCESS);
1345 if (!gsvs_ring_bo)
1346 goto fail;
1347 } else {
1348 gsvs_ring_bo = queue->gsvs_ring_bo;
1349 gsvs_ring_size = queue->gsvs_ring_size;
1350 }
1351
1352 if (scratch_bo != queue->scratch_bo ||
1353 esgs_ring_bo != queue->esgs_ring_bo ||
1354 gsvs_ring_bo != queue->gsvs_ring_bo) {
1355 uint32_t size = 0;
1356 if (gsvs_ring_bo || esgs_ring_bo)
1357 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1358 else if (scratch_bo)
1359 size = 8; /* 2 dword */
1360
1361 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1362 size,
1363 4096,
1364 RADEON_DOMAIN_VRAM,
1365 RADEON_FLAG_CPU_ACCESS);
1366 if (!descriptor_bo)
1367 goto fail;
1368 } else
1369 descriptor_bo = queue->descriptor_bo;
1370
1371 for(int i = 0; i < 2; ++i) {
1372 struct radeon_winsys_cs *cs = NULL;
1373 cs = queue->device->ws->cs_create(queue->device->ws,
1374 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1375 if (!cs)
1376 goto fail;
1377
1378 dest_cs[i] = cs;
1379
1380 if (scratch_bo)
1381 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1382
1383 if (esgs_ring_bo)
1384 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1385
1386 if (gsvs_ring_bo)
1387 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1388
1389 if (descriptor_bo)
1390 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1391
1392 if (descriptor_bo != queue->descriptor_bo) {
1393 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1394
1395 if (scratch_bo) {
1396 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1397 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1398 S_008F04_SWIZZLE_ENABLE(1);
1399 map[0] = scratch_va;
1400 map[1] = rsrc1;
1401 }
1402
1403 if (esgs_ring_bo || gsvs_ring_bo)
1404 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1405
1406 queue->device->ws->buffer_unmap(descriptor_bo);
1407 }
1408
1409 if (esgs_ring_bo || gsvs_ring_bo) {
1410 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1411 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1412 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1413 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1414
1415 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1416 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1417 radeon_emit(cs, esgs_ring_size >> 8);
1418 radeon_emit(cs, gsvs_ring_size >> 8);
1419 } else {
1420 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1421 radeon_emit(cs, esgs_ring_size >> 8);
1422 radeon_emit(cs, gsvs_ring_size >> 8);
1423 }
1424 }
1425
1426 if (descriptor_bo) {
1427 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1428 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1429 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1430 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1431 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1432 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1433
1434 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1435
1436 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1437 radeon_set_sh_reg_seq(cs, regs[i], 2);
1438 radeon_emit(cs, va);
1439 radeon_emit(cs, va >> 32);
1440 }
1441 }
1442
1443 if (compute_scratch_bo) {
1444 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1445 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1446 S_008F04_SWIZZLE_ENABLE(1);
1447
1448 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1449
1450 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1451 radeon_emit(cs, scratch_va);
1452 radeon_emit(cs, rsrc1);
1453 }
1454
1455 if (!i) {
1456 si_cs_emit_cache_flush(cs,
1457 queue->device->physical_device->rad_info.chip_class,
1458 queue->queue_family_index == RING_COMPUTE &&
1459 queue->device->physical_device->rad_info.chip_class >= CIK,
1460 RADV_CMD_FLAG_INV_ICACHE |
1461 RADV_CMD_FLAG_INV_SMEM_L1 |
1462 RADV_CMD_FLAG_INV_VMEM_L1 |
1463 RADV_CMD_FLAG_INV_GLOBAL_L2);
1464 }
1465
1466 if (!queue->device->ws->cs_finalize(cs))
1467 goto fail;
1468 }
1469
1470 if (queue->initial_preamble_cs)
1471 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1472
1473 if (queue->continue_preamble_cs)
1474 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1475
1476 queue->initial_preamble_cs = dest_cs[0];
1477 queue->continue_preamble_cs = dest_cs[1];
1478
1479 if (scratch_bo != queue->scratch_bo) {
1480 if (queue->scratch_bo)
1481 queue->device->ws->buffer_destroy(queue->scratch_bo);
1482 queue->scratch_bo = scratch_bo;
1483 queue->scratch_size = scratch_size;
1484 }
1485
1486 if (compute_scratch_bo != queue->compute_scratch_bo) {
1487 if (queue->compute_scratch_bo)
1488 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1489 queue->compute_scratch_bo = compute_scratch_bo;
1490 queue->compute_scratch_size = compute_scratch_size;
1491 }
1492
1493 if (esgs_ring_bo != queue->esgs_ring_bo) {
1494 if (queue->esgs_ring_bo)
1495 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1496 queue->esgs_ring_bo = esgs_ring_bo;
1497 queue->esgs_ring_size = esgs_ring_size;
1498 }
1499
1500 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1501 if (queue->gsvs_ring_bo)
1502 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1503 queue->gsvs_ring_bo = gsvs_ring_bo;
1504 queue->gsvs_ring_size = gsvs_ring_size;
1505 }
1506
1507 if (descriptor_bo != queue->descriptor_bo) {
1508 if (queue->descriptor_bo)
1509 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1510
1511 queue->descriptor_bo = descriptor_bo;
1512 }
1513
1514 *initial_preamble_cs = queue->initial_preamble_cs;
1515 *continue_preamble_cs = queue->continue_preamble_cs;
1516 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1517 *continue_preamble_cs = NULL;
1518 return VK_SUCCESS;
1519 fail:
1520 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1521 if (dest_cs[i])
1522 queue->device->ws->cs_destroy(dest_cs[i]);
1523 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1524 queue->device->ws->buffer_destroy(descriptor_bo);
1525 if (scratch_bo && scratch_bo != queue->scratch_bo)
1526 queue->device->ws->buffer_destroy(scratch_bo);
1527 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1528 queue->device->ws->buffer_destroy(compute_scratch_bo);
1529 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1530 queue->device->ws->buffer_destroy(esgs_ring_bo);
1531 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1532 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1533 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1534 }
1535
1536 VkResult radv_QueueSubmit(
1537 VkQueue _queue,
1538 uint32_t submitCount,
1539 const VkSubmitInfo* pSubmits,
1540 VkFence _fence)
1541 {
1542 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1543 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1544 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1545 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1546 int ret;
1547 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1548 uint32_t scratch_size = 0;
1549 uint32_t compute_scratch_size = 0;
1550 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1551 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1552 VkResult result;
1553 bool fence_emitted = false;
1554
1555 /* Do this first so failing to allocate scratch buffers can't result in
1556 * partially executed submissions. */
1557 for (uint32_t i = 0; i < submitCount; i++) {
1558 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1559 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1560 pSubmits[i].pCommandBuffers[j]);
1561
1562 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1563 compute_scratch_size = MAX2(compute_scratch_size,
1564 cmd_buffer->compute_scratch_size_needed);
1565 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1566 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1567 }
1568 }
1569
1570 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1571 esgs_ring_size, gsvs_ring_size,
1572 &initial_preamble_cs, &continue_preamble_cs);
1573 if (result != VK_SUCCESS)
1574 return result;
1575
1576 for (uint32_t i = 0; i < submitCount; i++) {
1577 struct radeon_winsys_cs **cs_array;
1578 bool do_flush = !i;
1579 bool can_patch = !do_flush;
1580 uint32_t advance;
1581
1582 if (!pSubmits[i].commandBufferCount) {
1583 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1584 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1585 &queue->device->empty_cs[queue->queue_family_index],
1586 1, NULL, NULL,
1587 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1588 pSubmits[i].waitSemaphoreCount,
1589 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1590 pSubmits[i].signalSemaphoreCount,
1591 false, base_fence);
1592 if (ret) {
1593 radv_loge("failed to submit CS %d\n", i);
1594 abort();
1595 }
1596 fence_emitted = true;
1597 }
1598 continue;
1599 }
1600
1601 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1602 (pSubmits[i].commandBufferCount + do_flush));
1603
1604 if(do_flush)
1605 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1606
1607 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1608 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1609 pSubmits[i].pCommandBuffers[j]);
1610 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1611
1612 cs_array[j + do_flush] = cmd_buffer->cs;
1613 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1614 can_patch = false;
1615 }
1616
1617 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1618 advance = MIN2(max_cs_submission,
1619 pSubmits[i].commandBufferCount + do_flush - j);
1620 bool b = j == 0;
1621 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1622
1623 if (queue->device->trace_bo)
1624 *queue->device->trace_id_ptr = 0;
1625
1626 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1627 advance, initial_preamble_cs, continue_preamble_cs,
1628 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1629 b ? pSubmits[i].waitSemaphoreCount : 0,
1630 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1631 e ? pSubmits[i].signalSemaphoreCount : 0,
1632 can_patch, base_fence);
1633
1634 if (ret) {
1635 radv_loge("failed to submit CS %d\n", i);
1636 abort();
1637 }
1638 fence_emitted = true;
1639 if (queue->device->trace_bo) {
1640 bool success = queue->device->ws->ctx_wait_idle(
1641 queue->hw_ctx,
1642 radv_queue_family_to_ring(
1643 queue->queue_family_index),
1644 queue->queue_idx);
1645
1646 if (!success) { /* Hang */
1647 radv_dump_trace(queue->device, cs_array[j]);
1648 abort();
1649 }
1650 }
1651 }
1652 free(cs_array);
1653 }
1654
1655 if (fence) {
1656 if (!fence_emitted)
1657 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1658 &queue->device->empty_cs[queue->queue_family_index],
1659 1, NULL, NULL, NULL, 0, NULL, 0,
1660 false, base_fence);
1661
1662 fence->submitted = true;
1663 }
1664
1665 return VK_SUCCESS;
1666 }
1667
1668 VkResult radv_QueueWaitIdle(
1669 VkQueue _queue)
1670 {
1671 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1672
1673 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1674 radv_queue_family_to_ring(queue->queue_family_index),
1675 queue->queue_idx);
1676 return VK_SUCCESS;
1677 }
1678
1679 VkResult radv_DeviceWaitIdle(
1680 VkDevice _device)
1681 {
1682 RADV_FROM_HANDLE(radv_device, device, _device);
1683
1684 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1685 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1686 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1687 }
1688 }
1689 return VK_SUCCESS;
1690 }
1691
1692 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1693 VkInstance instance,
1694 const char* pName)
1695 {
1696 return radv_lookup_entrypoint(pName);
1697 }
1698
1699 /* The loader wants us to expose a second GetInstanceProcAddr function
1700 * to work around certain LD_PRELOAD issues seen in apps.
1701 */
1702 PUBLIC
1703 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1704 VkInstance instance,
1705 const char* pName);
1706
1707 PUBLIC
1708 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1709 VkInstance instance,
1710 const char* pName)
1711 {
1712 return radv_GetInstanceProcAddr(instance, pName);
1713 }
1714
1715 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1716 VkDevice device,
1717 const char* pName)
1718 {
1719 return radv_lookup_entrypoint(pName);
1720 }
1721
1722 bool radv_get_memory_fd(struct radv_device *device,
1723 struct radv_device_memory *memory,
1724 int *pFD)
1725 {
1726 struct radeon_bo_metadata metadata;
1727
1728 if (memory->image) {
1729 radv_init_metadata(device, memory->image, &metadata);
1730 device->ws->buffer_set_metadata(memory->bo, &metadata);
1731 }
1732
1733 return device->ws->buffer_get_fd(device->ws, memory->bo,
1734 pFD);
1735 }
1736
1737 VkResult radv_AllocateMemory(
1738 VkDevice _device,
1739 const VkMemoryAllocateInfo* pAllocateInfo,
1740 const VkAllocationCallbacks* pAllocator,
1741 VkDeviceMemory* pMem)
1742 {
1743 RADV_FROM_HANDLE(radv_device, device, _device);
1744 struct radv_device_memory *mem;
1745 VkResult result;
1746 enum radeon_bo_domain domain;
1747 uint32_t flags = 0;
1748 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1749 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1750
1751 if (pAllocateInfo->allocationSize == 0) {
1752 /* Apparently, this is allowed */
1753 *pMem = VK_NULL_HANDLE;
1754 return VK_SUCCESS;
1755 }
1756
1757 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1758 switch (ext->sType) {
1759 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1760 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1761 break;
1762 default:
1763 break;
1764 }
1765 }
1766
1767 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1768 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1769 if (mem == NULL)
1770 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1771
1772 if (dedicate_info) {
1773 mem->image = radv_image_from_handle(dedicate_info->image);
1774 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1775 } else {
1776 mem->image = NULL;
1777 mem->buffer = NULL;
1778 }
1779
1780 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1781 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1782 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1783 domain = RADEON_DOMAIN_GTT;
1784 else
1785 domain = RADEON_DOMAIN_VRAM;
1786
1787 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1788 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1789 else
1790 flags |= RADEON_FLAG_CPU_ACCESS;
1791
1792 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1793 flags |= RADEON_FLAG_GTT_WC;
1794
1795 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1796 domain, flags);
1797
1798 if (!mem->bo) {
1799 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1800 goto fail;
1801 }
1802 mem->type_index = pAllocateInfo->memoryTypeIndex;
1803
1804 *pMem = radv_device_memory_to_handle(mem);
1805
1806 return VK_SUCCESS;
1807
1808 fail:
1809 vk_free2(&device->alloc, pAllocator, mem);
1810
1811 return result;
1812 }
1813
1814 void radv_FreeMemory(
1815 VkDevice _device,
1816 VkDeviceMemory _mem,
1817 const VkAllocationCallbacks* pAllocator)
1818 {
1819 RADV_FROM_HANDLE(radv_device, device, _device);
1820 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1821
1822 if (mem == NULL)
1823 return;
1824
1825 device->ws->buffer_destroy(mem->bo);
1826 mem->bo = NULL;
1827
1828 vk_free2(&device->alloc, pAllocator, mem);
1829 }
1830
1831 VkResult radv_MapMemory(
1832 VkDevice _device,
1833 VkDeviceMemory _memory,
1834 VkDeviceSize offset,
1835 VkDeviceSize size,
1836 VkMemoryMapFlags flags,
1837 void** ppData)
1838 {
1839 RADV_FROM_HANDLE(radv_device, device, _device);
1840 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1841
1842 if (mem == NULL) {
1843 *ppData = NULL;
1844 return VK_SUCCESS;
1845 }
1846
1847 *ppData = device->ws->buffer_map(mem->bo);
1848 if (*ppData) {
1849 *ppData += offset;
1850 return VK_SUCCESS;
1851 }
1852
1853 return VK_ERROR_MEMORY_MAP_FAILED;
1854 }
1855
1856 void radv_UnmapMemory(
1857 VkDevice _device,
1858 VkDeviceMemory _memory)
1859 {
1860 RADV_FROM_HANDLE(radv_device, device, _device);
1861 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1862
1863 if (mem == NULL)
1864 return;
1865
1866 device->ws->buffer_unmap(mem->bo);
1867 }
1868
1869 VkResult radv_FlushMappedMemoryRanges(
1870 VkDevice _device,
1871 uint32_t memoryRangeCount,
1872 const VkMappedMemoryRange* pMemoryRanges)
1873 {
1874 return VK_SUCCESS;
1875 }
1876
1877 VkResult radv_InvalidateMappedMemoryRanges(
1878 VkDevice _device,
1879 uint32_t memoryRangeCount,
1880 const VkMappedMemoryRange* pMemoryRanges)
1881 {
1882 return VK_SUCCESS;
1883 }
1884
1885 void radv_GetBufferMemoryRequirements(
1886 VkDevice device,
1887 VkBuffer _buffer,
1888 VkMemoryRequirements* pMemoryRequirements)
1889 {
1890 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1891
1892 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1893
1894 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
1895 pMemoryRequirements->alignment = 4096;
1896 else
1897 pMemoryRequirements->alignment = 16;
1898
1899 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
1900 }
1901
1902 void radv_GetImageMemoryRequirements(
1903 VkDevice device,
1904 VkImage _image,
1905 VkMemoryRequirements* pMemoryRequirements)
1906 {
1907 RADV_FROM_HANDLE(radv_image, image, _image);
1908
1909 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1910
1911 pMemoryRequirements->size = image->size;
1912 pMemoryRequirements->alignment = image->alignment;
1913 }
1914
1915 void radv_GetImageSparseMemoryRequirements(
1916 VkDevice device,
1917 VkImage image,
1918 uint32_t* pSparseMemoryRequirementCount,
1919 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1920 {
1921 stub();
1922 }
1923
1924 void radv_GetDeviceMemoryCommitment(
1925 VkDevice device,
1926 VkDeviceMemory memory,
1927 VkDeviceSize* pCommittedMemoryInBytes)
1928 {
1929 *pCommittedMemoryInBytes = 0;
1930 }
1931
1932 VkResult radv_BindBufferMemory(
1933 VkDevice device,
1934 VkBuffer _buffer,
1935 VkDeviceMemory _memory,
1936 VkDeviceSize memoryOffset)
1937 {
1938 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1939 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1940
1941 if (mem) {
1942 buffer->bo = mem->bo;
1943 buffer->offset = memoryOffset;
1944 } else {
1945 buffer->bo = NULL;
1946 buffer->offset = 0;
1947 }
1948
1949 return VK_SUCCESS;
1950 }
1951
1952 VkResult radv_BindImageMemory(
1953 VkDevice device,
1954 VkImage _image,
1955 VkDeviceMemory _memory,
1956 VkDeviceSize memoryOffset)
1957 {
1958 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1959 RADV_FROM_HANDLE(radv_image, image, _image);
1960
1961 if (mem) {
1962 image->bo = mem->bo;
1963 image->offset = memoryOffset;
1964 } else {
1965 image->bo = NULL;
1966 image->offset = 0;
1967 }
1968
1969 return VK_SUCCESS;
1970 }
1971
1972
1973 static void
1974 radv_sparse_buffer_bind_memory(struct radv_device *device,
1975 const VkSparseBufferMemoryBindInfo *bind)
1976 {
1977 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
1978
1979 for (uint32_t i = 0; i < bind->bindCount; ++i) {
1980 struct radv_device_memory *mem = NULL;
1981
1982 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
1983 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
1984
1985 device->ws->buffer_virtual_bind(buffer->bo,
1986 bind->pBinds[i].resourceOffset,
1987 bind->pBinds[i].size,
1988 mem ? mem->bo : NULL,
1989 bind->pBinds[i].memoryOffset);
1990 }
1991 }
1992
1993 static void
1994 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
1995 const VkSparseImageOpaqueMemoryBindInfo *bind)
1996 {
1997 RADV_FROM_HANDLE(radv_image, image, bind->image);
1998
1999 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2000 struct radv_device_memory *mem = NULL;
2001
2002 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2003 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2004
2005 device->ws->buffer_virtual_bind(image->bo,
2006 bind->pBinds[i].resourceOffset,
2007 bind->pBinds[i].size,
2008 mem ? mem->bo : NULL,
2009 bind->pBinds[i].memoryOffset);
2010 }
2011 }
2012
2013 VkResult radv_QueueBindSparse(
2014 VkQueue _queue,
2015 uint32_t bindInfoCount,
2016 const VkBindSparseInfo* pBindInfo,
2017 VkFence _fence)
2018 {
2019 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2020 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2021 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2022 bool fence_emitted = false;
2023
2024 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2025 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2026 radv_sparse_buffer_bind_memory(queue->device,
2027 pBindInfo[i].pBufferBinds + j);
2028 }
2029
2030 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2031 radv_sparse_image_opaque_bind_memory(queue->device,
2032 pBindInfo[i].pImageOpaqueBinds + j);
2033 }
2034
2035 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2036 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2037 &queue->device->empty_cs[queue->queue_family_index],
2038 1, NULL, NULL,
2039 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2040 pBindInfo[i].waitSemaphoreCount,
2041 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2042 pBindInfo[i].signalSemaphoreCount,
2043 false, base_fence);
2044 fence_emitted = true;
2045 if (fence)
2046 fence->submitted = true;
2047 }
2048 }
2049
2050 if (fence && !fence_emitted) {
2051 fence->signalled = true;
2052 }
2053
2054 return VK_SUCCESS;
2055 }
2056
2057 VkResult radv_CreateFence(
2058 VkDevice _device,
2059 const VkFenceCreateInfo* pCreateInfo,
2060 const VkAllocationCallbacks* pAllocator,
2061 VkFence* pFence)
2062 {
2063 RADV_FROM_HANDLE(radv_device, device, _device);
2064 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2065 sizeof(*fence), 8,
2066 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2067
2068 if (!fence)
2069 return VK_ERROR_OUT_OF_HOST_MEMORY;
2070
2071 memset(fence, 0, sizeof(*fence));
2072 fence->submitted = false;
2073 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2074 fence->fence = device->ws->create_fence();
2075 if (!fence->fence) {
2076 vk_free2(&device->alloc, pAllocator, fence);
2077 return VK_ERROR_OUT_OF_HOST_MEMORY;
2078 }
2079
2080 *pFence = radv_fence_to_handle(fence);
2081
2082 return VK_SUCCESS;
2083 }
2084
2085 void radv_DestroyFence(
2086 VkDevice _device,
2087 VkFence _fence,
2088 const VkAllocationCallbacks* pAllocator)
2089 {
2090 RADV_FROM_HANDLE(radv_device, device, _device);
2091 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2092
2093 if (!fence)
2094 return;
2095 device->ws->destroy_fence(fence->fence);
2096 vk_free2(&device->alloc, pAllocator, fence);
2097 }
2098
2099 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2100 {
2101 uint64_t current_time;
2102 struct timespec tv;
2103
2104 clock_gettime(CLOCK_MONOTONIC, &tv);
2105 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2106
2107 timeout = MIN2(UINT64_MAX - current_time, timeout);
2108
2109 return current_time + timeout;
2110 }
2111
2112 VkResult radv_WaitForFences(
2113 VkDevice _device,
2114 uint32_t fenceCount,
2115 const VkFence* pFences,
2116 VkBool32 waitAll,
2117 uint64_t timeout)
2118 {
2119 RADV_FROM_HANDLE(radv_device, device, _device);
2120 timeout = radv_get_absolute_timeout(timeout);
2121
2122 if (!waitAll && fenceCount > 1) {
2123 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2124 }
2125
2126 for (uint32_t i = 0; i < fenceCount; ++i) {
2127 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2128 bool expired = false;
2129
2130 if (fence->signalled)
2131 continue;
2132
2133 if (!fence->submitted)
2134 return VK_TIMEOUT;
2135
2136 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2137 if (!expired)
2138 return VK_TIMEOUT;
2139
2140 fence->signalled = true;
2141 }
2142
2143 return VK_SUCCESS;
2144 }
2145
2146 VkResult radv_ResetFences(VkDevice device,
2147 uint32_t fenceCount,
2148 const VkFence *pFences)
2149 {
2150 for (unsigned i = 0; i < fenceCount; ++i) {
2151 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2152 fence->submitted = fence->signalled = false;
2153 }
2154
2155 return VK_SUCCESS;
2156 }
2157
2158 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2159 {
2160 RADV_FROM_HANDLE(radv_device, device, _device);
2161 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2162
2163 if (fence->signalled)
2164 return VK_SUCCESS;
2165 if (!fence->submitted)
2166 return VK_NOT_READY;
2167
2168 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2169 return VK_NOT_READY;
2170
2171 return VK_SUCCESS;
2172 }
2173
2174
2175 // Queue semaphore functions
2176
2177 VkResult radv_CreateSemaphore(
2178 VkDevice _device,
2179 const VkSemaphoreCreateInfo* pCreateInfo,
2180 const VkAllocationCallbacks* pAllocator,
2181 VkSemaphore* pSemaphore)
2182 {
2183 RADV_FROM_HANDLE(radv_device, device, _device);
2184 struct radeon_winsys_sem *sem;
2185
2186 sem = device->ws->create_sem(device->ws);
2187 if (!sem)
2188 return VK_ERROR_OUT_OF_HOST_MEMORY;
2189
2190 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2191 return VK_SUCCESS;
2192 }
2193
2194 void radv_DestroySemaphore(
2195 VkDevice _device,
2196 VkSemaphore _semaphore,
2197 const VkAllocationCallbacks* pAllocator)
2198 {
2199 RADV_FROM_HANDLE(radv_device, device, _device);
2200 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2201 if (!_semaphore)
2202 return;
2203
2204 device->ws->destroy_sem(sem);
2205 }
2206
2207 VkResult radv_CreateEvent(
2208 VkDevice _device,
2209 const VkEventCreateInfo* pCreateInfo,
2210 const VkAllocationCallbacks* pAllocator,
2211 VkEvent* pEvent)
2212 {
2213 RADV_FROM_HANDLE(radv_device, device, _device);
2214 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2215 sizeof(*event), 8,
2216 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2217
2218 if (!event)
2219 return VK_ERROR_OUT_OF_HOST_MEMORY;
2220
2221 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2222 RADEON_DOMAIN_GTT,
2223 RADEON_FLAG_CPU_ACCESS);
2224 if (!event->bo) {
2225 vk_free2(&device->alloc, pAllocator, event);
2226 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2227 }
2228
2229 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2230
2231 *pEvent = radv_event_to_handle(event);
2232
2233 return VK_SUCCESS;
2234 }
2235
2236 void radv_DestroyEvent(
2237 VkDevice _device,
2238 VkEvent _event,
2239 const VkAllocationCallbacks* pAllocator)
2240 {
2241 RADV_FROM_HANDLE(radv_device, device, _device);
2242 RADV_FROM_HANDLE(radv_event, event, _event);
2243
2244 if (!event)
2245 return;
2246 device->ws->buffer_destroy(event->bo);
2247 vk_free2(&device->alloc, pAllocator, event);
2248 }
2249
2250 VkResult radv_GetEventStatus(
2251 VkDevice _device,
2252 VkEvent _event)
2253 {
2254 RADV_FROM_HANDLE(radv_event, event, _event);
2255
2256 if (*event->map == 1)
2257 return VK_EVENT_SET;
2258 return VK_EVENT_RESET;
2259 }
2260
2261 VkResult radv_SetEvent(
2262 VkDevice _device,
2263 VkEvent _event)
2264 {
2265 RADV_FROM_HANDLE(radv_event, event, _event);
2266 *event->map = 1;
2267
2268 return VK_SUCCESS;
2269 }
2270
2271 VkResult radv_ResetEvent(
2272 VkDevice _device,
2273 VkEvent _event)
2274 {
2275 RADV_FROM_HANDLE(radv_event, event, _event);
2276 *event->map = 0;
2277
2278 return VK_SUCCESS;
2279 }
2280
2281 VkResult radv_CreateBuffer(
2282 VkDevice _device,
2283 const VkBufferCreateInfo* pCreateInfo,
2284 const VkAllocationCallbacks* pAllocator,
2285 VkBuffer* pBuffer)
2286 {
2287 RADV_FROM_HANDLE(radv_device, device, _device);
2288 struct radv_buffer *buffer;
2289
2290 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2291
2292 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2293 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2294 if (buffer == NULL)
2295 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2296
2297 buffer->size = pCreateInfo->size;
2298 buffer->usage = pCreateInfo->usage;
2299 buffer->bo = NULL;
2300 buffer->offset = 0;
2301 buffer->flags = pCreateInfo->flags;
2302
2303 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2304 buffer->bo = device->ws->buffer_create(device->ws,
2305 align64(buffer->size, 4096),
2306 4096, 0, RADEON_FLAG_VIRTUAL);
2307 if (!buffer->bo) {
2308 vk_free2(&device->alloc, pAllocator, buffer);
2309 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2310 }
2311 }
2312
2313 *pBuffer = radv_buffer_to_handle(buffer);
2314
2315 return VK_SUCCESS;
2316 }
2317
2318 void radv_DestroyBuffer(
2319 VkDevice _device,
2320 VkBuffer _buffer,
2321 const VkAllocationCallbacks* pAllocator)
2322 {
2323 RADV_FROM_HANDLE(radv_device, device, _device);
2324 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2325
2326 if (!buffer)
2327 return;
2328
2329 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2330 device->ws->buffer_destroy(buffer->bo);
2331
2332 vk_free2(&device->alloc, pAllocator, buffer);
2333 }
2334
2335 static inline unsigned
2336 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2337 {
2338 if (stencil)
2339 return image->surface.stencil_tiling_index[level];
2340 else
2341 return image->surface.tiling_index[level];
2342 }
2343
2344 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2345 {
2346 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2347 }
2348
2349 static void
2350 radv_initialise_color_surface(struct radv_device *device,
2351 struct radv_color_buffer_info *cb,
2352 struct radv_image_view *iview)
2353 {
2354 const struct vk_format_description *desc;
2355 unsigned ntype, format, swap, endian;
2356 unsigned blend_clamp = 0, blend_bypass = 0;
2357 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2358 uint64_t va;
2359 const struct radeon_surf *surf = &iview->image->surface;
2360 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2361
2362 desc = vk_format_description(iview->vk_format);
2363
2364 memset(cb, 0, sizeof(*cb));
2365
2366 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2367 va += level_info->offset;
2368 cb->cb_color_base = va >> 8;
2369
2370 /* CMASK variables */
2371 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2372 va += iview->image->cmask.offset;
2373 cb->cb_color_cmask = va >> 8;
2374 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2375
2376 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2377 va += iview->image->dcc_offset;
2378 cb->cb_dcc_base = va >> 8;
2379
2380 uint32_t max_slice = radv_surface_layer_count(iview);
2381 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2382 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2383
2384 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2385 pitch_tile_max = level_info->nblk_x / 8 - 1;
2386 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2387 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2388
2389 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2390 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2391
2392 /* Intensity is implemented as Red, so treat it that way. */
2393 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2394 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2395
2396 if (iview->image->samples > 1) {
2397 unsigned log_samples = util_logbase2(iview->image->samples);
2398
2399 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2400 S_028C74_NUM_FRAGMENTS(log_samples);
2401 }
2402
2403 if (iview->image->fmask.size) {
2404 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2405 if (device->physical_device->rad_info.chip_class >= CIK)
2406 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2407 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2408 cb->cb_color_fmask = va >> 8;
2409 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2410 } else {
2411 /* This must be set for fast clear to work without FMASK. */
2412 if (device->physical_device->rad_info.chip_class >= CIK)
2413 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2414 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2415 cb->cb_color_fmask = cb->cb_color_base;
2416 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2417 }
2418
2419 ntype = radv_translate_color_numformat(iview->vk_format,
2420 desc,
2421 vk_format_get_first_non_void_channel(iview->vk_format));
2422 format = radv_translate_colorformat(iview->vk_format);
2423 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2424 radv_finishme("Illegal color\n");
2425 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2426 endian = radv_colorformat_endian_swap(format);
2427
2428 /* blend clamp should be set for all NORM/SRGB types */
2429 if (ntype == V_028C70_NUMBER_UNORM ||
2430 ntype == V_028C70_NUMBER_SNORM ||
2431 ntype == V_028C70_NUMBER_SRGB)
2432 blend_clamp = 1;
2433
2434 /* set blend bypass according to docs if SINT/UINT or
2435 8/24 COLOR variants */
2436 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2437 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2438 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2439 blend_clamp = 0;
2440 blend_bypass = 1;
2441 }
2442 #if 0
2443 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2444 (format == V_028C70_COLOR_8 ||
2445 format == V_028C70_COLOR_8_8 ||
2446 format == V_028C70_COLOR_8_8_8_8))
2447 ->color_is_int8 = true;
2448 #endif
2449 cb->cb_color_info = S_028C70_FORMAT(format) |
2450 S_028C70_COMP_SWAP(swap) |
2451 S_028C70_BLEND_CLAMP(blend_clamp) |
2452 S_028C70_BLEND_BYPASS(blend_bypass) |
2453 S_028C70_SIMPLE_FLOAT(1) |
2454 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2455 ntype != V_028C70_NUMBER_SNORM &&
2456 ntype != V_028C70_NUMBER_SRGB &&
2457 format != V_028C70_COLOR_8_24 &&
2458 format != V_028C70_COLOR_24_8) |
2459 S_028C70_NUMBER_TYPE(ntype) |
2460 S_028C70_ENDIAN(endian);
2461 if (iview->image->samples > 1)
2462 if (iview->image->fmask.size)
2463 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2464
2465 if (iview->image->cmask.size &&
2466 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2467 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2468
2469 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2470 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2471
2472 if (device->physical_device->rad_info.chip_class >= VI) {
2473 unsigned max_uncompressed_block_size = 2;
2474 if (iview->image->samples > 1) {
2475 if (iview->image->surface.bpe == 1)
2476 max_uncompressed_block_size = 0;
2477 else if (iview->image->surface.bpe == 2)
2478 max_uncompressed_block_size = 1;
2479 }
2480
2481 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2482 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2483 }
2484
2485 /* This must be set for fast clear to work without FMASK. */
2486 if (!iview->image->fmask.size &&
2487 device->physical_device->rad_info.chip_class == SI) {
2488 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2489 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2490 }
2491 }
2492
2493 static void
2494 radv_initialise_ds_surface(struct radv_device *device,
2495 struct radv_ds_buffer_info *ds,
2496 struct radv_image_view *iview)
2497 {
2498 unsigned level = iview->base_mip;
2499 unsigned format;
2500 uint64_t va, s_offs, z_offs;
2501 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2502 memset(ds, 0, sizeof(*ds));
2503 switch (iview->vk_format) {
2504 case VK_FORMAT_D24_UNORM_S8_UINT:
2505 case VK_FORMAT_X8_D24_UNORM_PACK32:
2506 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2507 ds->offset_scale = 2.0f;
2508 break;
2509 case VK_FORMAT_D16_UNORM:
2510 case VK_FORMAT_D16_UNORM_S8_UINT:
2511 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2512 ds->offset_scale = 4.0f;
2513 break;
2514 case VK_FORMAT_D32_SFLOAT:
2515 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2516 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2517 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2518 ds->offset_scale = 1.0f;
2519 break;
2520 default:
2521 break;
2522 }
2523
2524 format = radv_translate_dbformat(iview->vk_format);
2525
2526 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2527 s_offs = z_offs = va;
2528 z_offs += iview->image->surface.level[level].offset;
2529 s_offs += iview->image->surface.stencil_level[level].offset;
2530
2531 uint32_t max_slice = radv_surface_layer_count(iview);
2532 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2533 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2534 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2535 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2536
2537 if (iview->image->samples > 1)
2538 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2539
2540 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2541 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2542 else
2543 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2544
2545 if (device->physical_device->rad_info.chip_class >= CIK) {
2546 struct radeon_info *info = &device->physical_device->rad_info;
2547 unsigned tiling_index = iview->image->surface.tiling_index[level];
2548 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2549 unsigned macro_index = iview->image->surface.macro_tile_index;
2550 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2551 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2552 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2553
2554 ds->db_depth_info |=
2555 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2556 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2557 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2558 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2559 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2560 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2561 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2562 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2563 } else {
2564 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2565 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2566 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2567 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2568 }
2569
2570 if (iview->image->surface.htile_size && !level) {
2571 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2572 S_028040_ALLOW_EXPCLEAR(1);
2573
2574 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2575 /* Workaround: For a not yet understood reason, the
2576 * combination of MSAA, fast stencil clear and stencil
2577 * decompress messes with subsequent stencil buffer
2578 * uses. Problem was reproduced on Verde, Bonaire,
2579 * Tonga, and Carrizo.
2580 *
2581 * Disabling EXPCLEAR works around the problem.
2582 *
2583 * Check piglit's arb_texture_multisample-stencil-clear
2584 * test if you want to try changing this.
2585 */
2586 if (iview->image->samples <= 1)
2587 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2588 } else
2589 /* Use all of the htile_buffer for depth if there's no stencil. */
2590 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2591
2592 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2593 iview->image->htile_offset;
2594 ds->db_htile_data_base = va >> 8;
2595 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2596 } else {
2597 ds->db_htile_data_base = 0;
2598 ds->db_htile_surface = 0;
2599 }
2600
2601 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2602 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2603
2604 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2605 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2606 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2607 }
2608
2609 VkResult radv_CreateFramebuffer(
2610 VkDevice _device,
2611 const VkFramebufferCreateInfo* pCreateInfo,
2612 const VkAllocationCallbacks* pAllocator,
2613 VkFramebuffer* pFramebuffer)
2614 {
2615 RADV_FROM_HANDLE(radv_device, device, _device);
2616 struct radv_framebuffer *framebuffer;
2617
2618 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2619
2620 size_t size = sizeof(*framebuffer) +
2621 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2622 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2623 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2624 if (framebuffer == NULL)
2625 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2626
2627 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2628 framebuffer->width = pCreateInfo->width;
2629 framebuffer->height = pCreateInfo->height;
2630 framebuffer->layers = pCreateInfo->layers;
2631 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2632 VkImageView _iview = pCreateInfo->pAttachments[i];
2633 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2634 framebuffer->attachments[i].attachment = iview;
2635 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2636 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2637 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2638 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2639 }
2640 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2641 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2642 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2643 }
2644
2645 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2646 return VK_SUCCESS;
2647 }
2648
2649 void radv_DestroyFramebuffer(
2650 VkDevice _device,
2651 VkFramebuffer _fb,
2652 const VkAllocationCallbacks* pAllocator)
2653 {
2654 RADV_FROM_HANDLE(radv_device, device, _device);
2655 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2656
2657 if (!fb)
2658 return;
2659 vk_free2(&device->alloc, pAllocator, fb);
2660 }
2661
2662 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2663 {
2664 switch (address_mode) {
2665 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2666 return V_008F30_SQ_TEX_WRAP;
2667 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2668 return V_008F30_SQ_TEX_MIRROR;
2669 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2670 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2671 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2672 return V_008F30_SQ_TEX_CLAMP_BORDER;
2673 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2674 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2675 default:
2676 unreachable("illegal tex wrap mode");
2677 break;
2678 }
2679 }
2680
2681 static unsigned
2682 radv_tex_compare(VkCompareOp op)
2683 {
2684 switch (op) {
2685 case VK_COMPARE_OP_NEVER:
2686 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2687 case VK_COMPARE_OP_LESS:
2688 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2689 case VK_COMPARE_OP_EQUAL:
2690 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2691 case VK_COMPARE_OP_LESS_OR_EQUAL:
2692 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2693 case VK_COMPARE_OP_GREATER:
2694 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2695 case VK_COMPARE_OP_NOT_EQUAL:
2696 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2697 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2698 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2699 case VK_COMPARE_OP_ALWAYS:
2700 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2701 default:
2702 unreachable("illegal compare mode");
2703 break;
2704 }
2705 }
2706
2707 static unsigned
2708 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2709 {
2710 switch (filter) {
2711 case VK_FILTER_NEAREST:
2712 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2713 V_008F38_SQ_TEX_XY_FILTER_POINT);
2714 case VK_FILTER_LINEAR:
2715 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2716 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2717 case VK_FILTER_CUBIC_IMG:
2718 default:
2719 fprintf(stderr, "illegal texture filter");
2720 return 0;
2721 }
2722 }
2723
2724 static unsigned
2725 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2726 {
2727 switch (mode) {
2728 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2729 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2730 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2731 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2732 default:
2733 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2734 }
2735 }
2736
2737 static unsigned
2738 radv_tex_bordercolor(VkBorderColor bcolor)
2739 {
2740 switch (bcolor) {
2741 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2742 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2743 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2744 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2745 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2746 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2747 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2748 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2749 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2750 default:
2751 break;
2752 }
2753 return 0;
2754 }
2755
2756 static unsigned
2757 radv_tex_aniso_filter(unsigned filter)
2758 {
2759 if (filter < 2)
2760 return 0;
2761 if (filter < 4)
2762 return 1;
2763 if (filter < 8)
2764 return 2;
2765 if (filter < 16)
2766 return 3;
2767 return 4;
2768 }
2769
2770 static void
2771 radv_init_sampler(struct radv_device *device,
2772 struct radv_sampler *sampler,
2773 const VkSamplerCreateInfo *pCreateInfo)
2774 {
2775 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2776 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2777 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2778 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2779
2780 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2781 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2782 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2783 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2784 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2785 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2786 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2787 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2788 S_008F30_DISABLE_CUBE_WRAP(0) |
2789 S_008F30_COMPAT_MODE(is_vi));
2790 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2791 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2792 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2793 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2794 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2795 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2796 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2797 S_008F38_MIP_POINT_PRECLAMP(0) |
2798 S_008F38_DISABLE_LSB_CEIL(1) |
2799 S_008F38_FILTER_PREC_FIX(1) |
2800 S_008F38_ANISO_OVERRIDE(is_vi));
2801 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2802 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2803 }
2804
2805 VkResult radv_CreateSampler(
2806 VkDevice _device,
2807 const VkSamplerCreateInfo* pCreateInfo,
2808 const VkAllocationCallbacks* pAllocator,
2809 VkSampler* pSampler)
2810 {
2811 RADV_FROM_HANDLE(radv_device, device, _device);
2812 struct radv_sampler *sampler;
2813
2814 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2815
2816 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2817 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2818 if (!sampler)
2819 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2820
2821 radv_init_sampler(device, sampler, pCreateInfo);
2822 *pSampler = radv_sampler_to_handle(sampler);
2823
2824 return VK_SUCCESS;
2825 }
2826
2827 void radv_DestroySampler(
2828 VkDevice _device,
2829 VkSampler _sampler,
2830 const VkAllocationCallbacks* pAllocator)
2831 {
2832 RADV_FROM_HANDLE(radv_device, device, _device);
2833 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2834
2835 if (!sampler)
2836 return;
2837 vk_free2(&device->alloc, pAllocator, sampler);
2838 }
2839
2840
2841 /* vk_icd.h does not declare this function, so we declare it here to
2842 * suppress Wmissing-prototypes.
2843 */
2844 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2845 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2846
2847 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2848 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2849 {
2850 /* For the full details on loader interface versioning, see
2851 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2852 * What follows is a condensed summary, to help you navigate the large and
2853 * confusing official doc.
2854 *
2855 * - Loader interface v0 is incompatible with later versions. We don't
2856 * support it.
2857 *
2858 * - In loader interface v1:
2859 * - The first ICD entrypoint called by the loader is
2860 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2861 * entrypoint.
2862 * - The ICD must statically expose no other Vulkan symbol unless it is
2863 * linked with -Bsymbolic.
2864 * - Each dispatchable Vulkan handle created by the ICD must be
2865 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2866 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2867 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2868 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2869 * such loader-managed surfaces.
2870 *
2871 * - Loader interface v2 differs from v1 in:
2872 * - The first ICD entrypoint called by the loader is
2873 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2874 * statically expose this entrypoint.
2875 *
2876 * - Loader interface v3 differs from v2 in:
2877 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2878 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2879 * because the loader no longer does so.
2880 */
2881 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2882 return VK_SUCCESS;
2883 }