radv: Enable sparseBinding feature.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 {
88 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
89 .specVersion = 1,
90 },
91 };
92
93 static const VkExtensionProperties common_device_extensions[] = {
94 {
95 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
100 .specVersion = 1,
101 },
102 {
103 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
104 .specVersion = 68,
105 },
106 {
107 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (!instance)
369 return;
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 static VkResult
383 radv_enumerate_devices(struct radv_instance *instance)
384 {
385 /* TODO: Check for more devices ? */
386 drmDevicePtr devices[8];
387 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
388 int max_devices;
389
390 instance->physicalDeviceCount = 0;
391
392 max_devices = drmGetDevices2(0, devices, sizeof(devices));
393 if (max_devices < 1)
394 return VK_ERROR_INCOMPATIBLE_DRIVER;
395
396 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
397 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
398 devices[i]->bustype == DRM_BUS_PCI &&
399 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
400
401 result = radv_physical_device_init(instance->physicalDevices +
402 instance->physicalDeviceCount,
403 instance,
404 devices[i]->nodes[DRM_NODE_RENDER]);
405 if (result == VK_SUCCESS)
406 ++instance->physicalDeviceCount;
407 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
408 return result;
409 }
410 }
411 return result;
412 }
413
414 VkResult radv_EnumeratePhysicalDevices(
415 VkInstance _instance,
416 uint32_t* pPhysicalDeviceCount,
417 VkPhysicalDevice* pPhysicalDevices)
418 {
419 RADV_FROM_HANDLE(radv_instance, instance, _instance);
420 VkResult result;
421
422 if (instance->physicalDeviceCount < 0) {
423 result = radv_enumerate_devices(instance);
424 if (result != VK_SUCCESS &&
425 result != VK_ERROR_INCOMPATIBLE_DRIVER)
426 return result;
427 }
428
429 if (!pPhysicalDevices) {
430 *pPhysicalDeviceCount = instance->physicalDeviceCount;
431 } else {
432 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
433 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
434 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
435 }
436
437 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
438 : VK_SUCCESS;
439 }
440
441 void radv_GetPhysicalDeviceFeatures(
442 VkPhysicalDevice physicalDevice,
443 VkPhysicalDeviceFeatures* pFeatures)
444 {
445 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
446
447 memset(pFeatures, 0, sizeof(*pFeatures));
448
449 *pFeatures = (VkPhysicalDeviceFeatures) {
450 .robustBufferAccess = true,
451 .fullDrawIndexUint32 = true,
452 .imageCubeArray = true,
453 .independentBlend = true,
454 .geometryShader = true,
455 .tessellationShader = false,
456 .sampleRateShading = false,
457 .dualSrcBlend = true,
458 .logicOp = true,
459 .multiDrawIndirect = true,
460 .drawIndirectFirstInstance = true,
461 .depthClamp = true,
462 .depthBiasClamp = true,
463 .fillModeNonSolid = true,
464 .depthBounds = true,
465 .wideLines = true,
466 .largePoints = true,
467 .alphaToOne = true,
468 .multiViewport = true,
469 .samplerAnisotropy = true,
470 .textureCompressionETC2 = false,
471 .textureCompressionASTC_LDR = false,
472 .textureCompressionBC = true,
473 .occlusionQueryPrecise = true,
474 .pipelineStatisticsQuery = false,
475 .vertexPipelineStoresAndAtomics = true,
476 .fragmentStoresAndAtomics = true,
477 .shaderTessellationAndGeometryPointSize = true,
478 .shaderImageGatherExtended = true,
479 .shaderStorageImageExtendedFormats = true,
480 .shaderStorageImageMultisample = false,
481 .shaderUniformBufferArrayDynamicIndexing = true,
482 .shaderSampledImageArrayDynamicIndexing = true,
483 .shaderStorageBufferArrayDynamicIndexing = true,
484 .shaderStorageImageArrayDynamicIndexing = true,
485 .shaderStorageImageReadWithoutFormat = true,
486 .shaderStorageImageWriteWithoutFormat = true,
487 .shaderClipDistance = true,
488 .shaderCullDistance = true,
489 .shaderFloat64 = true,
490 .shaderInt64 = false,
491 .shaderInt16 = false,
492 .sparseBinding = true,
493 .variableMultisampleRate = false,
494 .inheritedQueries = false,
495 };
496 }
497
498 void radv_GetPhysicalDeviceFeatures2KHR(
499 VkPhysicalDevice physicalDevice,
500 VkPhysicalDeviceFeatures2KHR *pFeatures)
501 {
502 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
503 }
504
505 static uint32_t radv_get_driver_version()
506 {
507 const char *minor_string = strchr(VERSION, '.');
508 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
509 int major = atoi(VERSION);
510 int minor = minor_string ? atoi(minor_string + 1) : 0;
511 int patch = patch_string ? atoi(patch_string + 1) : 0;
512 if (strstr(VERSION, "devel")) {
513 if (patch == 0) {
514 patch = 99;
515 if (minor == 0) {
516 minor = 99;
517 --major;
518 } else
519 --minor;
520 } else
521 --patch;
522 }
523 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
524 return version;
525 }
526
527 void radv_GetPhysicalDeviceProperties(
528 VkPhysicalDevice physicalDevice,
529 VkPhysicalDeviceProperties* pProperties)
530 {
531 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
532 VkSampleCountFlags sample_counts = 0xf;
533 VkPhysicalDeviceLimits limits = {
534 .maxImageDimension1D = (1 << 14),
535 .maxImageDimension2D = (1 << 14),
536 .maxImageDimension3D = (1 << 11),
537 .maxImageDimensionCube = (1 << 14),
538 .maxImageArrayLayers = (1 << 11),
539 .maxTexelBufferElements = 128 * 1024 * 1024,
540 .maxUniformBufferRange = UINT32_MAX,
541 .maxStorageBufferRange = UINT32_MAX,
542 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
543 .maxMemoryAllocationCount = UINT32_MAX,
544 .maxSamplerAllocationCount = 64 * 1024,
545 .bufferImageGranularity = 64, /* A cache line */
546 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
547 .maxBoundDescriptorSets = MAX_SETS,
548 .maxPerStageDescriptorSamplers = 64,
549 .maxPerStageDescriptorUniformBuffers = 64,
550 .maxPerStageDescriptorStorageBuffers = 64,
551 .maxPerStageDescriptorSampledImages = 64,
552 .maxPerStageDescriptorStorageImages = 64,
553 .maxPerStageDescriptorInputAttachments = 64,
554 .maxPerStageResources = 128,
555 .maxDescriptorSetSamplers = 256,
556 .maxDescriptorSetUniformBuffers = 256,
557 .maxDescriptorSetUniformBuffersDynamic = 256,
558 .maxDescriptorSetStorageBuffers = 256,
559 .maxDescriptorSetStorageBuffersDynamic = 256,
560 .maxDescriptorSetSampledImages = 256,
561 .maxDescriptorSetStorageImages = 256,
562 .maxDescriptorSetInputAttachments = 256,
563 .maxVertexInputAttributes = 32,
564 .maxVertexInputBindings = 32,
565 .maxVertexInputAttributeOffset = 2047,
566 .maxVertexInputBindingStride = 2048,
567 .maxVertexOutputComponents = 128,
568 .maxTessellationGenerationLevel = 0,
569 .maxTessellationPatchSize = 0,
570 .maxTessellationControlPerVertexInputComponents = 0,
571 .maxTessellationControlPerVertexOutputComponents = 0,
572 .maxTessellationControlPerPatchOutputComponents = 0,
573 .maxTessellationControlTotalOutputComponents = 0,
574 .maxTessellationEvaluationInputComponents = 0,
575 .maxTessellationEvaluationOutputComponents = 0,
576 .maxGeometryShaderInvocations = 32,
577 .maxGeometryInputComponents = 64,
578 .maxGeometryOutputComponents = 128,
579 .maxGeometryOutputVertices = 256,
580 .maxGeometryTotalOutputComponents = 1024,
581 .maxFragmentInputComponents = 128,
582 .maxFragmentOutputAttachments = 8,
583 .maxFragmentDualSrcAttachments = 1,
584 .maxFragmentCombinedOutputResources = 8,
585 .maxComputeSharedMemorySize = 32768,
586 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
587 .maxComputeWorkGroupInvocations = 2048,
588 .maxComputeWorkGroupSize = {
589 2048,
590 2048,
591 2048
592 },
593 .subPixelPrecisionBits = 4 /* FIXME */,
594 .subTexelPrecisionBits = 4 /* FIXME */,
595 .mipmapPrecisionBits = 4 /* FIXME */,
596 .maxDrawIndexedIndexValue = UINT32_MAX,
597 .maxDrawIndirectCount = UINT32_MAX,
598 .maxSamplerLodBias = 16,
599 .maxSamplerAnisotropy = 16,
600 .maxViewports = MAX_VIEWPORTS,
601 .maxViewportDimensions = { (1 << 14), (1 << 14) },
602 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
603 .viewportSubPixelBits = 13, /* We take a float? */
604 .minMemoryMapAlignment = 4096, /* A page */
605 .minTexelBufferOffsetAlignment = 1,
606 .minUniformBufferOffsetAlignment = 4,
607 .minStorageBufferOffsetAlignment = 4,
608 .minTexelOffset = -32,
609 .maxTexelOffset = 31,
610 .minTexelGatherOffset = -32,
611 .maxTexelGatherOffset = 31,
612 .minInterpolationOffset = -2,
613 .maxInterpolationOffset = 2,
614 .subPixelInterpolationOffsetBits = 8,
615 .maxFramebufferWidth = (1 << 14),
616 .maxFramebufferHeight = (1 << 14),
617 .maxFramebufferLayers = (1 << 10),
618 .framebufferColorSampleCounts = sample_counts,
619 .framebufferDepthSampleCounts = sample_counts,
620 .framebufferStencilSampleCounts = sample_counts,
621 .framebufferNoAttachmentsSampleCounts = sample_counts,
622 .maxColorAttachments = MAX_RTS,
623 .sampledImageColorSampleCounts = sample_counts,
624 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
625 .sampledImageDepthSampleCounts = sample_counts,
626 .sampledImageStencilSampleCounts = sample_counts,
627 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
628 .maxSampleMaskWords = 1,
629 .timestampComputeAndGraphics = false,
630 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
631 .maxClipDistances = 8,
632 .maxCullDistances = 8,
633 .maxCombinedClipAndCullDistances = 8,
634 .discreteQueuePriorities = 1,
635 .pointSizeRange = { 0.125, 255.875 },
636 .lineWidthRange = { 0.0, 7.9921875 },
637 .pointSizeGranularity = (1.0 / 8.0),
638 .lineWidthGranularity = (1.0 / 128.0),
639 .strictLines = false, /* FINISHME */
640 .standardSampleLocations = true,
641 .optimalBufferCopyOffsetAlignment = 128,
642 .optimalBufferCopyRowPitchAlignment = 128,
643 .nonCoherentAtomSize = 64,
644 };
645
646 *pProperties = (VkPhysicalDeviceProperties) {
647 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
648 .driverVersion = radv_get_driver_version(),
649 .vendorID = 0x1002,
650 .deviceID = pdevice->rad_info.pci_id,
651 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
652 .limits = limits,
653 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
654 };
655
656 strcpy(pProperties->deviceName, pdevice->name);
657 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
658 }
659
660 void radv_GetPhysicalDeviceProperties2KHR(
661 VkPhysicalDevice physicalDevice,
662 VkPhysicalDeviceProperties2KHR *pProperties)
663 {
664 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
665 }
666
667 static void radv_get_physical_device_queue_family_properties(
668 struct radv_physical_device* pdevice,
669 uint32_t* pCount,
670 VkQueueFamilyProperties** pQueueFamilyProperties)
671 {
672 int num_queue_families = 1;
673 int idx;
674 if (pdevice->rad_info.compute_rings > 0 &&
675 pdevice->rad_info.chip_class >= CIK &&
676 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
677 num_queue_families++;
678
679 if (pQueueFamilyProperties == NULL) {
680 *pCount = num_queue_families;
681 return;
682 }
683
684 if (!*pCount)
685 return;
686
687 idx = 0;
688 if (*pCount >= 1) {
689 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
690 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
691 VK_QUEUE_COMPUTE_BIT |
692 VK_QUEUE_TRANSFER_BIT |
693 VK_QUEUE_SPARSE_BINDING_BIT,
694 .queueCount = 1,
695 .timestampValidBits = 64,
696 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
697 };
698 idx++;
699 }
700
701 if (pdevice->rad_info.compute_rings > 0 &&
702 pdevice->rad_info.chip_class >= CIK &&
703 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
704 if (*pCount > idx) {
705 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
706 .queueFlags = VK_QUEUE_COMPUTE_BIT |
707 VK_QUEUE_TRANSFER_BIT |
708 VK_QUEUE_SPARSE_BINDING_BIT,
709 .queueCount = pdevice->rad_info.compute_rings,
710 .timestampValidBits = 64,
711 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
712 };
713 idx++;
714 }
715 }
716 *pCount = idx;
717 }
718
719 void radv_GetPhysicalDeviceQueueFamilyProperties(
720 VkPhysicalDevice physicalDevice,
721 uint32_t* pCount,
722 VkQueueFamilyProperties* pQueueFamilyProperties)
723 {
724 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
725 if (!pQueueFamilyProperties) {
726 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
727 return;
728 }
729 VkQueueFamilyProperties *properties[] = {
730 pQueueFamilyProperties + 0,
731 pQueueFamilyProperties + 1,
732 pQueueFamilyProperties + 2,
733 };
734 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
735 assert(*pCount <= 3);
736 }
737
738 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
739 VkPhysicalDevice physicalDevice,
740 uint32_t* pCount,
741 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
742 {
743 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
744 if (!pQueueFamilyProperties) {
745 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
746 return;
747 }
748 VkQueueFamilyProperties *properties[] = {
749 &pQueueFamilyProperties[0].queueFamilyProperties,
750 &pQueueFamilyProperties[1].queueFamilyProperties,
751 &pQueueFamilyProperties[2].queueFamilyProperties,
752 };
753 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
754 assert(*pCount <= 3);
755 }
756
757 void radv_GetPhysicalDeviceMemoryProperties(
758 VkPhysicalDevice physicalDevice,
759 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
760 {
761 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
762
763 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
764
765 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
766 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
767 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
768 .heapIndex = RADV_MEM_HEAP_VRAM,
769 };
770 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
771 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
772 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
773 .heapIndex = RADV_MEM_HEAP_GTT,
774 };
775 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
776 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
777 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
778 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
779 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
780 };
781 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
782 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
783 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
784 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
785 .heapIndex = RADV_MEM_HEAP_GTT,
786 };
787
788 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
789
790 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
791 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
792 .size = physical_device->rad_info.vram_size -
793 physical_device->rad_info.visible_vram_size,
794 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
795 };
796 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
797 .size = physical_device->rad_info.visible_vram_size,
798 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
799 };
800 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
801 .size = physical_device->rad_info.gart_size,
802 .flags = 0,
803 };
804 }
805
806 void radv_GetPhysicalDeviceMemoryProperties2KHR(
807 VkPhysicalDevice physicalDevice,
808 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
809 {
810 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
811 &pMemoryProperties->memoryProperties);
812 }
813
814 static int
815 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
816 int queue_family_index, int idx)
817 {
818 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
819 queue->device = device;
820 queue->queue_family_index = queue_family_index;
821 queue->queue_idx = idx;
822
823 queue->hw_ctx = device->ws->ctx_create(device->ws);
824 if (!queue->hw_ctx)
825 return VK_ERROR_OUT_OF_HOST_MEMORY;
826
827 return VK_SUCCESS;
828 }
829
830 static void
831 radv_queue_finish(struct radv_queue *queue)
832 {
833 if (queue->hw_ctx)
834 queue->device->ws->ctx_destroy(queue->hw_ctx);
835
836 if (queue->initial_preamble_cs)
837 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
838 if (queue->continue_preamble_cs)
839 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
840 if (queue->descriptor_bo)
841 queue->device->ws->buffer_destroy(queue->descriptor_bo);
842 if (queue->scratch_bo)
843 queue->device->ws->buffer_destroy(queue->scratch_bo);
844 if (queue->esgs_ring_bo)
845 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
846 if (queue->gsvs_ring_bo)
847 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
848 if (queue->compute_scratch_bo)
849 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
850 }
851
852 static void
853 radv_device_init_gs_info(struct radv_device *device)
854 {
855 switch (device->physical_device->rad_info.family) {
856 case CHIP_OLAND:
857 case CHIP_HAINAN:
858 case CHIP_KAVERI:
859 case CHIP_KABINI:
860 case CHIP_MULLINS:
861 case CHIP_ICELAND:
862 case CHIP_CARRIZO:
863 case CHIP_STONEY:
864 device->gs_table_depth = 16;
865 return;
866 case CHIP_TAHITI:
867 case CHIP_PITCAIRN:
868 case CHIP_VERDE:
869 case CHIP_BONAIRE:
870 case CHIP_HAWAII:
871 case CHIP_TONGA:
872 case CHIP_FIJI:
873 case CHIP_POLARIS10:
874 case CHIP_POLARIS11:
875 device->gs_table_depth = 32;
876 return;
877 default:
878 unreachable("unknown GPU");
879 }
880 }
881
882 VkResult radv_CreateDevice(
883 VkPhysicalDevice physicalDevice,
884 const VkDeviceCreateInfo* pCreateInfo,
885 const VkAllocationCallbacks* pAllocator,
886 VkDevice* pDevice)
887 {
888 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
889 VkResult result;
890 struct radv_device *device;
891
892 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
893 if (!is_extension_enabled(physical_device->extensions.ext_array,
894 physical_device->extensions.num_ext,
895 pCreateInfo->ppEnabledExtensionNames[i]))
896 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
897 }
898
899 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
900 sizeof(*device), 8,
901 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
902 if (!device)
903 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
904
905 memset(device, 0, sizeof(*device));
906
907 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
908 device->instance = physical_device->instance;
909 device->physical_device = physical_device;
910
911 device->debug_flags = device->instance->debug_flags;
912
913 device->ws = physical_device->ws;
914 if (pAllocator)
915 device->alloc = *pAllocator;
916 else
917 device->alloc = physical_device->instance->alloc;
918
919 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
920 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
921 uint32_t qfi = queue_create->queueFamilyIndex;
922
923 device->queues[qfi] = vk_alloc(&device->alloc,
924 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
925 if (!device->queues[qfi]) {
926 result = VK_ERROR_OUT_OF_HOST_MEMORY;
927 goto fail;
928 }
929
930 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
931
932 device->queue_count[qfi] = queue_create->queueCount;
933
934 for (unsigned q = 0; q < queue_create->queueCount; q++) {
935 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
936 if (result != VK_SUCCESS)
937 goto fail;
938 }
939 }
940
941 #if HAVE_LLVM < 0x0400
942 device->llvm_supports_spill = false;
943 #else
944 device->llvm_supports_spill = true;
945 #endif
946
947 /* The maximum number of scratch waves. Scratch space isn't divided
948 * evenly between CUs. The number is only a function of the number of CUs.
949 * We can decrease the constant to decrease the scratch buffer size.
950 *
951 * sctx->scratch_waves must be >= the maximum posible size of
952 * 1 threadgroup, so that the hw doesn't hang from being unable
953 * to start any.
954 *
955 * The recommended value is 4 per CU at most. Higher numbers don't
956 * bring much benefit, but they still occupy chip resources (think
957 * async compute). I've seen ~2% performance difference between 4 and 32.
958 */
959 uint32_t max_threads_per_block = 2048;
960 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
961 max_threads_per_block / 64);
962
963 radv_device_init_gs_info(device);
964
965 result = radv_device_init_meta(device);
966 if (result != VK_SUCCESS)
967 goto fail;
968
969 radv_device_init_msaa(device);
970
971 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
972 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
973 switch (family) {
974 case RADV_QUEUE_GENERAL:
975 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
976 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
977 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
978 break;
979 case RADV_QUEUE_COMPUTE:
980 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
981 radeon_emit(device->empty_cs[family], 0);
982 break;
983 }
984 device->ws->cs_finalize(device->empty_cs[family]);
985
986 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
987 switch (family) {
988 case RADV_QUEUE_GENERAL:
989 case RADV_QUEUE_COMPUTE:
990 si_cs_emit_cache_flush(device->flush_cs[family],
991 device->physical_device->rad_info.chip_class,
992 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
993 RADV_CMD_FLAG_INV_ICACHE |
994 RADV_CMD_FLAG_INV_SMEM_L1 |
995 RADV_CMD_FLAG_INV_VMEM_L1 |
996 RADV_CMD_FLAG_INV_GLOBAL_L2);
997 break;
998 }
999 device->ws->cs_finalize(device->flush_cs[family]);
1000 }
1001
1002 if (getenv("RADV_TRACE_FILE")) {
1003 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1004 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1005 if (!device->trace_bo)
1006 goto fail;
1007
1008 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1009 if (!device->trace_id_ptr)
1010 goto fail;
1011 }
1012
1013 if (device->physical_device->rad_info.chip_class >= CIK)
1014 cik_create_gfx_config(device);
1015
1016 VkPipelineCacheCreateInfo ci;
1017 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1018 ci.pNext = NULL;
1019 ci.flags = 0;
1020 ci.pInitialData = NULL;
1021 ci.initialDataSize = 0;
1022 VkPipelineCache pc;
1023 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1024 &ci, NULL, &pc);
1025 if (result != VK_SUCCESS)
1026 goto fail;
1027
1028 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1029
1030 *pDevice = radv_device_to_handle(device);
1031 return VK_SUCCESS;
1032
1033 fail:
1034 if (device->trace_bo)
1035 device->ws->buffer_destroy(device->trace_bo);
1036
1037 if (device->gfx_init)
1038 device->ws->buffer_destroy(device->gfx_init);
1039
1040 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1041 for (unsigned q = 0; q < device->queue_count[i]; q++)
1042 radv_queue_finish(&device->queues[i][q]);
1043 if (device->queue_count[i])
1044 vk_free(&device->alloc, device->queues[i]);
1045 }
1046
1047 vk_free(&device->alloc, device);
1048 return result;
1049 }
1050
1051 void radv_DestroyDevice(
1052 VkDevice _device,
1053 const VkAllocationCallbacks* pAllocator)
1054 {
1055 RADV_FROM_HANDLE(radv_device, device, _device);
1056
1057 if (!device)
1058 return;
1059
1060 if (device->trace_bo)
1061 device->ws->buffer_destroy(device->trace_bo);
1062
1063 if (device->gfx_init)
1064 device->ws->buffer_destroy(device->gfx_init);
1065
1066 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1067 for (unsigned q = 0; q < device->queue_count[i]; q++)
1068 radv_queue_finish(&device->queues[i][q]);
1069 if (device->queue_count[i])
1070 vk_free(&device->alloc, device->queues[i]);
1071 if (device->empty_cs[i])
1072 device->ws->cs_destroy(device->empty_cs[i]);
1073 if (device->flush_cs[i])
1074 device->ws->cs_destroy(device->flush_cs[i]);
1075 }
1076 radv_device_finish_meta(device);
1077
1078 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1079 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1080
1081 vk_free(&device->alloc, device);
1082 }
1083
1084 VkResult radv_EnumerateInstanceExtensionProperties(
1085 const char* pLayerName,
1086 uint32_t* pPropertyCount,
1087 VkExtensionProperties* pProperties)
1088 {
1089 if (pProperties == NULL) {
1090 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1091 return VK_SUCCESS;
1092 }
1093
1094 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1095 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1096
1097 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1098 return VK_INCOMPLETE;
1099
1100 return VK_SUCCESS;
1101 }
1102
1103 VkResult radv_EnumerateDeviceExtensionProperties(
1104 VkPhysicalDevice physicalDevice,
1105 const char* pLayerName,
1106 uint32_t* pPropertyCount,
1107 VkExtensionProperties* pProperties)
1108 {
1109 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1110
1111 if (pProperties == NULL) {
1112 *pPropertyCount = pdevice->extensions.num_ext;
1113 return VK_SUCCESS;
1114 }
1115
1116 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1117 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1118
1119 if (*pPropertyCount < pdevice->extensions.num_ext)
1120 return VK_INCOMPLETE;
1121
1122 return VK_SUCCESS;
1123 }
1124
1125 VkResult radv_EnumerateInstanceLayerProperties(
1126 uint32_t* pPropertyCount,
1127 VkLayerProperties* pProperties)
1128 {
1129 if (pProperties == NULL) {
1130 *pPropertyCount = 0;
1131 return VK_SUCCESS;
1132 }
1133
1134 /* None supported at this time */
1135 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1136 }
1137
1138 VkResult radv_EnumerateDeviceLayerProperties(
1139 VkPhysicalDevice physicalDevice,
1140 uint32_t* pPropertyCount,
1141 VkLayerProperties* pProperties)
1142 {
1143 if (pProperties == NULL) {
1144 *pPropertyCount = 0;
1145 return VK_SUCCESS;
1146 }
1147
1148 /* None supported at this time */
1149 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1150 }
1151
1152 void radv_GetDeviceQueue(
1153 VkDevice _device,
1154 uint32_t queueFamilyIndex,
1155 uint32_t queueIndex,
1156 VkQueue* pQueue)
1157 {
1158 RADV_FROM_HANDLE(radv_device, device, _device);
1159
1160 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1161 }
1162
1163 static void radv_dump_trace(struct radv_device *device,
1164 struct radeon_winsys_cs *cs)
1165 {
1166 const char *filename = getenv("RADV_TRACE_FILE");
1167 FILE *f = fopen(filename, "w");
1168 if (!f) {
1169 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1170 return;
1171 }
1172
1173 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1174 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1175 fclose(f);
1176 }
1177
1178 static void
1179 fill_geom_rings(struct radv_queue *queue,
1180 uint32_t *map,
1181 uint32_t esgs_ring_size,
1182 struct radeon_winsys_bo *esgs_ring_bo,
1183 uint32_t gsvs_ring_size,
1184 struct radeon_winsys_bo *gsvs_ring_bo)
1185 {
1186 uint64_t esgs_va = 0, gsvs_va = 0;
1187 uint32_t *desc = &map[4];
1188
1189 if (esgs_ring_bo)
1190 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1191 if (gsvs_ring_bo)
1192 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1193
1194 /* stride 0, num records - size, add tid, swizzle, elsize4,
1195 index stride 64 */
1196 desc[0] = esgs_va;
1197 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1198 S_008F04_STRIDE(0) |
1199 S_008F04_SWIZZLE_ENABLE(true);
1200 desc[2] = esgs_ring_size;
1201 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1202 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1203 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1204 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1205 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1206 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1207 S_008F0C_ELEMENT_SIZE(1) |
1208 S_008F0C_INDEX_STRIDE(3) |
1209 S_008F0C_ADD_TID_ENABLE(true);
1210
1211 desc += 4;
1212 /* GS entry for ES->GS ring */
1213 /* stride 0, num records - size, elsize0,
1214 index stride 0 */
1215 desc[0] = esgs_va;
1216 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1217 S_008F04_STRIDE(0) |
1218 S_008F04_SWIZZLE_ENABLE(false);
1219 desc[2] = esgs_ring_size;
1220 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1221 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1222 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1223 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1224 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1225 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1226 S_008F0C_ELEMENT_SIZE(0) |
1227 S_008F0C_INDEX_STRIDE(0) |
1228 S_008F0C_ADD_TID_ENABLE(false);
1229
1230 desc += 4;
1231 /* VS entry for GS->VS ring */
1232 /* stride 0, num records - size, elsize0,
1233 index stride 0 */
1234 desc[0] = gsvs_va;
1235 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1236 S_008F04_STRIDE(0) |
1237 S_008F04_SWIZZLE_ENABLE(false);
1238 desc[2] = gsvs_ring_size;
1239 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1240 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1241 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1242 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1243 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1244 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1245 S_008F0C_ELEMENT_SIZE(0) |
1246 S_008F0C_INDEX_STRIDE(0) |
1247 S_008F0C_ADD_TID_ENABLE(false);
1248 desc += 4;
1249
1250 /* stride gsvs_itemsize, num records 64
1251 elsize 4, index stride 16 */
1252 /* shader will patch stride and desc[2] */
1253 desc[0] = gsvs_va;
1254 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1255 S_008F04_STRIDE(0) |
1256 S_008F04_SWIZZLE_ENABLE(true);
1257 desc[2] = 0;
1258 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1259 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1260 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1261 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1262 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1263 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1264 S_008F0C_ELEMENT_SIZE(1) |
1265 S_008F0C_INDEX_STRIDE(1) |
1266 S_008F0C_ADD_TID_ENABLE(true);
1267 }
1268
1269 static VkResult
1270 radv_get_preamble_cs(struct radv_queue *queue,
1271 uint32_t scratch_size,
1272 uint32_t compute_scratch_size,
1273 uint32_t esgs_ring_size,
1274 uint32_t gsvs_ring_size,
1275 struct radeon_winsys_cs **initial_preamble_cs,
1276 struct radeon_winsys_cs **continue_preamble_cs)
1277 {
1278 struct radeon_winsys_bo *scratch_bo = NULL;
1279 struct radeon_winsys_bo *descriptor_bo = NULL;
1280 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1281 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1282 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1283 struct radeon_winsys_cs *dest_cs[2] = {0};
1284
1285 if (scratch_size <= queue->scratch_size &&
1286 compute_scratch_size <= queue->compute_scratch_size &&
1287 esgs_ring_size <= queue->esgs_ring_size &&
1288 gsvs_ring_size <= queue->gsvs_ring_size &&
1289 queue->initial_preamble_cs) {
1290 *initial_preamble_cs = queue->initial_preamble_cs;
1291 *continue_preamble_cs = queue->continue_preamble_cs;
1292 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1293 *continue_preamble_cs = NULL;
1294 return VK_SUCCESS;
1295 }
1296
1297 if (scratch_size > queue->scratch_size) {
1298 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1299 scratch_size,
1300 4096,
1301 RADEON_DOMAIN_VRAM,
1302 RADEON_FLAG_NO_CPU_ACCESS);
1303 if (!scratch_bo)
1304 goto fail;
1305 } else
1306 scratch_bo = queue->scratch_bo;
1307
1308 if (compute_scratch_size > queue->compute_scratch_size) {
1309 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1310 compute_scratch_size,
1311 4096,
1312 RADEON_DOMAIN_VRAM,
1313 RADEON_FLAG_NO_CPU_ACCESS);
1314 if (!compute_scratch_bo)
1315 goto fail;
1316
1317 } else
1318 compute_scratch_bo = queue->compute_scratch_bo;
1319
1320 if (esgs_ring_size > queue->esgs_ring_size) {
1321 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1322 esgs_ring_size,
1323 4096,
1324 RADEON_DOMAIN_VRAM,
1325 RADEON_FLAG_NO_CPU_ACCESS);
1326 if (!esgs_ring_bo)
1327 goto fail;
1328 } else {
1329 esgs_ring_bo = queue->esgs_ring_bo;
1330 esgs_ring_size = queue->esgs_ring_size;
1331 }
1332
1333 if (gsvs_ring_size > queue->gsvs_ring_size) {
1334 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1335 gsvs_ring_size,
1336 4096,
1337 RADEON_DOMAIN_VRAM,
1338 RADEON_FLAG_NO_CPU_ACCESS);
1339 if (!gsvs_ring_bo)
1340 goto fail;
1341 } else {
1342 gsvs_ring_bo = queue->gsvs_ring_bo;
1343 gsvs_ring_size = queue->gsvs_ring_size;
1344 }
1345
1346 if (scratch_bo != queue->scratch_bo ||
1347 esgs_ring_bo != queue->esgs_ring_bo ||
1348 gsvs_ring_bo != queue->gsvs_ring_bo) {
1349 uint32_t size = 0;
1350 if (gsvs_ring_bo || esgs_ring_bo)
1351 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1352 else if (scratch_bo)
1353 size = 8; /* 2 dword */
1354
1355 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1356 size,
1357 4096,
1358 RADEON_DOMAIN_VRAM,
1359 RADEON_FLAG_CPU_ACCESS);
1360 if (!descriptor_bo)
1361 goto fail;
1362 } else
1363 descriptor_bo = queue->descriptor_bo;
1364
1365 for(int i = 0; i < 2; ++i) {
1366 struct radeon_winsys_cs *cs = NULL;
1367 cs = queue->device->ws->cs_create(queue->device->ws,
1368 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1369 if (!cs)
1370 goto fail;
1371
1372 dest_cs[i] = cs;
1373
1374 if (scratch_bo)
1375 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1376
1377 if (esgs_ring_bo)
1378 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1379
1380 if (gsvs_ring_bo)
1381 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1382
1383 if (descriptor_bo)
1384 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1385
1386 if (descriptor_bo != queue->descriptor_bo) {
1387 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1388
1389 if (scratch_bo) {
1390 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1391 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1392 S_008F04_SWIZZLE_ENABLE(1);
1393 map[0] = scratch_va;
1394 map[1] = rsrc1;
1395 }
1396
1397 if (esgs_ring_bo || gsvs_ring_bo)
1398 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1399
1400 queue->device->ws->buffer_unmap(descriptor_bo);
1401 }
1402
1403 if (esgs_ring_bo || gsvs_ring_bo) {
1404 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1405 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1406 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1407 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1408
1409 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1410 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1411 radeon_emit(cs, esgs_ring_size >> 8);
1412 radeon_emit(cs, gsvs_ring_size >> 8);
1413 } else {
1414 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1415 radeon_emit(cs, esgs_ring_size >> 8);
1416 radeon_emit(cs, gsvs_ring_size >> 8);
1417 }
1418 }
1419
1420 if (descriptor_bo) {
1421 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1422 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1423 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1424 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1425 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1426 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1427
1428 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1429
1430 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1431 radeon_set_sh_reg_seq(cs, regs[i], 2);
1432 radeon_emit(cs, va);
1433 radeon_emit(cs, va >> 32);
1434 }
1435 }
1436
1437 if (compute_scratch_bo) {
1438 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1439 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1440 S_008F04_SWIZZLE_ENABLE(1);
1441
1442 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1443
1444 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1445 radeon_emit(cs, scratch_va);
1446 radeon_emit(cs, rsrc1);
1447 }
1448
1449 if (!i) {
1450 si_cs_emit_cache_flush(cs,
1451 queue->device->physical_device->rad_info.chip_class,
1452 queue->queue_family_index == RING_COMPUTE &&
1453 queue->device->physical_device->rad_info.chip_class >= CIK,
1454 RADV_CMD_FLAG_INV_ICACHE |
1455 RADV_CMD_FLAG_INV_SMEM_L1 |
1456 RADV_CMD_FLAG_INV_VMEM_L1 |
1457 RADV_CMD_FLAG_INV_GLOBAL_L2);
1458 }
1459
1460 if (!queue->device->ws->cs_finalize(cs))
1461 goto fail;
1462 }
1463
1464 if (queue->initial_preamble_cs)
1465 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1466
1467 if (queue->continue_preamble_cs)
1468 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1469
1470 queue->initial_preamble_cs = dest_cs[0];
1471 queue->continue_preamble_cs = dest_cs[1];
1472
1473 if (scratch_bo != queue->scratch_bo) {
1474 if (queue->scratch_bo)
1475 queue->device->ws->buffer_destroy(queue->scratch_bo);
1476 queue->scratch_bo = scratch_bo;
1477 queue->scratch_size = scratch_size;
1478 }
1479
1480 if (compute_scratch_bo != queue->compute_scratch_bo) {
1481 if (queue->compute_scratch_bo)
1482 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1483 queue->compute_scratch_bo = compute_scratch_bo;
1484 queue->compute_scratch_size = compute_scratch_size;
1485 }
1486
1487 if (esgs_ring_bo != queue->esgs_ring_bo) {
1488 if (queue->esgs_ring_bo)
1489 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1490 queue->esgs_ring_bo = esgs_ring_bo;
1491 queue->esgs_ring_size = esgs_ring_size;
1492 }
1493
1494 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1495 if (queue->gsvs_ring_bo)
1496 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1497 queue->gsvs_ring_bo = gsvs_ring_bo;
1498 queue->gsvs_ring_size = gsvs_ring_size;
1499 }
1500
1501 if (descriptor_bo != queue->descriptor_bo) {
1502 if (queue->descriptor_bo)
1503 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1504
1505 queue->descriptor_bo = descriptor_bo;
1506 }
1507
1508 *initial_preamble_cs = queue->initial_preamble_cs;
1509 *continue_preamble_cs = queue->continue_preamble_cs;
1510 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1511 *continue_preamble_cs = NULL;
1512 return VK_SUCCESS;
1513 fail:
1514 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1515 if (dest_cs[i])
1516 queue->device->ws->cs_destroy(dest_cs[i]);
1517 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1518 queue->device->ws->buffer_destroy(descriptor_bo);
1519 if (scratch_bo && scratch_bo != queue->scratch_bo)
1520 queue->device->ws->buffer_destroy(scratch_bo);
1521 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1522 queue->device->ws->buffer_destroy(compute_scratch_bo);
1523 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1524 queue->device->ws->buffer_destroy(esgs_ring_bo);
1525 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1526 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1527 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1528 }
1529
1530 VkResult radv_QueueSubmit(
1531 VkQueue _queue,
1532 uint32_t submitCount,
1533 const VkSubmitInfo* pSubmits,
1534 VkFence _fence)
1535 {
1536 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1537 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1538 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1539 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1540 int ret;
1541 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1542 uint32_t scratch_size = 0;
1543 uint32_t compute_scratch_size = 0;
1544 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1545 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1546 VkResult result;
1547 bool fence_emitted = false;
1548
1549 /* Do this first so failing to allocate scratch buffers can't result in
1550 * partially executed submissions. */
1551 for (uint32_t i = 0; i < submitCount; i++) {
1552 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1553 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1554 pSubmits[i].pCommandBuffers[j]);
1555
1556 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1557 compute_scratch_size = MAX2(compute_scratch_size,
1558 cmd_buffer->compute_scratch_size_needed);
1559 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1560 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1561 }
1562 }
1563
1564 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1565 esgs_ring_size, gsvs_ring_size,
1566 &initial_preamble_cs, &continue_preamble_cs);
1567 if (result != VK_SUCCESS)
1568 return result;
1569
1570 for (uint32_t i = 0; i < submitCount; i++) {
1571 struct radeon_winsys_cs **cs_array;
1572 bool do_flush = !i;
1573 bool can_patch = !do_flush;
1574 uint32_t advance;
1575
1576 if (!pSubmits[i].commandBufferCount) {
1577 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1578 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1579 &queue->device->empty_cs[queue->queue_family_index],
1580 1, NULL, NULL,
1581 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1582 pSubmits[i].waitSemaphoreCount,
1583 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1584 pSubmits[i].signalSemaphoreCount,
1585 false, base_fence);
1586 if (ret) {
1587 radv_loge("failed to submit CS %d\n", i);
1588 abort();
1589 }
1590 fence_emitted = true;
1591 }
1592 continue;
1593 }
1594
1595 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1596 (pSubmits[i].commandBufferCount + do_flush));
1597
1598 if(do_flush)
1599 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1600
1601 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1602 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1603 pSubmits[i].pCommandBuffers[j]);
1604 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1605
1606 cs_array[j + do_flush] = cmd_buffer->cs;
1607 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1608 can_patch = false;
1609 }
1610
1611 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1612 advance = MIN2(max_cs_submission,
1613 pSubmits[i].commandBufferCount + do_flush - j);
1614 bool b = j == 0;
1615 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1616
1617 if (queue->device->trace_bo)
1618 *queue->device->trace_id_ptr = 0;
1619
1620 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1621 advance, initial_preamble_cs, continue_preamble_cs,
1622 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1623 b ? pSubmits[i].waitSemaphoreCount : 0,
1624 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1625 e ? pSubmits[i].signalSemaphoreCount : 0,
1626 can_patch, base_fence);
1627
1628 if (ret) {
1629 radv_loge("failed to submit CS %d\n", i);
1630 abort();
1631 }
1632 fence_emitted = true;
1633 if (queue->device->trace_bo) {
1634 bool success = queue->device->ws->ctx_wait_idle(
1635 queue->hw_ctx,
1636 radv_queue_family_to_ring(
1637 queue->queue_family_index),
1638 queue->queue_idx);
1639
1640 if (!success) { /* Hang */
1641 radv_dump_trace(queue->device, cs_array[j]);
1642 abort();
1643 }
1644 }
1645 }
1646 free(cs_array);
1647 }
1648
1649 if (fence) {
1650 if (!fence_emitted)
1651 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1652 &queue->device->empty_cs[queue->queue_family_index],
1653 1, NULL, NULL, NULL, 0, NULL, 0,
1654 false, base_fence);
1655
1656 fence->submitted = true;
1657 }
1658
1659 return VK_SUCCESS;
1660 }
1661
1662 VkResult radv_QueueWaitIdle(
1663 VkQueue _queue)
1664 {
1665 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1666
1667 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1668 radv_queue_family_to_ring(queue->queue_family_index),
1669 queue->queue_idx);
1670 return VK_SUCCESS;
1671 }
1672
1673 VkResult radv_DeviceWaitIdle(
1674 VkDevice _device)
1675 {
1676 RADV_FROM_HANDLE(radv_device, device, _device);
1677
1678 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1679 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1680 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1681 }
1682 }
1683 return VK_SUCCESS;
1684 }
1685
1686 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1687 VkInstance instance,
1688 const char* pName)
1689 {
1690 return radv_lookup_entrypoint(pName);
1691 }
1692
1693 /* The loader wants us to expose a second GetInstanceProcAddr function
1694 * to work around certain LD_PRELOAD issues seen in apps.
1695 */
1696 PUBLIC
1697 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1698 VkInstance instance,
1699 const char* pName);
1700
1701 PUBLIC
1702 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1703 VkInstance instance,
1704 const char* pName)
1705 {
1706 return radv_GetInstanceProcAddr(instance, pName);
1707 }
1708
1709 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1710 VkDevice device,
1711 const char* pName)
1712 {
1713 return radv_lookup_entrypoint(pName);
1714 }
1715
1716 bool radv_get_memory_fd(struct radv_device *device,
1717 struct radv_device_memory *memory,
1718 int *pFD)
1719 {
1720 struct radeon_bo_metadata metadata;
1721
1722 if (memory->image) {
1723 radv_init_metadata(device, memory->image, &metadata);
1724 device->ws->buffer_set_metadata(memory->bo, &metadata);
1725 }
1726
1727 return device->ws->buffer_get_fd(device->ws, memory->bo,
1728 pFD);
1729 }
1730
1731 VkResult radv_AllocateMemory(
1732 VkDevice _device,
1733 const VkMemoryAllocateInfo* pAllocateInfo,
1734 const VkAllocationCallbacks* pAllocator,
1735 VkDeviceMemory* pMem)
1736 {
1737 RADV_FROM_HANDLE(radv_device, device, _device);
1738 struct radv_device_memory *mem;
1739 VkResult result;
1740 enum radeon_bo_domain domain;
1741 uint32_t flags = 0;
1742 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1743 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1744
1745 if (pAllocateInfo->allocationSize == 0) {
1746 /* Apparently, this is allowed */
1747 *pMem = VK_NULL_HANDLE;
1748 return VK_SUCCESS;
1749 }
1750
1751 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1752 switch (ext->sType) {
1753 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1754 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1755 break;
1756 default:
1757 break;
1758 }
1759 }
1760
1761 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1762 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1763 if (mem == NULL)
1764 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1765
1766 if (dedicate_info) {
1767 mem->image = radv_image_from_handle(dedicate_info->image);
1768 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1769 } else {
1770 mem->image = NULL;
1771 mem->buffer = NULL;
1772 }
1773
1774 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1775 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1776 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1777 domain = RADEON_DOMAIN_GTT;
1778 else
1779 domain = RADEON_DOMAIN_VRAM;
1780
1781 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1782 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1783 else
1784 flags |= RADEON_FLAG_CPU_ACCESS;
1785
1786 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1787 flags |= RADEON_FLAG_GTT_WC;
1788
1789 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1790 domain, flags);
1791
1792 if (!mem->bo) {
1793 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1794 goto fail;
1795 }
1796 mem->type_index = pAllocateInfo->memoryTypeIndex;
1797
1798 *pMem = radv_device_memory_to_handle(mem);
1799
1800 return VK_SUCCESS;
1801
1802 fail:
1803 vk_free2(&device->alloc, pAllocator, mem);
1804
1805 return result;
1806 }
1807
1808 void radv_FreeMemory(
1809 VkDevice _device,
1810 VkDeviceMemory _mem,
1811 const VkAllocationCallbacks* pAllocator)
1812 {
1813 RADV_FROM_HANDLE(radv_device, device, _device);
1814 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1815
1816 if (mem == NULL)
1817 return;
1818
1819 device->ws->buffer_destroy(mem->bo);
1820 mem->bo = NULL;
1821
1822 vk_free2(&device->alloc, pAllocator, mem);
1823 }
1824
1825 VkResult radv_MapMemory(
1826 VkDevice _device,
1827 VkDeviceMemory _memory,
1828 VkDeviceSize offset,
1829 VkDeviceSize size,
1830 VkMemoryMapFlags flags,
1831 void** ppData)
1832 {
1833 RADV_FROM_HANDLE(radv_device, device, _device);
1834 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1835
1836 if (mem == NULL) {
1837 *ppData = NULL;
1838 return VK_SUCCESS;
1839 }
1840
1841 *ppData = device->ws->buffer_map(mem->bo);
1842 if (*ppData) {
1843 *ppData += offset;
1844 return VK_SUCCESS;
1845 }
1846
1847 return VK_ERROR_MEMORY_MAP_FAILED;
1848 }
1849
1850 void radv_UnmapMemory(
1851 VkDevice _device,
1852 VkDeviceMemory _memory)
1853 {
1854 RADV_FROM_HANDLE(radv_device, device, _device);
1855 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1856
1857 if (mem == NULL)
1858 return;
1859
1860 device->ws->buffer_unmap(mem->bo);
1861 }
1862
1863 VkResult radv_FlushMappedMemoryRanges(
1864 VkDevice _device,
1865 uint32_t memoryRangeCount,
1866 const VkMappedMemoryRange* pMemoryRanges)
1867 {
1868 return VK_SUCCESS;
1869 }
1870
1871 VkResult radv_InvalidateMappedMemoryRanges(
1872 VkDevice _device,
1873 uint32_t memoryRangeCount,
1874 const VkMappedMemoryRange* pMemoryRanges)
1875 {
1876 return VK_SUCCESS;
1877 }
1878
1879 void radv_GetBufferMemoryRequirements(
1880 VkDevice device,
1881 VkBuffer _buffer,
1882 VkMemoryRequirements* pMemoryRequirements)
1883 {
1884 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1885
1886 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1887
1888 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
1889 pMemoryRequirements->alignment = 4096;
1890 else
1891 pMemoryRequirements->alignment = 16;
1892
1893 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
1894 }
1895
1896 void radv_GetImageMemoryRequirements(
1897 VkDevice device,
1898 VkImage _image,
1899 VkMemoryRequirements* pMemoryRequirements)
1900 {
1901 RADV_FROM_HANDLE(radv_image, image, _image);
1902
1903 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1904
1905 pMemoryRequirements->size = image->size;
1906 pMemoryRequirements->alignment = image->alignment;
1907 }
1908
1909 void radv_GetImageSparseMemoryRequirements(
1910 VkDevice device,
1911 VkImage image,
1912 uint32_t* pSparseMemoryRequirementCount,
1913 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1914 {
1915 stub();
1916 }
1917
1918 void radv_GetDeviceMemoryCommitment(
1919 VkDevice device,
1920 VkDeviceMemory memory,
1921 VkDeviceSize* pCommittedMemoryInBytes)
1922 {
1923 *pCommittedMemoryInBytes = 0;
1924 }
1925
1926 VkResult radv_BindBufferMemory(
1927 VkDevice device,
1928 VkBuffer _buffer,
1929 VkDeviceMemory _memory,
1930 VkDeviceSize memoryOffset)
1931 {
1932 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1933 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1934
1935 if (mem) {
1936 buffer->bo = mem->bo;
1937 buffer->offset = memoryOffset;
1938 } else {
1939 buffer->bo = NULL;
1940 buffer->offset = 0;
1941 }
1942
1943 return VK_SUCCESS;
1944 }
1945
1946 VkResult radv_BindImageMemory(
1947 VkDevice device,
1948 VkImage _image,
1949 VkDeviceMemory _memory,
1950 VkDeviceSize memoryOffset)
1951 {
1952 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1953 RADV_FROM_HANDLE(radv_image, image, _image);
1954
1955 if (mem) {
1956 image->bo = mem->bo;
1957 image->offset = memoryOffset;
1958 } else {
1959 image->bo = NULL;
1960 image->offset = 0;
1961 }
1962
1963 return VK_SUCCESS;
1964 }
1965
1966
1967 static void
1968 radv_sparse_buffer_bind_memory(struct radv_device *device,
1969 const VkSparseBufferMemoryBindInfo *bind)
1970 {
1971 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
1972
1973 for (uint32_t i = 0; i < bind->bindCount; ++i) {
1974 struct radv_device_memory *mem = NULL;
1975
1976 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
1977 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
1978
1979 device->ws->buffer_virtual_bind(buffer->bo,
1980 bind->pBinds[i].resourceOffset,
1981 bind->pBinds[i].size,
1982 mem ? mem->bo : NULL,
1983 bind->pBinds[i].memoryOffset);
1984 }
1985 }
1986
1987 static void
1988 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
1989 const VkSparseImageOpaqueMemoryBindInfo *bind)
1990 {
1991 RADV_FROM_HANDLE(radv_image, image, bind->image);
1992
1993 for (uint32_t i = 0; i < bind->bindCount; ++i) {
1994 struct radv_device_memory *mem = NULL;
1995
1996 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
1997 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
1998
1999 device->ws->buffer_virtual_bind(image->bo,
2000 bind->pBinds[i].resourceOffset,
2001 bind->pBinds[i].size,
2002 mem ? mem->bo : NULL,
2003 bind->pBinds[i].memoryOffset);
2004 }
2005 }
2006
2007 VkResult radv_QueueBindSparse(
2008 VkQueue _queue,
2009 uint32_t bindInfoCount,
2010 const VkBindSparseInfo* pBindInfo,
2011 VkFence _fence)
2012 {
2013 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2014 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2015 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2016 bool fence_emitted = false;
2017
2018 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2019 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2020 radv_sparse_buffer_bind_memory(queue->device,
2021 pBindInfo[i].pBufferBinds + j);
2022 }
2023
2024 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2025 radv_sparse_image_opaque_bind_memory(queue->device,
2026 pBindInfo[i].pImageOpaqueBinds + j);
2027 }
2028
2029 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2030 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2031 &queue->device->empty_cs[queue->queue_family_index],
2032 1, NULL, NULL,
2033 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2034 pBindInfo[i].waitSemaphoreCount,
2035 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2036 pBindInfo[i].signalSemaphoreCount,
2037 false, base_fence);
2038 fence_emitted = true;
2039 if (fence)
2040 fence->submitted = true;
2041 }
2042 }
2043
2044 if (fence && !fence_emitted) {
2045 fence->signalled = true;
2046 }
2047
2048 return VK_SUCCESS;
2049 }
2050
2051 VkResult radv_CreateFence(
2052 VkDevice _device,
2053 const VkFenceCreateInfo* pCreateInfo,
2054 const VkAllocationCallbacks* pAllocator,
2055 VkFence* pFence)
2056 {
2057 RADV_FROM_HANDLE(radv_device, device, _device);
2058 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2059 sizeof(*fence), 8,
2060 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2061
2062 if (!fence)
2063 return VK_ERROR_OUT_OF_HOST_MEMORY;
2064
2065 memset(fence, 0, sizeof(*fence));
2066 fence->submitted = false;
2067 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2068 fence->fence = device->ws->create_fence();
2069 if (!fence->fence) {
2070 vk_free2(&device->alloc, pAllocator, fence);
2071 return VK_ERROR_OUT_OF_HOST_MEMORY;
2072 }
2073
2074 *pFence = radv_fence_to_handle(fence);
2075
2076 return VK_SUCCESS;
2077 }
2078
2079 void radv_DestroyFence(
2080 VkDevice _device,
2081 VkFence _fence,
2082 const VkAllocationCallbacks* pAllocator)
2083 {
2084 RADV_FROM_HANDLE(radv_device, device, _device);
2085 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2086
2087 if (!fence)
2088 return;
2089 device->ws->destroy_fence(fence->fence);
2090 vk_free2(&device->alloc, pAllocator, fence);
2091 }
2092
2093 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2094 {
2095 uint64_t current_time;
2096 struct timespec tv;
2097
2098 clock_gettime(CLOCK_MONOTONIC, &tv);
2099 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2100
2101 timeout = MIN2(UINT64_MAX - current_time, timeout);
2102
2103 return current_time + timeout;
2104 }
2105
2106 VkResult radv_WaitForFences(
2107 VkDevice _device,
2108 uint32_t fenceCount,
2109 const VkFence* pFences,
2110 VkBool32 waitAll,
2111 uint64_t timeout)
2112 {
2113 RADV_FROM_HANDLE(radv_device, device, _device);
2114 timeout = radv_get_absolute_timeout(timeout);
2115
2116 if (!waitAll && fenceCount > 1) {
2117 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2118 }
2119
2120 for (uint32_t i = 0; i < fenceCount; ++i) {
2121 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2122 bool expired = false;
2123
2124 if (fence->signalled)
2125 continue;
2126
2127 if (!fence->submitted)
2128 return VK_TIMEOUT;
2129
2130 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2131 if (!expired)
2132 return VK_TIMEOUT;
2133
2134 fence->signalled = true;
2135 }
2136
2137 return VK_SUCCESS;
2138 }
2139
2140 VkResult radv_ResetFences(VkDevice device,
2141 uint32_t fenceCount,
2142 const VkFence *pFences)
2143 {
2144 for (unsigned i = 0; i < fenceCount; ++i) {
2145 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2146 fence->submitted = fence->signalled = false;
2147 }
2148
2149 return VK_SUCCESS;
2150 }
2151
2152 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2153 {
2154 RADV_FROM_HANDLE(radv_device, device, _device);
2155 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2156
2157 if (fence->signalled)
2158 return VK_SUCCESS;
2159 if (!fence->submitted)
2160 return VK_NOT_READY;
2161
2162 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2163 return VK_NOT_READY;
2164
2165 return VK_SUCCESS;
2166 }
2167
2168
2169 // Queue semaphore functions
2170
2171 VkResult radv_CreateSemaphore(
2172 VkDevice _device,
2173 const VkSemaphoreCreateInfo* pCreateInfo,
2174 const VkAllocationCallbacks* pAllocator,
2175 VkSemaphore* pSemaphore)
2176 {
2177 RADV_FROM_HANDLE(radv_device, device, _device);
2178 struct radeon_winsys_sem *sem;
2179
2180 sem = device->ws->create_sem(device->ws);
2181 if (!sem)
2182 return VK_ERROR_OUT_OF_HOST_MEMORY;
2183
2184 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2185 return VK_SUCCESS;
2186 }
2187
2188 void radv_DestroySemaphore(
2189 VkDevice _device,
2190 VkSemaphore _semaphore,
2191 const VkAllocationCallbacks* pAllocator)
2192 {
2193 RADV_FROM_HANDLE(radv_device, device, _device);
2194 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2195 if (!_semaphore)
2196 return;
2197
2198 device->ws->destroy_sem(sem);
2199 }
2200
2201 VkResult radv_CreateEvent(
2202 VkDevice _device,
2203 const VkEventCreateInfo* pCreateInfo,
2204 const VkAllocationCallbacks* pAllocator,
2205 VkEvent* pEvent)
2206 {
2207 RADV_FROM_HANDLE(radv_device, device, _device);
2208 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2209 sizeof(*event), 8,
2210 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2211
2212 if (!event)
2213 return VK_ERROR_OUT_OF_HOST_MEMORY;
2214
2215 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2216 RADEON_DOMAIN_GTT,
2217 RADEON_FLAG_CPU_ACCESS);
2218 if (!event->bo) {
2219 vk_free2(&device->alloc, pAllocator, event);
2220 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2221 }
2222
2223 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2224
2225 *pEvent = radv_event_to_handle(event);
2226
2227 return VK_SUCCESS;
2228 }
2229
2230 void radv_DestroyEvent(
2231 VkDevice _device,
2232 VkEvent _event,
2233 const VkAllocationCallbacks* pAllocator)
2234 {
2235 RADV_FROM_HANDLE(radv_device, device, _device);
2236 RADV_FROM_HANDLE(radv_event, event, _event);
2237
2238 if (!event)
2239 return;
2240 device->ws->buffer_destroy(event->bo);
2241 vk_free2(&device->alloc, pAllocator, event);
2242 }
2243
2244 VkResult radv_GetEventStatus(
2245 VkDevice _device,
2246 VkEvent _event)
2247 {
2248 RADV_FROM_HANDLE(radv_event, event, _event);
2249
2250 if (*event->map == 1)
2251 return VK_EVENT_SET;
2252 return VK_EVENT_RESET;
2253 }
2254
2255 VkResult radv_SetEvent(
2256 VkDevice _device,
2257 VkEvent _event)
2258 {
2259 RADV_FROM_HANDLE(radv_event, event, _event);
2260 *event->map = 1;
2261
2262 return VK_SUCCESS;
2263 }
2264
2265 VkResult radv_ResetEvent(
2266 VkDevice _device,
2267 VkEvent _event)
2268 {
2269 RADV_FROM_HANDLE(radv_event, event, _event);
2270 *event->map = 0;
2271
2272 return VK_SUCCESS;
2273 }
2274
2275 VkResult radv_CreateBuffer(
2276 VkDevice _device,
2277 const VkBufferCreateInfo* pCreateInfo,
2278 const VkAllocationCallbacks* pAllocator,
2279 VkBuffer* pBuffer)
2280 {
2281 RADV_FROM_HANDLE(radv_device, device, _device);
2282 struct radv_buffer *buffer;
2283
2284 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2285
2286 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2287 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2288 if (buffer == NULL)
2289 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2290
2291 buffer->size = pCreateInfo->size;
2292 buffer->usage = pCreateInfo->usage;
2293 buffer->bo = NULL;
2294 buffer->offset = 0;
2295 buffer->flags = pCreateInfo->flags;
2296
2297 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2298 buffer->bo = device->ws->buffer_create(device->ws,
2299 align64(buffer->size, 4096),
2300 4096, 0, RADEON_FLAG_VIRTUAL);
2301 if (!buffer->bo) {
2302 vk_free2(&device->alloc, pAllocator, buffer);
2303 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2304 }
2305 }
2306
2307 *pBuffer = radv_buffer_to_handle(buffer);
2308
2309 return VK_SUCCESS;
2310 }
2311
2312 void radv_DestroyBuffer(
2313 VkDevice _device,
2314 VkBuffer _buffer,
2315 const VkAllocationCallbacks* pAllocator)
2316 {
2317 RADV_FROM_HANDLE(radv_device, device, _device);
2318 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2319
2320 if (!buffer)
2321 return;
2322
2323 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2324 device->ws->buffer_destroy(buffer->bo);
2325
2326 vk_free2(&device->alloc, pAllocator, buffer);
2327 }
2328
2329 static inline unsigned
2330 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2331 {
2332 if (stencil)
2333 return image->surface.stencil_tiling_index[level];
2334 else
2335 return image->surface.tiling_index[level];
2336 }
2337
2338 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2339 {
2340 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2341 }
2342
2343 static void
2344 radv_initialise_color_surface(struct radv_device *device,
2345 struct radv_color_buffer_info *cb,
2346 struct radv_image_view *iview)
2347 {
2348 const struct vk_format_description *desc;
2349 unsigned ntype, format, swap, endian;
2350 unsigned blend_clamp = 0, blend_bypass = 0;
2351 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2352 uint64_t va;
2353 const struct radeon_surf *surf = &iview->image->surface;
2354 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2355
2356 desc = vk_format_description(iview->vk_format);
2357
2358 memset(cb, 0, sizeof(*cb));
2359
2360 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2361 va += level_info->offset;
2362 cb->cb_color_base = va >> 8;
2363
2364 /* CMASK variables */
2365 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2366 va += iview->image->cmask.offset;
2367 cb->cb_color_cmask = va >> 8;
2368 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2369
2370 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2371 va += iview->image->dcc_offset;
2372 cb->cb_dcc_base = va >> 8;
2373
2374 uint32_t max_slice = radv_surface_layer_count(iview);
2375 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2376 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2377
2378 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2379 pitch_tile_max = level_info->nblk_x / 8 - 1;
2380 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2381 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2382
2383 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2384 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2385
2386 /* Intensity is implemented as Red, so treat it that way. */
2387 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2388 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2389
2390 if (iview->image->samples > 1) {
2391 unsigned log_samples = util_logbase2(iview->image->samples);
2392
2393 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2394 S_028C74_NUM_FRAGMENTS(log_samples);
2395 }
2396
2397 if (iview->image->fmask.size) {
2398 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2399 if (device->physical_device->rad_info.chip_class >= CIK)
2400 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2401 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2402 cb->cb_color_fmask = va >> 8;
2403 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2404 } else {
2405 /* This must be set for fast clear to work without FMASK. */
2406 if (device->physical_device->rad_info.chip_class >= CIK)
2407 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2408 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2409 cb->cb_color_fmask = cb->cb_color_base;
2410 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2411 }
2412
2413 ntype = radv_translate_color_numformat(iview->vk_format,
2414 desc,
2415 vk_format_get_first_non_void_channel(iview->vk_format));
2416 format = radv_translate_colorformat(iview->vk_format);
2417 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2418 radv_finishme("Illegal color\n");
2419 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2420 endian = radv_colorformat_endian_swap(format);
2421
2422 /* blend clamp should be set for all NORM/SRGB types */
2423 if (ntype == V_028C70_NUMBER_UNORM ||
2424 ntype == V_028C70_NUMBER_SNORM ||
2425 ntype == V_028C70_NUMBER_SRGB)
2426 blend_clamp = 1;
2427
2428 /* set blend bypass according to docs if SINT/UINT or
2429 8/24 COLOR variants */
2430 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2431 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2432 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2433 blend_clamp = 0;
2434 blend_bypass = 1;
2435 }
2436 #if 0
2437 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2438 (format == V_028C70_COLOR_8 ||
2439 format == V_028C70_COLOR_8_8 ||
2440 format == V_028C70_COLOR_8_8_8_8))
2441 ->color_is_int8 = true;
2442 #endif
2443 cb->cb_color_info = S_028C70_FORMAT(format) |
2444 S_028C70_COMP_SWAP(swap) |
2445 S_028C70_BLEND_CLAMP(blend_clamp) |
2446 S_028C70_BLEND_BYPASS(blend_bypass) |
2447 S_028C70_SIMPLE_FLOAT(1) |
2448 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2449 ntype != V_028C70_NUMBER_SNORM &&
2450 ntype != V_028C70_NUMBER_SRGB &&
2451 format != V_028C70_COLOR_8_24 &&
2452 format != V_028C70_COLOR_24_8) |
2453 S_028C70_NUMBER_TYPE(ntype) |
2454 S_028C70_ENDIAN(endian);
2455 if (iview->image->samples > 1)
2456 if (iview->image->fmask.size)
2457 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2458
2459 if (iview->image->cmask.size &&
2460 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2461 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2462
2463 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2464 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2465
2466 if (device->physical_device->rad_info.chip_class >= VI) {
2467 unsigned max_uncompressed_block_size = 2;
2468 if (iview->image->samples > 1) {
2469 if (iview->image->surface.bpe == 1)
2470 max_uncompressed_block_size = 0;
2471 else if (iview->image->surface.bpe == 2)
2472 max_uncompressed_block_size = 1;
2473 }
2474
2475 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2476 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2477 }
2478
2479 /* This must be set for fast clear to work without FMASK. */
2480 if (!iview->image->fmask.size &&
2481 device->physical_device->rad_info.chip_class == SI) {
2482 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2483 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2484 }
2485 }
2486
2487 static void
2488 radv_initialise_ds_surface(struct radv_device *device,
2489 struct radv_ds_buffer_info *ds,
2490 struct radv_image_view *iview)
2491 {
2492 unsigned level = iview->base_mip;
2493 unsigned format;
2494 uint64_t va, s_offs, z_offs;
2495 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2496 memset(ds, 0, sizeof(*ds));
2497 switch (iview->vk_format) {
2498 case VK_FORMAT_D24_UNORM_S8_UINT:
2499 case VK_FORMAT_X8_D24_UNORM_PACK32:
2500 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2501 ds->offset_scale = 2.0f;
2502 break;
2503 case VK_FORMAT_D16_UNORM:
2504 case VK_FORMAT_D16_UNORM_S8_UINT:
2505 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2506 ds->offset_scale = 4.0f;
2507 break;
2508 case VK_FORMAT_D32_SFLOAT:
2509 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2510 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2511 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2512 ds->offset_scale = 1.0f;
2513 break;
2514 default:
2515 break;
2516 }
2517
2518 format = radv_translate_dbformat(iview->vk_format);
2519
2520 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2521 s_offs = z_offs = va;
2522 z_offs += iview->image->surface.level[level].offset;
2523 s_offs += iview->image->surface.stencil_level[level].offset;
2524
2525 uint32_t max_slice = radv_surface_layer_count(iview);
2526 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2527 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2528 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2529 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2530
2531 if (iview->image->samples > 1)
2532 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2533
2534 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2535 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2536 else
2537 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2538
2539 if (device->physical_device->rad_info.chip_class >= CIK) {
2540 struct radeon_info *info = &device->physical_device->rad_info;
2541 unsigned tiling_index = iview->image->surface.tiling_index[level];
2542 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2543 unsigned macro_index = iview->image->surface.macro_tile_index;
2544 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2545 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2546 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2547
2548 ds->db_depth_info |=
2549 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2550 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2551 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2552 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2553 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2554 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2555 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2556 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2557 } else {
2558 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2559 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2560 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2561 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2562 }
2563
2564 if (iview->image->surface.htile_size && !level) {
2565 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2566 S_028040_ALLOW_EXPCLEAR(1);
2567
2568 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2569 /* Workaround: For a not yet understood reason, the
2570 * combination of MSAA, fast stencil clear and stencil
2571 * decompress messes with subsequent stencil buffer
2572 * uses. Problem was reproduced on Verde, Bonaire,
2573 * Tonga, and Carrizo.
2574 *
2575 * Disabling EXPCLEAR works around the problem.
2576 *
2577 * Check piglit's arb_texture_multisample-stencil-clear
2578 * test if you want to try changing this.
2579 */
2580 if (iview->image->samples <= 1)
2581 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2582 } else
2583 /* Use all of the htile_buffer for depth if there's no stencil. */
2584 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2585
2586 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2587 iview->image->htile_offset;
2588 ds->db_htile_data_base = va >> 8;
2589 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2590 } else {
2591 ds->db_htile_data_base = 0;
2592 ds->db_htile_surface = 0;
2593 }
2594
2595 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2596 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2597
2598 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2599 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2600 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2601 }
2602
2603 VkResult radv_CreateFramebuffer(
2604 VkDevice _device,
2605 const VkFramebufferCreateInfo* pCreateInfo,
2606 const VkAllocationCallbacks* pAllocator,
2607 VkFramebuffer* pFramebuffer)
2608 {
2609 RADV_FROM_HANDLE(radv_device, device, _device);
2610 struct radv_framebuffer *framebuffer;
2611
2612 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2613
2614 size_t size = sizeof(*framebuffer) +
2615 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2616 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2617 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2618 if (framebuffer == NULL)
2619 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2620
2621 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2622 framebuffer->width = pCreateInfo->width;
2623 framebuffer->height = pCreateInfo->height;
2624 framebuffer->layers = pCreateInfo->layers;
2625 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2626 VkImageView _iview = pCreateInfo->pAttachments[i];
2627 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2628 framebuffer->attachments[i].attachment = iview;
2629 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2630 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2631 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2632 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2633 }
2634 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2635 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2636 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2637 }
2638
2639 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2640 return VK_SUCCESS;
2641 }
2642
2643 void radv_DestroyFramebuffer(
2644 VkDevice _device,
2645 VkFramebuffer _fb,
2646 const VkAllocationCallbacks* pAllocator)
2647 {
2648 RADV_FROM_HANDLE(radv_device, device, _device);
2649 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2650
2651 if (!fb)
2652 return;
2653 vk_free2(&device->alloc, pAllocator, fb);
2654 }
2655
2656 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2657 {
2658 switch (address_mode) {
2659 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2660 return V_008F30_SQ_TEX_WRAP;
2661 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2662 return V_008F30_SQ_TEX_MIRROR;
2663 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2664 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2665 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2666 return V_008F30_SQ_TEX_CLAMP_BORDER;
2667 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2668 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2669 default:
2670 unreachable("illegal tex wrap mode");
2671 break;
2672 }
2673 }
2674
2675 static unsigned
2676 radv_tex_compare(VkCompareOp op)
2677 {
2678 switch (op) {
2679 case VK_COMPARE_OP_NEVER:
2680 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2681 case VK_COMPARE_OP_LESS:
2682 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2683 case VK_COMPARE_OP_EQUAL:
2684 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2685 case VK_COMPARE_OP_LESS_OR_EQUAL:
2686 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2687 case VK_COMPARE_OP_GREATER:
2688 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2689 case VK_COMPARE_OP_NOT_EQUAL:
2690 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2691 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2692 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2693 case VK_COMPARE_OP_ALWAYS:
2694 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2695 default:
2696 unreachable("illegal compare mode");
2697 break;
2698 }
2699 }
2700
2701 static unsigned
2702 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2703 {
2704 switch (filter) {
2705 case VK_FILTER_NEAREST:
2706 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2707 V_008F38_SQ_TEX_XY_FILTER_POINT);
2708 case VK_FILTER_LINEAR:
2709 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2710 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2711 case VK_FILTER_CUBIC_IMG:
2712 default:
2713 fprintf(stderr, "illegal texture filter");
2714 return 0;
2715 }
2716 }
2717
2718 static unsigned
2719 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2720 {
2721 switch (mode) {
2722 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2723 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2724 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2725 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2726 default:
2727 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2728 }
2729 }
2730
2731 static unsigned
2732 radv_tex_bordercolor(VkBorderColor bcolor)
2733 {
2734 switch (bcolor) {
2735 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2736 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2737 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2738 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2739 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2740 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2741 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2742 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2743 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2744 default:
2745 break;
2746 }
2747 return 0;
2748 }
2749
2750 static unsigned
2751 radv_tex_aniso_filter(unsigned filter)
2752 {
2753 if (filter < 2)
2754 return 0;
2755 if (filter < 4)
2756 return 1;
2757 if (filter < 8)
2758 return 2;
2759 if (filter < 16)
2760 return 3;
2761 return 4;
2762 }
2763
2764 static void
2765 radv_init_sampler(struct radv_device *device,
2766 struct radv_sampler *sampler,
2767 const VkSamplerCreateInfo *pCreateInfo)
2768 {
2769 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2770 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2771 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2772 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2773
2774 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2775 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2776 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2777 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2778 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2779 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2780 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2781 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2782 S_008F30_DISABLE_CUBE_WRAP(0) |
2783 S_008F30_COMPAT_MODE(is_vi));
2784 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2785 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2786 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2787 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2788 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2789 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2790 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2791 S_008F38_MIP_POINT_PRECLAMP(0) |
2792 S_008F38_DISABLE_LSB_CEIL(1) |
2793 S_008F38_FILTER_PREC_FIX(1) |
2794 S_008F38_ANISO_OVERRIDE(is_vi));
2795 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2796 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2797 }
2798
2799 VkResult radv_CreateSampler(
2800 VkDevice _device,
2801 const VkSamplerCreateInfo* pCreateInfo,
2802 const VkAllocationCallbacks* pAllocator,
2803 VkSampler* pSampler)
2804 {
2805 RADV_FROM_HANDLE(radv_device, device, _device);
2806 struct radv_sampler *sampler;
2807
2808 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2809
2810 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2811 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2812 if (!sampler)
2813 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2814
2815 radv_init_sampler(device, sampler, pCreateInfo);
2816 *pSampler = radv_sampler_to_handle(sampler);
2817
2818 return VK_SUCCESS;
2819 }
2820
2821 void radv_DestroySampler(
2822 VkDevice _device,
2823 VkSampler _sampler,
2824 const VkAllocationCallbacks* pAllocator)
2825 {
2826 RADV_FROM_HANDLE(radv_device, device, _device);
2827 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2828
2829 if (!sampler)
2830 return;
2831 vk_free2(&device->alloc, pAllocator, sampler);
2832 }
2833
2834
2835 /* vk_icd.h does not declare this function, so we declare it here to
2836 * suppress Wmissing-prototypes.
2837 */
2838 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2839 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2840
2841 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2842 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2843 {
2844 /* For the full details on loader interface versioning, see
2845 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2846 * What follows is a condensed summary, to help you navigate the large and
2847 * confusing official doc.
2848 *
2849 * - Loader interface v0 is incompatible with later versions. We don't
2850 * support it.
2851 *
2852 * - In loader interface v1:
2853 * - The first ICD entrypoint called by the loader is
2854 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2855 * entrypoint.
2856 * - The ICD must statically expose no other Vulkan symbol unless it is
2857 * linked with -Bsymbolic.
2858 * - Each dispatchable Vulkan handle created by the ICD must be
2859 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2860 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2861 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2862 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2863 * such loader-managed surfaces.
2864 *
2865 * - Loader interface v2 differs from v1 in:
2866 * - The first ICD entrypoint called by the loader is
2867 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2868 * statically expose this entrypoint.
2869 *
2870 * - Loader interface v3 differs from v2 in:
2871 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2872 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2873 * because the loader no longer does so.
2874 */
2875 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2876 return VK_SUCCESS;
2877 }