radv: use ac_surface data structures
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static void
65 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
66 memset(uuid, 0, VK_UUID_SIZE);
67 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
68 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
69 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
70 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
71 }
72
73 static const VkExtensionProperties instance_extensions[] = {
74 {
75 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
76 .specVersion = 25,
77 },
78 #ifdef VK_USE_PLATFORM_XCB_KHR
79 {
80 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
81 .specVersion = 6,
82 },
83 #endif
84 #ifdef VK_USE_PLATFORM_XLIB_KHR
85 {
86 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
87 .specVersion = 6,
88 },
89 #endif
90 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
91 {
92 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
93 .specVersion = 5,
94 },
95 #endif
96 {
97 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
98 .specVersion = 1,
99 },
100 {
101 .extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
102 .specVersion = 1,
103 },
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
117 .specVersion = 1,
118 },
119 {
120 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 {
128 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
129 .specVersion = 68,
130 },
131 {
132 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
133 .specVersion = 1,
134 },
135 {
136 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
137 .specVersion = 1,
138 },
139 {
140 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
141 .specVersion = 1,
142 },
143 {
144 .extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
145 .specVersion = 1,
146 },
147 {
148 .extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
149 .specVersion = 1,
150 },
151 };
152
153 static VkResult
154 radv_extensions_register(struct radv_instance *instance,
155 struct radv_extensions *extensions,
156 const VkExtensionProperties *new_ext,
157 uint32_t num_ext)
158 {
159 size_t new_size;
160 VkExtensionProperties *new_ptr;
161
162 assert(new_ext && num_ext > 0);
163
164 if (!new_ext)
165 return VK_ERROR_INITIALIZATION_FAILED;
166
167 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
168 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
169 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
170
171 /* Old array continues to be valid, update nothing */
172 if (!new_ptr)
173 return VK_ERROR_OUT_OF_HOST_MEMORY;
174
175 memcpy(&new_ptr[extensions->num_ext], new_ext,
176 num_ext * sizeof(VkExtensionProperties));
177 extensions->ext_array = new_ptr;
178 extensions->num_ext += num_ext;
179
180 return VK_SUCCESS;
181 }
182
183 static void
184 radv_extensions_finish(struct radv_instance *instance,
185 struct radv_extensions *extensions)
186 {
187 assert(extensions);
188
189 if (!extensions)
190 radv_loge("Attemted to free invalid extension struct\n");
191
192 if (extensions->ext_array)
193 vk_free(&instance->alloc, extensions->ext_array);
194 }
195
196 static bool
197 is_extension_enabled(const VkExtensionProperties *extensions,
198 size_t num_ext,
199 const char *name)
200 {
201 assert(extensions && name);
202
203 for (uint32_t i = 0; i < num_ext; i++) {
204 if (strcmp(name, extensions[i].extensionName) == 0)
205 return true;
206 }
207
208 return false;
209 }
210
211 static VkResult
212 radv_physical_device_init(struct radv_physical_device *device,
213 struct radv_instance *instance,
214 drmDevicePtr drm_device)
215 {
216 const char *path = drm_device->nodes[DRM_NODE_RENDER];
217 VkResult result;
218 drmVersionPtr version;
219 int fd;
220
221 fd = open(path, O_RDWR | O_CLOEXEC);
222 if (fd < 0)
223 return VK_ERROR_INCOMPATIBLE_DRIVER;
224
225 version = drmGetVersion(fd);
226 if (!version) {
227 close(fd);
228 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
229 "failed to get version %s: %m", path);
230 }
231
232 if (strcmp(version->name, "amdgpu")) {
233 drmFreeVersion(version);
234 close(fd);
235 return VK_ERROR_INCOMPATIBLE_DRIVER;
236 }
237 drmFreeVersion(version);
238
239 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
240 device->instance = instance;
241 assert(strlen(path) < ARRAY_SIZE(device->path));
242 strncpy(device->path, path, ARRAY_SIZE(device->path));
243
244 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
245 if (!device->ws) {
246 result = VK_ERROR_INCOMPATIBLE_DRIVER;
247 goto fail;
248 }
249
250 device->local_fd = fd;
251 device->ws->query_info(device->ws, &device->rad_info);
252 result = radv_init_wsi(device);
253 if (result != VK_SUCCESS) {
254 device->ws->destroy(device->ws);
255 goto fail;
256 }
257
258 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
259 radv_finish_wsi(device);
260 device->ws->destroy(device->ws);
261 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
262 "cannot generate UUID");
263 goto fail;
264 }
265
266 result = radv_extensions_register(instance,
267 &device->extensions,
268 common_device_extensions,
269 ARRAY_SIZE(common_device_extensions));
270 if (result != VK_SUCCESS)
271 goto fail;
272
273 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
274 device->name = device->rad_info.name;
275
276 radv_get_device_uuid(drm_device, device->device_uuid);
277
278 return VK_SUCCESS;
279
280 fail:
281 close(fd);
282 return result;
283 }
284
285 static void
286 radv_physical_device_finish(struct radv_physical_device *device)
287 {
288 radv_extensions_finish(device->instance, &device->extensions);
289 radv_finish_wsi(device);
290 device->ws->destroy(device->ws);
291 close(device->local_fd);
292 }
293
294 static void *
295 default_alloc_func(void *pUserData, size_t size, size_t align,
296 VkSystemAllocationScope allocationScope)
297 {
298 return malloc(size);
299 }
300
301 static void *
302 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
303 size_t align, VkSystemAllocationScope allocationScope)
304 {
305 return realloc(pOriginal, size);
306 }
307
308 static void
309 default_free_func(void *pUserData, void *pMemory)
310 {
311 free(pMemory);
312 }
313
314 static const VkAllocationCallbacks default_alloc = {
315 .pUserData = NULL,
316 .pfnAllocation = default_alloc_func,
317 .pfnReallocation = default_realloc_func,
318 .pfnFree = default_free_func,
319 };
320
321 static const struct debug_control radv_debug_options[] = {
322 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
323 {"nodcc", RADV_DEBUG_NO_DCC},
324 {"shaders", RADV_DEBUG_DUMP_SHADERS},
325 {"nocache", RADV_DEBUG_NO_CACHE},
326 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
327 {"nohiz", RADV_DEBUG_NO_HIZ},
328 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
329 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
330 {"allbos", RADV_DEBUG_ALL_BOS},
331 {"noibs", RADV_DEBUG_NO_IBS},
332 {NULL, 0}
333 };
334
335 VkResult radv_CreateInstance(
336 const VkInstanceCreateInfo* pCreateInfo,
337 const VkAllocationCallbacks* pAllocator,
338 VkInstance* pInstance)
339 {
340 struct radv_instance *instance;
341
342 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
343
344 uint32_t client_version;
345 if (pCreateInfo->pApplicationInfo &&
346 pCreateInfo->pApplicationInfo->apiVersion != 0) {
347 client_version = pCreateInfo->pApplicationInfo->apiVersion;
348 } else {
349 client_version = VK_MAKE_VERSION(1, 0, 0);
350 }
351
352 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
353 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
354 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
355 "Client requested version %d.%d.%d",
356 VK_VERSION_MAJOR(client_version),
357 VK_VERSION_MINOR(client_version),
358 VK_VERSION_PATCH(client_version));
359 }
360
361 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
362 if (!is_extension_enabled(instance_extensions,
363 ARRAY_SIZE(instance_extensions),
364 pCreateInfo->ppEnabledExtensionNames[i]))
365 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
366 }
367
368 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
369 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
370 if (!instance)
371 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
372
373 memset(instance, 0, sizeof(*instance));
374
375 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
376
377 if (pAllocator)
378 instance->alloc = *pAllocator;
379 else
380 instance->alloc = default_alloc;
381
382 instance->apiVersion = client_version;
383 instance->physicalDeviceCount = -1;
384
385 _mesa_locale_init();
386
387 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
388
389 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
390 radv_debug_options);
391
392 *pInstance = radv_instance_to_handle(instance);
393
394 return VK_SUCCESS;
395 }
396
397 void radv_DestroyInstance(
398 VkInstance _instance,
399 const VkAllocationCallbacks* pAllocator)
400 {
401 RADV_FROM_HANDLE(radv_instance, instance, _instance);
402
403 if (!instance)
404 return;
405
406 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
407 radv_physical_device_finish(instance->physicalDevices + i);
408 }
409
410 VG(VALGRIND_DESTROY_MEMPOOL(instance));
411
412 _mesa_locale_fini();
413
414 vk_free(&instance->alloc, instance);
415 }
416
417 static VkResult
418 radv_enumerate_devices(struct radv_instance *instance)
419 {
420 /* TODO: Check for more devices ? */
421 drmDevicePtr devices[8];
422 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
423 int max_devices;
424
425 instance->physicalDeviceCount = 0;
426
427 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
428 if (max_devices < 1)
429 return VK_ERROR_INCOMPATIBLE_DRIVER;
430
431 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
432 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
433 devices[i]->bustype == DRM_BUS_PCI &&
434 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
435
436 result = radv_physical_device_init(instance->physicalDevices +
437 instance->physicalDeviceCount,
438 instance,
439 devices[i]);
440 if (result == VK_SUCCESS)
441 ++instance->physicalDeviceCount;
442 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
443 break;
444 }
445 }
446 drmFreeDevices(devices, max_devices);
447
448 return result;
449 }
450
451 VkResult radv_EnumeratePhysicalDevices(
452 VkInstance _instance,
453 uint32_t* pPhysicalDeviceCount,
454 VkPhysicalDevice* pPhysicalDevices)
455 {
456 RADV_FROM_HANDLE(radv_instance, instance, _instance);
457 VkResult result;
458
459 if (instance->physicalDeviceCount < 0) {
460 result = radv_enumerate_devices(instance);
461 if (result != VK_SUCCESS &&
462 result != VK_ERROR_INCOMPATIBLE_DRIVER)
463 return result;
464 }
465
466 if (!pPhysicalDevices) {
467 *pPhysicalDeviceCount = instance->physicalDeviceCount;
468 } else {
469 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
470 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
471 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
472 }
473
474 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
475 : VK_SUCCESS;
476 }
477
478 void radv_GetPhysicalDeviceFeatures(
479 VkPhysicalDevice physicalDevice,
480 VkPhysicalDeviceFeatures* pFeatures)
481 {
482 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
483
484 memset(pFeatures, 0, sizeof(*pFeatures));
485
486 *pFeatures = (VkPhysicalDeviceFeatures) {
487 .robustBufferAccess = true,
488 .fullDrawIndexUint32 = true,
489 .imageCubeArray = true,
490 .independentBlend = true,
491 .geometryShader = true,
492 .tessellationShader = true,
493 .sampleRateShading = false,
494 .dualSrcBlend = true,
495 .logicOp = true,
496 .multiDrawIndirect = true,
497 .drawIndirectFirstInstance = true,
498 .depthClamp = true,
499 .depthBiasClamp = true,
500 .fillModeNonSolid = true,
501 .depthBounds = true,
502 .wideLines = true,
503 .largePoints = true,
504 .alphaToOne = true,
505 .multiViewport = true,
506 .samplerAnisotropy = true,
507 .textureCompressionETC2 = false,
508 .textureCompressionASTC_LDR = false,
509 .textureCompressionBC = true,
510 .occlusionQueryPrecise = true,
511 .pipelineStatisticsQuery = true,
512 .vertexPipelineStoresAndAtomics = true,
513 .fragmentStoresAndAtomics = true,
514 .shaderTessellationAndGeometryPointSize = true,
515 .shaderImageGatherExtended = true,
516 .shaderStorageImageExtendedFormats = true,
517 .shaderStorageImageMultisample = false,
518 .shaderUniformBufferArrayDynamicIndexing = true,
519 .shaderSampledImageArrayDynamicIndexing = true,
520 .shaderStorageBufferArrayDynamicIndexing = true,
521 .shaderStorageImageArrayDynamicIndexing = true,
522 .shaderStorageImageReadWithoutFormat = true,
523 .shaderStorageImageWriteWithoutFormat = true,
524 .shaderClipDistance = true,
525 .shaderCullDistance = true,
526 .shaderFloat64 = true,
527 .shaderInt64 = false,
528 .shaderInt16 = false,
529 .sparseBinding = true,
530 .variableMultisampleRate = true,
531 .inheritedQueries = true,
532 };
533 }
534
535 void radv_GetPhysicalDeviceFeatures2KHR(
536 VkPhysicalDevice physicalDevice,
537 VkPhysicalDeviceFeatures2KHR *pFeatures)
538 {
539 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
540 }
541
542 static uint32_t radv_get_driver_version()
543 {
544 const char *minor_string = strchr(VERSION, '.');
545 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
546 int major = atoi(VERSION);
547 int minor = minor_string ? atoi(minor_string + 1) : 0;
548 int patch = patch_string ? atoi(patch_string + 1) : 0;
549 if (strstr(VERSION, "devel")) {
550 if (patch == 0) {
551 patch = 99;
552 if (minor == 0) {
553 minor = 99;
554 --major;
555 } else
556 --minor;
557 } else
558 --patch;
559 }
560 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
561 return version;
562 }
563
564 void radv_GetPhysicalDeviceProperties(
565 VkPhysicalDevice physicalDevice,
566 VkPhysicalDeviceProperties* pProperties)
567 {
568 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
569 VkSampleCountFlags sample_counts = 0xf;
570
571 /* make sure that the entire descriptor set is addressable with a signed
572 * 32-bit int. So the sum of all limits scaled by descriptor size has to
573 * be at most 2 GiB. the combined image & samples object count as one of
574 * both. This limit is for the pipeline layout, not for the set layout, but
575 * there is no set limit, so we just set a pipeline limit. I don't think
576 * any app is going to hit this soon. */
577 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
578 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
579 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
580 32 /* sampler, largest when combined with image */ +
581 64 /* sampled image */ +
582 64 /* storage image */);
583
584 VkPhysicalDeviceLimits limits = {
585 .maxImageDimension1D = (1 << 14),
586 .maxImageDimension2D = (1 << 14),
587 .maxImageDimension3D = (1 << 11),
588 .maxImageDimensionCube = (1 << 14),
589 .maxImageArrayLayers = (1 << 11),
590 .maxTexelBufferElements = 128 * 1024 * 1024,
591 .maxUniformBufferRange = UINT32_MAX,
592 .maxStorageBufferRange = UINT32_MAX,
593 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
594 .maxMemoryAllocationCount = UINT32_MAX,
595 .maxSamplerAllocationCount = 64 * 1024,
596 .bufferImageGranularity = 64, /* A cache line */
597 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
598 .maxBoundDescriptorSets = MAX_SETS,
599 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
600 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
601 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
602 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
603 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
604 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
605 .maxPerStageResources = max_descriptor_set_size,
606 .maxDescriptorSetSamplers = max_descriptor_set_size,
607 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
608 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
609 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
610 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
611 .maxDescriptorSetSampledImages = max_descriptor_set_size,
612 .maxDescriptorSetStorageImages = max_descriptor_set_size,
613 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
614 .maxVertexInputAttributes = 32,
615 .maxVertexInputBindings = 32,
616 .maxVertexInputAttributeOffset = 2047,
617 .maxVertexInputBindingStride = 2048,
618 .maxVertexOutputComponents = 128,
619 .maxTessellationGenerationLevel = 64,
620 .maxTessellationPatchSize = 32,
621 .maxTessellationControlPerVertexInputComponents = 128,
622 .maxTessellationControlPerVertexOutputComponents = 128,
623 .maxTessellationControlPerPatchOutputComponents = 120,
624 .maxTessellationControlTotalOutputComponents = 4096,
625 .maxTessellationEvaluationInputComponents = 128,
626 .maxTessellationEvaluationOutputComponents = 128,
627 .maxGeometryShaderInvocations = 127,
628 .maxGeometryInputComponents = 64,
629 .maxGeometryOutputComponents = 128,
630 .maxGeometryOutputVertices = 256,
631 .maxGeometryTotalOutputComponents = 1024,
632 .maxFragmentInputComponents = 128,
633 .maxFragmentOutputAttachments = 8,
634 .maxFragmentDualSrcAttachments = 1,
635 .maxFragmentCombinedOutputResources = 8,
636 .maxComputeSharedMemorySize = 32768,
637 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
638 .maxComputeWorkGroupInvocations = 2048,
639 .maxComputeWorkGroupSize = {
640 2048,
641 2048,
642 2048
643 },
644 .subPixelPrecisionBits = 4 /* FIXME */,
645 .subTexelPrecisionBits = 4 /* FIXME */,
646 .mipmapPrecisionBits = 4 /* FIXME */,
647 .maxDrawIndexedIndexValue = UINT32_MAX,
648 .maxDrawIndirectCount = UINT32_MAX,
649 .maxSamplerLodBias = 16,
650 .maxSamplerAnisotropy = 16,
651 .maxViewports = MAX_VIEWPORTS,
652 .maxViewportDimensions = { (1 << 14), (1 << 14) },
653 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
654 .viewportSubPixelBits = 13, /* We take a float? */
655 .minMemoryMapAlignment = 4096, /* A page */
656 .minTexelBufferOffsetAlignment = 1,
657 .minUniformBufferOffsetAlignment = 4,
658 .minStorageBufferOffsetAlignment = 4,
659 .minTexelOffset = -32,
660 .maxTexelOffset = 31,
661 .minTexelGatherOffset = -32,
662 .maxTexelGatherOffset = 31,
663 .minInterpolationOffset = -2,
664 .maxInterpolationOffset = 2,
665 .subPixelInterpolationOffsetBits = 8,
666 .maxFramebufferWidth = (1 << 14),
667 .maxFramebufferHeight = (1 << 14),
668 .maxFramebufferLayers = (1 << 10),
669 .framebufferColorSampleCounts = sample_counts,
670 .framebufferDepthSampleCounts = sample_counts,
671 .framebufferStencilSampleCounts = sample_counts,
672 .framebufferNoAttachmentsSampleCounts = sample_counts,
673 .maxColorAttachments = MAX_RTS,
674 .sampledImageColorSampleCounts = sample_counts,
675 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
676 .sampledImageDepthSampleCounts = sample_counts,
677 .sampledImageStencilSampleCounts = sample_counts,
678 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
679 .maxSampleMaskWords = 1,
680 .timestampComputeAndGraphics = true,
681 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
682 .maxClipDistances = 8,
683 .maxCullDistances = 8,
684 .maxCombinedClipAndCullDistances = 8,
685 .discreteQueuePriorities = 1,
686 .pointSizeRange = { 0.125, 255.875 },
687 .lineWidthRange = { 0.0, 7.9921875 },
688 .pointSizeGranularity = (1.0 / 8.0),
689 .lineWidthGranularity = (1.0 / 128.0),
690 .strictLines = false, /* FINISHME */
691 .standardSampleLocations = true,
692 .optimalBufferCopyOffsetAlignment = 128,
693 .optimalBufferCopyRowPitchAlignment = 128,
694 .nonCoherentAtomSize = 64,
695 };
696
697 *pProperties = (VkPhysicalDeviceProperties) {
698 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
699 .driverVersion = radv_get_driver_version(),
700 .vendorID = 0x1002,
701 .deviceID = pdevice->rad_info.pci_id,
702 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
703 .limits = limits,
704 .sparseProperties = {0},
705 };
706
707 strcpy(pProperties->deviceName, pdevice->name);
708 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
709 }
710
711 void radv_GetPhysicalDeviceProperties2KHR(
712 VkPhysicalDevice physicalDevice,
713 VkPhysicalDeviceProperties2KHR *pProperties)
714 {
715 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
716 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
717
718 vk_foreach_struct(ext, pProperties->pNext) {
719 switch (ext->sType) {
720 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
721 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
722 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
723 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
724 break;
725 }
726 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
727 VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
728 radv_device_get_cache_uuid(0, properties->driverUUID);
729 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
730 properties->deviceLUIDValid = false;
731 break;
732 }
733 default:
734 break;
735 }
736 }
737 }
738
739 static void radv_get_physical_device_queue_family_properties(
740 struct radv_physical_device* pdevice,
741 uint32_t* pCount,
742 VkQueueFamilyProperties** pQueueFamilyProperties)
743 {
744 int num_queue_families = 1;
745 int idx;
746 if (pdevice->rad_info.compute_rings > 0 &&
747 pdevice->rad_info.chip_class >= CIK &&
748 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
749 num_queue_families++;
750
751 if (pQueueFamilyProperties == NULL) {
752 *pCount = num_queue_families;
753 return;
754 }
755
756 if (!*pCount)
757 return;
758
759 idx = 0;
760 if (*pCount >= 1) {
761 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
762 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
763 VK_QUEUE_COMPUTE_BIT |
764 VK_QUEUE_TRANSFER_BIT |
765 VK_QUEUE_SPARSE_BINDING_BIT,
766 .queueCount = 1,
767 .timestampValidBits = 64,
768 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
769 };
770 idx++;
771 }
772
773 if (pdevice->rad_info.compute_rings > 0 &&
774 pdevice->rad_info.chip_class >= CIK &&
775 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
776 if (*pCount > idx) {
777 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
778 .queueFlags = VK_QUEUE_COMPUTE_BIT |
779 VK_QUEUE_TRANSFER_BIT |
780 VK_QUEUE_SPARSE_BINDING_BIT,
781 .queueCount = pdevice->rad_info.compute_rings,
782 .timestampValidBits = 64,
783 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
784 };
785 idx++;
786 }
787 }
788 *pCount = idx;
789 }
790
791 void radv_GetPhysicalDeviceQueueFamilyProperties(
792 VkPhysicalDevice physicalDevice,
793 uint32_t* pCount,
794 VkQueueFamilyProperties* pQueueFamilyProperties)
795 {
796 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
797 if (!pQueueFamilyProperties) {
798 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
799 return;
800 }
801 VkQueueFamilyProperties *properties[] = {
802 pQueueFamilyProperties + 0,
803 pQueueFamilyProperties + 1,
804 pQueueFamilyProperties + 2,
805 };
806 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
807 assert(*pCount <= 3);
808 }
809
810 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
811 VkPhysicalDevice physicalDevice,
812 uint32_t* pCount,
813 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
814 {
815 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
816 if (!pQueueFamilyProperties) {
817 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
818 return;
819 }
820 VkQueueFamilyProperties *properties[] = {
821 &pQueueFamilyProperties[0].queueFamilyProperties,
822 &pQueueFamilyProperties[1].queueFamilyProperties,
823 &pQueueFamilyProperties[2].queueFamilyProperties,
824 };
825 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
826 assert(*pCount <= 3);
827 }
828
829 void radv_GetPhysicalDeviceMemoryProperties(
830 VkPhysicalDevice physicalDevice,
831 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
832 {
833 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
834
835 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
836
837 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
838 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
839 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
840 .heapIndex = RADV_MEM_HEAP_VRAM,
841 };
842 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
843 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
844 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
845 .heapIndex = RADV_MEM_HEAP_GTT,
846 };
847 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
848 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
849 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
850 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
851 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
852 };
853 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
854 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
855 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
856 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
857 .heapIndex = RADV_MEM_HEAP_GTT,
858 };
859
860 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
861
862 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
863 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
864 .size = physical_device->rad_info.vram_size -
865 physical_device->rad_info.visible_vram_size,
866 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
867 };
868 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
869 .size = physical_device->rad_info.visible_vram_size,
870 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
871 };
872 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
873 .size = physical_device->rad_info.gart_size,
874 .flags = 0,
875 };
876 }
877
878 void radv_GetPhysicalDeviceMemoryProperties2KHR(
879 VkPhysicalDevice physicalDevice,
880 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
881 {
882 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
883 &pMemoryProperties->memoryProperties);
884 }
885
886 static int
887 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
888 int queue_family_index, int idx)
889 {
890 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
891 queue->device = device;
892 queue->queue_family_index = queue_family_index;
893 queue->queue_idx = idx;
894
895 queue->hw_ctx = device->ws->ctx_create(device->ws);
896 if (!queue->hw_ctx)
897 return VK_ERROR_OUT_OF_HOST_MEMORY;
898
899 return VK_SUCCESS;
900 }
901
902 static void
903 radv_queue_finish(struct radv_queue *queue)
904 {
905 if (queue->hw_ctx)
906 queue->device->ws->ctx_destroy(queue->hw_ctx);
907
908 if (queue->initial_preamble_cs)
909 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
910 if (queue->continue_preamble_cs)
911 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
912 if (queue->descriptor_bo)
913 queue->device->ws->buffer_destroy(queue->descriptor_bo);
914 if (queue->scratch_bo)
915 queue->device->ws->buffer_destroy(queue->scratch_bo);
916 if (queue->esgs_ring_bo)
917 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
918 if (queue->gsvs_ring_bo)
919 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
920 if (queue->tess_factor_ring_bo)
921 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
922 if (queue->tess_offchip_ring_bo)
923 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
924 if (queue->compute_scratch_bo)
925 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
926 }
927
928 static void
929 radv_device_init_gs_info(struct radv_device *device)
930 {
931 switch (device->physical_device->rad_info.family) {
932 case CHIP_OLAND:
933 case CHIP_HAINAN:
934 case CHIP_KAVERI:
935 case CHIP_KABINI:
936 case CHIP_MULLINS:
937 case CHIP_ICELAND:
938 case CHIP_CARRIZO:
939 case CHIP_STONEY:
940 device->gs_table_depth = 16;
941 return;
942 case CHIP_TAHITI:
943 case CHIP_PITCAIRN:
944 case CHIP_VERDE:
945 case CHIP_BONAIRE:
946 case CHIP_HAWAII:
947 case CHIP_TONGA:
948 case CHIP_FIJI:
949 case CHIP_POLARIS10:
950 case CHIP_POLARIS11:
951 case CHIP_POLARIS12:
952 device->gs_table_depth = 32;
953 return;
954 default:
955 unreachable("unknown GPU");
956 }
957 }
958
959 VkResult radv_CreateDevice(
960 VkPhysicalDevice physicalDevice,
961 const VkDeviceCreateInfo* pCreateInfo,
962 const VkAllocationCallbacks* pAllocator,
963 VkDevice* pDevice)
964 {
965 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
966 VkResult result;
967 struct radv_device *device;
968
969 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
970 if (!is_extension_enabled(physical_device->extensions.ext_array,
971 physical_device->extensions.num_ext,
972 pCreateInfo->ppEnabledExtensionNames[i]))
973 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
974 }
975
976 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
977 sizeof(*device), 8,
978 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
979 if (!device)
980 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
981
982 memset(device, 0, sizeof(*device));
983
984 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
985 device->instance = physical_device->instance;
986 device->physical_device = physical_device;
987
988 device->debug_flags = device->instance->debug_flags;
989
990 device->ws = physical_device->ws;
991 if (pAllocator)
992 device->alloc = *pAllocator;
993 else
994 device->alloc = physical_device->instance->alloc;
995
996 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
997 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
998 uint32_t qfi = queue_create->queueFamilyIndex;
999
1000 device->queues[qfi] = vk_alloc(&device->alloc,
1001 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1002 if (!device->queues[qfi]) {
1003 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1004 goto fail;
1005 }
1006
1007 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1008
1009 device->queue_count[qfi] = queue_create->queueCount;
1010
1011 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1012 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1013 if (result != VK_SUCCESS)
1014 goto fail;
1015 }
1016 }
1017
1018 #if HAVE_LLVM < 0x0400
1019 device->llvm_supports_spill = false;
1020 #else
1021 device->llvm_supports_spill = true;
1022 #endif
1023
1024 /* The maximum number of scratch waves. Scratch space isn't divided
1025 * evenly between CUs. The number is only a function of the number of CUs.
1026 * We can decrease the constant to decrease the scratch buffer size.
1027 *
1028 * sctx->scratch_waves must be >= the maximum posible size of
1029 * 1 threadgroup, so that the hw doesn't hang from being unable
1030 * to start any.
1031 *
1032 * The recommended value is 4 per CU at most. Higher numbers don't
1033 * bring much benefit, but they still occupy chip resources (think
1034 * async compute). I've seen ~2% performance difference between 4 and 32.
1035 */
1036 uint32_t max_threads_per_block = 2048;
1037 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1038 max_threads_per_block / 64);
1039
1040 radv_device_init_gs_info(device);
1041
1042 device->tess_offchip_block_dw_size =
1043 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1044 device->has_distributed_tess =
1045 device->physical_device->rad_info.chip_class >= VI &&
1046 device->physical_device->rad_info.max_se >= 2;
1047
1048 result = radv_device_init_meta(device);
1049 if (result != VK_SUCCESS)
1050 goto fail;
1051
1052 radv_device_init_msaa(device);
1053
1054 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1055 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1056 switch (family) {
1057 case RADV_QUEUE_GENERAL:
1058 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1059 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1060 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1061 break;
1062 case RADV_QUEUE_COMPUTE:
1063 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1064 radeon_emit(device->empty_cs[family], 0);
1065 break;
1066 }
1067 device->ws->cs_finalize(device->empty_cs[family]);
1068
1069 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1070 switch (family) {
1071 case RADV_QUEUE_GENERAL:
1072 case RADV_QUEUE_COMPUTE:
1073 si_cs_emit_cache_flush(device->flush_cs[family],
1074 device->physical_device->rad_info.chip_class,
1075 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1076 RADV_CMD_FLAG_INV_ICACHE |
1077 RADV_CMD_FLAG_INV_SMEM_L1 |
1078 RADV_CMD_FLAG_INV_VMEM_L1 |
1079 RADV_CMD_FLAG_INV_GLOBAL_L2);
1080 break;
1081 }
1082 device->ws->cs_finalize(device->flush_cs[family]);
1083
1084 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1085 switch (family) {
1086 case RADV_QUEUE_GENERAL:
1087 case RADV_QUEUE_COMPUTE:
1088 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1089 device->physical_device->rad_info.chip_class,
1090 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1091 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1092 RADV_CMD_FLAG_INV_ICACHE |
1093 RADV_CMD_FLAG_INV_SMEM_L1 |
1094 RADV_CMD_FLAG_INV_VMEM_L1 |
1095 RADV_CMD_FLAG_INV_GLOBAL_L2);
1096 break;
1097 }
1098 device->ws->cs_finalize(device->flush_shader_cs[family]);
1099 }
1100
1101 if (getenv("RADV_TRACE_FILE")) {
1102 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1103 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1104 if (!device->trace_bo)
1105 goto fail;
1106
1107 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1108 if (!device->trace_id_ptr)
1109 goto fail;
1110 }
1111
1112 if (device->physical_device->rad_info.chip_class >= CIK)
1113 cik_create_gfx_config(device);
1114
1115 VkPipelineCacheCreateInfo ci;
1116 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1117 ci.pNext = NULL;
1118 ci.flags = 0;
1119 ci.pInitialData = NULL;
1120 ci.initialDataSize = 0;
1121 VkPipelineCache pc;
1122 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1123 &ci, NULL, &pc);
1124 if (result != VK_SUCCESS)
1125 goto fail;
1126
1127 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1128
1129 *pDevice = radv_device_to_handle(device);
1130 return VK_SUCCESS;
1131
1132 fail:
1133 if (device->trace_bo)
1134 device->ws->buffer_destroy(device->trace_bo);
1135
1136 if (device->gfx_init)
1137 device->ws->buffer_destroy(device->gfx_init);
1138
1139 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1140 for (unsigned q = 0; q < device->queue_count[i]; q++)
1141 radv_queue_finish(&device->queues[i][q]);
1142 if (device->queue_count[i])
1143 vk_free(&device->alloc, device->queues[i]);
1144 }
1145
1146 vk_free(&device->alloc, device);
1147 return result;
1148 }
1149
1150 void radv_DestroyDevice(
1151 VkDevice _device,
1152 const VkAllocationCallbacks* pAllocator)
1153 {
1154 RADV_FROM_HANDLE(radv_device, device, _device);
1155
1156 if (!device)
1157 return;
1158
1159 if (device->trace_bo)
1160 device->ws->buffer_destroy(device->trace_bo);
1161
1162 if (device->gfx_init)
1163 device->ws->buffer_destroy(device->gfx_init);
1164
1165 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1166 for (unsigned q = 0; q < device->queue_count[i]; q++)
1167 radv_queue_finish(&device->queues[i][q]);
1168 if (device->queue_count[i])
1169 vk_free(&device->alloc, device->queues[i]);
1170 if (device->empty_cs[i])
1171 device->ws->cs_destroy(device->empty_cs[i]);
1172 if (device->flush_cs[i])
1173 device->ws->cs_destroy(device->flush_cs[i]);
1174 if (device->flush_shader_cs[i])
1175 device->ws->cs_destroy(device->flush_shader_cs[i]);
1176 }
1177 radv_device_finish_meta(device);
1178
1179 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1180 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1181
1182 vk_free(&device->alloc, device);
1183 }
1184
1185 VkResult radv_EnumerateInstanceExtensionProperties(
1186 const char* pLayerName,
1187 uint32_t* pPropertyCount,
1188 VkExtensionProperties* pProperties)
1189 {
1190 if (pProperties == NULL) {
1191 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1192 return VK_SUCCESS;
1193 }
1194
1195 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1196 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1197
1198 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1199 return VK_INCOMPLETE;
1200
1201 return VK_SUCCESS;
1202 }
1203
1204 VkResult radv_EnumerateDeviceExtensionProperties(
1205 VkPhysicalDevice physicalDevice,
1206 const char* pLayerName,
1207 uint32_t* pPropertyCount,
1208 VkExtensionProperties* pProperties)
1209 {
1210 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1211
1212 if (pProperties == NULL) {
1213 *pPropertyCount = pdevice->extensions.num_ext;
1214 return VK_SUCCESS;
1215 }
1216
1217 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1218 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1219
1220 if (*pPropertyCount < pdevice->extensions.num_ext)
1221 return VK_INCOMPLETE;
1222
1223 return VK_SUCCESS;
1224 }
1225
1226 VkResult radv_EnumerateInstanceLayerProperties(
1227 uint32_t* pPropertyCount,
1228 VkLayerProperties* pProperties)
1229 {
1230 if (pProperties == NULL) {
1231 *pPropertyCount = 0;
1232 return VK_SUCCESS;
1233 }
1234
1235 /* None supported at this time */
1236 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1237 }
1238
1239 VkResult radv_EnumerateDeviceLayerProperties(
1240 VkPhysicalDevice physicalDevice,
1241 uint32_t* pPropertyCount,
1242 VkLayerProperties* pProperties)
1243 {
1244 if (pProperties == NULL) {
1245 *pPropertyCount = 0;
1246 return VK_SUCCESS;
1247 }
1248
1249 /* None supported at this time */
1250 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1251 }
1252
1253 void radv_GetDeviceQueue(
1254 VkDevice _device,
1255 uint32_t queueFamilyIndex,
1256 uint32_t queueIndex,
1257 VkQueue* pQueue)
1258 {
1259 RADV_FROM_HANDLE(radv_device, device, _device);
1260
1261 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1262 }
1263
1264 static void radv_dump_trace(struct radv_device *device,
1265 struct radeon_winsys_cs *cs)
1266 {
1267 const char *filename = getenv("RADV_TRACE_FILE");
1268 FILE *f = fopen(filename, "w");
1269 if (!f) {
1270 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1271 return;
1272 }
1273
1274 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1275 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1276 fclose(f);
1277 }
1278
1279 static void
1280 fill_geom_tess_rings(struct radv_queue *queue,
1281 uint32_t *map,
1282 bool add_sample_positions,
1283 uint32_t esgs_ring_size,
1284 struct radeon_winsys_bo *esgs_ring_bo,
1285 uint32_t gsvs_ring_size,
1286 struct radeon_winsys_bo *gsvs_ring_bo,
1287 uint32_t tess_factor_ring_size,
1288 struct radeon_winsys_bo *tess_factor_ring_bo,
1289 uint32_t tess_offchip_ring_size,
1290 struct radeon_winsys_bo *tess_offchip_ring_bo)
1291 {
1292 uint64_t esgs_va = 0, gsvs_va = 0;
1293 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1294 uint32_t *desc = &map[4];
1295
1296 if (esgs_ring_bo)
1297 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1298 if (gsvs_ring_bo)
1299 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1300 if (tess_factor_ring_bo)
1301 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1302 if (tess_offchip_ring_bo)
1303 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1304
1305 /* stride 0, num records - size, add tid, swizzle, elsize4,
1306 index stride 64 */
1307 desc[0] = esgs_va;
1308 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1309 S_008F04_STRIDE(0) |
1310 S_008F04_SWIZZLE_ENABLE(true);
1311 desc[2] = esgs_ring_size;
1312 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1313 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1314 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1315 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1316 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1317 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1318 S_008F0C_ELEMENT_SIZE(1) |
1319 S_008F0C_INDEX_STRIDE(3) |
1320 S_008F0C_ADD_TID_ENABLE(true);
1321
1322 desc += 4;
1323 /* GS entry for ES->GS ring */
1324 /* stride 0, num records - size, elsize0,
1325 index stride 0 */
1326 desc[0] = esgs_va;
1327 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1328 S_008F04_STRIDE(0) |
1329 S_008F04_SWIZZLE_ENABLE(false);
1330 desc[2] = esgs_ring_size;
1331 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1332 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1333 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1334 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1335 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1336 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1337 S_008F0C_ELEMENT_SIZE(0) |
1338 S_008F0C_INDEX_STRIDE(0) |
1339 S_008F0C_ADD_TID_ENABLE(false);
1340
1341 desc += 4;
1342 /* VS entry for GS->VS ring */
1343 /* stride 0, num records - size, elsize0,
1344 index stride 0 */
1345 desc[0] = gsvs_va;
1346 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1347 S_008F04_STRIDE(0) |
1348 S_008F04_SWIZZLE_ENABLE(false);
1349 desc[2] = gsvs_ring_size;
1350 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1351 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1352 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1353 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1354 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1355 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1356 S_008F0C_ELEMENT_SIZE(0) |
1357 S_008F0C_INDEX_STRIDE(0) |
1358 S_008F0C_ADD_TID_ENABLE(false);
1359 desc += 4;
1360
1361 /* stride gsvs_itemsize, num records 64
1362 elsize 4, index stride 16 */
1363 /* shader will patch stride and desc[2] */
1364 desc[0] = gsvs_va;
1365 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1366 S_008F04_STRIDE(0) |
1367 S_008F04_SWIZZLE_ENABLE(true);
1368 desc[2] = 0;
1369 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1370 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1371 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1372 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1373 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1374 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1375 S_008F0C_ELEMENT_SIZE(1) |
1376 S_008F0C_INDEX_STRIDE(1) |
1377 S_008F0C_ADD_TID_ENABLE(true);
1378 desc += 4;
1379
1380 desc[0] = tess_factor_va;
1381 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1382 S_008F04_STRIDE(0) |
1383 S_008F04_SWIZZLE_ENABLE(false);
1384 desc[2] = tess_factor_ring_size;
1385 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1386 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1387 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1388 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1389 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1390 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1391 S_008F0C_ELEMENT_SIZE(0) |
1392 S_008F0C_INDEX_STRIDE(0) |
1393 S_008F0C_ADD_TID_ENABLE(false);
1394 desc += 4;
1395
1396 desc[0] = tess_offchip_va;
1397 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1398 S_008F04_STRIDE(0) |
1399 S_008F04_SWIZZLE_ENABLE(false);
1400 desc[2] = tess_offchip_ring_size;
1401 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1402 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1403 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1404 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1405 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1406 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1407 S_008F0C_ELEMENT_SIZE(0) |
1408 S_008F0C_INDEX_STRIDE(0) |
1409 S_008F0C_ADD_TID_ENABLE(false);
1410 desc += 4;
1411
1412 /* add sample positions after all rings */
1413 memcpy(desc, queue->device->sample_locations_1x, 8);
1414 desc += 2;
1415 memcpy(desc, queue->device->sample_locations_2x, 16);
1416 desc += 4;
1417 memcpy(desc, queue->device->sample_locations_4x, 32);
1418 desc += 8;
1419 memcpy(desc, queue->device->sample_locations_8x, 64);
1420 desc += 16;
1421 memcpy(desc, queue->device->sample_locations_16x, 128);
1422 }
1423
1424 static unsigned
1425 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1426 {
1427 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1428 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1429 device->physical_device->rad_info.family != CHIP_STONEY;
1430 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1431 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1432 device->physical_device->rad_info.max_se;
1433 unsigned offchip_granularity;
1434 unsigned hs_offchip_param;
1435 switch (device->tess_offchip_block_dw_size) {
1436 default:
1437 assert(0);
1438 /* fall through */
1439 case 8192:
1440 offchip_granularity = V_03093C_X_8K_DWORDS;
1441 break;
1442 case 4096:
1443 offchip_granularity = V_03093C_X_4K_DWORDS;
1444 break;
1445 }
1446
1447 switch (device->physical_device->rad_info.chip_class) {
1448 case SI:
1449 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1450 break;
1451 case CIK:
1452 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1453 break;
1454 case VI:
1455 default:
1456 max_offchip_buffers = MIN2(max_offchip_buffers, 512);
1457 break;
1458 }
1459
1460 *max_offchip_buffers_p = max_offchip_buffers;
1461 if (device->physical_device->rad_info.chip_class >= CIK) {
1462 if (device->physical_device->rad_info.chip_class >= VI)
1463 --max_offchip_buffers;
1464 hs_offchip_param =
1465 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1466 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1467 } else {
1468 hs_offchip_param =
1469 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1470 }
1471 return hs_offchip_param;
1472 }
1473
1474 static VkResult
1475 radv_get_preamble_cs(struct radv_queue *queue,
1476 uint32_t scratch_size,
1477 uint32_t compute_scratch_size,
1478 uint32_t esgs_ring_size,
1479 uint32_t gsvs_ring_size,
1480 bool needs_tess_rings,
1481 bool needs_sample_positions,
1482 struct radeon_winsys_cs **initial_preamble_cs,
1483 struct radeon_winsys_cs **continue_preamble_cs)
1484 {
1485 struct radeon_winsys_bo *scratch_bo = NULL;
1486 struct radeon_winsys_bo *descriptor_bo = NULL;
1487 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1488 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1489 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1490 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1491 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1492 struct radeon_winsys_cs *dest_cs[2] = {0};
1493 bool add_tess_rings = false, add_sample_positions = false;
1494 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1495 unsigned max_offchip_buffers;
1496 unsigned hs_offchip_param = 0;
1497 if (!queue->has_tess_rings) {
1498 if (needs_tess_rings)
1499 add_tess_rings = true;
1500 }
1501 if (!queue->has_sample_positions) {
1502 if (needs_sample_positions)
1503 add_sample_positions = true;
1504 }
1505 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1506 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1507 &max_offchip_buffers);
1508 tess_offchip_ring_size = max_offchip_buffers *
1509 queue->device->tess_offchip_block_dw_size * 4;
1510
1511 if (scratch_size <= queue->scratch_size &&
1512 compute_scratch_size <= queue->compute_scratch_size &&
1513 esgs_ring_size <= queue->esgs_ring_size &&
1514 gsvs_ring_size <= queue->gsvs_ring_size &&
1515 !add_tess_rings && !add_sample_positions &&
1516 queue->initial_preamble_cs) {
1517 *initial_preamble_cs = queue->initial_preamble_cs;
1518 *continue_preamble_cs = queue->continue_preamble_cs;
1519 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1520 *continue_preamble_cs = NULL;
1521 return VK_SUCCESS;
1522 }
1523
1524 if (scratch_size > queue->scratch_size) {
1525 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1526 scratch_size,
1527 4096,
1528 RADEON_DOMAIN_VRAM,
1529 RADEON_FLAG_NO_CPU_ACCESS);
1530 if (!scratch_bo)
1531 goto fail;
1532 } else
1533 scratch_bo = queue->scratch_bo;
1534
1535 if (compute_scratch_size > queue->compute_scratch_size) {
1536 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1537 compute_scratch_size,
1538 4096,
1539 RADEON_DOMAIN_VRAM,
1540 RADEON_FLAG_NO_CPU_ACCESS);
1541 if (!compute_scratch_bo)
1542 goto fail;
1543
1544 } else
1545 compute_scratch_bo = queue->compute_scratch_bo;
1546
1547 if (esgs_ring_size > queue->esgs_ring_size) {
1548 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1549 esgs_ring_size,
1550 4096,
1551 RADEON_DOMAIN_VRAM,
1552 RADEON_FLAG_NO_CPU_ACCESS);
1553 if (!esgs_ring_bo)
1554 goto fail;
1555 } else {
1556 esgs_ring_bo = queue->esgs_ring_bo;
1557 esgs_ring_size = queue->esgs_ring_size;
1558 }
1559
1560 if (gsvs_ring_size > queue->gsvs_ring_size) {
1561 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1562 gsvs_ring_size,
1563 4096,
1564 RADEON_DOMAIN_VRAM,
1565 RADEON_FLAG_NO_CPU_ACCESS);
1566 if (!gsvs_ring_bo)
1567 goto fail;
1568 } else {
1569 gsvs_ring_bo = queue->gsvs_ring_bo;
1570 gsvs_ring_size = queue->gsvs_ring_size;
1571 }
1572
1573 if (add_tess_rings) {
1574 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1575 tess_factor_ring_size,
1576 256,
1577 RADEON_DOMAIN_VRAM,
1578 RADEON_FLAG_NO_CPU_ACCESS);
1579 if (!tess_factor_ring_bo)
1580 goto fail;
1581 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1582 tess_offchip_ring_size,
1583 256,
1584 RADEON_DOMAIN_VRAM,
1585 RADEON_FLAG_NO_CPU_ACCESS);
1586 if (!tess_offchip_ring_bo)
1587 goto fail;
1588 } else {
1589 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1590 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1591 }
1592
1593 if (scratch_bo != queue->scratch_bo ||
1594 esgs_ring_bo != queue->esgs_ring_bo ||
1595 gsvs_ring_bo != queue->gsvs_ring_bo ||
1596 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1597 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1598 uint32_t size = 0;
1599 if (gsvs_ring_bo || esgs_ring_bo ||
1600 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1601 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1602 if (add_sample_positions)
1603 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1604 }
1605 else if (scratch_bo)
1606 size = 8; /* 2 dword */
1607
1608 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1609 size,
1610 4096,
1611 RADEON_DOMAIN_VRAM,
1612 RADEON_FLAG_CPU_ACCESS);
1613 if (!descriptor_bo)
1614 goto fail;
1615 } else
1616 descriptor_bo = queue->descriptor_bo;
1617
1618 for(int i = 0; i < 2; ++i) {
1619 struct radeon_winsys_cs *cs = NULL;
1620 cs = queue->device->ws->cs_create(queue->device->ws,
1621 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1622 if (!cs)
1623 goto fail;
1624
1625 dest_cs[i] = cs;
1626
1627 if (scratch_bo)
1628 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1629
1630 if (esgs_ring_bo)
1631 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1632
1633 if (gsvs_ring_bo)
1634 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1635
1636 if (tess_factor_ring_bo)
1637 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1638
1639 if (tess_offchip_ring_bo)
1640 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1641
1642 if (descriptor_bo)
1643 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1644
1645 if (descriptor_bo != queue->descriptor_bo) {
1646 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1647
1648 if (scratch_bo) {
1649 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1650 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1651 S_008F04_SWIZZLE_ENABLE(1);
1652 map[0] = scratch_va;
1653 map[1] = rsrc1;
1654 }
1655
1656 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1657 add_sample_positions)
1658 fill_geom_tess_rings(queue, map, add_sample_positions,
1659 esgs_ring_size, esgs_ring_bo,
1660 gsvs_ring_size, gsvs_ring_bo,
1661 tess_factor_ring_size, tess_factor_ring_bo,
1662 tess_offchip_ring_size, tess_offchip_ring_bo);
1663
1664 queue->device->ws->buffer_unmap(descriptor_bo);
1665 }
1666
1667 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1668 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1669 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1670 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1671 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1672 }
1673
1674 if (esgs_ring_bo || gsvs_ring_bo) {
1675 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1676 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1677 radeon_emit(cs, esgs_ring_size >> 8);
1678 radeon_emit(cs, gsvs_ring_size >> 8);
1679 } else {
1680 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1681 radeon_emit(cs, esgs_ring_size >> 8);
1682 radeon_emit(cs, gsvs_ring_size >> 8);
1683 }
1684 }
1685
1686 if (tess_factor_ring_bo) {
1687 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1688 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1689 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1690 S_030938_SIZE(tess_factor_ring_size / 4));
1691 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1692 tf_va >> 8);
1693 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1694 } else {
1695 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1696 S_008988_SIZE(tess_factor_ring_size / 4));
1697 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1698 tf_va >> 8);
1699 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1700 hs_offchip_param);
1701 }
1702 }
1703
1704 if (descriptor_bo) {
1705 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1706 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1707 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1708 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1709 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1710 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1711
1712 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1713
1714 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1715 radeon_set_sh_reg_seq(cs, regs[i], 2);
1716 radeon_emit(cs, va);
1717 radeon_emit(cs, va >> 32);
1718 }
1719 }
1720
1721 if (compute_scratch_bo) {
1722 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1723 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1724 S_008F04_SWIZZLE_ENABLE(1);
1725
1726 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1727
1728 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1729 radeon_emit(cs, scratch_va);
1730 radeon_emit(cs, rsrc1);
1731 }
1732
1733 if (!i) {
1734 si_cs_emit_cache_flush(cs,
1735 queue->device->physical_device->rad_info.chip_class,
1736 queue->queue_family_index == RING_COMPUTE &&
1737 queue->device->physical_device->rad_info.chip_class >= CIK,
1738 RADV_CMD_FLAG_INV_ICACHE |
1739 RADV_CMD_FLAG_INV_SMEM_L1 |
1740 RADV_CMD_FLAG_INV_VMEM_L1 |
1741 RADV_CMD_FLAG_INV_GLOBAL_L2);
1742 }
1743
1744 if (!queue->device->ws->cs_finalize(cs))
1745 goto fail;
1746 }
1747
1748 if (queue->initial_preamble_cs)
1749 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1750
1751 if (queue->continue_preamble_cs)
1752 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1753
1754 queue->initial_preamble_cs = dest_cs[0];
1755 queue->continue_preamble_cs = dest_cs[1];
1756
1757 if (scratch_bo != queue->scratch_bo) {
1758 if (queue->scratch_bo)
1759 queue->device->ws->buffer_destroy(queue->scratch_bo);
1760 queue->scratch_bo = scratch_bo;
1761 queue->scratch_size = scratch_size;
1762 }
1763
1764 if (compute_scratch_bo != queue->compute_scratch_bo) {
1765 if (queue->compute_scratch_bo)
1766 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1767 queue->compute_scratch_bo = compute_scratch_bo;
1768 queue->compute_scratch_size = compute_scratch_size;
1769 }
1770
1771 if (esgs_ring_bo != queue->esgs_ring_bo) {
1772 if (queue->esgs_ring_bo)
1773 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1774 queue->esgs_ring_bo = esgs_ring_bo;
1775 queue->esgs_ring_size = esgs_ring_size;
1776 }
1777
1778 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1779 if (queue->gsvs_ring_bo)
1780 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1781 queue->gsvs_ring_bo = gsvs_ring_bo;
1782 queue->gsvs_ring_size = gsvs_ring_size;
1783 }
1784
1785 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1786 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1787 }
1788
1789 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1790 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1791 queue->has_tess_rings = true;
1792 }
1793
1794 if (descriptor_bo != queue->descriptor_bo) {
1795 if (queue->descriptor_bo)
1796 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1797
1798 queue->descriptor_bo = descriptor_bo;
1799 }
1800
1801 if (add_sample_positions)
1802 queue->has_sample_positions = true;
1803
1804 *initial_preamble_cs = queue->initial_preamble_cs;
1805 *continue_preamble_cs = queue->continue_preamble_cs;
1806 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1807 *continue_preamble_cs = NULL;
1808 return VK_SUCCESS;
1809 fail:
1810 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1811 if (dest_cs[i])
1812 queue->device->ws->cs_destroy(dest_cs[i]);
1813 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1814 queue->device->ws->buffer_destroy(descriptor_bo);
1815 if (scratch_bo && scratch_bo != queue->scratch_bo)
1816 queue->device->ws->buffer_destroy(scratch_bo);
1817 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1818 queue->device->ws->buffer_destroy(compute_scratch_bo);
1819 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1820 queue->device->ws->buffer_destroy(esgs_ring_bo);
1821 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1822 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1823 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1824 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1825 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1826 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1827 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1828 }
1829
1830 VkResult radv_QueueSubmit(
1831 VkQueue _queue,
1832 uint32_t submitCount,
1833 const VkSubmitInfo* pSubmits,
1834 VkFence _fence)
1835 {
1836 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1837 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1838 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1839 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1840 int ret;
1841 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1842 uint32_t scratch_size = 0;
1843 uint32_t compute_scratch_size = 0;
1844 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1845 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1846 VkResult result;
1847 bool fence_emitted = false;
1848 bool tess_rings_needed = false;
1849 bool sample_positions_needed = false;
1850
1851 /* Do this first so failing to allocate scratch buffers can't result in
1852 * partially executed submissions. */
1853 for (uint32_t i = 0; i < submitCount; i++) {
1854 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1855 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1856 pSubmits[i].pCommandBuffers[j]);
1857
1858 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1859 compute_scratch_size = MAX2(compute_scratch_size,
1860 cmd_buffer->compute_scratch_size_needed);
1861 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1862 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1863 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1864 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1865 }
1866 }
1867
1868 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1869 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1870 sample_positions_needed,
1871 &initial_preamble_cs, &continue_preamble_cs);
1872 if (result != VK_SUCCESS)
1873 return result;
1874
1875 for (uint32_t i = 0; i < submitCount; i++) {
1876 struct radeon_winsys_cs **cs_array;
1877 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1878 bool can_patch = !do_flush;
1879 uint32_t advance;
1880
1881 if (!pSubmits[i].commandBufferCount) {
1882 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1883 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1884 &queue->device->empty_cs[queue->queue_family_index],
1885 1, NULL, NULL,
1886 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1887 pSubmits[i].waitSemaphoreCount,
1888 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1889 pSubmits[i].signalSemaphoreCount,
1890 false, base_fence);
1891 if (ret) {
1892 radv_loge("failed to submit CS %d\n", i);
1893 abort();
1894 }
1895 fence_emitted = true;
1896 }
1897 continue;
1898 }
1899
1900 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1901 (pSubmits[i].commandBufferCount + do_flush));
1902
1903 if(do_flush)
1904 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1905 queue->device->flush_shader_cs[queue->queue_family_index] :
1906 queue->device->flush_cs[queue->queue_family_index];
1907
1908 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1909 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1910 pSubmits[i].pCommandBuffers[j]);
1911 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1912
1913 cs_array[j + do_flush] = cmd_buffer->cs;
1914 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1915 can_patch = false;
1916 }
1917
1918 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1919 advance = MIN2(max_cs_submission,
1920 pSubmits[i].commandBufferCount + do_flush - j);
1921 bool b = j == 0;
1922 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1923
1924 if (queue->device->trace_bo)
1925 *queue->device->trace_id_ptr = 0;
1926
1927 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1928 advance, initial_preamble_cs, continue_preamble_cs,
1929 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1930 b ? pSubmits[i].waitSemaphoreCount : 0,
1931 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1932 e ? pSubmits[i].signalSemaphoreCount : 0,
1933 can_patch, base_fence);
1934
1935 if (ret) {
1936 radv_loge("failed to submit CS %d\n", i);
1937 abort();
1938 }
1939 fence_emitted = true;
1940 if (queue->device->trace_bo) {
1941 bool success = queue->device->ws->ctx_wait_idle(
1942 queue->hw_ctx,
1943 radv_queue_family_to_ring(
1944 queue->queue_family_index),
1945 queue->queue_idx);
1946
1947 if (!success) { /* Hang */
1948 radv_dump_trace(queue->device, cs_array[j]);
1949 abort();
1950 }
1951 }
1952 }
1953 free(cs_array);
1954 }
1955
1956 if (fence) {
1957 if (!fence_emitted)
1958 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1959 &queue->device->empty_cs[queue->queue_family_index],
1960 1, NULL, NULL, NULL, 0, NULL, 0,
1961 false, base_fence);
1962
1963 fence->submitted = true;
1964 }
1965
1966 return VK_SUCCESS;
1967 }
1968
1969 VkResult radv_QueueWaitIdle(
1970 VkQueue _queue)
1971 {
1972 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1973
1974 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1975 radv_queue_family_to_ring(queue->queue_family_index),
1976 queue->queue_idx);
1977 return VK_SUCCESS;
1978 }
1979
1980 VkResult radv_DeviceWaitIdle(
1981 VkDevice _device)
1982 {
1983 RADV_FROM_HANDLE(radv_device, device, _device);
1984
1985 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1986 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1987 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1988 }
1989 }
1990 return VK_SUCCESS;
1991 }
1992
1993 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1994 VkInstance instance,
1995 const char* pName)
1996 {
1997 return radv_lookup_entrypoint(pName);
1998 }
1999
2000 /* The loader wants us to expose a second GetInstanceProcAddr function
2001 * to work around certain LD_PRELOAD issues seen in apps.
2002 */
2003 PUBLIC
2004 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2005 VkInstance instance,
2006 const char* pName);
2007
2008 PUBLIC
2009 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2010 VkInstance instance,
2011 const char* pName)
2012 {
2013 return radv_GetInstanceProcAddr(instance, pName);
2014 }
2015
2016 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2017 VkDevice device,
2018 const char* pName)
2019 {
2020 return radv_lookup_entrypoint(pName);
2021 }
2022
2023 bool radv_get_memory_fd(struct radv_device *device,
2024 struct radv_device_memory *memory,
2025 int *pFD)
2026 {
2027 struct radeon_bo_metadata metadata;
2028
2029 if (memory->image) {
2030 radv_init_metadata(device, memory->image, &metadata);
2031 device->ws->buffer_set_metadata(memory->bo, &metadata);
2032 }
2033
2034 return device->ws->buffer_get_fd(device->ws, memory->bo,
2035 pFD);
2036 }
2037
2038 VkResult radv_AllocateMemory(
2039 VkDevice _device,
2040 const VkMemoryAllocateInfo* pAllocateInfo,
2041 const VkAllocationCallbacks* pAllocator,
2042 VkDeviceMemory* pMem)
2043 {
2044 RADV_FROM_HANDLE(radv_device, device, _device);
2045 struct radv_device_memory *mem;
2046 VkResult result;
2047 enum radeon_bo_domain domain;
2048 uint32_t flags = 0;
2049
2050 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2051
2052 if (pAllocateInfo->allocationSize == 0) {
2053 /* Apparently, this is allowed */
2054 *pMem = VK_NULL_HANDLE;
2055 return VK_SUCCESS;
2056 }
2057
2058 const VkImportMemoryFdInfoKHX *import_info =
2059 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
2060 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
2061 vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
2062
2063 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2064 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2065 if (mem == NULL)
2066 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2067
2068 if (dedicate_info) {
2069 mem->image = radv_image_from_handle(dedicate_info->image);
2070 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2071 } else {
2072 mem->image = NULL;
2073 mem->buffer = NULL;
2074 }
2075
2076 if (import_info) {
2077 assert(import_info->handleType ==
2078 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
2079 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2080 NULL, NULL);
2081 if (!mem->bo)
2082 goto fail;
2083 else
2084 goto out_success;
2085 }
2086
2087 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2088 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2089 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2090 domain = RADEON_DOMAIN_GTT;
2091 else
2092 domain = RADEON_DOMAIN_VRAM;
2093
2094 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2095 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2096 else
2097 flags |= RADEON_FLAG_CPU_ACCESS;
2098
2099 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2100 flags |= RADEON_FLAG_GTT_WC;
2101
2102 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2103 domain, flags);
2104
2105 if (!mem->bo) {
2106 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2107 goto fail;
2108 }
2109 mem->type_index = pAllocateInfo->memoryTypeIndex;
2110 out_success:
2111 *pMem = radv_device_memory_to_handle(mem);
2112
2113 return VK_SUCCESS;
2114
2115 fail:
2116 vk_free2(&device->alloc, pAllocator, mem);
2117
2118 return result;
2119 }
2120
2121 void radv_FreeMemory(
2122 VkDevice _device,
2123 VkDeviceMemory _mem,
2124 const VkAllocationCallbacks* pAllocator)
2125 {
2126 RADV_FROM_HANDLE(radv_device, device, _device);
2127 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2128
2129 if (mem == NULL)
2130 return;
2131
2132 device->ws->buffer_destroy(mem->bo);
2133 mem->bo = NULL;
2134
2135 vk_free2(&device->alloc, pAllocator, mem);
2136 }
2137
2138 VkResult radv_MapMemory(
2139 VkDevice _device,
2140 VkDeviceMemory _memory,
2141 VkDeviceSize offset,
2142 VkDeviceSize size,
2143 VkMemoryMapFlags flags,
2144 void** ppData)
2145 {
2146 RADV_FROM_HANDLE(radv_device, device, _device);
2147 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2148
2149 if (mem == NULL) {
2150 *ppData = NULL;
2151 return VK_SUCCESS;
2152 }
2153
2154 *ppData = device->ws->buffer_map(mem->bo);
2155 if (*ppData) {
2156 *ppData += offset;
2157 return VK_SUCCESS;
2158 }
2159
2160 return VK_ERROR_MEMORY_MAP_FAILED;
2161 }
2162
2163 void radv_UnmapMemory(
2164 VkDevice _device,
2165 VkDeviceMemory _memory)
2166 {
2167 RADV_FROM_HANDLE(radv_device, device, _device);
2168 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2169
2170 if (mem == NULL)
2171 return;
2172
2173 device->ws->buffer_unmap(mem->bo);
2174 }
2175
2176 VkResult radv_FlushMappedMemoryRanges(
2177 VkDevice _device,
2178 uint32_t memoryRangeCount,
2179 const VkMappedMemoryRange* pMemoryRanges)
2180 {
2181 return VK_SUCCESS;
2182 }
2183
2184 VkResult radv_InvalidateMappedMemoryRanges(
2185 VkDevice _device,
2186 uint32_t memoryRangeCount,
2187 const VkMappedMemoryRange* pMemoryRanges)
2188 {
2189 return VK_SUCCESS;
2190 }
2191
2192 void radv_GetBufferMemoryRequirements(
2193 VkDevice device,
2194 VkBuffer _buffer,
2195 VkMemoryRequirements* pMemoryRequirements)
2196 {
2197 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2198
2199 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2200
2201 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2202 pMemoryRequirements->alignment = 4096;
2203 else
2204 pMemoryRequirements->alignment = 16;
2205
2206 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2207 }
2208
2209 void radv_GetImageMemoryRequirements(
2210 VkDevice device,
2211 VkImage _image,
2212 VkMemoryRequirements* pMemoryRequirements)
2213 {
2214 RADV_FROM_HANDLE(radv_image, image, _image);
2215
2216 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2217
2218 pMemoryRequirements->size = image->size;
2219 pMemoryRequirements->alignment = image->alignment;
2220 }
2221
2222 void radv_GetImageSparseMemoryRequirements(
2223 VkDevice device,
2224 VkImage image,
2225 uint32_t* pSparseMemoryRequirementCount,
2226 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2227 {
2228 stub();
2229 }
2230
2231 void radv_GetDeviceMemoryCommitment(
2232 VkDevice device,
2233 VkDeviceMemory memory,
2234 VkDeviceSize* pCommittedMemoryInBytes)
2235 {
2236 *pCommittedMemoryInBytes = 0;
2237 }
2238
2239 VkResult radv_BindBufferMemory(
2240 VkDevice device,
2241 VkBuffer _buffer,
2242 VkDeviceMemory _memory,
2243 VkDeviceSize memoryOffset)
2244 {
2245 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2246 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2247
2248 if (mem) {
2249 buffer->bo = mem->bo;
2250 buffer->offset = memoryOffset;
2251 } else {
2252 buffer->bo = NULL;
2253 buffer->offset = 0;
2254 }
2255
2256 return VK_SUCCESS;
2257 }
2258
2259 VkResult radv_BindImageMemory(
2260 VkDevice device,
2261 VkImage _image,
2262 VkDeviceMemory _memory,
2263 VkDeviceSize memoryOffset)
2264 {
2265 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2266 RADV_FROM_HANDLE(radv_image, image, _image);
2267
2268 if (mem) {
2269 image->bo = mem->bo;
2270 image->offset = memoryOffset;
2271 } else {
2272 image->bo = NULL;
2273 image->offset = 0;
2274 }
2275
2276 return VK_SUCCESS;
2277 }
2278
2279
2280 static void
2281 radv_sparse_buffer_bind_memory(struct radv_device *device,
2282 const VkSparseBufferMemoryBindInfo *bind)
2283 {
2284 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2285
2286 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2287 struct radv_device_memory *mem = NULL;
2288
2289 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2290 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2291
2292 device->ws->buffer_virtual_bind(buffer->bo,
2293 bind->pBinds[i].resourceOffset,
2294 bind->pBinds[i].size,
2295 mem ? mem->bo : NULL,
2296 bind->pBinds[i].memoryOffset);
2297 }
2298 }
2299
2300 static void
2301 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2302 const VkSparseImageOpaqueMemoryBindInfo *bind)
2303 {
2304 RADV_FROM_HANDLE(radv_image, image, bind->image);
2305
2306 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2307 struct radv_device_memory *mem = NULL;
2308
2309 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2310 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2311
2312 device->ws->buffer_virtual_bind(image->bo,
2313 bind->pBinds[i].resourceOffset,
2314 bind->pBinds[i].size,
2315 mem ? mem->bo : NULL,
2316 bind->pBinds[i].memoryOffset);
2317 }
2318 }
2319
2320 VkResult radv_QueueBindSparse(
2321 VkQueue _queue,
2322 uint32_t bindInfoCount,
2323 const VkBindSparseInfo* pBindInfo,
2324 VkFence _fence)
2325 {
2326 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2327 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2328 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2329 bool fence_emitted = false;
2330
2331 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2332 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2333 radv_sparse_buffer_bind_memory(queue->device,
2334 pBindInfo[i].pBufferBinds + j);
2335 }
2336
2337 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2338 radv_sparse_image_opaque_bind_memory(queue->device,
2339 pBindInfo[i].pImageOpaqueBinds + j);
2340 }
2341
2342 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2343 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2344 &queue->device->empty_cs[queue->queue_family_index],
2345 1, NULL, NULL,
2346 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2347 pBindInfo[i].waitSemaphoreCount,
2348 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2349 pBindInfo[i].signalSemaphoreCount,
2350 false, base_fence);
2351 fence_emitted = true;
2352 if (fence)
2353 fence->submitted = true;
2354 }
2355 }
2356
2357 if (fence && !fence_emitted) {
2358 fence->signalled = true;
2359 }
2360
2361 return VK_SUCCESS;
2362 }
2363
2364 VkResult radv_CreateFence(
2365 VkDevice _device,
2366 const VkFenceCreateInfo* pCreateInfo,
2367 const VkAllocationCallbacks* pAllocator,
2368 VkFence* pFence)
2369 {
2370 RADV_FROM_HANDLE(radv_device, device, _device);
2371 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2372 sizeof(*fence), 8,
2373 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2374
2375 if (!fence)
2376 return VK_ERROR_OUT_OF_HOST_MEMORY;
2377
2378 memset(fence, 0, sizeof(*fence));
2379 fence->submitted = false;
2380 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2381 fence->fence = device->ws->create_fence();
2382 if (!fence->fence) {
2383 vk_free2(&device->alloc, pAllocator, fence);
2384 return VK_ERROR_OUT_OF_HOST_MEMORY;
2385 }
2386
2387 *pFence = radv_fence_to_handle(fence);
2388
2389 return VK_SUCCESS;
2390 }
2391
2392 void radv_DestroyFence(
2393 VkDevice _device,
2394 VkFence _fence,
2395 const VkAllocationCallbacks* pAllocator)
2396 {
2397 RADV_FROM_HANDLE(radv_device, device, _device);
2398 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2399
2400 if (!fence)
2401 return;
2402 device->ws->destroy_fence(fence->fence);
2403 vk_free2(&device->alloc, pAllocator, fence);
2404 }
2405
2406 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2407 {
2408 uint64_t current_time;
2409 struct timespec tv;
2410
2411 clock_gettime(CLOCK_MONOTONIC, &tv);
2412 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2413
2414 timeout = MIN2(UINT64_MAX - current_time, timeout);
2415
2416 return current_time + timeout;
2417 }
2418
2419 VkResult radv_WaitForFences(
2420 VkDevice _device,
2421 uint32_t fenceCount,
2422 const VkFence* pFences,
2423 VkBool32 waitAll,
2424 uint64_t timeout)
2425 {
2426 RADV_FROM_HANDLE(radv_device, device, _device);
2427 timeout = radv_get_absolute_timeout(timeout);
2428
2429 if (!waitAll && fenceCount > 1) {
2430 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2431 }
2432
2433 for (uint32_t i = 0; i < fenceCount; ++i) {
2434 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2435 bool expired = false;
2436
2437 if (fence->signalled)
2438 continue;
2439
2440 if (!fence->submitted)
2441 return VK_TIMEOUT;
2442
2443 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2444 if (!expired)
2445 return VK_TIMEOUT;
2446
2447 fence->signalled = true;
2448 }
2449
2450 return VK_SUCCESS;
2451 }
2452
2453 VkResult radv_ResetFences(VkDevice device,
2454 uint32_t fenceCount,
2455 const VkFence *pFences)
2456 {
2457 for (unsigned i = 0; i < fenceCount; ++i) {
2458 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2459 fence->submitted = fence->signalled = false;
2460 }
2461
2462 return VK_SUCCESS;
2463 }
2464
2465 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2466 {
2467 RADV_FROM_HANDLE(radv_device, device, _device);
2468 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2469
2470 if (fence->signalled)
2471 return VK_SUCCESS;
2472 if (!fence->submitted)
2473 return VK_NOT_READY;
2474
2475 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2476 return VK_NOT_READY;
2477
2478 return VK_SUCCESS;
2479 }
2480
2481
2482 // Queue semaphore functions
2483
2484 VkResult radv_CreateSemaphore(
2485 VkDevice _device,
2486 const VkSemaphoreCreateInfo* pCreateInfo,
2487 const VkAllocationCallbacks* pAllocator,
2488 VkSemaphore* pSemaphore)
2489 {
2490 RADV_FROM_HANDLE(radv_device, device, _device);
2491 struct radeon_winsys_sem *sem;
2492
2493 sem = device->ws->create_sem(device->ws);
2494 if (!sem)
2495 return VK_ERROR_OUT_OF_HOST_MEMORY;
2496
2497 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2498 return VK_SUCCESS;
2499 }
2500
2501 void radv_DestroySemaphore(
2502 VkDevice _device,
2503 VkSemaphore _semaphore,
2504 const VkAllocationCallbacks* pAllocator)
2505 {
2506 RADV_FROM_HANDLE(radv_device, device, _device);
2507 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2508 if (!_semaphore)
2509 return;
2510
2511 device->ws->destroy_sem(sem);
2512 }
2513
2514 VkResult radv_CreateEvent(
2515 VkDevice _device,
2516 const VkEventCreateInfo* pCreateInfo,
2517 const VkAllocationCallbacks* pAllocator,
2518 VkEvent* pEvent)
2519 {
2520 RADV_FROM_HANDLE(radv_device, device, _device);
2521 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2522 sizeof(*event), 8,
2523 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2524
2525 if (!event)
2526 return VK_ERROR_OUT_OF_HOST_MEMORY;
2527
2528 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2529 RADEON_DOMAIN_GTT,
2530 RADEON_FLAG_CPU_ACCESS);
2531 if (!event->bo) {
2532 vk_free2(&device->alloc, pAllocator, event);
2533 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2534 }
2535
2536 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2537
2538 *pEvent = radv_event_to_handle(event);
2539
2540 return VK_SUCCESS;
2541 }
2542
2543 void radv_DestroyEvent(
2544 VkDevice _device,
2545 VkEvent _event,
2546 const VkAllocationCallbacks* pAllocator)
2547 {
2548 RADV_FROM_HANDLE(radv_device, device, _device);
2549 RADV_FROM_HANDLE(radv_event, event, _event);
2550
2551 if (!event)
2552 return;
2553 device->ws->buffer_destroy(event->bo);
2554 vk_free2(&device->alloc, pAllocator, event);
2555 }
2556
2557 VkResult radv_GetEventStatus(
2558 VkDevice _device,
2559 VkEvent _event)
2560 {
2561 RADV_FROM_HANDLE(radv_event, event, _event);
2562
2563 if (*event->map == 1)
2564 return VK_EVENT_SET;
2565 return VK_EVENT_RESET;
2566 }
2567
2568 VkResult radv_SetEvent(
2569 VkDevice _device,
2570 VkEvent _event)
2571 {
2572 RADV_FROM_HANDLE(radv_event, event, _event);
2573 *event->map = 1;
2574
2575 return VK_SUCCESS;
2576 }
2577
2578 VkResult radv_ResetEvent(
2579 VkDevice _device,
2580 VkEvent _event)
2581 {
2582 RADV_FROM_HANDLE(radv_event, event, _event);
2583 *event->map = 0;
2584
2585 return VK_SUCCESS;
2586 }
2587
2588 VkResult radv_CreateBuffer(
2589 VkDevice _device,
2590 const VkBufferCreateInfo* pCreateInfo,
2591 const VkAllocationCallbacks* pAllocator,
2592 VkBuffer* pBuffer)
2593 {
2594 RADV_FROM_HANDLE(radv_device, device, _device);
2595 struct radv_buffer *buffer;
2596
2597 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2598
2599 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2600 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2601 if (buffer == NULL)
2602 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2603
2604 buffer->size = pCreateInfo->size;
2605 buffer->usage = pCreateInfo->usage;
2606 buffer->bo = NULL;
2607 buffer->offset = 0;
2608 buffer->flags = pCreateInfo->flags;
2609
2610 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2611 buffer->bo = device->ws->buffer_create(device->ws,
2612 align64(buffer->size, 4096),
2613 4096, 0, RADEON_FLAG_VIRTUAL);
2614 if (!buffer->bo) {
2615 vk_free2(&device->alloc, pAllocator, buffer);
2616 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2617 }
2618 }
2619
2620 *pBuffer = radv_buffer_to_handle(buffer);
2621
2622 return VK_SUCCESS;
2623 }
2624
2625 void radv_DestroyBuffer(
2626 VkDevice _device,
2627 VkBuffer _buffer,
2628 const VkAllocationCallbacks* pAllocator)
2629 {
2630 RADV_FROM_HANDLE(radv_device, device, _device);
2631 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2632
2633 if (!buffer)
2634 return;
2635
2636 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2637 device->ws->buffer_destroy(buffer->bo);
2638
2639 vk_free2(&device->alloc, pAllocator, buffer);
2640 }
2641
2642 static inline unsigned
2643 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2644 {
2645 if (stencil)
2646 return image->surface.u.legacy.stencil_tiling_index[level];
2647 else
2648 return image->surface.u.legacy.tiling_index[level];
2649 }
2650
2651 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2652 {
2653 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2654 }
2655
2656 static void
2657 radv_initialise_color_surface(struct radv_device *device,
2658 struct radv_color_buffer_info *cb,
2659 struct radv_image_view *iview)
2660 {
2661 const struct vk_format_description *desc;
2662 unsigned ntype, format, swap, endian;
2663 unsigned blend_clamp = 0, blend_bypass = 0;
2664 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2665 uint64_t va;
2666 const struct radeon_surf *surf = &iview->image->surface;
2667 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2668
2669 desc = vk_format_description(iview->vk_format);
2670
2671 memset(cb, 0, sizeof(*cb));
2672
2673 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2674 va += level_info->offset;
2675 cb->cb_color_base = va >> 8;
2676
2677 /* CMASK variables */
2678 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2679 va += iview->image->cmask.offset;
2680 cb->cb_color_cmask = va >> 8;
2681 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2682
2683 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2684 va += iview->image->dcc_offset;
2685 cb->cb_dcc_base = va >> 8;
2686
2687 uint32_t max_slice = radv_surface_layer_count(iview);
2688 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2689 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2690
2691 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2692 pitch_tile_max = level_info->nblk_x / 8 - 1;
2693 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2694 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2695
2696 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2697 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2698
2699 /* Intensity is implemented as Red, so treat it that way. */
2700 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2701 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2702
2703 if (iview->image->info.samples > 1) {
2704 unsigned log_samples = util_logbase2(iview->image->info.samples);
2705
2706 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2707 S_028C74_NUM_FRAGMENTS(log_samples);
2708 }
2709
2710 if (iview->image->fmask.size) {
2711 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2712 if (device->physical_device->rad_info.chip_class >= CIK)
2713 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2714 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2715 cb->cb_color_fmask = va >> 8;
2716 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2717 } else {
2718 /* This must be set for fast clear to work without FMASK. */
2719 if (device->physical_device->rad_info.chip_class >= CIK)
2720 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2721 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2722 cb->cb_color_fmask = cb->cb_color_base;
2723 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2724 }
2725
2726 ntype = radv_translate_color_numformat(iview->vk_format,
2727 desc,
2728 vk_format_get_first_non_void_channel(iview->vk_format));
2729 format = radv_translate_colorformat(iview->vk_format);
2730 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2731 radv_finishme("Illegal color\n");
2732 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2733 endian = radv_colorformat_endian_swap(format);
2734
2735 /* blend clamp should be set for all NORM/SRGB types */
2736 if (ntype == V_028C70_NUMBER_UNORM ||
2737 ntype == V_028C70_NUMBER_SNORM ||
2738 ntype == V_028C70_NUMBER_SRGB)
2739 blend_clamp = 1;
2740
2741 /* set blend bypass according to docs if SINT/UINT or
2742 8/24 COLOR variants */
2743 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2744 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2745 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2746 blend_clamp = 0;
2747 blend_bypass = 1;
2748 }
2749 #if 0
2750 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2751 (format == V_028C70_COLOR_8 ||
2752 format == V_028C70_COLOR_8_8 ||
2753 format == V_028C70_COLOR_8_8_8_8))
2754 ->color_is_int8 = true;
2755 #endif
2756 cb->cb_color_info = S_028C70_FORMAT(format) |
2757 S_028C70_COMP_SWAP(swap) |
2758 S_028C70_BLEND_CLAMP(blend_clamp) |
2759 S_028C70_BLEND_BYPASS(blend_bypass) |
2760 S_028C70_SIMPLE_FLOAT(1) |
2761 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2762 ntype != V_028C70_NUMBER_SNORM &&
2763 ntype != V_028C70_NUMBER_SRGB &&
2764 format != V_028C70_COLOR_8_24 &&
2765 format != V_028C70_COLOR_24_8) |
2766 S_028C70_NUMBER_TYPE(ntype) |
2767 S_028C70_ENDIAN(endian);
2768 if (iview->image->info.samples > 1)
2769 if (iview->image->fmask.size)
2770 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2771
2772 if (iview->image->cmask.size &&
2773 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2774 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2775
2776 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2777 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2778
2779 if (device->physical_device->rad_info.chip_class >= VI) {
2780 unsigned max_uncompressed_block_size = 2;
2781 if (iview->image->info.samples > 1) {
2782 if (iview->image->surface.bpe == 1)
2783 max_uncompressed_block_size = 0;
2784 else if (iview->image->surface.bpe == 2)
2785 max_uncompressed_block_size = 1;
2786 }
2787
2788 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2789 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2790 }
2791
2792 /* This must be set for fast clear to work without FMASK. */
2793 if (!iview->image->fmask.size &&
2794 device->physical_device->rad_info.chip_class == SI) {
2795 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2796 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2797 }
2798 }
2799
2800 static void
2801 radv_initialise_ds_surface(struct radv_device *device,
2802 struct radv_ds_buffer_info *ds,
2803 struct radv_image_view *iview)
2804 {
2805 unsigned level = iview->base_mip;
2806 unsigned format;
2807 uint64_t va, s_offs, z_offs;
2808 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2809 bool stencil_only = false;
2810 memset(ds, 0, sizeof(*ds));
2811 switch (iview->vk_format) {
2812 case VK_FORMAT_D24_UNORM_S8_UINT:
2813 case VK_FORMAT_X8_D24_UNORM_PACK32:
2814 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2815 ds->offset_scale = 2.0f;
2816 break;
2817 case VK_FORMAT_D16_UNORM:
2818 case VK_FORMAT_D16_UNORM_S8_UINT:
2819 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2820 ds->offset_scale = 4.0f;
2821 break;
2822 case VK_FORMAT_D32_SFLOAT:
2823 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2824 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2825 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2826 ds->offset_scale = 1.0f;
2827 break;
2828 case VK_FORMAT_S8_UINT:
2829 stencil_only = true;
2830 level_info = &iview->image->surface.u.legacy.stencil_level[level];
2831 break;
2832 default:
2833 break;
2834 }
2835
2836 format = radv_translate_dbformat(iview->vk_format);
2837
2838 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2839 s_offs = z_offs = va;
2840 z_offs += iview->image->surface.u.legacy.level[level].offset;
2841 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2842
2843 uint32_t max_slice = radv_surface_layer_count(iview);
2844 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2845 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2846 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2847 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2848
2849 if (iview->image->info.samples > 1)
2850 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2851
2852 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2853 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2854 else
2855 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2856
2857 if (device->physical_device->rad_info.chip_class >= CIK) {
2858 struct radeon_info *info = &device->physical_device->rad_info;
2859 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
2860 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
2861 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
2862 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2863 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2864 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2865
2866 if (stencil_only)
2867 tile_mode = stencil_tile_mode;
2868
2869 ds->db_depth_info |=
2870 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2871 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2872 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2873 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2874 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2875 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2876 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2877 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2878 } else {
2879 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2880 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2881 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2882 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2883 }
2884
2885 if (iview->image->surface.htile_size && !level) {
2886 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
2887
2888 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2889 /* Use all of the htile_buffer for depth if there's no stencil. */
2890 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2891
2892 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2893 iview->image->htile_offset;
2894 ds->db_htile_data_base = va >> 8;
2895 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2896 } else {
2897 ds->db_htile_data_base = 0;
2898 ds->db_htile_surface = 0;
2899 }
2900
2901 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2902 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2903
2904 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2905 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2906 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2907 }
2908
2909 VkResult radv_CreateFramebuffer(
2910 VkDevice _device,
2911 const VkFramebufferCreateInfo* pCreateInfo,
2912 const VkAllocationCallbacks* pAllocator,
2913 VkFramebuffer* pFramebuffer)
2914 {
2915 RADV_FROM_HANDLE(radv_device, device, _device);
2916 struct radv_framebuffer *framebuffer;
2917
2918 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2919
2920 size_t size = sizeof(*framebuffer) +
2921 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2922 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2923 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2924 if (framebuffer == NULL)
2925 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2926
2927 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2928 framebuffer->width = pCreateInfo->width;
2929 framebuffer->height = pCreateInfo->height;
2930 framebuffer->layers = pCreateInfo->layers;
2931 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2932 VkImageView _iview = pCreateInfo->pAttachments[i];
2933 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2934 framebuffer->attachments[i].attachment = iview;
2935 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2936 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2937 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2938 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2939 }
2940 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2941 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2942 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2943 }
2944
2945 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2946 return VK_SUCCESS;
2947 }
2948
2949 void radv_DestroyFramebuffer(
2950 VkDevice _device,
2951 VkFramebuffer _fb,
2952 const VkAllocationCallbacks* pAllocator)
2953 {
2954 RADV_FROM_HANDLE(radv_device, device, _device);
2955 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2956
2957 if (!fb)
2958 return;
2959 vk_free2(&device->alloc, pAllocator, fb);
2960 }
2961
2962 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2963 {
2964 switch (address_mode) {
2965 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2966 return V_008F30_SQ_TEX_WRAP;
2967 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2968 return V_008F30_SQ_TEX_MIRROR;
2969 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2970 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2971 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2972 return V_008F30_SQ_TEX_CLAMP_BORDER;
2973 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2974 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2975 default:
2976 unreachable("illegal tex wrap mode");
2977 break;
2978 }
2979 }
2980
2981 static unsigned
2982 radv_tex_compare(VkCompareOp op)
2983 {
2984 switch (op) {
2985 case VK_COMPARE_OP_NEVER:
2986 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2987 case VK_COMPARE_OP_LESS:
2988 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2989 case VK_COMPARE_OP_EQUAL:
2990 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2991 case VK_COMPARE_OP_LESS_OR_EQUAL:
2992 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2993 case VK_COMPARE_OP_GREATER:
2994 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2995 case VK_COMPARE_OP_NOT_EQUAL:
2996 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2997 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2998 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2999 case VK_COMPARE_OP_ALWAYS:
3000 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3001 default:
3002 unreachable("illegal compare mode");
3003 break;
3004 }
3005 }
3006
3007 static unsigned
3008 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3009 {
3010 switch (filter) {
3011 case VK_FILTER_NEAREST:
3012 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3013 V_008F38_SQ_TEX_XY_FILTER_POINT);
3014 case VK_FILTER_LINEAR:
3015 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3016 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3017 case VK_FILTER_CUBIC_IMG:
3018 default:
3019 fprintf(stderr, "illegal texture filter");
3020 return 0;
3021 }
3022 }
3023
3024 static unsigned
3025 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3026 {
3027 switch (mode) {
3028 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3029 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3030 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3031 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3032 default:
3033 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3034 }
3035 }
3036
3037 static unsigned
3038 radv_tex_bordercolor(VkBorderColor bcolor)
3039 {
3040 switch (bcolor) {
3041 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3042 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3043 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3044 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3045 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3046 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3047 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3048 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3049 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3050 default:
3051 break;
3052 }
3053 return 0;
3054 }
3055
3056 static unsigned
3057 radv_tex_aniso_filter(unsigned filter)
3058 {
3059 if (filter < 2)
3060 return 0;
3061 if (filter < 4)
3062 return 1;
3063 if (filter < 8)
3064 return 2;
3065 if (filter < 16)
3066 return 3;
3067 return 4;
3068 }
3069
3070 static void
3071 radv_init_sampler(struct radv_device *device,
3072 struct radv_sampler *sampler,
3073 const VkSamplerCreateInfo *pCreateInfo)
3074 {
3075 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3076 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3077 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3078 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3079
3080 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3081 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3082 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3083 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3084 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3085 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3086 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3087 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3088 S_008F30_DISABLE_CUBE_WRAP(0) |
3089 S_008F30_COMPAT_MODE(is_vi));
3090 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3091 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3092 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3093 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3094 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3095 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3096 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3097 S_008F38_MIP_POINT_PRECLAMP(0) |
3098 S_008F38_DISABLE_LSB_CEIL(1) |
3099 S_008F38_FILTER_PREC_FIX(1) |
3100 S_008F38_ANISO_OVERRIDE(is_vi));
3101 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3102 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3103 }
3104
3105 VkResult radv_CreateSampler(
3106 VkDevice _device,
3107 const VkSamplerCreateInfo* pCreateInfo,
3108 const VkAllocationCallbacks* pAllocator,
3109 VkSampler* pSampler)
3110 {
3111 RADV_FROM_HANDLE(radv_device, device, _device);
3112 struct radv_sampler *sampler;
3113
3114 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3115
3116 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3117 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3118 if (!sampler)
3119 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3120
3121 radv_init_sampler(device, sampler, pCreateInfo);
3122 *pSampler = radv_sampler_to_handle(sampler);
3123
3124 return VK_SUCCESS;
3125 }
3126
3127 void radv_DestroySampler(
3128 VkDevice _device,
3129 VkSampler _sampler,
3130 const VkAllocationCallbacks* pAllocator)
3131 {
3132 RADV_FROM_HANDLE(radv_device, device, _device);
3133 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3134
3135 if (!sampler)
3136 return;
3137 vk_free2(&device->alloc, pAllocator, sampler);
3138 }
3139
3140 /* vk_icd.h does not declare this function, so we declare it here to
3141 * suppress Wmissing-prototypes.
3142 */
3143 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3144 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3145
3146 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3147 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3148 {
3149 /* For the full details on loader interface versioning, see
3150 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3151 * What follows is a condensed summary, to help you navigate the large and
3152 * confusing official doc.
3153 *
3154 * - Loader interface v0 is incompatible with later versions. We don't
3155 * support it.
3156 *
3157 * - In loader interface v1:
3158 * - The first ICD entrypoint called by the loader is
3159 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3160 * entrypoint.
3161 * - The ICD must statically expose no other Vulkan symbol unless it is
3162 * linked with -Bsymbolic.
3163 * - Each dispatchable Vulkan handle created by the ICD must be
3164 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3165 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3166 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3167 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3168 * such loader-managed surfaces.
3169 *
3170 * - Loader interface v2 differs from v1 in:
3171 * - The first ICD entrypoint called by the loader is
3172 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3173 * statically expose this entrypoint.
3174 *
3175 * - Loader interface v3 differs from v2 in:
3176 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3177 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3178 * because the loader no longer does so.
3179 */
3180 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3181 return VK_SUCCESS;
3182 }
3183
3184 VkResult radv_GetMemoryFdKHX(VkDevice _device,
3185 VkDeviceMemory _memory,
3186 VkExternalMemoryHandleTypeFlagsKHX handleType,
3187 int *pFD)
3188 {
3189 RADV_FROM_HANDLE(radv_device, device, _device);
3190 RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
3191
3192 /* We support only one handle type. */
3193 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
3194
3195 bool ret = radv_get_memory_fd(device, memory, pFD);
3196 if (ret == false)
3197 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3198 return VK_SUCCESS;
3199 }
3200
3201 VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
3202 VkExternalMemoryHandleTypeFlagBitsKHX handleType,
3203 int fd,
3204 VkMemoryFdPropertiesKHX *pMemoryFdProperties)
3205 {
3206 /* The valid usage section for this function says:
3207 *
3208 * "handleType must not be one of the handle types defined as opaque."
3209 *
3210 * Since we only handle opaque handles for now, there are no FD properties.
3211 */
3212 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
3213 }