radv: Add support for VK_KHR_variable_pointers.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51 uint32_t mesa_timestamp, llvm_timestamp;
52 uint16_t f = family;
53 memset(uuid, 0, VK_UUID_SIZE);
54 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56 return -1;
57
58 memcpy(uuid, &mesa_timestamp, 4);
59 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60 memcpy((char*)uuid + 8, &f, 2);
61 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62 return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67 memset(uuid, 0, VK_UUID_SIZE);
68 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75 {
76 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77 .specVersion = 25,
78 },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80 {
81 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82 .specVersion = 6,
83 },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86 {
87 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92 {
93 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94 .specVersion = 6,
95 },
96 #endif
97 {
98 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99 .specVersion = 1,
100 },
101 {
102 .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
103 .specVersion = 1,
104 },
105 };
106
107 static const VkExtensionProperties common_device_extensions[] = {
108 {
109 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
110 .specVersion = 1,
111 },
112 {
113 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
114 .specVersion = 1,
115 },
116 {
117 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
118 .specVersion = 1,
119 },
120 {
121 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
122 .specVersion = 1,
123 },
124 {
125 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
126 .specVersion = 1,
127 },
128 {
129 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
130 .specVersion = 68,
131 },
132 {
133 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
134 .specVersion = 1,
135 },
136 {
137 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
138 .specVersion = 1,
139 },
140 {
141 .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
142 .specVersion = 1,
143 },
144 {
145 .extensionName = VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
146 .specVersion = 1,
147 },
148 {
149 .extensionName = VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
150 .specVersion = 1,
151 },
152 {
153 .extensionName = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
154 .specVersion = 1,
155 },
156 {
157 .extensionName = VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME,
158 .specVersion = 1,
159 },
160 {
161 .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
162 .specVersion = 1,
163 },
164 };
165
166 static VkResult
167 radv_extensions_register(struct radv_instance *instance,
168 struct radv_extensions *extensions,
169 const VkExtensionProperties *new_ext,
170 uint32_t num_ext)
171 {
172 size_t new_size;
173 VkExtensionProperties *new_ptr;
174
175 assert(new_ext && num_ext > 0);
176
177 if (!new_ext)
178 return VK_ERROR_INITIALIZATION_FAILED;
179
180 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
181 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
182 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
183
184 /* Old array continues to be valid, update nothing */
185 if (!new_ptr)
186 return VK_ERROR_OUT_OF_HOST_MEMORY;
187
188 memcpy(&new_ptr[extensions->num_ext], new_ext,
189 num_ext * sizeof(VkExtensionProperties));
190 extensions->ext_array = new_ptr;
191 extensions->num_ext += num_ext;
192
193 return VK_SUCCESS;
194 }
195
196 static void
197 radv_extensions_finish(struct radv_instance *instance,
198 struct radv_extensions *extensions)
199 {
200 assert(extensions);
201
202 if (!extensions)
203 radv_loge("Attemted to free invalid extension struct\n");
204
205 if (extensions->ext_array)
206 vk_free(&instance->alloc, extensions->ext_array);
207 }
208
209 static bool
210 is_extension_enabled(const VkExtensionProperties *extensions,
211 size_t num_ext,
212 const char *name)
213 {
214 assert(extensions && name);
215
216 for (uint32_t i = 0; i < num_ext; i++) {
217 if (strcmp(name, extensions[i].extensionName) == 0)
218 return true;
219 }
220
221 return false;
222 }
223
224 static const char *
225 get_chip_name(enum radeon_family family)
226 {
227 switch (family) {
228 case CHIP_TAHITI: return "AMD RADV TAHITI";
229 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
230 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
231 case CHIP_OLAND: return "AMD RADV OLAND";
232 case CHIP_HAINAN: return "AMD RADV HAINAN";
233 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
234 case CHIP_KAVERI: return "AMD RADV KAVERI";
235 case CHIP_KABINI: return "AMD RADV KABINI";
236 case CHIP_HAWAII: return "AMD RADV HAWAII";
237 case CHIP_MULLINS: return "AMD RADV MULLINS";
238 case CHIP_TONGA: return "AMD RADV TONGA";
239 case CHIP_ICELAND: return "AMD RADV ICELAND";
240 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
241 case CHIP_FIJI: return "AMD RADV FIJI";
242 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
243 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
244 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
245 case CHIP_STONEY: return "AMD RADV STONEY";
246 case CHIP_VEGA10: return "AMD RADV VEGA";
247 case CHIP_RAVEN: return "AMD RADV RAVEN";
248 default: return "AMD RADV unknown";
249 }
250 }
251
252 static VkResult
253 radv_physical_device_init(struct radv_physical_device *device,
254 struct radv_instance *instance,
255 drmDevicePtr drm_device)
256 {
257 const char *path = drm_device->nodes[DRM_NODE_RENDER];
258 VkResult result;
259 drmVersionPtr version;
260 int fd;
261
262 fd = open(path, O_RDWR | O_CLOEXEC);
263 if (fd < 0)
264 return VK_ERROR_INCOMPATIBLE_DRIVER;
265
266 version = drmGetVersion(fd);
267 if (!version) {
268 close(fd);
269 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
270 "failed to get version %s: %m", path);
271 }
272
273 if (strcmp(version->name, "amdgpu")) {
274 drmFreeVersion(version);
275 close(fd);
276 return VK_ERROR_INCOMPATIBLE_DRIVER;
277 }
278 drmFreeVersion(version);
279
280 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
281 device->instance = instance;
282 assert(strlen(path) < ARRAY_SIZE(device->path));
283 strncpy(device->path, path, ARRAY_SIZE(device->path));
284
285 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
286 instance->perftest_flags);
287 if (!device->ws) {
288 result = VK_ERROR_INCOMPATIBLE_DRIVER;
289 goto fail;
290 }
291
292 device->local_fd = fd;
293 device->ws->query_info(device->ws, &device->rad_info);
294 result = radv_init_wsi(device);
295 if (result != VK_SUCCESS) {
296 device->ws->destroy(device->ws);
297 goto fail;
298 }
299
300 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
301 radv_finish_wsi(device);
302 device->ws->destroy(device->ws);
303 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
304 "cannot generate UUID");
305 goto fail;
306 }
307
308 result = radv_extensions_register(instance,
309 &device->extensions,
310 common_device_extensions,
311 ARRAY_SIZE(common_device_extensions));
312 if (result != VK_SUCCESS)
313 goto fail;
314
315 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
316 device->name = get_chip_name(device->rad_info.family);
317
318 radv_get_device_uuid(drm_device, device->device_uuid);
319
320 if (device->rad_info.family == CHIP_STONEY ||
321 device->rad_info.chip_class >= GFX9) {
322 device->has_rbplus = true;
323 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
324 }
325
326 return VK_SUCCESS;
327
328 fail:
329 close(fd);
330 return result;
331 }
332
333 static void
334 radv_physical_device_finish(struct radv_physical_device *device)
335 {
336 radv_extensions_finish(device->instance, &device->extensions);
337 radv_finish_wsi(device);
338 device->ws->destroy(device->ws);
339 close(device->local_fd);
340 }
341
342 static void *
343 default_alloc_func(void *pUserData, size_t size, size_t align,
344 VkSystemAllocationScope allocationScope)
345 {
346 return malloc(size);
347 }
348
349 static void *
350 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
351 size_t align, VkSystemAllocationScope allocationScope)
352 {
353 return realloc(pOriginal, size);
354 }
355
356 static void
357 default_free_func(void *pUserData, void *pMemory)
358 {
359 free(pMemory);
360 }
361
362 static const VkAllocationCallbacks default_alloc = {
363 .pUserData = NULL,
364 .pfnAllocation = default_alloc_func,
365 .pfnReallocation = default_realloc_func,
366 .pfnFree = default_free_func,
367 };
368
369 static const struct debug_control radv_debug_options[] = {
370 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
371 {"nodcc", RADV_DEBUG_NO_DCC},
372 {"shaders", RADV_DEBUG_DUMP_SHADERS},
373 {"nocache", RADV_DEBUG_NO_CACHE},
374 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
375 {"nohiz", RADV_DEBUG_NO_HIZ},
376 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
377 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
378 {"allbos", RADV_DEBUG_ALL_BOS},
379 {"noibs", RADV_DEBUG_NO_IBS},
380 {NULL, 0}
381 };
382
383 static const struct debug_control radv_perftest_options[] = {
384 {"batchchain", RADV_PERFTEST_BATCHCHAIN},
385 {"sisched", RADV_PERFTEST_SISCHED},
386 {NULL, 0}
387 };
388
389 VkResult radv_CreateInstance(
390 const VkInstanceCreateInfo* pCreateInfo,
391 const VkAllocationCallbacks* pAllocator,
392 VkInstance* pInstance)
393 {
394 struct radv_instance *instance;
395
396 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
397
398 uint32_t client_version;
399 if (pCreateInfo->pApplicationInfo &&
400 pCreateInfo->pApplicationInfo->apiVersion != 0) {
401 client_version = pCreateInfo->pApplicationInfo->apiVersion;
402 } else {
403 client_version = VK_MAKE_VERSION(1, 0, 0);
404 }
405
406 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
407 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
408 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
409 "Client requested version %d.%d.%d",
410 VK_VERSION_MAJOR(client_version),
411 VK_VERSION_MINOR(client_version),
412 VK_VERSION_PATCH(client_version));
413 }
414
415 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
416 if (!is_extension_enabled(instance_extensions,
417 ARRAY_SIZE(instance_extensions),
418 pCreateInfo->ppEnabledExtensionNames[i]))
419 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
420 }
421
422 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
423 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
424 if (!instance)
425 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
426
427 memset(instance, 0, sizeof(*instance));
428
429 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
430
431 if (pAllocator)
432 instance->alloc = *pAllocator;
433 else
434 instance->alloc = default_alloc;
435
436 instance->apiVersion = client_version;
437 instance->physicalDeviceCount = -1;
438
439 _mesa_locale_init();
440
441 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
442
443 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
444 radv_debug_options);
445
446 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
447 radv_perftest_options);
448
449 *pInstance = radv_instance_to_handle(instance);
450
451 return VK_SUCCESS;
452 }
453
454 void radv_DestroyInstance(
455 VkInstance _instance,
456 const VkAllocationCallbacks* pAllocator)
457 {
458 RADV_FROM_HANDLE(radv_instance, instance, _instance);
459
460 if (!instance)
461 return;
462
463 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
464 radv_physical_device_finish(instance->physicalDevices + i);
465 }
466
467 VG(VALGRIND_DESTROY_MEMPOOL(instance));
468
469 _mesa_locale_fini();
470
471 vk_free(&instance->alloc, instance);
472 }
473
474 static VkResult
475 radv_enumerate_devices(struct radv_instance *instance)
476 {
477 /* TODO: Check for more devices ? */
478 drmDevicePtr devices[8];
479 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
480 int max_devices;
481
482 instance->physicalDeviceCount = 0;
483
484 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
485 if (max_devices < 1)
486 return VK_ERROR_INCOMPATIBLE_DRIVER;
487
488 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
489 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
490 devices[i]->bustype == DRM_BUS_PCI &&
491 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
492
493 result = radv_physical_device_init(instance->physicalDevices +
494 instance->physicalDeviceCount,
495 instance,
496 devices[i]);
497 if (result == VK_SUCCESS)
498 ++instance->physicalDeviceCount;
499 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
500 break;
501 }
502 }
503 drmFreeDevices(devices, max_devices);
504
505 return result;
506 }
507
508 VkResult radv_EnumeratePhysicalDevices(
509 VkInstance _instance,
510 uint32_t* pPhysicalDeviceCount,
511 VkPhysicalDevice* pPhysicalDevices)
512 {
513 RADV_FROM_HANDLE(radv_instance, instance, _instance);
514 VkResult result;
515
516 if (instance->physicalDeviceCount < 0) {
517 result = radv_enumerate_devices(instance);
518 if (result != VK_SUCCESS &&
519 result != VK_ERROR_INCOMPATIBLE_DRIVER)
520 return result;
521 }
522
523 if (!pPhysicalDevices) {
524 *pPhysicalDeviceCount = instance->physicalDeviceCount;
525 } else {
526 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
527 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
528 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
529 }
530
531 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
532 : VK_SUCCESS;
533 }
534
535 void radv_GetPhysicalDeviceFeatures(
536 VkPhysicalDevice physicalDevice,
537 VkPhysicalDeviceFeatures* pFeatures)
538 {
539 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
540 bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
541 memset(pFeatures, 0, sizeof(*pFeatures));
542
543 *pFeatures = (VkPhysicalDeviceFeatures) {
544 .robustBufferAccess = true,
545 .fullDrawIndexUint32 = true,
546 .imageCubeArray = true,
547 .independentBlend = true,
548 .geometryShader = !is_gfx9,
549 .tessellationShader = !is_gfx9,
550 .sampleRateShading = false,
551 .dualSrcBlend = true,
552 .logicOp = true,
553 .multiDrawIndirect = true,
554 .drawIndirectFirstInstance = true,
555 .depthClamp = true,
556 .depthBiasClamp = true,
557 .fillModeNonSolid = true,
558 .depthBounds = true,
559 .wideLines = true,
560 .largePoints = true,
561 .alphaToOne = true,
562 .multiViewport = true,
563 .samplerAnisotropy = true,
564 .textureCompressionETC2 = false,
565 .textureCompressionASTC_LDR = false,
566 .textureCompressionBC = true,
567 .occlusionQueryPrecise = true,
568 .pipelineStatisticsQuery = true,
569 .vertexPipelineStoresAndAtomics = true,
570 .fragmentStoresAndAtomics = true,
571 .shaderTessellationAndGeometryPointSize = true,
572 .shaderImageGatherExtended = true,
573 .shaderStorageImageExtendedFormats = true,
574 .shaderStorageImageMultisample = false,
575 .shaderUniformBufferArrayDynamicIndexing = true,
576 .shaderSampledImageArrayDynamicIndexing = true,
577 .shaderStorageBufferArrayDynamicIndexing = true,
578 .shaderStorageImageArrayDynamicIndexing = true,
579 .shaderStorageImageReadWithoutFormat = true,
580 .shaderStorageImageWriteWithoutFormat = true,
581 .shaderClipDistance = true,
582 .shaderCullDistance = true,
583 .shaderFloat64 = true,
584 .shaderInt64 = true,
585 .shaderInt16 = false,
586 .sparseBinding = true,
587 .variableMultisampleRate = true,
588 .inheritedQueries = true,
589 };
590 }
591
592 void radv_GetPhysicalDeviceFeatures2KHR(
593 VkPhysicalDevice physicalDevice,
594 VkPhysicalDeviceFeatures2KHR *pFeatures)
595 {
596 vk_foreach_struct(ext, pFeatures->pNext) {
597 switch (ext->sType) {
598 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
599 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
600 features->variablePointersStorageBuffer = true;
601 features->variablePointers = false;
602 break;
603 }
604 default:
605 break;
606 }
607 }
608 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
609 }
610
611 void radv_GetPhysicalDeviceProperties(
612 VkPhysicalDevice physicalDevice,
613 VkPhysicalDeviceProperties* pProperties)
614 {
615 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
616 VkSampleCountFlags sample_counts = 0xf;
617
618 /* make sure that the entire descriptor set is addressable with a signed
619 * 32-bit int. So the sum of all limits scaled by descriptor size has to
620 * be at most 2 GiB. the combined image & samples object count as one of
621 * both. This limit is for the pipeline layout, not for the set layout, but
622 * there is no set limit, so we just set a pipeline limit. I don't think
623 * any app is going to hit this soon. */
624 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
625 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
626 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
627 32 /* sampler, largest when combined with image */ +
628 64 /* sampled image */ +
629 64 /* storage image */);
630
631 VkPhysicalDeviceLimits limits = {
632 .maxImageDimension1D = (1 << 14),
633 .maxImageDimension2D = (1 << 14),
634 .maxImageDimension3D = (1 << 11),
635 .maxImageDimensionCube = (1 << 14),
636 .maxImageArrayLayers = (1 << 11),
637 .maxTexelBufferElements = 128 * 1024 * 1024,
638 .maxUniformBufferRange = UINT32_MAX,
639 .maxStorageBufferRange = UINT32_MAX,
640 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
641 .maxMemoryAllocationCount = UINT32_MAX,
642 .maxSamplerAllocationCount = 64 * 1024,
643 .bufferImageGranularity = 64, /* A cache line */
644 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
645 .maxBoundDescriptorSets = MAX_SETS,
646 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
647 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
648 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
649 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
650 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
651 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
652 .maxPerStageResources = max_descriptor_set_size,
653 .maxDescriptorSetSamplers = max_descriptor_set_size,
654 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
655 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
656 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
657 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
658 .maxDescriptorSetSampledImages = max_descriptor_set_size,
659 .maxDescriptorSetStorageImages = max_descriptor_set_size,
660 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
661 .maxVertexInputAttributes = 32,
662 .maxVertexInputBindings = 32,
663 .maxVertexInputAttributeOffset = 2047,
664 .maxVertexInputBindingStride = 2048,
665 .maxVertexOutputComponents = 128,
666 .maxTessellationGenerationLevel = 64,
667 .maxTessellationPatchSize = 32,
668 .maxTessellationControlPerVertexInputComponents = 128,
669 .maxTessellationControlPerVertexOutputComponents = 128,
670 .maxTessellationControlPerPatchOutputComponents = 120,
671 .maxTessellationControlTotalOutputComponents = 4096,
672 .maxTessellationEvaluationInputComponents = 128,
673 .maxTessellationEvaluationOutputComponents = 128,
674 .maxGeometryShaderInvocations = 127,
675 .maxGeometryInputComponents = 64,
676 .maxGeometryOutputComponents = 128,
677 .maxGeometryOutputVertices = 256,
678 .maxGeometryTotalOutputComponents = 1024,
679 .maxFragmentInputComponents = 128,
680 .maxFragmentOutputAttachments = 8,
681 .maxFragmentDualSrcAttachments = 1,
682 .maxFragmentCombinedOutputResources = 8,
683 .maxComputeSharedMemorySize = 32768,
684 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
685 .maxComputeWorkGroupInvocations = 2048,
686 .maxComputeWorkGroupSize = {
687 2048,
688 2048,
689 2048
690 },
691 .subPixelPrecisionBits = 4 /* FIXME */,
692 .subTexelPrecisionBits = 4 /* FIXME */,
693 .mipmapPrecisionBits = 4 /* FIXME */,
694 .maxDrawIndexedIndexValue = UINT32_MAX,
695 .maxDrawIndirectCount = UINT32_MAX,
696 .maxSamplerLodBias = 16,
697 .maxSamplerAnisotropy = 16,
698 .maxViewports = MAX_VIEWPORTS,
699 .maxViewportDimensions = { (1 << 14), (1 << 14) },
700 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
701 .viewportSubPixelBits = 13, /* We take a float? */
702 .minMemoryMapAlignment = 4096, /* A page */
703 .minTexelBufferOffsetAlignment = 1,
704 .minUniformBufferOffsetAlignment = 4,
705 .minStorageBufferOffsetAlignment = 4,
706 .minTexelOffset = -32,
707 .maxTexelOffset = 31,
708 .minTexelGatherOffset = -32,
709 .maxTexelGatherOffset = 31,
710 .minInterpolationOffset = -2,
711 .maxInterpolationOffset = 2,
712 .subPixelInterpolationOffsetBits = 8,
713 .maxFramebufferWidth = (1 << 14),
714 .maxFramebufferHeight = (1 << 14),
715 .maxFramebufferLayers = (1 << 10),
716 .framebufferColorSampleCounts = sample_counts,
717 .framebufferDepthSampleCounts = sample_counts,
718 .framebufferStencilSampleCounts = sample_counts,
719 .framebufferNoAttachmentsSampleCounts = sample_counts,
720 .maxColorAttachments = MAX_RTS,
721 .sampledImageColorSampleCounts = sample_counts,
722 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
723 .sampledImageDepthSampleCounts = sample_counts,
724 .sampledImageStencilSampleCounts = sample_counts,
725 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
726 .maxSampleMaskWords = 1,
727 .timestampComputeAndGraphics = true,
728 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
729 .maxClipDistances = 8,
730 .maxCullDistances = 8,
731 .maxCombinedClipAndCullDistances = 8,
732 .discreteQueuePriorities = 1,
733 .pointSizeRange = { 0.125, 255.875 },
734 .lineWidthRange = { 0.0, 7.9921875 },
735 .pointSizeGranularity = (1.0 / 8.0),
736 .lineWidthGranularity = (1.0 / 128.0),
737 .strictLines = false, /* FINISHME */
738 .standardSampleLocations = true,
739 .optimalBufferCopyOffsetAlignment = 128,
740 .optimalBufferCopyRowPitchAlignment = 128,
741 .nonCoherentAtomSize = 64,
742 };
743
744 *pProperties = (VkPhysicalDeviceProperties) {
745 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
746 .driverVersion = vk_get_driver_version(),
747 .vendorID = 0x1002,
748 .deviceID = pdevice->rad_info.pci_id,
749 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
750 .limits = limits,
751 .sparseProperties = {0},
752 };
753
754 strcpy(pProperties->deviceName, pdevice->name);
755 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
756 }
757
758 void radv_GetPhysicalDeviceProperties2KHR(
759 VkPhysicalDevice physicalDevice,
760 VkPhysicalDeviceProperties2KHR *pProperties)
761 {
762 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
763 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
764
765 vk_foreach_struct(ext, pProperties->pNext) {
766 switch (ext->sType) {
767 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
768 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
769 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
770 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
771 break;
772 }
773 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
774 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
775 radv_device_get_cache_uuid(0, properties->driverUUID);
776 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
777 properties->deviceLUIDValid = false;
778 break;
779 }
780 default:
781 break;
782 }
783 }
784 }
785
786 static void radv_get_physical_device_queue_family_properties(
787 struct radv_physical_device* pdevice,
788 uint32_t* pCount,
789 VkQueueFamilyProperties** pQueueFamilyProperties)
790 {
791 int num_queue_families = 1;
792 int idx;
793 if (pdevice->rad_info.num_compute_rings > 0 &&
794 pdevice->rad_info.chip_class >= CIK &&
795 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
796 num_queue_families++;
797
798 if (pQueueFamilyProperties == NULL) {
799 *pCount = num_queue_families;
800 return;
801 }
802
803 if (!*pCount)
804 return;
805
806 idx = 0;
807 if (*pCount >= 1) {
808 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
809 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
810 VK_QUEUE_COMPUTE_BIT |
811 VK_QUEUE_TRANSFER_BIT |
812 VK_QUEUE_SPARSE_BINDING_BIT,
813 .queueCount = 1,
814 .timestampValidBits = 64,
815 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
816 };
817 idx++;
818 }
819
820 if (pdevice->rad_info.num_compute_rings > 0 &&
821 pdevice->rad_info.chip_class >= CIK &&
822 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
823 if (*pCount > idx) {
824 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
825 .queueFlags = VK_QUEUE_COMPUTE_BIT |
826 VK_QUEUE_TRANSFER_BIT |
827 VK_QUEUE_SPARSE_BINDING_BIT,
828 .queueCount = pdevice->rad_info.num_compute_rings,
829 .timestampValidBits = 64,
830 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
831 };
832 idx++;
833 }
834 }
835 *pCount = idx;
836 }
837
838 void radv_GetPhysicalDeviceQueueFamilyProperties(
839 VkPhysicalDevice physicalDevice,
840 uint32_t* pCount,
841 VkQueueFamilyProperties* pQueueFamilyProperties)
842 {
843 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
844 if (!pQueueFamilyProperties) {
845 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
846 return;
847 }
848 VkQueueFamilyProperties *properties[] = {
849 pQueueFamilyProperties + 0,
850 pQueueFamilyProperties + 1,
851 pQueueFamilyProperties + 2,
852 };
853 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
854 assert(*pCount <= 3);
855 }
856
857 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
858 VkPhysicalDevice physicalDevice,
859 uint32_t* pCount,
860 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
861 {
862 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
863 if (!pQueueFamilyProperties) {
864 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
865 return;
866 }
867 VkQueueFamilyProperties *properties[] = {
868 &pQueueFamilyProperties[0].queueFamilyProperties,
869 &pQueueFamilyProperties[1].queueFamilyProperties,
870 &pQueueFamilyProperties[2].queueFamilyProperties,
871 };
872 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
873 assert(*pCount <= 3);
874 }
875
876 void radv_GetPhysicalDeviceMemoryProperties(
877 VkPhysicalDevice physicalDevice,
878 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
879 {
880 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
881
882 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
883
884 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
885 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
886 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
887 .heapIndex = RADV_MEM_HEAP_VRAM,
888 };
889 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
890 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
891 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
892 .heapIndex = RADV_MEM_HEAP_GTT,
893 };
894 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
895 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
896 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
897 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
898 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
899 };
900 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
901 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
902 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
903 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
904 .heapIndex = RADV_MEM_HEAP_GTT,
905 };
906
907 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
908
909 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
910 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
911 .size = physical_device->rad_info.vram_size -
912 physical_device->rad_info.vram_vis_size,
913 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
914 };
915 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
916 .size = physical_device->rad_info.vram_vis_size,
917 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
918 };
919 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
920 .size = physical_device->rad_info.gart_size,
921 .flags = 0,
922 };
923 }
924
925 void radv_GetPhysicalDeviceMemoryProperties2KHR(
926 VkPhysicalDevice physicalDevice,
927 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
928 {
929 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
930 &pMemoryProperties->memoryProperties);
931 }
932
933 static int
934 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
935 int queue_family_index, int idx)
936 {
937 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
938 queue->device = device;
939 queue->queue_family_index = queue_family_index;
940 queue->queue_idx = idx;
941
942 queue->hw_ctx = device->ws->ctx_create(device->ws);
943 if (!queue->hw_ctx)
944 return VK_ERROR_OUT_OF_HOST_MEMORY;
945
946 return VK_SUCCESS;
947 }
948
949 static void
950 radv_queue_finish(struct radv_queue *queue)
951 {
952 if (queue->hw_ctx)
953 queue->device->ws->ctx_destroy(queue->hw_ctx);
954
955 if (queue->initial_preamble_cs)
956 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
957 if (queue->continue_preamble_cs)
958 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
959 if (queue->descriptor_bo)
960 queue->device->ws->buffer_destroy(queue->descriptor_bo);
961 if (queue->scratch_bo)
962 queue->device->ws->buffer_destroy(queue->scratch_bo);
963 if (queue->esgs_ring_bo)
964 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
965 if (queue->gsvs_ring_bo)
966 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
967 if (queue->tess_factor_ring_bo)
968 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
969 if (queue->tess_offchip_ring_bo)
970 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
971 if (queue->compute_scratch_bo)
972 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
973 }
974
975 static void
976 radv_device_init_gs_info(struct radv_device *device)
977 {
978 switch (device->physical_device->rad_info.family) {
979 case CHIP_OLAND:
980 case CHIP_HAINAN:
981 case CHIP_KAVERI:
982 case CHIP_KABINI:
983 case CHIP_MULLINS:
984 case CHIP_ICELAND:
985 case CHIP_CARRIZO:
986 case CHIP_STONEY:
987 device->gs_table_depth = 16;
988 return;
989 case CHIP_TAHITI:
990 case CHIP_PITCAIRN:
991 case CHIP_VERDE:
992 case CHIP_BONAIRE:
993 case CHIP_HAWAII:
994 case CHIP_TONGA:
995 case CHIP_FIJI:
996 case CHIP_POLARIS10:
997 case CHIP_POLARIS11:
998 case CHIP_POLARIS12:
999 case CHIP_VEGA10:
1000 case CHIP_RAVEN:
1001 device->gs_table_depth = 32;
1002 return;
1003 default:
1004 unreachable("unknown GPU");
1005 }
1006 }
1007
1008 VkResult radv_CreateDevice(
1009 VkPhysicalDevice physicalDevice,
1010 const VkDeviceCreateInfo* pCreateInfo,
1011 const VkAllocationCallbacks* pAllocator,
1012 VkDevice* pDevice)
1013 {
1014 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1015 VkResult result;
1016 struct radv_device *device;
1017
1018 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1019 if (!is_extension_enabled(physical_device->extensions.ext_array,
1020 physical_device->extensions.num_ext,
1021 pCreateInfo->ppEnabledExtensionNames[i]))
1022 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1023 }
1024
1025 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1026 sizeof(*device), 8,
1027 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1028 if (!device)
1029 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1030
1031 memset(device, 0, sizeof(*device));
1032
1033 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1034 device->instance = physical_device->instance;
1035 device->physical_device = physical_device;
1036
1037 device->debug_flags = device->instance->debug_flags;
1038
1039 device->ws = physical_device->ws;
1040 if (pAllocator)
1041 device->alloc = *pAllocator;
1042 else
1043 device->alloc = physical_device->instance->alloc;
1044
1045 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1046 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1047 uint32_t qfi = queue_create->queueFamilyIndex;
1048
1049 device->queues[qfi] = vk_alloc(&device->alloc,
1050 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1051 if (!device->queues[qfi]) {
1052 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1053 goto fail;
1054 }
1055
1056 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1057
1058 device->queue_count[qfi] = queue_create->queueCount;
1059
1060 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1061 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1062 if (result != VK_SUCCESS)
1063 goto fail;
1064 }
1065 }
1066
1067 #if HAVE_LLVM < 0x0400
1068 device->llvm_supports_spill = false;
1069 #else
1070 device->llvm_supports_spill = true;
1071 #endif
1072
1073 /* The maximum number of scratch waves. Scratch space isn't divided
1074 * evenly between CUs. The number is only a function of the number of CUs.
1075 * We can decrease the constant to decrease the scratch buffer size.
1076 *
1077 * sctx->scratch_waves must be >= the maximum posible size of
1078 * 1 threadgroup, so that the hw doesn't hang from being unable
1079 * to start any.
1080 *
1081 * The recommended value is 4 per CU at most. Higher numbers don't
1082 * bring much benefit, but they still occupy chip resources (think
1083 * async compute). I've seen ~2% performance difference between 4 and 32.
1084 */
1085 uint32_t max_threads_per_block = 2048;
1086 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1087 max_threads_per_block / 64);
1088
1089 radv_device_init_gs_info(device);
1090
1091 device->tess_offchip_block_dw_size =
1092 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1093 device->has_distributed_tess =
1094 device->physical_device->rad_info.chip_class >= VI &&
1095 device->physical_device->rad_info.max_se >= 2;
1096
1097 result = radv_device_init_meta(device);
1098 if (result != VK_SUCCESS)
1099 goto fail;
1100
1101 radv_device_init_msaa(device);
1102
1103 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1104 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1105 switch (family) {
1106 case RADV_QUEUE_GENERAL:
1107 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1108 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1109 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1110 break;
1111 case RADV_QUEUE_COMPUTE:
1112 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1113 radeon_emit(device->empty_cs[family], 0);
1114 break;
1115 }
1116 device->ws->cs_finalize(device->empty_cs[family]);
1117
1118 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1119 switch (family) {
1120 case RADV_QUEUE_GENERAL:
1121 case RADV_QUEUE_COMPUTE:
1122 si_cs_emit_cache_flush(device->flush_cs[family],
1123 false,
1124 device->physical_device->rad_info.chip_class,
1125 NULL, 0,
1126 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1127 RADV_CMD_FLAG_INV_ICACHE |
1128 RADV_CMD_FLAG_INV_SMEM_L1 |
1129 RADV_CMD_FLAG_INV_VMEM_L1 |
1130 RADV_CMD_FLAG_INV_GLOBAL_L2);
1131 break;
1132 }
1133 device->ws->cs_finalize(device->flush_cs[family]);
1134
1135 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1136 switch (family) {
1137 case RADV_QUEUE_GENERAL:
1138 case RADV_QUEUE_COMPUTE:
1139 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1140 false,
1141 device->physical_device->rad_info.chip_class,
1142 NULL, 0,
1143 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1144 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1145 RADV_CMD_FLAG_INV_ICACHE |
1146 RADV_CMD_FLAG_INV_SMEM_L1 |
1147 RADV_CMD_FLAG_INV_VMEM_L1 |
1148 RADV_CMD_FLAG_INV_GLOBAL_L2);
1149 break;
1150 }
1151 device->ws->cs_finalize(device->flush_shader_cs[family]);
1152 }
1153
1154 if (getenv("RADV_TRACE_FILE")) {
1155 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1156 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1157 if (!device->trace_bo)
1158 goto fail;
1159
1160 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1161 if (!device->trace_id_ptr)
1162 goto fail;
1163 }
1164
1165 if (device->physical_device->rad_info.chip_class >= CIK)
1166 cik_create_gfx_config(device);
1167
1168 VkPipelineCacheCreateInfo ci;
1169 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1170 ci.pNext = NULL;
1171 ci.flags = 0;
1172 ci.pInitialData = NULL;
1173 ci.initialDataSize = 0;
1174 VkPipelineCache pc;
1175 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1176 &ci, NULL, &pc);
1177 if (result != VK_SUCCESS)
1178 goto fail;
1179
1180 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1181
1182 *pDevice = radv_device_to_handle(device);
1183 return VK_SUCCESS;
1184
1185 fail:
1186 if (device->trace_bo)
1187 device->ws->buffer_destroy(device->trace_bo);
1188
1189 if (device->gfx_init)
1190 device->ws->buffer_destroy(device->gfx_init);
1191
1192 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1193 for (unsigned q = 0; q < device->queue_count[i]; q++)
1194 radv_queue_finish(&device->queues[i][q]);
1195 if (device->queue_count[i])
1196 vk_free(&device->alloc, device->queues[i]);
1197 }
1198
1199 vk_free(&device->alloc, device);
1200 return result;
1201 }
1202
1203 void radv_DestroyDevice(
1204 VkDevice _device,
1205 const VkAllocationCallbacks* pAllocator)
1206 {
1207 RADV_FROM_HANDLE(radv_device, device, _device);
1208
1209 if (!device)
1210 return;
1211
1212 if (device->trace_bo)
1213 device->ws->buffer_destroy(device->trace_bo);
1214
1215 if (device->gfx_init)
1216 device->ws->buffer_destroy(device->gfx_init);
1217
1218 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1219 for (unsigned q = 0; q < device->queue_count[i]; q++)
1220 radv_queue_finish(&device->queues[i][q]);
1221 if (device->queue_count[i])
1222 vk_free(&device->alloc, device->queues[i]);
1223 if (device->empty_cs[i])
1224 device->ws->cs_destroy(device->empty_cs[i]);
1225 if (device->flush_cs[i])
1226 device->ws->cs_destroy(device->flush_cs[i]);
1227 if (device->flush_shader_cs[i])
1228 device->ws->cs_destroy(device->flush_shader_cs[i]);
1229 }
1230 radv_device_finish_meta(device);
1231
1232 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1233 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1234
1235 vk_free(&device->alloc, device);
1236 }
1237
1238 VkResult radv_EnumerateInstanceExtensionProperties(
1239 const char* pLayerName,
1240 uint32_t* pPropertyCount,
1241 VkExtensionProperties* pProperties)
1242 {
1243 if (pProperties == NULL) {
1244 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1245 return VK_SUCCESS;
1246 }
1247
1248 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1249 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1250
1251 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1252 return VK_INCOMPLETE;
1253
1254 return VK_SUCCESS;
1255 }
1256
1257 VkResult radv_EnumerateDeviceExtensionProperties(
1258 VkPhysicalDevice physicalDevice,
1259 const char* pLayerName,
1260 uint32_t* pPropertyCount,
1261 VkExtensionProperties* pProperties)
1262 {
1263 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1264
1265 if (pProperties == NULL) {
1266 *pPropertyCount = pdevice->extensions.num_ext;
1267 return VK_SUCCESS;
1268 }
1269
1270 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1271 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1272
1273 if (*pPropertyCount < pdevice->extensions.num_ext)
1274 return VK_INCOMPLETE;
1275
1276 return VK_SUCCESS;
1277 }
1278
1279 VkResult radv_EnumerateInstanceLayerProperties(
1280 uint32_t* pPropertyCount,
1281 VkLayerProperties* pProperties)
1282 {
1283 if (pProperties == NULL) {
1284 *pPropertyCount = 0;
1285 return VK_SUCCESS;
1286 }
1287
1288 /* None supported at this time */
1289 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1290 }
1291
1292 VkResult radv_EnumerateDeviceLayerProperties(
1293 VkPhysicalDevice physicalDevice,
1294 uint32_t* pPropertyCount,
1295 VkLayerProperties* pProperties)
1296 {
1297 if (pProperties == NULL) {
1298 *pPropertyCount = 0;
1299 return VK_SUCCESS;
1300 }
1301
1302 /* None supported at this time */
1303 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1304 }
1305
1306 void radv_GetDeviceQueue(
1307 VkDevice _device,
1308 uint32_t queueFamilyIndex,
1309 uint32_t queueIndex,
1310 VkQueue* pQueue)
1311 {
1312 RADV_FROM_HANDLE(radv_device, device, _device);
1313
1314 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1315 }
1316
1317 static void radv_dump_trace(struct radv_device *device,
1318 struct radeon_winsys_cs *cs)
1319 {
1320 const char *filename = getenv("RADV_TRACE_FILE");
1321 FILE *f = fopen(filename, "w");
1322 if (!f) {
1323 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1324 return;
1325 }
1326
1327 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1328 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1329 fclose(f);
1330 }
1331
1332 static void
1333 fill_geom_tess_rings(struct radv_queue *queue,
1334 uint32_t *map,
1335 bool add_sample_positions,
1336 uint32_t esgs_ring_size,
1337 struct radeon_winsys_bo *esgs_ring_bo,
1338 uint32_t gsvs_ring_size,
1339 struct radeon_winsys_bo *gsvs_ring_bo,
1340 uint32_t tess_factor_ring_size,
1341 struct radeon_winsys_bo *tess_factor_ring_bo,
1342 uint32_t tess_offchip_ring_size,
1343 struct radeon_winsys_bo *tess_offchip_ring_bo)
1344 {
1345 uint64_t esgs_va = 0, gsvs_va = 0;
1346 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1347 uint32_t *desc = &map[4];
1348
1349 if (esgs_ring_bo)
1350 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1351 if (gsvs_ring_bo)
1352 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1353 if (tess_factor_ring_bo)
1354 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1355 if (tess_offchip_ring_bo)
1356 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1357
1358 /* stride 0, num records - size, add tid, swizzle, elsize4,
1359 index stride 64 */
1360 desc[0] = esgs_va;
1361 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1362 S_008F04_STRIDE(0) |
1363 S_008F04_SWIZZLE_ENABLE(true);
1364 desc[2] = esgs_ring_size;
1365 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1366 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1367 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1368 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1369 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1370 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1371 S_008F0C_ELEMENT_SIZE(1) |
1372 S_008F0C_INDEX_STRIDE(3) |
1373 S_008F0C_ADD_TID_ENABLE(true);
1374
1375 desc += 4;
1376 /* GS entry for ES->GS ring */
1377 /* stride 0, num records - size, elsize0,
1378 index stride 0 */
1379 desc[0] = esgs_va;
1380 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1381 S_008F04_STRIDE(0) |
1382 S_008F04_SWIZZLE_ENABLE(false);
1383 desc[2] = esgs_ring_size;
1384 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1385 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1386 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1387 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1388 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1389 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1390 S_008F0C_ELEMENT_SIZE(0) |
1391 S_008F0C_INDEX_STRIDE(0) |
1392 S_008F0C_ADD_TID_ENABLE(false);
1393
1394 desc += 4;
1395 /* VS entry for GS->VS ring */
1396 /* stride 0, num records - size, elsize0,
1397 index stride 0 */
1398 desc[0] = gsvs_va;
1399 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1400 S_008F04_STRIDE(0) |
1401 S_008F04_SWIZZLE_ENABLE(false);
1402 desc[2] = gsvs_ring_size;
1403 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1404 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1405 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1406 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1407 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1408 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1409 S_008F0C_ELEMENT_SIZE(0) |
1410 S_008F0C_INDEX_STRIDE(0) |
1411 S_008F0C_ADD_TID_ENABLE(false);
1412 desc += 4;
1413
1414 /* stride gsvs_itemsize, num records 64
1415 elsize 4, index stride 16 */
1416 /* shader will patch stride and desc[2] */
1417 desc[0] = gsvs_va;
1418 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1419 S_008F04_STRIDE(0) |
1420 S_008F04_SWIZZLE_ENABLE(true);
1421 desc[2] = 0;
1422 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1423 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1424 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1425 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1426 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1427 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1428 S_008F0C_ELEMENT_SIZE(1) |
1429 S_008F0C_INDEX_STRIDE(1) |
1430 S_008F0C_ADD_TID_ENABLE(true);
1431 desc += 4;
1432
1433 desc[0] = tess_factor_va;
1434 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1435 S_008F04_STRIDE(0) |
1436 S_008F04_SWIZZLE_ENABLE(false);
1437 desc[2] = tess_factor_ring_size;
1438 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1439 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1440 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1441 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1442 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1443 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1444 S_008F0C_ELEMENT_SIZE(0) |
1445 S_008F0C_INDEX_STRIDE(0) |
1446 S_008F0C_ADD_TID_ENABLE(false);
1447 desc += 4;
1448
1449 desc[0] = tess_offchip_va;
1450 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1451 S_008F04_STRIDE(0) |
1452 S_008F04_SWIZZLE_ENABLE(false);
1453 desc[2] = tess_offchip_ring_size;
1454 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1455 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1456 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1457 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1458 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1459 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1460 S_008F0C_ELEMENT_SIZE(0) |
1461 S_008F0C_INDEX_STRIDE(0) |
1462 S_008F0C_ADD_TID_ENABLE(false);
1463 desc += 4;
1464
1465 /* add sample positions after all rings */
1466 memcpy(desc, queue->device->sample_locations_1x, 8);
1467 desc += 2;
1468 memcpy(desc, queue->device->sample_locations_2x, 16);
1469 desc += 4;
1470 memcpy(desc, queue->device->sample_locations_4x, 32);
1471 desc += 8;
1472 memcpy(desc, queue->device->sample_locations_8x, 64);
1473 desc += 16;
1474 memcpy(desc, queue->device->sample_locations_16x, 128);
1475 }
1476
1477 static unsigned
1478 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1479 {
1480 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1481 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1482 device->physical_device->rad_info.family != CHIP_STONEY;
1483 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1484 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1485 device->physical_device->rad_info.max_se;
1486 unsigned offchip_granularity;
1487 unsigned hs_offchip_param;
1488 switch (device->tess_offchip_block_dw_size) {
1489 default:
1490 assert(0);
1491 /* fall through */
1492 case 8192:
1493 offchip_granularity = V_03093C_X_8K_DWORDS;
1494 break;
1495 case 4096:
1496 offchip_granularity = V_03093C_X_4K_DWORDS;
1497 break;
1498 }
1499
1500 switch (device->physical_device->rad_info.chip_class) {
1501 case SI:
1502 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1503 break;
1504 case CIK:
1505 case VI:
1506 case GFX9:
1507 default:
1508 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1509 break;
1510 }
1511
1512 *max_offchip_buffers_p = max_offchip_buffers;
1513 if (device->physical_device->rad_info.chip_class >= CIK) {
1514 if (device->physical_device->rad_info.chip_class >= VI)
1515 --max_offchip_buffers;
1516 hs_offchip_param =
1517 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1518 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1519 } else {
1520 hs_offchip_param =
1521 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1522 }
1523 return hs_offchip_param;
1524 }
1525
1526 static VkResult
1527 radv_get_preamble_cs(struct radv_queue *queue,
1528 uint32_t scratch_size,
1529 uint32_t compute_scratch_size,
1530 uint32_t esgs_ring_size,
1531 uint32_t gsvs_ring_size,
1532 bool needs_tess_rings,
1533 bool needs_sample_positions,
1534 struct radeon_winsys_cs **initial_preamble_cs,
1535 struct radeon_winsys_cs **continue_preamble_cs)
1536 {
1537 struct radeon_winsys_bo *scratch_bo = NULL;
1538 struct radeon_winsys_bo *descriptor_bo = NULL;
1539 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1540 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1541 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1542 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1543 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1544 struct radeon_winsys_cs *dest_cs[2] = {0};
1545 bool add_tess_rings = false, add_sample_positions = false;
1546 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1547 unsigned max_offchip_buffers;
1548 unsigned hs_offchip_param = 0;
1549 if (!queue->has_tess_rings) {
1550 if (needs_tess_rings)
1551 add_tess_rings = true;
1552 }
1553 if (!queue->has_sample_positions) {
1554 if (needs_sample_positions)
1555 add_sample_positions = true;
1556 }
1557 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1558 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1559 &max_offchip_buffers);
1560 tess_offchip_ring_size = max_offchip_buffers *
1561 queue->device->tess_offchip_block_dw_size * 4;
1562
1563 if (scratch_size <= queue->scratch_size &&
1564 compute_scratch_size <= queue->compute_scratch_size &&
1565 esgs_ring_size <= queue->esgs_ring_size &&
1566 gsvs_ring_size <= queue->gsvs_ring_size &&
1567 !add_tess_rings && !add_sample_positions &&
1568 queue->initial_preamble_cs) {
1569 *initial_preamble_cs = queue->initial_preamble_cs;
1570 *continue_preamble_cs = queue->continue_preamble_cs;
1571 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1572 *continue_preamble_cs = NULL;
1573 return VK_SUCCESS;
1574 }
1575
1576 if (scratch_size > queue->scratch_size) {
1577 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1578 scratch_size,
1579 4096,
1580 RADEON_DOMAIN_VRAM,
1581 RADEON_FLAG_NO_CPU_ACCESS);
1582 if (!scratch_bo)
1583 goto fail;
1584 } else
1585 scratch_bo = queue->scratch_bo;
1586
1587 if (compute_scratch_size > queue->compute_scratch_size) {
1588 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1589 compute_scratch_size,
1590 4096,
1591 RADEON_DOMAIN_VRAM,
1592 RADEON_FLAG_NO_CPU_ACCESS);
1593 if (!compute_scratch_bo)
1594 goto fail;
1595
1596 } else
1597 compute_scratch_bo = queue->compute_scratch_bo;
1598
1599 if (esgs_ring_size > queue->esgs_ring_size) {
1600 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1601 esgs_ring_size,
1602 4096,
1603 RADEON_DOMAIN_VRAM,
1604 RADEON_FLAG_NO_CPU_ACCESS);
1605 if (!esgs_ring_bo)
1606 goto fail;
1607 } else {
1608 esgs_ring_bo = queue->esgs_ring_bo;
1609 esgs_ring_size = queue->esgs_ring_size;
1610 }
1611
1612 if (gsvs_ring_size > queue->gsvs_ring_size) {
1613 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1614 gsvs_ring_size,
1615 4096,
1616 RADEON_DOMAIN_VRAM,
1617 RADEON_FLAG_NO_CPU_ACCESS);
1618 if (!gsvs_ring_bo)
1619 goto fail;
1620 } else {
1621 gsvs_ring_bo = queue->gsvs_ring_bo;
1622 gsvs_ring_size = queue->gsvs_ring_size;
1623 }
1624
1625 if (add_tess_rings) {
1626 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1627 tess_factor_ring_size,
1628 256,
1629 RADEON_DOMAIN_VRAM,
1630 RADEON_FLAG_NO_CPU_ACCESS);
1631 if (!tess_factor_ring_bo)
1632 goto fail;
1633 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1634 tess_offchip_ring_size,
1635 256,
1636 RADEON_DOMAIN_VRAM,
1637 RADEON_FLAG_NO_CPU_ACCESS);
1638 if (!tess_offchip_ring_bo)
1639 goto fail;
1640 } else {
1641 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1642 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1643 }
1644
1645 if (scratch_bo != queue->scratch_bo ||
1646 esgs_ring_bo != queue->esgs_ring_bo ||
1647 gsvs_ring_bo != queue->gsvs_ring_bo ||
1648 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1649 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1650 uint32_t size = 0;
1651 if (gsvs_ring_bo || esgs_ring_bo ||
1652 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1653 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1654 if (add_sample_positions)
1655 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1656 }
1657 else if (scratch_bo)
1658 size = 8; /* 2 dword */
1659
1660 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1661 size,
1662 4096,
1663 RADEON_DOMAIN_VRAM,
1664 RADEON_FLAG_CPU_ACCESS);
1665 if (!descriptor_bo)
1666 goto fail;
1667 } else
1668 descriptor_bo = queue->descriptor_bo;
1669
1670 for(int i = 0; i < 2; ++i) {
1671 struct radeon_winsys_cs *cs = NULL;
1672 cs = queue->device->ws->cs_create(queue->device->ws,
1673 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1674 if (!cs)
1675 goto fail;
1676
1677 dest_cs[i] = cs;
1678
1679 if (scratch_bo)
1680 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1681
1682 if (esgs_ring_bo)
1683 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1684
1685 if (gsvs_ring_bo)
1686 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1687
1688 if (tess_factor_ring_bo)
1689 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1690
1691 if (tess_offchip_ring_bo)
1692 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1693
1694 if (descriptor_bo)
1695 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1696
1697 if (descriptor_bo != queue->descriptor_bo) {
1698 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1699
1700 if (scratch_bo) {
1701 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1702 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1703 S_008F04_SWIZZLE_ENABLE(1);
1704 map[0] = scratch_va;
1705 map[1] = rsrc1;
1706 }
1707
1708 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1709 add_sample_positions)
1710 fill_geom_tess_rings(queue, map, add_sample_positions,
1711 esgs_ring_size, esgs_ring_bo,
1712 gsvs_ring_size, gsvs_ring_bo,
1713 tess_factor_ring_size, tess_factor_ring_bo,
1714 tess_offchip_ring_size, tess_offchip_ring_bo);
1715
1716 queue->device->ws->buffer_unmap(descriptor_bo);
1717 }
1718
1719 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1720 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1721 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1722 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1723 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1724 }
1725
1726 if (esgs_ring_bo || gsvs_ring_bo) {
1727 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1728 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1729 radeon_emit(cs, esgs_ring_size >> 8);
1730 radeon_emit(cs, gsvs_ring_size >> 8);
1731 } else {
1732 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1733 radeon_emit(cs, esgs_ring_size >> 8);
1734 radeon_emit(cs, gsvs_ring_size >> 8);
1735 }
1736 }
1737
1738 if (tess_factor_ring_bo) {
1739 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1740 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1741 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1742 S_030938_SIZE(tess_factor_ring_size / 4));
1743 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1744 tf_va >> 8);
1745 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1746 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1747 tf_va >> 40);
1748 }
1749 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1750 } else {
1751 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1752 S_008988_SIZE(tess_factor_ring_size / 4));
1753 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1754 tf_va >> 8);
1755 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1756 hs_offchip_param);
1757 }
1758 }
1759
1760 if (descriptor_bo) {
1761 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1762 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1763 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1764 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1765 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1766 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1767
1768 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1769
1770 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1771 radeon_set_sh_reg_seq(cs, regs[i], 2);
1772 radeon_emit(cs, va);
1773 radeon_emit(cs, va >> 32);
1774 }
1775 }
1776
1777 if (compute_scratch_bo) {
1778 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1779 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1780 S_008F04_SWIZZLE_ENABLE(1);
1781
1782 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1783
1784 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1785 radeon_emit(cs, scratch_va);
1786 radeon_emit(cs, rsrc1);
1787 }
1788
1789 if (!i) {
1790 si_cs_emit_cache_flush(cs,
1791 false,
1792 queue->device->physical_device->rad_info.chip_class,
1793 NULL, 0,
1794 queue->queue_family_index == RING_COMPUTE &&
1795 queue->device->physical_device->rad_info.chip_class >= CIK,
1796 RADV_CMD_FLAG_INV_ICACHE |
1797 RADV_CMD_FLAG_INV_SMEM_L1 |
1798 RADV_CMD_FLAG_INV_VMEM_L1 |
1799 RADV_CMD_FLAG_INV_GLOBAL_L2);
1800 }
1801
1802 if (!queue->device->ws->cs_finalize(cs))
1803 goto fail;
1804 }
1805
1806 if (queue->initial_preamble_cs)
1807 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1808
1809 if (queue->continue_preamble_cs)
1810 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1811
1812 queue->initial_preamble_cs = dest_cs[0];
1813 queue->continue_preamble_cs = dest_cs[1];
1814
1815 if (scratch_bo != queue->scratch_bo) {
1816 if (queue->scratch_bo)
1817 queue->device->ws->buffer_destroy(queue->scratch_bo);
1818 queue->scratch_bo = scratch_bo;
1819 queue->scratch_size = scratch_size;
1820 }
1821
1822 if (compute_scratch_bo != queue->compute_scratch_bo) {
1823 if (queue->compute_scratch_bo)
1824 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1825 queue->compute_scratch_bo = compute_scratch_bo;
1826 queue->compute_scratch_size = compute_scratch_size;
1827 }
1828
1829 if (esgs_ring_bo != queue->esgs_ring_bo) {
1830 if (queue->esgs_ring_bo)
1831 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1832 queue->esgs_ring_bo = esgs_ring_bo;
1833 queue->esgs_ring_size = esgs_ring_size;
1834 }
1835
1836 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1837 if (queue->gsvs_ring_bo)
1838 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1839 queue->gsvs_ring_bo = gsvs_ring_bo;
1840 queue->gsvs_ring_size = gsvs_ring_size;
1841 }
1842
1843 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1844 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1845 }
1846
1847 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1848 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1849 queue->has_tess_rings = true;
1850 }
1851
1852 if (descriptor_bo != queue->descriptor_bo) {
1853 if (queue->descriptor_bo)
1854 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1855
1856 queue->descriptor_bo = descriptor_bo;
1857 }
1858
1859 if (add_sample_positions)
1860 queue->has_sample_positions = true;
1861
1862 *initial_preamble_cs = queue->initial_preamble_cs;
1863 *continue_preamble_cs = queue->continue_preamble_cs;
1864 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1865 *continue_preamble_cs = NULL;
1866 return VK_SUCCESS;
1867 fail:
1868 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1869 if (dest_cs[i])
1870 queue->device->ws->cs_destroy(dest_cs[i]);
1871 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1872 queue->device->ws->buffer_destroy(descriptor_bo);
1873 if (scratch_bo && scratch_bo != queue->scratch_bo)
1874 queue->device->ws->buffer_destroy(scratch_bo);
1875 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1876 queue->device->ws->buffer_destroy(compute_scratch_bo);
1877 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1878 queue->device->ws->buffer_destroy(esgs_ring_bo);
1879 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1880 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1881 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1882 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1883 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1884 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1885 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1886 }
1887
1888 VkResult radv_QueueSubmit(
1889 VkQueue _queue,
1890 uint32_t submitCount,
1891 const VkSubmitInfo* pSubmits,
1892 VkFence _fence)
1893 {
1894 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1895 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1896 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1897 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1898 int ret;
1899 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1900 uint32_t scratch_size = 0;
1901 uint32_t compute_scratch_size = 0;
1902 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1903 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1904 VkResult result;
1905 bool fence_emitted = false;
1906 bool tess_rings_needed = false;
1907 bool sample_positions_needed = false;
1908
1909 /* Do this first so failing to allocate scratch buffers can't result in
1910 * partially executed submissions. */
1911 for (uint32_t i = 0; i < submitCount; i++) {
1912 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1913 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1914 pSubmits[i].pCommandBuffers[j]);
1915
1916 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1917 compute_scratch_size = MAX2(compute_scratch_size,
1918 cmd_buffer->compute_scratch_size_needed);
1919 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1920 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1921 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1922 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1923 }
1924 }
1925
1926 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1927 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1928 sample_positions_needed,
1929 &initial_preamble_cs, &continue_preamble_cs);
1930 if (result != VK_SUCCESS)
1931 return result;
1932
1933 for (uint32_t i = 0; i < submitCount; i++) {
1934 struct radeon_winsys_cs **cs_array;
1935 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1936 bool can_patch = !do_flush;
1937 uint32_t advance;
1938
1939 if (!pSubmits[i].commandBufferCount) {
1940 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1941 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1942 &queue->device->empty_cs[queue->queue_family_index],
1943 1, NULL, NULL,
1944 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1945 pSubmits[i].waitSemaphoreCount,
1946 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1947 pSubmits[i].signalSemaphoreCount,
1948 false, base_fence);
1949 if (ret) {
1950 radv_loge("failed to submit CS %d\n", i);
1951 abort();
1952 }
1953 fence_emitted = true;
1954 }
1955 continue;
1956 }
1957
1958 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1959 (pSubmits[i].commandBufferCount + do_flush));
1960
1961 if(do_flush)
1962 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1963 queue->device->flush_shader_cs[queue->queue_family_index] :
1964 queue->device->flush_cs[queue->queue_family_index];
1965
1966 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1967 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1968 pSubmits[i].pCommandBuffers[j]);
1969 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1970
1971 cs_array[j + do_flush] = cmd_buffer->cs;
1972 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1973 can_patch = false;
1974 }
1975
1976 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1977 advance = MIN2(max_cs_submission,
1978 pSubmits[i].commandBufferCount + do_flush - j);
1979 bool b = j == 0;
1980 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1981
1982 if (queue->device->trace_bo)
1983 *queue->device->trace_id_ptr = 0;
1984
1985 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1986 advance, initial_preamble_cs, continue_preamble_cs,
1987 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1988 b ? pSubmits[i].waitSemaphoreCount : 0,
1989 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1990 e ? pSubmits[i].signalSemaphoreCount : 0,
1991 can_patch, base_fence);
1992
1993 if (ret) {
1994 radv_loge("failed to submit CS %d\n", i);
1995 abort();
1996 }
1997 fence_emitted = true;
1998 if (queue->device->trace_bo) {
1999 bool success = queue->device->ws->ctx_wait_idle(
2000 queue->hw_ctx,
2001 radv_queue_family_to_ring(
2002 queue->queue_family_index),
2003 queue->queue_idx);
2004
2005 if (!success) { /* Hang */
2006 radv_dump_trace(queue->device, cs_array[j]);
2007 abort();
2008 }
2009 }
2010 }
2011 free(cs_array);
2012 }
2013
2014 if (fence) {
2015 if (!fence_emitted)
2016 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2017 &queue->device->empty_cs[queue->queue_family_index],
2018 1, NULL, NULL, NULL, 0, NULL, 0,
2019 false, base_fence);
2020
2021 fence->submitted = true;
2022 }
2023
2024 return VK_SUCCESS;
2025 }
2026
2027 VkResult radv_QueueWaitIdle(
2028 VkQueue _queue)
2029 {
2030 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2031
2032 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2033 radv_queue_family_to_ring(queue->queue_family_index),
2034 queue->queue_idx);
2035 return VK_SUCCESS;
2036 }
2037
2038 VkResult radv_DeviceWaitIdle(
2039 VkDevice _device)
2040 {
2041 RADV_FROM_HANDLE(radv_device, device, _device);
2042
2043 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2044 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2045 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2046 }
2047 }
2048 return VK_SUCCESS;
2049 }
2050
2051 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2052 VkInstance instance,
2053 const char* pName)
2054 {
2055 return radv_lookup_entrypoint(pName);
2056 }
2057
2058 /* The loader wants us to expose a second GetInstanceProcAddr function
2059 * to work around certain LD_PRELOAD issues seen in apps.
2060 */
2061 PUBLIC
2062 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2063 VkInstance instance,
2064 const char* pName);
2065
2066 PUBLIC
2067 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2068 VkInstance instance,
2069 const char* pName)
2070 {
2071 return radv_GetInstanceProcAddr(instance, pName);
2072 }
2073
2074 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2075 VkDevice device,
2076 const char* pName)
2077 {
2078 return radv_lookup_entrypoint(pName);
2079 }
2080
2081 bool radv_get_memory_fd(struct radv_device *device,
2082 struct radv_device_memory *memory,
2083 int *pFD)
2084 {
2085 struct radeon_bo_metadata metadata;
2086
2087 if (memory->image) {
2088 radv_init_metadata(device, memory->image, &metadata);
2089 device->ws->buffer_set_metadata(memory->bo, &metadata);
2090 }
2091
2092 return device->ws->buffer_get_fd(device->ws, memory->bo,
2093 pFD);
2094 }
2095
2096 VkResult radv_AllocateMemory(
2097 VkDevice _device,
2098 const VkMemoryAllocateInfo* pAllocateInfo,
2099 const VkAllocationCallbacks* pAllocator,
2100 VkDeviceMemory* pMem)
2101 {
2102 RADV_FROM_HANDLE(radv_device, device, _device);
2103 struct radv_device_memory *mem;
2104 VkResult result;
2105 enum radeon_bo_domain domain;
2106 uint32_t flags = 0;
2107
2108 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2109
2110 if (pAllocateInfo->allocationSize == 0) {
2111 /* Apparently, this is allowed */
2112 *pMem = VK_NULL_HANDLE;
2113 return VK_SUCCESS;
2114 }
2115
2116 const VkImportMemoryFdInfoKHR *import_info =
2117 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2118 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2119 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2120
2121 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2122 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2123 if (mem == NULL)
2124 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2125
2126 if (dedicate_info) {
2127 mem->image = radv_image_from_handle(dedicate_info->image);
2128 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2129 } else {
2130 mem->image = NULL;
2131 mem->buffer = NULL;
2132 }
2133
2134 if (import_info) {
2135 assert(import_info->handleType ==
2136 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2137 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2138 NULL, NULL);
2139 if (!mem->bo) {
2140 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2141 goto fail;
2142 } else
2143 goto out_success;
2144 }
2145
2146 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2147 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2148 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2149 domain = RADEON_DOMAIN_GTT;
2150 else
2151 domain = RADEON_DOMAIN_VRAM;
2152
2153 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2154 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2155 else
2156 flags |= RADEON_FLAG_CPU_ACCESS;
2157
2158 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2159 flags |= RADEON_FLAG_GTT_WC;
2160
2161 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2162 domain, flags);
2163
2164 if (!mem->bo) {
2165 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2166 goto fail;
2167 }
2168 mem->type_index = pAllocateInfo->memoryTypeIndex;
2169 out_success:
2170 *pMem = radv_device_memory_to_handle(mem);
2171
2172 return VK_SUCCESS;
2173
2174 fail:
2175 vk_free2(&device->alloc, pAllocator, mem);
2176
2177 return result;
2178 }
2179
2180 void radv_FreeMemory(
2181 VkDevice _device,
2182 VkDeviceMemory _mem,
2183 const VkAllocationCallbacks* pAllocator)
2184 {
2185 RADV_FROM_HANDLE(radv_device, device, _device);
2186 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2187
2188 if (mem == NULL)
2189 return;
2190
2191 device->ws->buffer_destroy(mem->bo);
2192 mem->bo = NULL;
2193
2194 vk_free2(&device->alloc, pAllocator, mem);
2195 }
2196
2197 VkResult radv_MapMemory(
2198 VkDevice _device,
2199 VkDeviceMemory _memory,
2200 VkDeviceSize offset,
2201 VkDeviceSize size,
2202 VkMemoryMapFlags flags,
2203 void** ppData)
2204 {
2205 RADV_FROM_HANDLE(radv_device, device, _device);
2206 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2207
2208 if (mem == NULL) {
2209 *ppData = NULL;
2210 return VK_SUCCESS;
2211 }
2212
2213 *ppData = device->ws->buffer_map(mem->bo);
2214 if (*ppData) {
2215 *ppData += offset;
2216 return VK_SUCCESS;
2217 }
2218
2219 return VK_ERROR_MEMORY_MAP_FAILED;
2220 }
2221
2222 void radv_UnmapMemory(
2223 VkDevice _device,
2224 VkDeviceMemory _memory)
2225 {
2226 RADV_FROM_HANDLE(radv_device, device, _device);
2227 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2228
2229 if (mem == NULL)
2230 return;
2231
2232 device->ws->buffer_unmap(mem->bo);
2233 }
2234
2235 VkResult radv_FlushMappedMemoryRanges(
2236 VkDevice _device,
2237 uint32_t memoryRangeCount,
2238 const VkMappedMemoryRange* pMemoryRanges)
2239 {
2240 return VK_SUCCESS;
2241 }
2242
2243 VkResult radv_InvalidateMappedMemoryRanges(
2244 VkDevice _device,
2245 uint32_t memoryRangeCount,
2246 const VkMappedMemoryRange* pMemoryRanges)
2247 {
2248 return VK_SUCCESS;
2249 }
2250
2251 void radv_GetBufferMemoryRequirements(
2252 VkDevice device,
2253 VkBuffer _buffer,
2254 VkMemoryRequirements* pMemoryRequirements)
2255 {
2256 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2257
2258 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2259
2260 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2261 pMemoryRequirements->alignment = 4096;
2262 else
2263 pMemoryRequirements->alignment = 16;
2264
2265 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2266 }
2267
2268 void radv_GetBufferMemoryRequirements2KHR(
2269 VkDevice device,
2270 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2271 VkMemoryRequirements2KHR* pMemoryRequirements)
2272 {
2273 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2274 &pMemoryRequirements->memoryRequirements);
2275
2276 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2277 switch (ext->sType) {
2278 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2279 VkMemoryDedicatedRequirementsKHR *req =
2280 (VkMemoryDedicatedRequirementsKHR *) ext;
2281 req->requiresDedicatedAllocation = false;
2282 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2283 break;
2284 }
2285 default:
2286 break;
2287 }
2288 }
2289 }
2290
2291 void radv_GetImageMemoryRequirements(
2292 VkDevice device,
2293 VkImage _image,
2294 VkMemoryRequirements* pMemoryRequirements)
2295 {
2296 RADV_FROM_HANDLE(radv_image, image, _image);
2297
2298 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2299
2300 pMemoryRequirements->size = image->size;
2301 pMemoryRequirements->alignment = image->alignment;
2302 }
2303
2304 void radv_GetImageMemoryRequirements2KHR(
2305 VkDevice device,
2306 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2307 VkMemoryRequirements2KHR* pMemoryRequirements)
2308 {
2309 radv_GetImageMemoryRequirements(device, pInfo->image,
2310 &pMemoryRequirements->memoryRequirements);
2311
2312 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2313
2314 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2315 switch (ext->sType) {
2316 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2317 VkMemoryDedicatedRequirementsKHR *req =
2318 (VkMemoryDedicatedRequirementsKHR *) ext;
2319 req->requiresDedicatedAllocation = image->shareable;
2320 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2321 break;
2322 }
2323 default:
2324 break;
2325 }
2326 }
2327 }
2328
2329 void radv_GetImageSparseMemoryRequirements(
2330 VkDevice device,
2331 VkImage image,
2332 uint32_t* pSparseMemoryRequirementCount,
2333 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2334 {
2335 stub();
2336 }
2337
2338 void radv_GetImageSparseMemoryRequirements2KHR(
2339 VkDevice device,
2340 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2341 uint32_t* pSparseMemoryRequirementCount,
2342 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2343 {
2344 stub();
2345 }
2346
2347 void radv_GetDeviceMemoryCommitment(
2348 VkDevice device,
2349 VkDeviceMemory memory,
2350 VkDeviceSize* pCommittedMemoryInBytes)
2351 {
2352 *pCommittedMemoryInBytes = 0;
2353 }
2354
2355 VkResult radv_BindBufferMemory(
2356 VkDevice device,
2357 VkBuffer _buffer,
2358 VkDeviceMemory _memory,
2359 VkDeviceSize memoryOffset)
2360 {
2361 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2362 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2363
2364 if (mem) {
2365 buffer->bo = mem->bo;
2366 buffer->offset = memoryOffset;
2367 } else {
2368 buffer->bo = NULL;
2369 buffer->offset = 0;
2370 }
2371
2372 return VK_SUCCESS;
2373 }
2374
2375 VkResult radv_BindImageMemory(
2376 VkDevice device,
2377 VkImage _image,
2378 VkDeviceMemory _memory,
2379 VkDeviceSize memoryOffset)
2380 {
2381 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2382 RADV_FROM_HANDLE(radv_image, image, _image);
2383
2384 if (mem) {
2385 image->bo = mem->bo;
2386 image->offset = memoryOffset;
2387 } else {
2388 image->bo = NULL;
2389 image->offset = 0;
2390 }
2391
2392 return VK_SUCCESS;
2393 }
2394
2395
2396 static void
2397 radv_sparse_buffer_bind_memory(struct radv_device *device,
2398 const VkSparseBufferMemoryBindInfo *bind)
2399 {
2400 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2401
2402 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2403 struct radv_device_memory *mem = NULL;
2404
2405 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2406 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2407
2408 device->ws->buffer_virtual_bind(buffer->bo,
2409 bind->pBinds[i].resourceOffset,
2410 bind->pBinds[i].size,
2411 mem ? mem->bo : NULL,
2412 bind->pBinds[i].memoryOffset);
2413 }
2414 }
2415
2416 static void
2417 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2418 const VkSparseImageOpaqueMemoryBindInfo *bind)
2419 {
2420 RADV_FROM_HANDLE(radv_image, image, bind->image);
2421
2422 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2423 struct radv_device_memory *mem = NULL;
2424
2425 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2426 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2427
2428 device->ws->buffer_virtual_bind(image->bo,
2429 bind->pBinds[i].resourceOffset,
2430 bind->pBinds[i].size,
2431 mem ? mem->bo : NULL,
2432 bind->pBinds[i].memoryOffset);
2433 }
2434 }
2435
2436 VkResult radv_QueueBindSparse(
2437 VkQueue _queue,
2438 uint32_t bindInfoCount,
2439 const VkBindSparseInfo* pBindInfo,
2440 VkFence _fence)
2441 {
2442 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2443 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2444 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2445 bool fence_emitted = false;
2446
2447 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2448 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2449 radv_sparse_buffer_bind_memory(queue->device,
2450 pBindInfo[i].pBufferBinds + j);
2451 }
2452
2453 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2454 radv_sparse_image_opaque_bind_memory(queue->device,
2455 pBindInfo[i].pImageOpaqueBinds + j);
2456 }
2457
2458 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2459 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2460 &queue->device->empty_cs[queue->queue_family_index],
2461 1, NULL, NULL,
2462 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2463 pBindInfo[i].waitSemaphoreCount,
2464 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2465 pBindInfo[i].signalSemaphoreCount,
2466 false, base_fence);
2467 fence_emitted = true;
2468 if (fence)
2469 fence->submitted = true;
2470 }
2471 }
2472
2473 if (fence && !fence_emitted) {
2474 fence->signalled = true;
2475 }
2476
2477 return VK_SUCCESS;
2478 }
2479
2480 VkResult radv_CreateFence(
2481 VkDevice _device,
2482 const VkFenceCreateInfo* pCreateInfo,
2483 const VkAllocationCallbacks* pAllocator,
2484 VkFence* pFence)
2485 {
2486 RADV_FROM_HANDLE(radv_device, device, _device);
2487 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2488 sizeof(*fence), 8,
2489 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2490
2491 if (!fence)
2492 return VK_ERROR_OUT_OF_HOST_MEMORY;
2493
2494 memset(fence, 0, sizeof(*fence));
2495 fence->submitted = false;
2496 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2497 fence->fence = device->ws->create_fence();
2498 if (!fence->fence) {
2499 vk_free2(&device->alloc, pAllocator, fence);
2500 return VK_ERROR_OUT_OF_HOST_MEMORY;
2501 }
2502
2503 *pFence = radv_fence_to_handle(fence);
2504
2505 return VK_SUCCESS;
2506 }
2507
2508 void radv_DestroyFence(
2509 VkDevice _device,
2510 VkFence _fence,
2511 const VkAllocationCallbacks* pAllocator)
2512 {
2513 RADV_FROM_HANDLE(radv_device, device, _device);
2514 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2515
2516 if (!fence)
2517 return;
2518 device->ws->destroy_fence(fence->fence);
2519 vk_free2(&device->alloc, pAllocator, fence);
2520 }
2521
2522 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2523 {
2524 uint64_t current_time;
2525 struct timespec tv;
2526
2527 clock_gettime(CLOCK_MONOTONIC, &tv);
2528 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2529
2530 timeout = MIN2(UINT64_MAX - current_time, timeout);
2531
2532 return current_time + timeout;
2533 }
2534
2535 VkResult radv_WaitForFences(
2536 VkDevice _device,
2537 uint32_t fenceCount,
2538 const VkFence* pFences,
2539 VkBool32 waitAll,
2540 uint64_t timeout)
2541 {
2542 RADV_FROM_HANDLE(radv_device, device, _device);
2543 timeout = radv_get_absolute_timeout(timeout);
2544
2545 if (!waitAll && fenceCount > 1) {
2546 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2547 }
2548
2549 for (uint32_t i = 0; i < fenceCount; ++i) {
2550 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2551 bool expired = false;
2552
2553 if (fence->signalled)
2554 continue;
2555
2556 if (!fence->submitted)
2557 return VK_TIMEOUT;
2558
2559 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2560 if (!expired)
2561 return VK_TIMEOUT;
2562
2563 fence->signalled = true;
2564 }
2565
2566 return VK_SUCCESS;
2567 }
2568
2569 VkResult radv_ResetFences(VkDevice device,
2570 uint32_t fenceCount,
2571 const VkFence *pFences)
2572 {
2573 for (unsigned i = 0; i < fenceCount; ++i) {
2574 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2575 fence->submitted = fence->signalled = false;
2576 }
2577
2578 return VK_SUCCESS;
2579 }
2580
2581 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2582 {
2583 RADV_FROM_HANDLE(radv_device, device, _device);
2584 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2585
2586 if (fence->signalled)
2587 return VK_SUCCESS;
2588 if (!fence->submitted)
2589 return VK_NOT_READY;
2590
2591 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2592 return VK_NOT_READY;
2593
2594 return VK_SUCCESS;
2595 }
2596
2597
2598 // Queue semaphore functions
2599
2600 VkResult radv_CreateSemaphore(
2601 VkDevice _device,
2602 const VkSemaphoreCreateInfo* pCreateInfo,
2603 const VkAllocationCallbacks* pAllocator,
2604 VkSemaphore* pSemaphore)
2605 {
2606 RADV_FROM_HANDLE(radv_device, device, _device);
2607 struct radeon_winsys_sem *sem;
2608
2609 sem = device->ws->create_sem(device->ws);
2610 if (!sem)
2611 return VK_ERROR_OUT_OF_HOST_MEMORY;
2612
2613 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2614 return VK_SUCCESS;
2615 }
2616
2617 void radv_DestroySemaphore(
2618 VkDevice _device,
2619 VkSemaphore _semaphore,
2620 const VkAllocationCallbacks* pAllocator)
2621 {
2622 RADV_FROM_HANDLE(radv_device, device, _device);
2623 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2624 if (!_semaphore)
2625 return;
2626
2627 device->ws->destroy_sem(sem);
2628 }
2629
2630 VkResult radv_CreateEvent(
2631 VkDevice _device,
2632 const VkEventCreateInfo* pCreateInfo,
2633 const VkAllocationCallbacks* pAllocator,
2634 VkEvent* pEvent)
2635 {
2636 RADV_FROM_HANDLE(radv_device, device, _device);
2637 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2638 sizeof(*event), 8,
2639 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2640
2641 if (!event)
2642 return VK_ERROR_OUT_OF_HOST_MEMORY;
2643
2644 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2645 RADEON_DOMAIN_GTT,
2646 RADEON_FLAG_CPU_ACCESS);
2647 if (!event->bo) {
2648 vk_free2(&device->alloc, pAllocator, event);
2649 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2650 }
2651
2652 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2653
2654 *pEvent = radv_event_to_handle(event);
2655
2656 return VK_SUCCESS;
2657 }
2658
2659 void radv_DestroyEvent(
2660 VkDevice _device,
2661 VkEvent _event,
2662 const VkAllocationCallbacks* pAllocator)
2663 {
2664 RADV_FROM_HANDLE(radv_device, device, _device);
2665 RADV_FROM_HANDLE(radv_event, event, _event);
2666
2667 if (!event)
2668 return;
2669 device->ws->buffer_destroy(event->bo);
2670 vk_free2(&device->alloc, pAllocator, event);
2671 }
2672
2673 VkResult radv_GetEventStatus(
2674 VkDevice _device,
2675 VkEvent _event)
2676 {
2677 RADV_FROM_HANDLE(radv_event, event, _event);
2678
2679 if (*event->map == 1)
2680 return VK_EVENT_SET;
2681 return VK_EVENT_RESET;
2682 }
2683
2684 VkResult radv_SetEvent(
2685 VkDevice _device,
2686 VkEvent _event)
2687 {
2688 RADV_FROM_HANDLE(radv_event, event, _event);
2689 *event->map = 1;
2690
2691 return VK_SUCCESS;
2692 }
2693
2694 VkResult radv_ResetEvent(
2695 VkDevice _device,
2696 VkEvent _event)
2697 {
2698 RADV_FROM_HANDLE(radv_event, event, _event);
2699 *event->map = 0;
2700
2701 return VK_SUCCESS;
2702 }
2703
2704 VkResult radv_CreateBuffer(
2705 VkDevice _device,
2706 const VkBufferCreateInfo* pCreateInfo,
2707 const VkAllocationCallbacks* pAllocator,
2708 VkBuffer* pBuffer)
2709 {
2710 RADV_FROM_HANDLE(radv_device, device, _device);
2711 struct radv_buffer *buffer;
2712
2713 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2714
2715 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2716 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2717 if (buffer == NULL)
2718 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2719
2720 buffer->size = pCreateInfo->size;
2721 buffer->usage = pCreateInfo->usage;
2722 buffer->bo = NULL;
2723 buffer->offset = 0;
2724 buffer->flags = pCreateInfo->flags;
2725
2726 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2727 buffer->bo = device->ws->buffer_create(device->ws,
2728 align64(buffer->size, 4096),
2729 4096, 0, RADEON_FLAG_VIRTUAL);
2730 if (!buffer->bo) {
2731 vk_free2(&device->alloc, pAllocator, buffer);
2732 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2733 }
2734 }
2735
2736 *pBuffer = radv_buffer_to_handle(buffer);
2737
2738 return VK_SUCCESS;
2739 }
2740
2741 void radv_DestroyBuffer(
2742 VkDevice _device,
2743 VkBuffer _buffer,
2744 const VkAllocationCallbacks* pAllocator)
2745 {
2746 RADV_FROM_HANDLE(radv_device, device, _device);
2747 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2748
2749 if (!buffer)
2750 return;
2751
2752 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2753 device->ws->buffer_destroy(buffer->bo);
2754
2755 vk_free2(&device->alloc, pAllocator, buffer);
2756 }
2757
2758 static inline unsigned
2759 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2760 {
2761 if (stencil)
2762 return image->surface.u.legacy.stencil_tiling_index[level];
2763 else
2764 return image->surface.u.legacy.tiling_index[level];
2765 }
2766
2767 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2768 {
2769 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2770 }
2771
2772 static void
2773 radv_initialise_color_surface(struct radv_device *device,
2774 struct radv_color_buffer_info *cb,
2775 struct radv_image_view *iview)
2776 {
2777 const struct vk_format_description *desc;
2778 unsigned ntype, format, swap, endian;
2779 unsigned blend_clamp = 0, blend_bypass = 0;
2780 uint64_t va;
2781 const struct radeon_surf *surf = &iview->image->surface;
2782
2783 desc = vk_format_description(iview->vk_format);
2784
2785 memset(cb, 0, sizeof(*cb));
2786
2787 /* Intensity is implemented as Red, so treat it that way. */
2788 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2789
2790 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2791
2792 if (device->physical_device->rad_info.chip_class >= GFX9) {
2793 struct gfx9_surf_meta_flags meta;
2794 if (iview->image->dcc_offset)
2795 meta = iview->image->surface.u.gfx9.dcc;
2796 else
2797 meta = iview->image->surface.u.gfx9.cmask;
2798
2799 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2800 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2801 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2802 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2803
2804 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2805 } else {
2806 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2807 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2808
2809 va += level_info->offset;
2810
2811 pitch_tile_max = level_info->nblk_x / 8 - 1;
2812 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2813 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2814
2815 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2816 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2817 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2818
2819 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2820 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2821
2822 if (iview->image->fmask.size) {
2823 if (device->physical_device->rad_info.chip_class >= CIK)
2824 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2825 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2826 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2827 } else {
2828 /* This must be set for fast clear to work without FMASK. */
2829 if (device->physical_device->rad_info.chip_class >= CIK)
2830 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2831 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2832 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2833 }
2834 }
2835
2836 cb->cb_color_base = va >> 8;
2837 if (device->physical_device->rad_info.chip_class < GFX9)
2838 cb->cb_color_base |= iview->image->surface.u.legacy.tile_swizzle;
2839 /* CMASK variables */
2840 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2841 va += iview->image->cmask.offset;
2842 cb->cb_color_cmask = va >> 8;
2843
2844 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2845 va += iview->image->dcc_offset;
2846 cb->cb_dcc_base = va >> 8;
2847 if (device->physical_device->rad_info.chip_class < GFX9)
2848 cb->cb_dcc_base |= iview->image->surface.u.legacy.tile_swizzle;
2849
2850 uint32_t max_slice = radv_surface_layer_count(iview);
2851 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2852 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2853
2854 if (iview->image->info.samples > 1) {
2855 unsigned log_samples = util_logbase2(iview->image->info.samples);
2856
2857 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2858 S_028C74_NUM_FRAGMENTS(log_samples);
2859 }
2860
2861 if (iview->image->fmask.size) {
2862 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2863 cb->cb_color_fmask = va >> 8;
2864 if (device->physical_device->rad_info.chip_class < GFX9)
2865 cb->cb_color_fmask |= iview->image->surface.u.legacy.tile_swizzle;
2866 } else {
2867 cb->cb_color_fmask = cb->cb_color_base;
2868 }
2869
2870 ntype = radv_translate_color_numformat(iview->vk_format,
2871 desc,
2872 vk_format_get_first_non_void_channel(iview->vk_format));
2873 format = radv_translate_colorformat(iview->vk_format);
2874 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2875 radv_finishme("Illegal color\n");
2876 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2877 endian = radv_colorformat_endian_swap(format);
2878
2879 /* blend clamp should be set for all NORM/SRGB types */
2880 if (ntype == V_028C70_NUMBER_UNORM ||
2881 ntype == V_028C70_NUMBER_SNORM ||
2882 ntype == V_028C70_NUMBER_SRGB)
2883 blend_clamp = 1;
2884
2885 /* set blend bypass according to docs if SINT/UINT or
2886 8/24 COLOR variants */
2887 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2888 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2889 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2890 blend_clamp = 0;
2891 blend_bypass = 1;
2892 }
2893 #if 0
2894 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2895 (format == V_028C70_COLOR_8 ||
2896 format == V_028C70_COLOR_8_8 ||
2897 format == V_028C70_COLOR_8_8_8_8))
2898 ->color_is_int8 = true;
2899 #endif
2900 cb->cb_color_info = S_028C70_FORMAT(format) |
2901 S_028C70_COMP_SWAP(swap) |
2902 S_028C70_BLEND_CLAMP(blend_clamp) |
2903 S_028C70_BLEND_BYPASS(blend_bypass) |
2904 S_028C70_SIMPLE_FLOAT(1) |
2905 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2906 ntype != V_028C70_NUMBER_SNORM &&
2907 ntype != V_028C70_NUMBER_SRGB &&
2908 format != V_028C70_COLOR_8_24 &&
2909 format != V_028C70_COLOR_24_8) |
2910 S_028C70_NUMBER_TYPE(ntype) |
2911 S_028C70_ENDIAN(endian);
2912 if (iview->image->info.samples > 1)
2913 if (iview->image->fmask.size)
2914 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2915
2916 if (iview->image->cmask.size &&
2917 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2918 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2919
2920 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2921 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2922
2923 if (device->physical_device->rad_info.chip_class >= VI) {
2924 unsigned max_uncompressed_block_size = 2;
2925 if (iview->image->info.samples > 1) {
2926 if (iview->image->surface.bpe == 1)
2927 max_uncompressed_block_size = 0;
2928 else if (iview->image->surface.bpe == 2)
2929 max_uncompressed_block_size = 1;
2930 }
2931
2932 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2933 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2934 }
2935
2936 /* This must be set for fast clear to work without FMASK. */
2937 if (!iview->image->fmask.size &&
2938 device->physical_device->rad_info.chip_class == SI) {
2939 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2940 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2941 }
2942
2943 if (device->physical_device->rad_info.chip_class >= GFX9) {
2944 uint32_t max_slice = radv_surface_layer_count(iview);
2945 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2946
2947 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2948 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2949 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2950 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2951 S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2952 S_028C68_MAX_MIP(iview->image->info.levels);
2953
2954 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2955
2956 }
2957 }
2958
2959 static void
2960 radv_initialise_ds_surface(struct radv_device *device,
2961 struct radv_ds_buffer_info *ds,
2962 struct radv_image_view *iview)
2963 {
2964 unsigned level = iview->base_mip;
2965 unsigned format, stencil_format;
2966 uint64_t va, s_offs, z_offs;
2967 bool stencil_only = false;
2968 memset(ds, 0, sizeof(*ds));
2969 switch (iview->image->vk_format) {
2970 case VK_FORMAT_D24_UNORM_S8_UINT:
2971 case VK_FORMAT_X8_D24_UNORM_PACK32:
2972 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2973 ds->offset_scale = 2.0f;
2974 break;
2975 case VK_FORMAT_D16_UNORM:
2976 case VK_FORMAT_D16_UNORM_S8_UINT:
2977 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2978 ds->offset_scale = 4.0f;
2979 break;
2980 case VK_FORMAT_D32_SFLOAT:
2981 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2982 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2983 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2984 ds->offset_scale = 1.0f;
2985 break;
2986 case VK_FORMAT_S8_UINT:
2987 stencil_only = true;
2988 break;
2989 default:
2990 break;
2991 }
2992
2993 format = radv_translate_dbformat(iview->image->vk_format);
2994 stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2995 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2996
2997 uint32_t max_slice = radv_surface_layer_count(iview);
2998 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2999 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3000
3001 ds->db_htile_data_base = 0;
3002 ds->db_htile_surface = 0;
3003
3004 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
3005 s_offs = z_offs = va;
3006
3007 if (device->physical_device->rad_info.chip_class >= GFX9) {
3008 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3009 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3010
3011 ds->db_z_info = S_028038_FORMAT(format) |
3012 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3013 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3014 S_028038_MAXMIP(iview->image->info.levels - 1);
3015 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3016 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3017
3018 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3019 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3020 ds->db_depth_view |= S_028008_MIPID(level);
3021
3022 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3023 S_02801C_Y_MAX(iview->image->info.height - 1);
3024
3025 /* Only use HTILE for the first level. */
3026 if (iview->image->surface.htile_size && !level) {
3027 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3028
3029 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3030 /* Use all of the htile_buffer for depth if there's no stencil. */
3031 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3032 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3033 iview->image->htile_offset;
3034 ds->db_htile_data_base = va >> 8;
3035 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3036 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3037 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3038 }
3039 } else {
3040 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3041
3042 if (stencil_only)
3043 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3044
3045 z_offs += iview->image->surface.u.legacy.level[level].offset;
3046 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3047
3048 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
3049 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3050 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3051
3052 if (iview->image->info.samples > 1)
3053 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3054
3055 if (device->physical_device->rad_info.chip_class >= CIK) {
3056 struct radeon_info *info = &device->physical_device->rad_info;
3057 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3058 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3059 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3060 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3061 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3062 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3063
3064 if (stencil_only)
3065 tile_mode = stencil_tile_mode;
3066
3067 ds->db_depth_info |=
3068 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3069 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3070 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3071 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3072 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3073 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3074 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3075 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3076 } else {
3077 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3078 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3079 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3080 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3081 }
3082
3083 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3084 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3085 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3086
3087 if (iview->image->surface.htile_size && !level) {
3088 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3089
3090 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3091 /* Use all of the htile_buffer for depth if there's no stencil. */
3092 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3093
3094 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3095 iview->image->htile_offset;
3096 ds->db_htile_data_base = va >> 8;
3097 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3098 }
3099 }
3100
3101 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3102 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3103 }
3104
3105 VkResult radv_CreateFramebuffer(
3106 VkDevice _device,
3107 const VkFramebufferCreateInfo* pCreateInfo,
3108 const VkAllocationCallbacks* pAllocator,
3109 VkFramebuffer* pFramebuffer)
3110 {
3111 RADV_FROM_HANDLE(radv_device, device, _device);
3112 struct radv_framebuffer *framebuffer;
3113
3114 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3115
3116 size_t size = sizeof(*framebuffer) +
3117 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3118 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3119 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3120 if (framebuffer == NULL)
3121 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3122
3123 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3124 framebuffer->width = pCreateInfo->width;
3125 framebuffer->height = pCreateInfo->height;
3126 framebuffer->layers = pCreateInfo->layers;
3127 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3128 VkImageView _iview = pCreateInfo->pAttachments[i];
3129 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3130 framebuffer->attachments[i].attachment = iview;
3131 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3132 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3133 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3134 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3135 }
3136 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3137 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3138 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3139 }
3140
3141 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3142 return VK_SUCCESS;
3143 }
3144
3145 void radv_DestroyFramebuffer(
3146 VkDevice _device,
3147 VkFramebuffer _fb,
3148 const VkAllocationCallbacks* pAllocator)
3149 {
3150 RADV_FROM_HANDLE(radv_device, device, _device);
3151 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3152
3153 if (!fb)
3154 return;
3155 vk_free2(&device->alloc, pAllocator, fb);
3156 }
3157
3158 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3159 {
3160 switch (address_mode) {
3161 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3162 return V_008F30_SQ_TEX_WRAP;
3163 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3164 return V_008F30_SQ_TEX_MIRROR;
3165 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3166 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3167 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3168 return V_008F30_SQ_TEX_CLAMP_BORDER;
3169 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3170 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3171 default:
3172 unreachable("illegal tex wrap mode");
3173 break;
3174 }
3175 }
3176
3177 static unsigned
3178 radv_tex_compare(VkCompareOp op)
3179 {
3180 switch (op) {
3181 case VK_COMPARE_OP_NEVER:
3182 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3183 case VK_COMPARE_OP_LESS:
3184 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3185 case VK_COMPARE_OP_EQUAL:
3186 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3187 case VK_COMPARE_OP_LESS_OR_EQUAL:
3188 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3189 case VK_COMPARE_OP_GREATER:
3190 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3191 case VK_COMPARE_OP_NOT_EQUAL:
3192 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3193 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3194 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3195 case VK_COMPARE_OP_ALWAYS:
3196 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3197 default:
3198 unreachable("illegal compare mode");
3199 break;
3200 }
3201 }
3202
3203 static unsigned
3204 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3205 {
3206 switch (filter) {
3207 case VK_FILTER_NEAREST:
3208 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3209 V_008F38_SQ_TEX_XY_FILTER_POINT);
3210 case VK_FILTER_LINEAR:
3211 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3212 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3213 case VK_FILTER_CUBIC_IMG:
3214 default:
3215 fprintf(stderr, "illegal texture filter");
3216 return 0;
3217 }
3218 }
3219
3220 static unsigned
3221 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3222 {
3223 switch (mode) {
3224 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3225 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3226 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3227 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3228 default:
3229 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3230 }
3231 }
3232
3233 static unsigned
3234 radv_tex_bordercolor(VkBorderColor bcolor)
3235 {
3236 switch (bcolor) {
3237 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3238 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3239 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3240 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3241 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3242 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3243 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3244 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3245 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3246 default:
3247 break;
3248 }
3249 return 0;
3250 }
3251
3252 static unsigned
3253 radv_tex_aniso_filter(unsigned filter)
3254 {
3255 if (filter < 2)
3256 return 0;
3257 if (filter < 4)
3258 return 1;
3259 if (filter < 8)
3260 return 2;
3261 if (filter < 16)
3262 return 3;
3263 return 4;
3264 }
3265
3266 static void
3267 radv_init_sampler(struct radv_device *device,
3268 struct radv_sampler *sampler,
3269 const VkSamplerCreateInfo *pCreateInfo)
3270 {
3271 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3272 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3273 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3274 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3275
3276 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3277 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3278 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3279 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3280 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3281 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3282 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3283 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3284 S_008F30_DISABLE_CUBE_WRAP(0) |
3285 S_008F30_COMPAT_MODE(is_vi));
3286 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3287 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3288 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3289 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3290 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3291 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3292 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3293 S_008F38_MIP_POINT_PRECLAMP(0) |
3294 S_008F38_DISABLE_LSB_CEIL(1) |
3295 S_008F38_FILTER_PREC_FIX(1) |
3296 S_008F38_ANISO_OVERRIDE(is_vi));
3297 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3298 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3299 }
3300
3301 VkResult radv_CreateSampler(
3302 VkDevice _device,
3303 const VkSamplerCreateInfo* pCreateInfo,
3304 const VkAllocationCallbacks* pAllocator,
3305 VkSampler* pSampler)
3306 {
3307 RADV_FROM_HANDLE(radv_device, device, _device);
3308 struct radv_sampler *sampler;
3309
3310 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3311
3312 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3313 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3314 if (!sampler)
3315 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3316
3317 radv_init_sampler(device, sampler, pCreateInfo);
3318 *pSampler = radv_sampler_to_handle(sampler);
3319
3320 return VK_SUCCESS;
3321 }
3322
3323 void radv_DestroySampler(
3324 VkDevice _device,
3325 VkSampler _sampler,
3326 const VkAllocationCallbacks* pAllocator)
3327 {
3328 RADV_FROM_HANDLE(radv_device, device, _device);
3329 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3330
3331 if (!sampler)
3332 return;
3333 vk_free2(&device->alloc, pAllocator, sampler);
3334 }
3335
3336 /* vk_icd.h does not declare this function, so we declare it here to
3337 * suppress Wmissing-prototypes.
3338 */
3339 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3340 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3341
3342 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3343 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3344 {
3345 /* For the full details on loader interface versioning, see
3346 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3347 * What follows is a condensed summary, to help you navigate the large and
3348 * confusing official doc.
3349 *
3350 * - Loader interface v0 is incompatible with later versions. We don't
3351 * support it.
3352 *
3353 * - In loader interface v1:
3354 * - The first ICD entrypoint called by the loader is
3355 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3356 * entrypoint.
3357 * - The ICD must statically expose no other Vulkan symbol unless it is
3358 * linked with -Bsymbolic.
3359 * - Each dispatchable Vulkan handle created by the ICD must be
3360 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3361 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3362 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3363 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3364 * such loader-managed surfaces.
3365 *
3366 * - Loader interface v2 differs from v1 in:
3367 * - The first ICD entrypoint called by the loader is
3368 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3369 * statically expose this entrypoint.
3370 *
3371 * - Loader interface v3 differs from v2 in:
3372 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3373 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3374 * because the loader no longer does so.
3375 */
3376 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3377 return VK_SUCCESS;
3378 }
3379
3380 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3381 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3382 int *pFD)
3383 {
3384 RADV_FROM_HANDLE(radv_device, device, _device);
3385 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3386
3387 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3388
3389 /* We support only one handle type. */
3390 assert(pGetFdInfo->handleType ==
3391 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3392
3393 bool ret = radv_get_memory_fd(device, memory, pFD);
3394 if (ret == false)
3395 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3396 return VK_SUCCESS;
3397 }
3398
3399 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3400 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3401 int fd,
3402 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3403 {
3404 /* The valid usage section for this function says:
3405 *
3406 * "handleType must not be one of the handle types defined as opaque."
3407 *
3408 * Since we only handle opaque handles for now, there are no FD properties.
3409 */
3410 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3411 }