radv: add rb+ support for GFX9
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51 uint32_t mesa_timestamp, llvm_timestamp;
52 uint16_t f = family;
53 memset(uuid, 0, VK_UUID_SIZE);
54 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56 return -1;
57
58 memcpy(uuid, &mesa_timestamp, 4);
59 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60 memcpy((char*)uuid + 8, &f, 2);
61 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62 return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67 memset(uuid, 0, VK_UUID_SIZE);
68 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75 {
76 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77 .specVersion = 25,
78 },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80 {
81 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82 .specVersion = 6,
83 },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86 {
87 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92 {
93 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94 .specVersion = 5,
95 },
96 #endif
97 {
98 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99 .specVersion = 1,
100 },
101 {
102 .extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
103 .specVersion = 1,
104 },
105 };
106
107 static const VkExtensionProperties common_device_extensions[] = {
108 {
109 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
110 .specVersion = 1,
111 },
112 {
113 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
114 .specVersion = 1,
115 },
116 {
117 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
118 .specVersion = 1,
119 },
120 {
121 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
122 .specVersion = 1,
123 },
124 {
125 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
126 .specVersion = 1,
127 },
128 {
129 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
130 .specVersion = 68,
131 },
132 {
133 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
134 .specVersion = 1,
135 },
136 {
137 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
138 .specVersion = 1,
139 },
140 {
141 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
142 .specVersion = 1,
143 },
144 {
145 .extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
146 .specVersion = 1,
147 },
148 {
149 .extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
150 .specVersion = 1,
151 },
152 };
153
154 static VkResult
155 radv_extensions_register(struct radv_instance *instance,
156 struct radv_extensions *extensions,
157 const VkExtensionProperties *new_ext,
158 uint32_t num_ext)
159 {
160 size_t new_size;
161 VkExtensionProperties *new_ptr;
162
163 assert(new_ext && num_ext > 0);
164
165 if (!new_ext)
166 return VK_ERROR_INITIALIZATION_FAILED;
167
168 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
169 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
170 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
171
172 /* Old array continues to be valid, update nothing */
173 if (!new_ptr)
174 return VK_ERROR_OUT_OF_HOST_MEMORY;
175
176 memcpy(&new_ptr[extensions->num_ext], new_ext,
177 num_ext * sizeof(VkExtensionProperties));
178 extensions->ext_array = new_ptr;
179 extensions->num_ext += num_ext;
180
181 return VK_SUCCESS;
182 }
183
184 static void
185 radv_extensions_finish(struct radv_instance *instance,
186 struct radv_extensions *extensions)
187 {
188 assert(extensions);
189
190 if (!extensions)
191 radv_loge("Attemted to free invalid extension struct\n");
192
193 if (extensions->ext_array)
194 vk_free(&instance->alloc, extensions->ext_array);
195 }
196
197 static bool
198 is_extension_enabled(const VkExtensionProperties *extensions,
199 size_t num_ext,
200 const char *name)
201 {
202 assert(extensions && name);
203
204 for (uint32_t i = 0; i < num_ext; i++) {
205 if (strcmp(name, extensions[i].extensionName) == 0)
206 return true;
207 }
208
209 return false;
210 }
211
212 static const char *
213 get_chip_name(enum radeon_family family)
214 {
215 switch (family) {
216 case CHIP_TAHITI: return "AMD RADV TAHITI";
217 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
218 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
219 case CHIP_OLAND: return "AMD RADV OLAND";
220 case CHIP_HAINAN: return "AMD RADV HAINAN";
221 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
222 case CHIP_KAVERI: return "AMD RADV KAVERI";
223 case CHIP_KABINI: return "AMD RADV KABINI";
224 case CHIP_HAWAII: return "AMD RADV HAWAII";
225 case CHIP_MULLINS: return "AMD RADV MULLINS";
226 case CHIP_TONGA: return "AMD RADV TONGA";
227 case CHIP_ICELAND: return "AMD RADV ICELAND";
228 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
229 case CHIP_FIJI: return "AMD RADV FIJI";
230 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
231 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
232 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
233 case CHIP_STONEY: return "AMD RADV STONEY";
234 case CHIP_VEGA10: return "AMD RADV VEGA";
235 case CHIP_RAVEN: return "AMD RADV RAVEN";
236 default: return "AMD RADV unknown";
237 }
238 }
239
240 static VkResult
241 radv_physical_device_init(struct radv_physical_device *device,
242 struct radv_instance *instance,
243 drmDevicePtr drm_device)
244 {
245 const char *path = drm_device->nodes[DRM_NODE_RENDER];
246 VkResult result;
247 drmVersionPtr version;
248 int fd;
249
250 fd = open(path, O_RDWR | O_CLOEXEC);
251 if (fd < 0)
252 return VK_ERROR_INCOMPATIBLE_DRIVER;
253
254 version = drmGetVersion(fd);
255 if (!version) {
256 close(fd);
257 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
258 "failed to get version %s: %m", path);
259 }
260
261 if (strcmp(version->name, "amdgpu")) {
262 drmFreeVersion(version);
263 close(fd);
264 return VK_ERROR_INCOMPATIBLE_DRIVER;
265 }
266 drmFreeVersion(version);
267
268 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
269 device->instance = instance;
270 assert(strlen(path) < ARRAY_SIZE(device->path));
271 strncpy(device->path, path, ARRAY_SIZE(device->path));
272
273 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
274 if (!device->ws) {
275 result = VK_ERROR_INCOMPATIBLE_DRIVER;
276 goto fail;
277 }
278
279 device->local_fd = fd;
280 device->ws->query_info(device->ws, &device->rad_info);
281 result = radv_init_wsi(device);
282 if (result != VK_SUCCESS) {
283 device->ws->destroy(device->ws);
284 goto fail;
285 }
286
287 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
288 radv_finish_wsi(device);
289 device->ws->destroy(device->ws);
290 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
291 "cannot generate UUID");
292 goto fail;
293 }
294
295 result = radv_extensions_register(instance,
296 &device->extensions,
297 common_device_extensions,
298 ARRAY_SIZE(common_device_extensions));
299 if (result != VK_SUCCESS)
300 goto fail;
301
302 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
303 device->name = get_chip_name(device->rad_info.family);
304
305 radv_get_device_uuid(drm_device, device->device_uuid);
306
307 if (device->rad_info.family == CHIP_STONEY ||
308 device->rad_info.chip_class >= GFX9) {
309 device->has_rbplus = true;
310 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
311 }
312
313 return VK_SUCCESS;
314
315 fail:
316 close(fd);
317 return result;
318 }
319
320 static void
321 radv_physical_device_finish(struct radv_physical_device *device)
322 {
323 radv_extensions_finish(device->instance, &device->extensions);
324 radv_finish_wsi(device);
325 device->ws->destroy(device->ws);
326 close(device->local_fd);
327 }
328
329 static void *
330 default_alloc_func(void *pUserData, size_t size, size_t align,
331 VkSystemAllocationScope allocationScope)
332 {
333 return malloc(size);
334 }
335
336 static void *
337 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
338 size_t align, VkSystemAllocationScope allocationScope)
339 {
340 return realloc(pOriginal, size);
341 }
342
343 static void
344 default_free_func(void *pUserData, void *pMemory)
345 {
346 free(pMemory);
347 }
348
349 static const VkAllocationCallbacks default_alloc = {
350 .pUserData = NULL,
351 .pfnAllocation = default_alloc_func,
352 .pfnReallocation = default_realloc_func,
353 .pfnFree = default_free_func,
354 };
355
356 static const struct debug_control radv_debug_options[] = {
357 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
358 {"nodcc", RADV_DEBUG_NO_DCC},
359 {"shaders", RADV_DEBUG_DUMP_SHADERS},
360 {"nocache", RADV_DEBUG_NO_CACHE},
361 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
362 {"nohiz", RADV_DEBUG_NO_HIZ},
363 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
364 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
365 {"allbos", RADV_DEBUG_ALL_BOS},
366 {"noibs", RADV_DEBUG_NO_IBS},
367 {NULL, 0}
368 };
369
370 VkResult radv_CreateInstance(
371 const VkInstanceCreateInfo* pCreateInfo,
372 const VkAllocationCallbacks* pAllocator,
373 VkInstance* pInstance)
374 {
375 struct radv_instance *instance;
376
377 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
378
379 uint32_t client_version;
380 if (pCreateInfo->pApplicationInfo &&
381 pCreateInfo->pApplicationInfo->apiVersion != 0) {
382 client_version = pCreateInfo->pApplicationInfo->apiVersion;
383 } else {
384 client_version = VK_MAKE_VERSION(1, 0, 0);
385 }
386
387 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
388 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
389 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
390 "Client requested version %d.%d.%d",
391 VK_VERSION_MAJOR(client_version),
392 VK_VERSION_MINOR(client_version),
393 VK_VERSION_PATCH(client_version));
394 }
395
396 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
397 if (!is_extension_enabled(instance_extensions,
398 ARRAY_SIZE(instance_extensions),
399 pCreateInfo->ppEnabledExtensionNames[i]))
400 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
401 }
402
403 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
404 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
405 if (!instance)
406 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
407
408 memset(instance, 0, sizeof(*instance));
409
410 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
411
412 if (pAllocator)
413 instance->alloc = *pAllocator;
414 else
415 instance->alloc = default_alloc;
416
417 instance->apiVersion = client_version;
418 instance->physicalDeviceCount = -1;
419
420 _mesa_locale_init();
421
422 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
423
424 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
425 radv_debug_options);
426
427 *pInstance = radv_instance_to_handle(instance);
428
429 return VK_SUCCESS;
430 }
431
432 void radv_DestroyInstance(
433 VkInstance _instance,
434 const VkAllocationCallbacks* pAllocator)
435 {
436 RADV_FROM_HANDLE(radv_instance, instance, _instance);
437
438 if (!instance)
439 return;
440
441 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
442 radv_physical_device_finish(instance->physicalDevices + i);
443 }
444
445 VG(VALGRIND_DESTROY_MEMPOOL(instance));
446
447 _mesa_locale_fini();
448
449 vk_free(&instance->alloc, instance);
450 }
451
452 static VkResult
453 radv_enumerate_devices(struct radv_instance *instance)
454 {
455 /* TODO: Check for more devices ? */
456 drmDevicePtr devices[8];
457 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
458 int max_devices;
459
460 instance->physicalDeviceCount = 0;
461
462 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
463 if (max_devices < 1)
464 return VK_ERROR_INCOMPATIBLE_DRIVER;
465
466 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
467 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
468 devices[i]->bustype == DRM_BUS_PCI &&
469 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
470
471 result = radv_physical_device_init(instance->physicalDevices +
472 instance->physicalDeviceCount,
473 instance,
474 devices[i]);
475 if (result == VK_SUCCESS)
476 ++instance->physicalDeviceCount;
477 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
478 break;
479 }
480 }
481 drmFreeDevices(devices, max_devices);
482
483 return result;
484 }
485
486 VkResult radv_EnumeratePhysicalDevices(
487 VkInstance _instance,
488 uint32_t* pPhysicalDeviceCount,
489 VkPhysicalDevice* pPhysicalDevices)
490 {
491 RADV_FROM_HANDLE(radv_instance, instance, _instance);
492 VkResult result;
493
494 if (instance->physicalDeviceCount < 0) {
495 result = radv_enumerate_devices(instance);
496 if (result != VK_SUCCESS &&
497 result != VK_ERROR_INCOMPATIBLE_DRIVER)
498 return result;
499 }
500
501 if (!pPhysicalDevices) {
502 *pPhysicalDeviceCount = instance->physicalDeviceCount;
503 } else {
504 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
505 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
506 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
507 }
508
509 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
510 : VK_SUCCESS;
511 }
512
513 void radv_GetPhysicalDeviceFeatures(
514 VkPhysicalDevice physicalDevice,
515 VkPhysicalDeviceFeatures* pFeatures)
516 {
517 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
518
519 memset(pFeatures, 0, sizeof(*pFeatures));
520
521 *pFeatures = (VkPhysicalDeviceFeatures) {
522 .robustBufferAccess = true,
523 .fullDrawIndexUint32 = true,
524 .imageCubeArray = true,
525 .independentBlend = true,
526 .geometryShader = true,
527 .tessellationShader = true,
528 .sampleRateShading = false,
529 .dualSrcBlend = true,
530 .logicOp = true,
531 .multiDrawIndirect = true,
532 .drawIndirectFirstInstance = true,
533 .depthClamp = true,
534 .depthBiasClamp = true,
535 .fillModeNonSolid = true,
536 .depthBounds = true,
537 .wideLines = true,
538 .largePoints = true,
539 .alphaToOne = true,
540 .multiViewport = true,
541 .samplerAnisotropy = true,
542 .textureCompressionETC2 = false,
543 .textureCompressionASTC_LDR = false,
544 .textureCompressionBC = true,
545 .occlusionQueryPrecise = true,
546 .pipelineStatisticsQuery = true,
547 .vertexPipelineStoresAndAtomics = true,
548 .fragmentStoresAndAtomics = true,
549 .shaderTessellationAndGeometryPointSize = true,
550 .shaderImageGatherExtended = true,
551 .shaderStorageImageExtendedFormats = true,
552 .shaderStorageImageMultisample = false,
553 .shaderUniformBufferArrayDynamicIndexing = true,
554 .shaderSampledImageArrayDynamicIndexing = true,
555 .shaderStorageBufferArrayDynamicIndexing = true,
556 .shaderStorageImageArrayDynamicIndexing = true,
557 .shaderStorageImageReadWithoutFormat = true,
558 .shaderStorageImageWriteWithoutFormat = true,
559 .shaderClipDistance = true,
560 .shaderCullDistance = true,
561 .shaderFloat64 = true,
562 .shaderInt64 = false,
563 .shaderInt16 = false,
564 .sparseBinding = true,
565 .variableMultisampleRate = true,
566 .inheritedQueries = true,
567 };
568 }
569
570 void radv_GetPhysicalDeviceFeatures2KHR(
571 VkPhysicalDevice physicalDevice,
572 VkPhysicalDeviceFeatures2KHR *pFeatures)
573 {
574 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
575 }
576
577 static uint32_t radv_get_driver_version()
578 {
579 const char *minor_string = strchr(VERSION, '.');
580 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
581 int major = atoi(VERSION);
582 int minor = minor_string ? atoi(minor_string + 1) : 0;
583 int patch = patch_string ? atoi(patch_string + 1) : 0;
584 if (strstr(VERSION, "devel")) {
585 if (patch == 0) {
586 patch = 99;
587 if (minor == 0) {
588 minor = 99;
589 --major;
590 } else
591 --minor;
592 } else
593 --patch;
594 }
595 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
596 return version;
597 }
598
599 void radv_GetPhysicalDeviceProperties(
600 VkPhysicalDevice physicalDevice,
601 VkPhysicalDeviceProperties* pProperties)
602 {
603 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
604 VkSampleCountFlags sample_counts = 0xf;
605
606 /* make sure that the entire descriptor set is addressable with a signed
607 * 32-bit int. So the sum of all limits scaled by descriptor size has to
608 * be at most 2 GiB. the combined image & samples object count as one of
609 * both. This limit is for the pipeline layout, not for the set layout, but
610 * there is no set limit, so we just set a pipeline limit. I don't think
611 * any app is going to hit this soon. */
612 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
613 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
614 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
615 32 /* sampler, largest when combined with image */ +
616 64 /* sampled image */ +
617 64 /* storage image */);
618
619 VkPhysicalDeviceLimits limits = {
620 .maxImageDimension1D = (1 << 14),
621 .maxImageDimension2D = (1 << 14),
622 .maxImageDimension3D = (1 << 11),
623 .maxImageDimensionCube = (1 << 14),
624 .maxImageArrayLayers = (1 << 11),
625 .maxTexelBufferElements = 128 * 1024 * 1024,
626 .maxUniformBufferRange = UINT32_MAX,
627 .maxStorageBufferRange = UINT32_MAX,
628 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
629 .maxMemoryAllocationCount = UINT32_MAX,
630 .maxSamplerAllocationCount = 64 * 1024,
631 .bufferImageGranularity = 64, /* A cache line */
632 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
633 .maxBoundDescriptorSets = MAX_SETS,
634 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
635 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
636 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
637 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
638 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
639 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
640 .maxPerStageResources = max_descriptor_set_size,
641 .maxDescriptorSetSamplers = max_descriptor_set_size,
642 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
643 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
644 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
645 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
646 .maxDescriptorSetSampledImages = max_descriptor_set_size,
647 .maxDescriptorSetStorageImages = max_descriptor_set_size,
648 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
649 .maxVertexInputAttributes = 32,
650 .maxVertexInputBindings = 32,
651 .maxVertexInputAttributeOffset = 2047,
652 .maxVertexInputBindingStride = 2048,
653 .maxVertexOutputComponents = 128,
654 .maxTessellationGenerationLevel = 64,
655 .maxTessellationPatchSize = 32,
656 .maxTessellationControlPerVertexInputComponents = 128,
657 .maxTessellationControlPerVertexOutputComponents = 128,
658 .maxTessellationControlPerPatchOutputComponents = 120,
659 .maxTessellationControlTotalOutputComponents = 4096,
660 .maxTessellationEvaluationInputComponents = 128,
661 .maxTessellationEvaluationOutputComponents = 128,
662 .maxGeometryShaderInvocations = 127,
663 .maxGeometryInputComponents = 64,
664 .maxGeometryOutputComponents = 128,
665 .maxGeometryOutputVertices = 256,
666 .maxGeometryTotalOutputComponents = 1024,
667 .maxFragmentInputComponents = 128,
668 .maxFragmentOutputAttachments = 8,
669 .maxFragmentDualSrcAttachments = 1,
670 .maxFragmentCombinedOutputResources = 8,
671 .maxComputeSharedMemorySize = 32768,
672 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
673 .maxComputeWorkGroupInvocations = 2048,
674 .maxComputeWorkGroupSize = {
675 2048,
676 2048,
677 2048
678 },
679 .subPixelPrecisionBits = 4 /* FIXME */,
680 .subTexelPrecisionBits = 4 /* FIXME */,
681 .mipmapPrecisionBits = 4 /* FIXME */,
682 .maxDrawIndexedIndexValue = UINT32_MAX,
683 .maxDrawIndirectCount = UINT32_MAX,
684 .maxSamplerLodBias = 16,
685 .maxSamplerAnisotropy = 16,
686 .maxViewports = MAX_VIEWPORTS,
687 .maxViewportDimensions = { (1 << 14), (1 << 14) },
688 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
689 .viewportSubPixelBits = 13, /* We take a float? */
690 .minMemoryMapAlignment = 4096, /* A page */
691 .minTexelBufferOffsetAlignment = 1,
692 .minUniformBufferOffsetAlignment = 4,
693 .minStorageBufferOffsetAlignment = 4,
694 .minTexelOffset = -32,
695 .maxTexelOffset = 31,
696 .minTexelGatherOffset = -32,
697 .maxTexelGatherOffset = 31,
698 .minInterpolationOffset = -2,
699 .maxInterpolationOffset = 2,
700 .subPixelInterpolationOffsetBits = 8,
701 .maxFramebufferWidth = (1 << 14),
702 .maxFramebufferHeight = (1 << 14),
703 .maxFramebufferLayers = (1 << 10),
704 .framebufferColorSampleCounts = sample_counts,
705 .framebufferDepthSampleCounts = sample_counts,
706 .framebufferStencilSampleCounts = sample_counts,
707 .framebufferNoAttachmentsSampleCounts = sample_counts,
708 .maxColorAttachments = MAX_RTS,
709 .sampledImageColorSampleCounts = sample_counts,
710 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
711 .sampledImageDepthSampleCounts = sample_counts,
712 .sampledImageStencilSampleCounts = sample_counts,
713 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
714 .maxSampleMaskWords = 1,
715 .timestampComputeAndGraphics = true,
716 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
717 .maxClipDistances = 8,
718 .maxCullDistances = 8,
719 .maxCombinedClipAndCullDistances = 8,
720 .discreteQueuePriorities = 1,
721 .pointSizeRange = { 0.125, 255.875 },
722 .lineWidthRange = { 0.0, 7.9921875 },
723 .pointSizeGranularity = (1.0 / 8.0),
724 .lineWidthGranularity = (1.0 / 128.0),
725 .strictLines = false, /* FINISHME */
726 .standardSampleLocations = true,
727 .optimalBufferCopyOffsetAlignment = 128,
728 .optimalBufferCopyRowPitchAlignment = 128,
729 .nonCoherentAtomSize = 64,
730 };
731
732 *pProperties = (VkPhysicalDeviceProperties) {
733 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
734 .driverVersion = radv_get_driver_version(),
735 .vendorID = 0x1002,
736 .deviceID = pdevice->rad_info.pci_id,
737 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
738 .limits = limits,
739 .sparseProperties = {0},
740 };
741
742 strcpy(pProperties->deviceName, pdevice->name);
743 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
744 }
745
746 void radv_GetPhysicalDeviceProperties2KHR(
747 VkPhysicalDevice physicalDevice,
748 VkPhysicalDeviceProperties2KHR *pProperties)
749 {
750 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
751 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
752
753 vk_foreach_struct(ext, pProperties->pNext) {
754 switch (ext->sType) {
755 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
756 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
757 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
758 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
759 break;
760 }
761 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
762 VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
763 radv_device_get_cache_uuid(0, properties->driverUUID);
764 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
765 properties->deviceLUIDValid = false;
766 break;
767 }
768 default:
769 break;
770 }
771 }
772 }
773
774 static void radv_get_physical_device_queue_family_properties(
775 struct radv_physical_device* pdevice,
776 uint32_t* pCount,
777 VkQueueFamilyProperties** pQueueFamilyProperties)
778 {
779 int num_queue_families = 1;
780 int idx;
781 if (pdevice->rad_info.num_compute_rings > 0 &&
782 pdevice->rad_info.chip_class >= CIK &&
783 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
784 num_queue_families++;
785
786 if (pQueueFamilyProperties == NULL) {
787 *pCount = num_queue_families;
788 return;
789 }
790
791 if (!*pCount)
792 return;
793
794 idx = 0;
795 if (*pCount >= 1) {
796 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
797 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
798 VK_QUEUE_COMPUTE_BIT |
799 VK_QUEUE_TRANSFER_BIT |
800 VK_QUEUE_SPARSE_BINDING_BIT,
801 .queueCount = 1,
802 .timestampValidBits = 64,
803 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
804 };
805 idx++;
806 }
807
808 if (pdevice->rad_info.num_compute_rings > 0 &&
809 pdevice->rad_info.chip_class >= CIK &&
810 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
811 if (*pCount > idx) {
812 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
813 .queueFlags = VK_QUEUE_COMPUTE_BIT |
814 VK_QUEUE_TRANSFER_BIT |
815 VK_QUEUE_SPARSE_BINDING_BIT,
816 .queueCount = pdevice->rad_info.num_compute_rings,
817 .timestampValidBits = 64,
818 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
819 };
820 idx++;
821 }
822 }
823 *pCount = idx;
824 }
825
826 void radv_GetPhysicalDeviceQueueFamilyProperties(
827 VkPhysicalDevice physicalDevice,
828 uint32_t* pCount,
829 VkQueueFamilyProperties* pQueueFamilyProperties)
830 {
831 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
832 if (!pQueueFamilyProperties) {
833 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
834 return;
835 }
836 VkQueueFamilyProperties *properties[] = {
837 pQueueFamilyProperties + 0,
838 pQueueFamilyProperties + 1,
839 pQueueFamilyProperties + 2,
840 };
841 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
842 assert(*pCount <= 3);
843 }
844
845 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
846 VkPhysicalDevice physicalDevice,
847 uint32_t* pCount,
848 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
849 {
850 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
851 if (!pQueueFamilyProperties) {
852 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
853 return;
854 }
855 VkQueueFamilyProperties *properties[] = {
856 &pQueueFamilyProperties[0].queueFamilyProperties,
857 &pQueueFamilyProperties[1].queueFamilyProperties,
858 &pQueueFamilyProperties[2].queueFamilyProperties,
859 };
860 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
861 assert(*pCount <= 3);
862 }
863
864 void radv_GetPhysicalDeviceMemoryProperties(
865 VkPhysicalDevice physicalDevice,
866 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
867 {
868 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
869
870 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
871
872 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
873 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
874 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
875 .heapIndex = RADV_MEM_HEAP_VRAM,
876 };
877 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
878 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
879 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
880 .heapIndex = RADV_MEM_HEAP_GTT,
881 };
882 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
883 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
884 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
885 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
886 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
887 };
888 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
889 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
890 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
891 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
892 .heapIndex = RADV_MEM_HEAP_GTT,
893 };
894
895 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
896
897 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
898 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
899 .size = physical_device->rad_info.vram_size -
900 physical_device->rad_info.vram_vis_size,
901 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
902 };
903 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
904 .size = physical_device->rad_info.vram_vis_size,
905 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
906 };
907 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
908 .size = physical_device->rad_info.gart_size,
909 .flags = 0,
910 };
911 }
912
913 void radv_GetPhysicalDeviceMemoryProperties2KHR(
914 VkPhysicalDevice physicalDevice,
915 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
916 {
917 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
918 &pMemoryProperties->memoryProperties);
919 }
920
921 static int
922 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
923 int queue_family_index, int idx)
924 {
925 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
926 queue->device = device;
927 queue->queue_family_index = queue_family_index;
928 queue->queue_idx = idx;
929
930 queue->hw_ctx = device->ws->ctx_create(device->ws);
931 if (!queue->hw_ctx)
932 return VK_ERROR_OUT_OF_HOST_MEMORY;
933
934 return VK_SUCCESS;
935 }
936
937 static void
938 radv_queue_finish(struct radv_queue *queue)
939 {
940 if (queue->hw_ctx)
941 queue->device->ws->ctx_destroy(queue->hw_ctx);
942
943 if (queue->initial_preamble_cs)
944 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
945 if (queue->continue_preamble_cs)
946 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
947 if (queue->descriptor_bo)
948 queue->device->ws->buffer_destroy(queue->descriptor_bo);
949 if (queue->scratch_bo)
950 queue->device->ws->buffer_destroy(queue->scratch_bo);
951 if (queue->esgs_ring_bo)
952 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
953 if (queue->gsvs_ring_bo)
954 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
955 if (queue->tess_factor_ring_bo)
956 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
957 if (queue->tess_offchip_ring_bo)
958 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
959 if (queue->compute_scratch_bo)
960 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
961 }
962
963 static void
964 radv_device_init_gs_info(struct radv_device *device)
965 {
966 switch (device->physical_device->rad_info.family) {
967 case CHIP_OLAND:
968 case CHIP_HAINAN:
969 case CHIP_KAVERI:
970 case CHIP_KABINI:
971 case CHIP_MULLINS:
972 case CHIP_ICELAND:
973 case CHIP_CARRIZO:
974 case CHIP_STONEY:
975 device->gs_table_depth = 16;
976 return;
977 case CHIP_TAHITI:
978 case CHIP_PITCAIRN:
979 case CHIP_VERDE:
980 case CHIP_BONAIRE:
981 case CHIP_HAWAII:
982 case CHIP_TONGA:
983 case CHIP_FIJI:
984 case CHIP_POLARIS10:
985 case CHIP_POLARIS11:
986 case CHIP_POLARIS12:
987 case CHIP_VEGA10:
988 case CHIP_RAVEN:
989 device->gs_table_depth = 32;
990 return;
991 default:
992 unreachable("unknown GPU");
993 }
994 }
995
996 VkResult radv_CreateDevice(
997 VkPhysicalDevice physicalDevice,
998 const VkDeviceCreateInfo* pCreateInfo,
999 const VkAllocationCallbacks* pAllocator,
1000 VkDevice* pDevice)
1001 {
1002 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1003 VkResult result;
1004 struct radv_device *device;
1005
1006 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1007 if (!is_extension_enabled(physical_device->extensions.ext_array,
1008 physical_device->extensions.num_ext,
1009 pCreateInfo->ppEnabledExtensionNames[i]))
1010 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1011 }
1012
1013 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1014 sizeof(*device), 8,
1015 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1016 if (!device)
1017 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1018
1019 memset(device, 0, sizeof(*device));
1020
1021 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1022 device->instance = physical_device->instance;
1023 device->physical_device = physical_device;
1024
1025 device->debug_flags = device->instance->debug_flags;
1026
1027 device->ws = physical_device->ws;
1028 if (pAllocator)
1029 device->alloc = *pAllocator;
1030 else
1031 device->alloc = physical_device->instance->alloc;
1032
1033 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1034 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1035 uint32_t qfi = queue_create->queueFamilyIndex;
1036
1037 device->queues[qfi] = vk_alloc(&device->alloc,
1038 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1039 if (!device->queues[qfi]) {
1040 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1041 goto fail;
1042 }
1043
1044 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1045
1046 device->queue_count[qfi] = queue_create->queueCount;
1047
1048 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1049 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1050 if (result != VK_SUCCESS)
1051 goto fail;
1052 }
1053 }
1054
1055 #if HAVE_LLVM < 0x0400
1056 device->llvm_supports_spill = false;
1057 #else
1058 device->llvm_supports_spill = true;
1059 #endif
1060
1061 /* The maximum number of scratch waves. Scratch space isn't divided
1062 * evenly between CUs. The number is only a function of the number of CUs.
1063 * We can decrease the constant to decrease the scratch buffer size.
1064 *
1065 * sctx->scratch_waves must be >= the maximum posible size of
1066 * 1 threadgroup, so that the hw doesn't hang from being unable
1067 * to start any.
1068 *
1069 * The recommended value is 4 per CU at most. Higher numbers don't
1070 * bring much benefit, but they still occupy chip resources (think
1071 * async compute). I've seen ~2% performance difference between 4 and 32.
1072 */
1073 uint32_t max_threads_per_block = 2048;
1074 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1075 max_threads_per_block / 64);
1076
1077 radv_device_init_gs_info(device);
1078
1079 device->tess_offchip_block_dw_size =
1080 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1081 device->has_distributed_tess =
1082 device->physical_device->rad_info.chip_class >= VI &&
1083 device->physical_device->rad_info.max_se >= 2;
1084
1085 result = radv_device_init_meta(device);
1086 if (result != VK_SUCCESS)
1087 goto fail;
1088
1089 radv_device_init_msaa(device);
1090
1091 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1092 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1093 switch (family) {
1094 case RADV_QUEUE_GENERAL:
1095 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1096 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1097 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1098 break;
1099 case RADV_QUEUE_COMPUTE:
1100 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1101 radeon_emit(device->empty_cs[family], 0);
1102 break;
1103 }
1104 device->ws->cs_finalize(device->empty_cs[family]);
1105
1106 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1107 switch (family) {
1108 case RADV_QUEUE_GENERAL:
1109 case RADV_QUEUE_COMPUTE:
1110 si_cs_emit_cache_flush(device->flush_cs[family],
1111 device->physical_device->rad_info.chip_class,
1112 NULL, 0,
1113 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1114 RADV_CMD_FLAG_INV_ICACHE |
1115 RADV_CMD_FLAG_INV_SMEM_L1 |
1116 RADV_CMD_FLAG_INV_VMEM_L1 |
1117 RADV_CMD_FLAG_INV_GLOBAL_L2);
1118 break;
1119 }
1120 device->ws->cs_finalize(device->flush_cs[family]);
1121
1122 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1123 switch (family) {
1124 case RADV_QUEUE_GENERAL:
1125 case RADV_QUEUE_COMPUTE:
1126 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1127 device->physical_device->rad_info.chip_class,
1128 NULL, 0,
1129 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1130 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1131 RADV_CMD_FLAG_INV_ICACHE |
1132 RADV_CMD_FLAG_INV_SMEM_L1 |
1133 RADV_CMD_FLAG_INV_VMEM_L1 |
1134 RADV_CMD_FLAG_INV_GLOBAL_L2);
1135 break;
1136 }
1137 device->ws->cs_finalize(device->flush_shader_cs[family]);
1138 }
1139
1140 if (getenv("RADV_TRACE_FILE")) {
1141 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1142 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1143 if (!device->trace_bo)
1144 goto fail;
1145
1146 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1147 if (!device->trace_id_ptr)
1148 goto fail;
1149 }
1150
1151 if (device->physical_device->rad_info.chip_class >= CIK)
1152 cik_create_gfx_config(device);
1153
1154 VkPipelineCacheCreateInfo ci;
1155 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1156 ci.pNext = NULL;
1157 ci.flags = 0;
1158 ci.pInitialData = NULL;
1159 ci.initialDataSize = 0;
1160 VkPipelineCache pc;
1161 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1162 &ci, NULL, &pc);
1163 if (result != VK_SUCCESS)
1164 goto fail;
1165
1166 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1167
1168 *pDevice = radv_device_to_handle(device);
1169 return VK_SUCCESS;
1170
1171 fail:
1172 if (device->trace_bo)
1173 device->ws->buffer_destroy(device->trace_bo);
1174
1175 if (device->gfx_init)
1176 device->ws->buffer_destroy(device->gfx_init);
1177
1178 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1179 for (unsigned q = 0; q < device->queue_count[i]; q++)
1180 radv_queue_finish(&device->queues[i][q]);
1181 if (device->queue_count[i])
1182 vk_free(&device->alloc, device->queues[i]);
1183 }
1184
1185 vk_free(&device->alloc, device);
1186 return result;
1187 }
1188
1189 void radv_DestroyDevice(
1190 VkDevice _device,
1191 const VkAllocationCallbacks* pAllocator)
1192 {
1193 RADV_FROM_HANDLE(radv_device, device, _device);
1194
1195 if (!device)
1196 return;
1197
1198 if (device->trace_bo)
1199 device->ws->buffer_destroy(device->trace_bo);
1200
1201 if (device->gfx_init)
1202 device->ws->buffer_destroy(device->gfx_init);
1203
1204 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1205 for (unsigned q = 0; q < device->queue_count[i]; q++)
1206 radv_queue_finish(&device->queues[i][q]);
1207 if (device->queue_count[i])
1208 vk_free(&device->alloc, device->queues[i]);
1209 if (device->empty_cs[i])
1210 device->ws->cs_destroy(device->empty_cs[i]);
1211 if (device->flush_cs[i])
1212 device->ws->cs_destroy(device->flush_cs[i]);
1213 if (device->flush_shader_cs[i])
1214 device->ws->cs_destroy(device->flush_shader_cs[i]);
1215 }
1216 radv_device_finish_meta(device);
1217
1218 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1219 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1220
1221 vk_free(&device->alloc, device);
1222 }
1223
1224 VkResult radv_EnumerateInstanceExtensionProperties(
1225 const char* pLayerName,
1226 uint32_t* pPropertyCount,
1227 VkExtensionProperties* pProperties)
1228 {
1229 if (pProperties == NULL) {
1230 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1231 return VK_SUCCESS;
1232 }
1233
1234 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1235 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1236
1237 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1238 return VK_INCOMPLETE;
1239
1240 return VK_SUCCESS;
1241 }
1242
1243 VkResult radv_EnumerateDeviceExtensionProperties(
1244 VkPhysicalDevice physicalDevice,
1245 const char* pLayerName,
1246 uint32_t* pPropertyCount,
1247 VkExtensionProperties* pProperties)
1248 {
1249 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1250
1251 if (pProperties == NULL) {
1252 *pPropertyCount = pdevice->extensions.num_ext;
1253 return VK_SUCCESS;
1254 }
1255
1256 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1257 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1258
1259 if (*pPropertyCount < pdevice->extensions.num_ext)
1260 return VK_INCOMPLETE;
1261
1262 return VK_SUCCESS;
1263 }
1264
1265 VkResult radv_EnumerateInstanceLayerProperties(
1266 uint32_t* pPropertyCount,
1267 VkLayerProperties* pProperties)
1268 {
1269 if (pProperties == NULL) {
1270 *pPropertyCount = 0;
1271 return VK_SUCCESS;
1272 }
1273
1274 /* None supported at this time */
1275 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1276 }
1277
1278 VkResult radv_EnumerateDeviceLayerProperties(
1279 VkPhysicalDevice physicalDevice,
1280 uint32_t* pPropertyCount,
1281 VkLayerProperties* pProperties)
1282 {
1283 if (pProperties == NULL) {
1284 *pPropertyCount = 0;
1285 return VK_SUCCESS;
1286 }
1287
1288 /* None supported at this time */
1289 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1290 }
1291
1292 void radv_GetDeviceQueue(
1293 VkDevice _device,
1294 uint32_t queueFamilyIndex,
1295 uint32_t queueIndex,
1296 VkQueue* pQueue)
1297 {
1298 RADV_FROM_HANDLE(radv_device, device, _device);
1299
1300 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1301 }
1302
1303 static void radv_dump_trace(struct radv_device *device,
1304 struct radeon_winsys_cs *cs)
1305 {
1306 const char *filename = getenv("RADV_TRACE_FILE");
1307 FILE *f = fopen(filename, "w");
1308 if (!f) {
1309 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1310 return;
1311 }
1312
1313 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1314 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1315 fclose(f);
1316 }
1317
1318 static void
1319 fill_geom_tess_rings(struct radv_queue *queue,
1320 uint32_t *map,
1321 bool add_sample_positions,
1322 uint32_t esgs_ring_size,
1323 struct radeon_winsys_bo *esgs_ring_bo,
1324 uint32_t gsvs_ring_size,
1325 struct radeon_winsys_bo *gsvs_ring_bo,
1326 uint32_t tess_factor_ring_size,
1327 struct radeon_winsys_bo *tess_factor_ring_bo,
1328 uint32_t tess_offchip_ring_size,
1329 struct radeon_winsys_bo *tess_offchip_ring_bo)
1330 {
1331 uint64_t esgs_va = 0, gsvs_va = 0;
1332 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1333 uint32_t *desc = &map[4];
1334
1335 if (esgs_ring_bo)
1336 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1337 if (gsvs_ring_bo)
1338 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1339 if (tess_factor_ring_bo)
1340 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1341 if (tess_offchip_ring_bo)
1342 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1343
1344 /* stride 0, num records - size, add tid, swizzle, elsize4,
1345 index stride 64 */
1346 desc[0] = esgs_va;
1347 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1348 S_008F04_STRIDE(0) |
1349 S_008F04_SWIZZLE_ENABLE(true);
1350 desc[2] = esgs_ring_size;
1351 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1352 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1353 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1354 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1355 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1356 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1357 S_008F0C_ELEMENT_SIZE(1) |
1358 S_008F0C_INDEX_STRIDE(3) |
1359 S_008F0C_ADD_TID_ENABLE(true);
1360
1361 desc += 4;
1362 /* GS entry for ES->GS ring */
1363 /* stride 0, num records - size, elsize0,
1364 index stride 0 */
1365 desc[0] = esgs_va;
1366 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1367 S_008F04_STRIDE(0) |
1368 S_008F04_SWIZZLE_ENABLE(false);
1369 desc[2] = esgs_ring_size;
1370 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1371 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1372 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1373 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1374 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1375 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1376 S_008F0C_ELEMENT_SIZE(0) |
1377 S_008F0C_INDEX_STRIDE(0) |
1378 S_008F0C_ADD_TID_ENABLE(false);
1379
1380 desc += 4;
1381 /* VS entry for GS->VS ring */
1382 /* stride 0, num records - size, elsize0,
1383 index stride 0 */
1384 desc[0] = gsvs_va;
1385 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1386 S_008F04_STRIDE(0) |
1387 S_008F04_SWIZZLE_ENABLE(false);
1388 desc[2] = gsvs_ring_size;
1389 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1390 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1391 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1392 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1393 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1394 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1395 S_008F0C_ELEMENT_SIZE(0) |
1396 S_008F0C_INDEX_STRIDE(0) |
1397 S_008F0C_ADD_TID_ENABLE(false);
1398 desc += 4;
1399
1400 /* stride gsvs_itemsize, num records 64
1401 elsize 4, index stride 16 */
1402 /* shader will patch stride and desc[2] */
1403 desc[0] = gsvs_va;
1404 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1405 S_008F04_STRIDE(0) |
1406 S_008F04_SWIZZLE_ENABLE(true);
1407 desc[2] = 0;
1408 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1409 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1410 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1411 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1412 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1413 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1414 S_008F0C_ELEMENT_SIZE(1) |
1415 S_008F0C_INDEX_STRIDE(1) |
1416 S_008F0C_ADD_TID_ENABLE(true);
1417 desc += 4;
1418
1419 desc[0] = tess_factor_va;
1420 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1421 S_008F04_STRIDE(0) |
1422 S_008F04_SWIZZLE_ENABLE(false);
1423 desc[2] = tess_factor_ring_size;
1424 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1425 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1426 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1427 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1428 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1429 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1430 S_008F0C_ELEMENT_SIZE(0) |
1431 S_008F0C_INDEX_STRIDE(0) |
1432 S_008F0C_ADD_TID_ENABLE(false);
1433 desc += 4;
1434
1435 desc[0] = tess_offchip_va;
1436 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1437 S_008F04_STRIDE(0) |
1438 S_008F04_SWIZZLE_ENABLE(false);
1439 desc[2] = tess_offchip_ring_size;
1440 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1441 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1442 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1443 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1444 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1445 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1446 S_008F0C_ELEMENT_SIZE(0) |
1447 S_008F0C_INDEX_STRIDE(0) |
1448 S_008F0C_ADD_TID_ENABLE(false);
1449 desc += 4;
1450
1451 /* add sample positions after all rings */
1452 memcpy(desc, queue->device->sample_locations_1x, 8);
1453 desc += 2;
1454 memcpy(desc, queue->device->sample_locations_2x, 16);
1455 desc += 4;
1456 memcpy(desc, queue->device->sample_locations_4x, 32);
1457 desc += 8;
1458 memcpy(desc, queue->device->sample_locations_8x, 64);
1459 desc += 16;
1460 memcpy(desc, queue->device->sample_locations_16x, 128);
1461 }
1462
1463 static unsigned
1464 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1465 {
1466 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1467 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1468 device->physical_device->rad_info.family != CHIP_STONEY;
1469 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1470 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1471 device->physical_device->rad_info.max_se;
1472 unsigned offchip_granularity;
1473 unsigned hs_offchip_param;
1474 switch (device->tess_offchip_block_dw_size) {
1475 default:
1476 assert(0);
1477 /* fall through */
1478 case 8192:
1479 offchip_granularity = V_03093C_X_8K_DWORDS;
1480 break;
1481 case 4096:
1482 offchip_granularity = V_03093C_X_4K_DWORDS;
1483 break;
1484 }
1485
1486 switch (device->physical_device->rad_info.chip_class) {
1487 case SI:
1488 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1489 break;
1490 case CIK:
1491 case VI:
1492 case GFX9:
1493 default:
1494 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1495 break;
1496 }
1497
1498 *max_offchip_buffers_p = max_offchip_buffers;
1499 if (device->physical_device->rad_info.chip_class >= CIK) {
1500 if (device->physical_device->rad_info.chip_class >= VI)
1501 --max_offchip_buffers;
1502 hs_offchip_param =
1503 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1504 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1505 } else {
1506 hs_offchip_param =
1507 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1508 }
1509 return hs_offchip_param;
1510 }
1511
1512 static VkResult
1513 radv_get_preamble_cs(struct radv_queue *queue,
1514 uint32_t scratch_size,
1515 uint32_t compute_scratch_size,
1516 uint32_t esgs_ring_size,
1517 uint32_t gsvs_ring_size,
1518 bool needs_tess_rings,
1519 bool needs_sample_positions,
1520 struct radeon_winsys_cs **initial_preamble_cs,
1521 struct radeon_winsys_cs **continue_preamble_cs)
1522 {
1523 struct radeon_winsys_bo *scratch_bo = NULL;
1524 struct radeon_winsys_bo *descriptor_bo = NULL;
1525 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1526 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1527 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1528 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1529 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1530 struct radeon_winsys_cs *dest_cs[2] = {0};
1531 bool add_tess_rings = false, add_sample_positions = false;
1532 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1533 unsigned max_offchip_buffers;
1534 unsigned hs_offchip_param = 0;
1535 if (!queue->has_tess_rings) {
1536 if (needs_tess_rings)
1537 add_tess_rings = true;
1538 }
1539 if (!queue->has_sample_positions) {
1540 if (needs_sample_positions)
1541 add_sample_positions = true;
1542 }
1543 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1544 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1545 &max_offchip_buffers);
1546 tess_offchip_ring_size = max_offchip_buffers *
1547 queue->device->tess_offchip_block_dw_size * 4;
1548
1549 if (scratch_size <= queue->scratch_size &&
1550 compute_scratch_size <= queue->compute_scratch_size &&
1551 esgs_ring_size <= queue->esgs_ring_size &&
1552 gsvs_ring_size <= queue->gsvs_ring_size &&
1553 !add_tess_rings && !add_sample_positions &&
1554 queue->initial_preamble_cs) {
1555 *initial_preamble_cs = queue->initial_preamble_cs;
1556 *continue_preamble_cs = queue->continue_preamble_cs;
1557 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1558 *continue_preamble_cs = NULL;
1559 return VK_SUCCESS;
1560 }
1561
1562 if (scratch_size > queue->scratch_size) {
1563 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1564 scratch_size,
1565 4096,
1566 RADEON_DOMAIN_VRAM,
1567 RADEON_FLAG_NO_CPU_ACCESS);
1568 if (!scratch_bo)
1569 goto fail;
1570 } else
1571 scratch_bo = queue->scratch_bo;
1572
1573 if (compute_scratch_size > queue->compute_scratch_size) {
1574 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1575 compute_scratch_size,
1576 4096,
1577 RADEON_DOMAIN_VRAM,
1578 RADEON_FLAG_NO_CPU_ACCESS);
1579 if (!compute_scratch_bo)
1580 goto fail;
1581
1582 } else
1583 compute_scratch_bo = queue->compute_scratch_bo;
1584
1585 if (esgs_ring_size > queue->esgs_ring_size) {
1586 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1587 esgs_ring_size,
1588 4096,
1589 RADEON_DOMAIN_VRAM,
1590 RADEON_FLAG_NO_CPU_ACCESS);
1591 if (!esgs_ring_bo)
1592 goto fail;
1593 } else {
1594 esgs_ring_bo = queue->esgs_ring_bo;
1595 esgs_ring_size = queue->esgs_ring_size;
1596 }
1597
1598 if (gsvs_ring_size > queue->gsvs_ring_size) {
1599 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1600 gsvs_ring_size,
1601 4096,
1602 RADEON_DOMAIN_VRAM,
1603 RADEON_FLAG_NO_CPU_ACCESS);
1604 if (!gsvs_ring_bo)
1605 goto fail;
1606 } else {
1607 gsvs_ring_bo = queue->gsvs_ring_bo;
1608 gsvs_ring_size = queue->gsvs_ring_size;
1609 }
1610
1611 if (add_tess_rings) {
1612 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1613 tess_factor_ring_size,
1614 256,
1615 RADEON_DOMAIN_VRAM,
1616 RADEON_FLAG_NO_CPU_ACCESS);
1617 if (!tess_factor_ring_bo)
1618 goto fail;
1619 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1620 tess_offchip_ring_size,
1621 256,
1622 RADEON_DOMAIN_VRAM,
1623 RADEON_FLAG_NO_CPU_ACCESS);
1624 if (!tess_offchip_ring_bo)
1625 goto fail;
1626 } else {
1627 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1628 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1629 }
1630
1631 if (scratch_bo != queue->scratch_bo ||
1632 esgs_ring_bo != queue->esgs_ring_bo ||
1633 gsvs_ring_bo != queue->gsvs_ring_bo ||
1634 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1635 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1636 uint32_t size = 0;
1637 if (gsvs_ring_bo || esgs_ring_bo ||
1638 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1639 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1640 if (add_sample_positions)
1641 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1642 }
1643 else if (scratch_bo)
1644 size = 8; /* 2 dword */
1645
1646 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1647 size,
1648 4096,
1649 RADEON_DOMAIN_VRAM,
1650 RADEON_FLAG_CPU_ACCESS);
1651 if (!descriptor_bo)
1652 goto fail;
1653 } else
1654 descriptor_bo = queue->descriptor_bo;
1655
1656 for(int i = 0; i < 2; ++i) {
1657 struct radeon_winsys_cs *cs = NULL;
1658 cs = queue->device->ws->cs_create(queue->device->ws,
1659 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1660 if (!cs)
1661 goto fail;
1662
1663 dest_cs[i] = cs;
1664
1665 if (scratch_bo)
1666 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1667
1668 if (esgs_ring_bo)
1669 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1670
1671 if (gsvs_ring_bo)
1672 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1673
1674 if (tess_factor_ring_bo)
1675 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1676
1677 if (tess_offchip_ring_bo)
1678 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1679
1680 if (descriptor_bo)
1681 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1682
1683 if (descriptor_bo != queue->descriptor_bo) {
1684 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1685
1686 if (scratch_bo) {
1687 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1688 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1689 S_008F04_SWIZZLE_ENABLE(1);
1690 map[0] = scratch_va;
1691 map[1] = rsrc1;
1692 }
1693
1694 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1695 add_sample_positions)
1696 fill_geom_tess_rings(queue, map, add_sample_positions,
1697 esgs_ring_size, esgs_ring_bo,
1698 gsvs_ring_size, gsvs_ring_bo,
1699 tess_factor_ring_size, tess_factor_ring_bo,
1700 tess_offchip_ring_size, tess_offchip_ring_bo);
1701
1702 queue->device->ws->buffer_unmap(descriptor_bo);
1703 }
1704
1705 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1706 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1707 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1708 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1709 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1710 }
1711
1712 if (esgs_ring_bo || gsvs_ring_bo) {
1713 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1714 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1715 radeon_emit(cs, esgs_ring_size >> 8);
1716 radeon_emit(cs, gsvs_ring_size >> 8);
1717 } else {
1718 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1719 radeon_emit(cs, esgs_ring_size >> 8);
1720 radeon_emit(cs, gsvs_ring_size >> 8);
1721 }
1722 }
1723
1724 if (tess_factor_ring_bo) {
1725 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1726 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1727 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1728 S_030938_SIZE(tess_factor_ring_size / 4));
1729 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1730 tf_va >> 8);
1731 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1732 } else {
1733 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1734 S_008988_SIZE(tess_factor_ring_size / 4));
1735 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1736 tf_va >> 8);
1737 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1738 hs_offchip_param);
1739 }
1740 }
1741
1742 if (descriptor_bo) {
1743 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1744 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1745 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1746 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1747 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1748 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1749
1750 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1751
1752 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1753 radeon_set_sh_reg_seq(cs, regs[i], 2);
1754 radeon_emit(cs, va);
1755 radeon_emit(cs, va >> 32);
1756 }
1757 }
1758
1759 if (compute_scratch_bo) {
1760 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1761 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1762 S_008F04_SWIZZLE_ENABLE(1);
1763
1764 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1765
1766 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1767 radeon_emit(cs, scratch_va);
1768 radeon_emit(cs, rsrc1);
1769 }
1770
1771 if (!i) {
1772 si_cs_emit_cache_flush(cs,
1773 queue->device->physical_device->rad_info.chip_class,
1774 NULL, 0,
1775 queue->queue_family_index == RING_COMPUTE &&
1776 queue->device->physical_device->rad_info.chip_class >= CIK,
1777 RADV_CMD_FLAG_INV_ICACHE |
1778 RADV_CMD_FLAG_INV_SMEM_L1 |
1779 RADV_CMD_FLAG_INV_VMEM_L1 |
1780 RADV_CMD_FLAG_INV_GLOBAL_L2);
1781 }
1782
1783 if (!queue->device->ws->cs_finalize(cs))
1784 goto fail;
1785 }
1786
1787 if (queue->initial_preamble_cs)
1788 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1789
1790 if (queue->continue_preamble_cs)
1791 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1792
1793 queue->initial_preamble_cs = dest_cs[0];
1794 queue->continue_preamble_cs = dest_cs[1];
1795
1796 if (scratch_bo != queue->scratch_bo) {
1797 if (queue->scratch_bo)
1798 queue->device->ws->buffer_destroy(queue->scratch_bo);
1799 queue->scratch_bo = scratch_bo;
1800 queue->scratch_size = scratch_size;
1801 }
1802
1803 if (compute_scratch_bo != queue->compute_scratch_bo) {
1804 if (queue->compute_scratch_bo)
1805 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1806 queue->compute_scratch_bo = compute_scratch_bo;
1807 queue->compute_scratch_size = compute_scratch_size;
1808 }
1809
1810 if (esgs_ring_bo != queue->esgs_ring_bo) {
1811 if (queue->esgs_ring_bo)
1812 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1813 queue->esgs_ring_bo = esgs_ring_bo;
1814 queue->esgs_ring_size = esgs_ring_size;
1815 }
1816
1817 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1818 if (queue->gsvs_ring_bo)
1819 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1820 queue->gsvs_ring_bo = gsvs_ring_bo;
1821 queue->gsvs_ring_size = gsvs_ring_size;
1822 }
1823
1824 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1825 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1826 }
1827
1828 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1829 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1830 queue->has_tess_rings = true;
1831 }
1832
1833 if (descriptor_bo != queue->descriptor_bo) {
1834 if (queue->descriptor_bo)
1835 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1836
1837 queue->descriptor_bo = descriptor_bo;
1838 }
1839
1840 if (add_sample_positions)
1841 queue->has_sample_positions = true;
1842
1843 *initial_preamble_cs = queue->initial_preamble_cs;
1844 *continue_preamble_cs = queue->continue_preamble_cs;
1845 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1846 *continue_preamble_cs = NULL;
1847 return VK_SUCCESS;
1848 fail:
1849 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1850 if (dest_cs[i])
1851 queue->device->ws->cs_destroy(dest_cs[i]);
1852 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1853 queue->device->ws->buffer_destroy(descriptor_bo);
1854 if (scratch_bo && scratch_bo != queue->scratch_bo)
1855 queue->device->ws->buffer_destroy(scratch_bo);
1856 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1857 queue->device->ws->buffer_destroy(compute_scratch_bo);
1858 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1859 queue->device->ws->buffer_destroy(esgs_ring_bo);
1860 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1861 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1862 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1863 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1864 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1865 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1866 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1867 }
1868
1869 VkResult radv_QueueSubmit(
1870 VkQueue _queue,
1871 uint32_t submitCount,
1872 const VkSubmitInfo* pSubmits,
1873 VkFence _fence)
1874 {
1875 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1876 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1877 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1878 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1879 int ret;
1880 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1881 uint32_t scratch_size = 0;
1882 uint32_t compute_scratch_size = 0;
1883 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1884 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1885 VkResult result;
1886 bool fence_emitted = false;
1887 bool tess_rings_needed = false;
1888 bool sample_positions_needed = false;
1889
1890 /* Do this first so failing to allocate scratch buffers can't result in
1891 * partially executed submissions. */
1892 for (uint32_t i = 0; i < submitCount; i++) {
1893 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1894 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1895 pSubmits[i].pCommandBuffers[j]);
1896
1897 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1898 compute_scratch_size = MAX2(compute_scratch_size,
1899 cmd_buffer->compute_scratch_size_needed);
1900 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1901 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1902 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1903 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1904 }
1905 }
1906
1907 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1908 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1909 sample_positions_needed,
1910 &initial_preamble_cs, &continue_preamble_cs);
1911 if (result != VK_SUCCESS)
1912 return result;
1913
1914 for (uint32_t i = 0; i < submitCount; i++) {
1915 struct radeon_winsys_cs **cs_array;
1916 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1917 bool can_patch = !do_flush;
1918 uint32_t advance;
1919
1920 if (!pSubmits[i].commandBufferCount) {
1921 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1922 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1923 &queue->device->empty_cs[queue->queue_family_index],
1924 1, NULL, NULL,
1925 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1926 pSubmits[i].waitSemaphoreCount,
1927 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1928 pSubmits[i].signalSemaphoreCount,
1929 false, base_fence);
1930 if (ret) {
1931 radv_loge("failed to submit CS %d\n", i);
1932 abort();
1933 }
1934 fence_emitted = true;
1935 }
1936 continue;
1937 }
1938
1939 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1940 (pSubmits[i].commandBufferCount + do_flush));
1941
1942 if(do_flush)
1943 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1944 queue->device->flush_shader_cs[queue->queue_family_index] :
1945 queue->device->flush_cs[queue->queue_family_index];
1946
1947 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1948 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1949 pSubmits[i].pCommandBuffers[j]);
1950 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1951
1952 cs_array[j + do_flush] = cmd_buffer->cs;
1953 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1954 can_patch = false;
1955 }
1956
1957 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1958 advance = MIN2(max_cs_submission,
1959 pSubmits[i].commandBufferCount + do_flush - j);
1960 bool b = j == 0;
1961 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1962
1963 if (queue->device->trace_bo)
1964 *queue->device->trace_id_ptr = 0;
1965
1966 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1967 advance, initial_preamble_cs, continue_preamble_cs,
1968 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1969 b ? pSubmits[i].waitSemaphoreCount : 0,
1970 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1971 e ? pSubmits[i].signalSemaphoreCount : 0,
1972 can_patch, base_fence);
1973
1974 if (ret) {
1975 radv_loge("failed to submit CS %d\n", i);
1976 abort();
1977 }
1978 fence_emitted = true;
1979 if (queue->device->trace_bo) {
1980 bool success = queue->device->ws->ctx_wait_idle(
1981 queue->hw_ctx,
1982 radv_queue_family_to_ring(
1983 queue->queue_family_index),
1984 queue->queue_idx);
1985
1986 if (!success) { /* Hang */
1987 radv_dump_trace(queue->device, cs_array[j]);
1988 abort();
1989 }
1990 }
1991 }
1992 free(cs_array);
1993 }
1994
1995 if (fence) {
1996 if (!fence_emitted)
1997 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1998 &queue->device->empty_cs[queue->queue_family_index],
1999 1, NULL, NULL, NULL, 0, NULL, 0,
2000 false, base_fence);
2001
2002 fence->submitted = true;
2003 }
2004
2005 return VK_SUCCESS;
2006 }
2007
2008 VkResult radv_QueueWaitIdle(
2009 VkQueue _queue)
2010 {
2011 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2012
2013 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2014 radv_queue_family_to_ring(queue->queue_family_index),
2015 queue->queue_idx);
2016 return VK_SUCCESS;
2017 }
2018
2019 VkResult radv_DeviceWaitIdle(
2020 VkDevice _device)
2021 {
2022 RADV_FROM_HANDLE(radv_device, device, _device);
2023
2024 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2025 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2026 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2027 }
2028 }
2029 return VK_SUCCESS;
2030 }
2031
2032 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2033 VkInstance instance,
2034 const char* pName)
2035 {
2036 return radv_lookup_entrypoint(pName);
2037 }
2038
2039 /* The loader wants us to expose a second GetInstanceProcAddr function
2040 * to work around certain LD_PRELOAD issues seen in apps.
2041 */
2042 PUBLIC
2043 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2044 VkInstance instance,
2045 const char* pName);
2046
2047 PUBLIC
2048 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2049 VkInstance instance,
2050 const char* pName)
2051 {
2052 return radv_GetInstanceProcAddr(instance, pName);
2053 }
2054
2055 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2056 VkDevice device,
2057 const char* pName)
2058 {
2059 return radv_lookup_entrypoint(pName);
2060 }
2061
2062 bool radv_get_memory_fd(struct radv_device *device,
2063 struct radv_device_memory *memory,
2064 int *pFD)
2065 {
2066 struct radeon_bo_metadata metadata;
2067
2068 if (memory->image) {
2069 radv_init_metadata(device, memory->image, &metadata);
2070 device->ws->buffer_set_metadata(memory->bo, &metadata);
2071 }
2072
2073 return device->ws->buffer_get_fd(device->ws, memory->bo,
2074 pFD);
2075 }
2076
2077 VkResult radv_AllocateMemory(
2078 VkDevice _device,
2079 const VkMemoryAllocateInfo* pAllocateInfo,
2080 const VkAllocationCallbacks* pAllocator,
2081 VkDeviceMemory* pMem)
2082 {
2083 RADV_FROM_HANDLE(radv_device, device, _device);
2084 struct radv_device_memory *mem;
2085 VkResult result;
2086 enum radeon_bo_domain domain;
2087 uint32_t flags = 0;
2088
2089 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2090
2091 if (pAllocateInfo->allocationSize == 0) {
2092 /* Apparently, this is allowed */
2093 *pMem = VK_NULL_HANDLE;
2094 return VK_SUCCESS;
2095 }
2096
2097 const VkImportMemoryFdInfoKHX *import_info =
2098 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
2099 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
2100 vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
2101
2102 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2103 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2104 if (mem == NULL)
2105 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2106
2107 if (dedicate_info) {
2108 mem->image = radv_image_from_handle(dedicate_info->image);
2109 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2110 } else {
2111 mem->image = NULL;
2112 mem->buffer = NULL;
2113 }
2114
2115 if (import_info) {
2116 assert(import_info->handleType ==
2117 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
2118 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2119 NULL, NULL);
2120 if (!mem->bo)
2121 goto fail;
2122 else
2123 goto out_success;
2124 }
2125
2126 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2127 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2128 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2129 domain = RADEON_DOMAIN_GTT;
2130 else
2131 domain = RADEON_DOMAIN_VRAM;
2132
2133 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2134 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2135 else
2136 flags |= RADEON_FLAG_CPU_ACCESS;
2137
2138 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2139 flags |= RADEON_FLAG_GTT_WC;
2140
2141 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2142 domain, flags);
2143
2144 if (!mem->bo) {
2145 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2146 goto fail;
2147 }
2148 mem->type_index = pAllocateInfo->memoryTypeIndex;
2149 out_success:
2150 *pMem = radv_device_memory_to_handle(mem);
2151
2152 return VK_SUCCESS;
2153
2154 fail:
2155 vk_free2(&device->alloc, pAllocator, mem);
2156
2157 return result;
2158 }
2159
2160 void radv_FreeMemory(
2161 VkDevice _device,
2162 VkDeviceMemory _mem,
2163 const VkAllocationCallbacks* pAllocator)
2164 {
2165 RADV_FROM_HANDLE(radv_device, device, _device);
2166 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2167
2168 if (mem == NULL)
2169 return;
2170
2171 device->ws->buffer_destroy(mem->bo);
2172 mem->bo = NULL;
2173
2174 vk_free2(&device->alloc, pAllocator, mem);
2175 }
2176
2177 VkResult radv_MapMemory(
2178 VkDevice _device,
2179 VkDeviceMemory _memory,
2180 VkDeviceSize offset,
2181 VkDeviceSize size,
2182 VkMemoryMapFlags flags,
2183 void** ppData)
2184 {
2185 RADV_FROM_HANDLE(radv_device, device, _device);
2186 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2187
2188 if (mem == NULL) {
2189 *ppData = NULL;
2190 return VK_SUCCESS;
2191 }
2192
2193 *ppData = device->ws->buffer_map(mem->bo);
2194 if (*ppData) {
2195 *ppData += offset;
2196 return VK_SUCCESS;
2197 }
2198
2199 return VK_ERROR_MEMORY_MAP_FAILED;
2200 }
2201
2202 void radv_UnmapMemory(
2203 VkDevice _device,
2204 VkDeviceMemory _memory)
2205 {
2206 RADV_FROM_HANDLE(radv_device, device, _device);
2207 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2208
2209 if (mem == NULL)
2210 return;
2211
2212 device->ws->buffer_unmap(mem->bo);
2213 }
2214
2215 VkResult radv_FlushMappedMemoryRanges(
2216 VkDevice _device,
2217 uint32_t memoryRangeCount,
2218 const VkMappedMemoryRange* pMemoryRanges)
2219 {
2220 return VK_SUCCESS;
2221 }
2222
2223 VkResult radv_InvalidateMappedMemoryRanges(
2224 VkDevice _device,
2225 uint32_t memoryRangeCount,
2226 const VkMappedMemoryRange* pMemoryRanges)
2227 {
2228 return VK_SUCCESS;
2229 }
2230
2231 void radv_GetBufferMemoryRequirements(
2232 VkDevice device,
2233 VkBuffer _buffer,
2234 VkMemoryRequirements* pMemoryRequirements)
2235 {
2236 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2237
2238 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2239
2240 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2241 pMemoryRequirements->alignment = 4096;
2242 else
2243 pMemoryRequirements->alignment = 16;
2244
2245 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2246 }
2247
2248 void radv_GetImageMemoryRequirements(
2249 VkDevice device,
2250 VkImage _image,
2251 VkMemoryRequirements* pMemoryRequirements)
2252 {
2253 RADV_FROM_HANDLE(radv_image, image, _image);
2254
2255 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2256
2257 pMemoryRequirements->size = image->size;
2258 pMemoryRequirements->alignment = image->alignment;
2259 }
2260
2261 void radv_GetImageSparseMemoryRequirements(
2262 VkDevice device,
2263 VkImage image,
2264 uint32_t* pSparseMemoryRequirementCount,
2265 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2266 {
2267 stub();
2268 }
2269
2270 void radv_GetDeviceMemoryCommitment(
2271 VkDevice device,
2272 VkDeviceMemory memory,
2273 VkDeviceSize* pCommittedMemoryInBytes)
2274 {
2275 *pCommittedMemoryInBytes = 0;
2276 }
2277
2278 VkResult radv_BindBufferMemory(
2279 VkDevice device,
2280 VkBuffer _buffer,
2281 VkDeviceMemory _memory,
2282 VkDeviceSize memoryOffset)
2283 {
2284 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2285 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2286
2287 if (mem) {
2288 buffer->bo = mem->bo;
2289 buffer->offset = memoryOffset;
2290 } else {
2291 buffer->bo = NULL;
2292 buffer->offset = 0;
2293 }
2294
2295 return VK_SUCCESS;
2296 }
2297
2298 VkResult radv_BindImageMemory(
2299 VkDevice device,
2300 VkImage _image,
2301 VkDeviceMemory _memory,
2302 VkDeviceSize memoryOffset)
2303 {
2304 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2305 RADV_FROM_HANDLE(radv_image, image, _image);
2306
2307 if (mem) {
2308 image->bo = mem->bo;
2309 image->offset = memoryOffset;
2310 } else {
2311 image->bo = NULL;
2312 image->offset = 0;
2313 }
2314
2315 return VK_SUCCESS;
2316 }
2317
2318
2319 static void
2320 radv_sparse_buffer_bind_memory(struct radv_device *device,
2321 const VkSparseBufferMemoryBindInfo *bind)
2322 {
2323 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2324
2325 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2326 struct radv_device_memory *mem = NULL;
2327
2328 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2329 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2330
2331 device->ws->buffer_virtual_bind(buffer->bo,
2332 bind->pBinds[i].resourceOffset,
2333 bind->pBinds[i].size,
2334 mem ? mem->bo : NULL,
2335 bind->pBinds[i].memoryOffset);
2336 }
2337 }
2338
2339 static void
2340 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2341 const VkSparseImageOpaqueMemoryBindInfo *bind)
2342 {
2343 RADV_FROM_HANDLE(radv_image, image, bind->image);
2344
2345 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2346 struct radv_device_memory *mem = NULL;
2347
2348 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2349 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2350
2351 device->ws->buffer_virtual_bind(image->bo,
2352 bind->pBinds[i].resourceOffset,
2353 bind->pBinds[i].size,
2354 mem ? mem->bo : NULL,
2355 bind->pBinds[i].memoryOffset);
2356 }
2357 }
2358
2359 VkResult radv_QueueBindSparse(
2360 VkQueue _queue,
2361 uint32_t bindInfoCount,
2362 const VkBindSparseInfo* pBindInfo,
2363 VkFence _fence)
2364 {
2365 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2366 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2367 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2368 bool fence_emitted = false;
2369
2370 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2371 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2372 radv_sparse_buffer_bind_memory(queue->device,
2373 pBindInfo[i].pBufferBinds + j);
2374 }
2375
2376 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2377 radv_sparse_image_opaque_bind_memory(queue->device,
2378 pBindInfo[i].pImageOpaqueBinds + j);
2379 }
2380
2381 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2382 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2383 &queue->device->empty_cs[queue->queue_family_index],
2384 1, NULL, NULL,
2385 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2386 pBindInfo[i].waitSemaphoreCount,
2387 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2388 pBindInfo[i].signalSemaphoreCount,
2389 false, base_fence);
2390 fence_emitted = true;
2391 if (fence)
2392 fence->submitted = true;
2393 }
2394 }
2395
2396 if (fence && !fence_emitted) {
2397 fence->signalled = true;
2398 }
2399
2400 return VK_SUCCESS;
2401 }
2402
2403 VkResult radv_CreateFence(
2404 VkDevice _device,
2405 const VkFenceCreateInfo* pCreateInfo,
2406 const VkAllocationCallbacks* pAllocator,
2407 VkFence* pFence)
2408 {
2409 RADV_FROM_HANDLE(radv_device, device, _device);
2410 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2411 sizeof(*fence), 8,
2412 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2413
2414 if (!fence)
2415 return VK_ERROR_OUT_OF_HOST_MEMORY;
2416
2417 memset(fence, 0, sizeof(*fence));
2418 fence->submitted = false;
2419 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2420 fence->fence = device->ws->create_fence();
2421 if (!fence->fence) {
2422 vk_free2(&device->alloc, pAllocator, fence);
2423 return VK_ERROR_OUT_OF_HOST_MEMORY;
2424 }
2425
2426 *pFence = radv_fence_to_handle(fence);
2427
2428 return VK_SUCCESS;
2429 }
2430
2431 void radv_DestroyFence(
2432 VkDevice _device,
2433 VkFence _fence,
2434 const VkAllocationCallbacks* pAllocator)
2435 {
2436 RADV_FROM_HANDLE(radv_device, device, _device);
2437 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2438
2439 if (!fence)
2440 return;
2441 device->ws->destroy_fence(fence->fence);
2442 vk_free2(&device->alloc, pAllocator, fence);
2443 }
2444
2445 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2446 {
2447 uint64_t current_time;
2448 struct timespec tv;
2449
2450 clock_gettime(CLOCK_MONOTONIC, &tv);
2451 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2452
2453 timeout = MIN2(UINT64_MAX - current_time, timeout);
2454
2455 return current_time + timeout;
2456 }
2457
2458 VkResult radv_WaitForFences(
2459 VkDevice _device,
2460 uint32_t fenceCount,
2461 const VkFence* pFences,
2462 VkBool32 waitAll,
2463 uint64_t timeout)
2464 {
2465 RADV_FROM_HANDLE(radv_device, device, _device);
2466 timeout = radv_get_absolute_timeout(timeout);
2467
2468 if (!waitAll && fenceCount > 1) {
2469 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2470 }
2471
2472 for (uint32_t i = 0; i < fenceCount; ++i) {
2473 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2474 bool expired = false;
2475
2476 if (fence->signalled)
2477 continue;
2478
2479 if (!fence->submitted)
2480 return VK_TIMEOUT;
2481
2482 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2483 if (!expired)
2484 return VK_TIMEOUT;
2485
2486 fence->signalled = true;
2487 }
2488
2489 return VK_SUCCESS;
2490 }
2491
2492 VkResult radv_ResetFences(VkDevice device,
2493 uint32_t fenceCount,
2494 const VkFence *pFences)
2495 {
2496 for (unsigned i = 0; i < fenceCount; ++i) {
2497 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2498 fence->submitted = fence->signalled = false;
2499 }
2500
2501 return VK_SUCCESS;
2502 }
2503
2504 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2505 {
2506 RADV_FROM_HANDLE(radv_device, device, _device);
2507 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2508
2509 if (fence->signalled)
2510 return VK_SUCCESS;
2511 if (!fence->submitted)
2512 return VK_NOT_READY;
2513
2514 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2515 return VK_NOT_READY;
2516
2517 return VK_SUCCESS;
2518 }
2519
2520
2521 // Queue semaphore functions
2522
2523 VkResult radv_CreateSemaphore(
2524 VkDevice _device,
2525 const VkSemaphoreCreateInfo* pCreateInfo,
2526 const VkAllocationCallbacks* pAllocator,
2527 VkSemaphore* pSemaphore)
2528 {
2529 RADV_FROM_HANDLE(radv_device, device, _device);
2530 struct radeon_winsys_sem *sem;
2531
2532 sem = device->ws->create_sem(device->ws);
2533 if (!sem)
2534 return VK_ERROR_OUT_OF_HOST_MEMORY;
2535
2536 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2537 return VK_SUCCESS;
2538 }
2539
2540 void radv_DestroySemaphore(
2541 VkDevice _device,
2542 VkSemaphore _semaphore,
2543 const VkAllocationCallbacks* pAllocator)
2544 {
2545 RADV_FROM_HANDLE(radv_device, device, _device);
2546 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2547 if (!_semaphore)
2548 return;
2549
2550 device->ws->destroy_sem(sem);
2551 }
2552
2553 VkResult radv_CreateEvent(
2554 VkDevice _device,
2555 const VkEventCreateInfo* pCreateInfo,
2556 const VkAllocationCallbacks* pAllocator,
2557 VkEvent* pEvent)
2558 {
2559 RADV_FROM_HANDLE(radv_device, device, _device);
2560 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2561 sizeof(*event), 8,
2562 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2563
2564 if (!event)
2565 return VK_ERROR_OUT_OF_HOST_MEMORY;
2566
2567 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2568 RADEON_DOMAIN_GTT,
2569 RADEON_FLAG_CPU_ACCESS);
2570 if (!event->bo) {
2571 vk_free2(&device->alloc, pAllocator, event);
2572 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2573 }
2574
2575 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2576
2577 *pEvent = radv_event_to_handle(event);
2578
2579 return VK_SUCCESS;
2580 }
2581
2582 void radv_DestroyEvent(
2583 VkDevice _device,
2584 VkEvent _event,
2585 const VkAllocationCallbacks* pAllocator)
2586 {
2587 RADV_FROM_HANDLE(radv_device, device, _device);
2588 RADV_FROM_HANDLE(radv_event, event, _event);
2589
2590 if (!event)
2591 return;
2592 device->ws->buffer_destroy(event->bo);
2593 vk_free2(&device->alloc, pAllocator, event);
2594 }
2595
2596 VkResult radv_GetEventStatus(
2597 VkDevice _device,
2598 VkEvent _event)
2599 {
2600 RADV_FROM_HANDLE(radv_event, event, _event);
2601
2602 if (*event->map == 1)
2603 return VK_EVENT_SET;
2604 return VK_EVENT_RESET;
2605 }
2606
2607 VkResult radv_SetEvent(
2608 VkDevice _device,
2609 VkEvent _event)
2610 {
2611 RADV_FROM_HANDLE(radv_event, event, _event);
2612 *event->map = 1;
2613
2614 return VK_SUCCESS;
2615 }
2616
2617 VkResult radv_ResetEvent(
2618 VkDevice _device,
2619 VkEvent _event)
2620 {
2621 RADV_FROM_HANDLE(radv_event, event, _event);
2622 *event->map = 0;
2623
2624 return VK_SUCCESS;
2625 }
2626
2627 VkResult radv_CreateBuffer(
2628 VkDevice _device,
2629 const VkBufferCreateInfo* pCreateInfo,
2630 const VkAllocationCallbacks* pAllocator,
2631 VkBuffer* pBuffer)
2632 {
2633 RADV_FROM_HANDLE(radv_device, device, _device);
2634 struct radv_buffer *buffer;
2635
2636 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2637
2638 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2639 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2640 if (buffer == NULL)
2641 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2642
2643 buffer->size = pCreateInfo->size;
2644 buffer->usage = pCreateInfo->usage;
2645 buffer->bo = NULL;
2646 buffer->offset = 0;
2647 buffer->flags = pCreateInfo->flags;
2648
2649 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2650 buffer->bo = device->ws->buffer_create(device->ws,
2651 align64(buffer->size, 4096),
2652 4096, 0, RADEON_FLAG_VIRTUAL);
2653 if (!buffer->bo) {
2654 vk_free2(&device->alloc, pAllocator, buffer);
2655 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2656 }
2657 }
2658
2659 *pBuffer = radv_buffer_to_handle(buffer);
2660
2661 return VK_SUCCESS;
2662 }
2663
2664 void radv_DestroyBuffer(
2665 VkDevice _device,
2666 VkBuffer _buffer,
2667 const VkAllocationCallbacks* pAllocator)
2668 {
2669 RADV_FROM_HANDLE(radv_device, device, _device);
2670 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2671
2672 if (!buffer)
2673 return;
2674
2675 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2676 device->ws->buffer_destroy(buffer->bo);
2677
2678 vk_free2(&device->alloc, pAllocator, buffer);
2679 }
2680
2681 static inline unsigned
2682 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2683 {
2684 if (stencil)
2685 return image->surface.u.legacy.stencil_tiling_index[level];
2686 else
2687 return image->surface.u.legacy.tiling_index[level];
2688 }
2689
2690 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2691 {
2692 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2693 }
2694
2695 static void
2696 radv_initialise_color_surface(struct radv_device *device,
2697 struct radv_color_buffer_info *cb,
2698 struct radv_image_view *iview)
2699 {
2700 const struct vk_format_description *desc;
2701 unsigned ntype, format, swap, endian;
2702 unsigned blend_clamp = 0, blend_bypass = 0;
2703 uint64_t va;
2704 const struct radeon_surf *surf = &iview->image->surface;
2705
2706 desc = vk_format_description(iview->vk_format);
2707
2708 memset(cb, 0, sizeof(*cb));
2709
2710 /* Intensity is implemented as Red, so treat it that way. */
2711 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2712
2713 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2714
2715 if (device->physical_device->rad_info.chip_class >= GFX9) {
2716 struct gfx9_surf_meta_flags meta;
2717 if (iview->image->dcc_offset)
2718 meta = iview->image->surface.u.gfx9.dcc;
2719 else
2720 meta = iview->image->surface.u.gfx9.cmask;
2721
2722 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2723 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2724 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2725 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2726
2727 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2728 } else {
2729 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2730 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2731
2732 va += level_info->offset;
2733
2734 pitch_tile_max = level_info->nblk_x / 8 - 1;
2735 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2736 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2737
2738 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2739 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2740 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2741
2742 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2743 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2744
2745 if (iview->image->fmask.size) {
2746 if (device->physical_device->rad_info.chip_class >= CIK)
2747 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2748 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2749 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2750 } else {
2751 /* This must be set for fast clear to work without FMASK. */
2752 if (device->physical_device->rad_info.chip_class >= CIK)
2753 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2754 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2755 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2756 }
2757 }
2758
2759 cb->cb_color_base = va >> 8;
2760
2761 /* CMASK variables */
2762 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2763 va += iview->image->cmask.offset;
2764 cb->cb_color_cmask = va >> 8;
2765
2766 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2767 va += iview->image->dcc_offset;
2768 cb->cb_dcc_base = va >> 8;
2769
2770 uint32_t max_slice = radv_surface_layer_count(iview);
2771 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2772 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2773
2774 if (iview->image->info.samples > 1) {
2775 unsigned log_samples = util_logbase2(iview->image->info.samples);
2776
2777 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2778 S_028C74_NUM_FRAGMENTS(log_samples);
2779 }
2780
2781 if (iview->image->fmask.size) {
2782 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2783 cb->cb_color_fmask = va >> 8;
2784 } else {
2785 cb->cb_color_fmask = cb->cb_color_base;
2786 }
2787
2788 ntype = radv_translate_color_numformat(iview->vk_format,
2789 desc,
2790 vk_format_get_first_non_void_channel(iview->vk_format));
2791 format = radv_translate_colorformat(iview->vk_format);
2792 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2793 radv_finishme("Illegal color\n");
2794 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2795 endian = radv_colorformat_endian_swap(format);
2796
2797 /* blend clamp should be set for all NORM/SRGB types */
2798 if (ntype == V_028C70_NUMBER_UNORM ||
2799 ntype == V_028C70_NUMBER_SNORM ||
2800 ntype == V_028C70_NUMBER_SRGB)
2801 blend_clamp = 1;
2802
2803 /* set blend bypass according to docs if SINT/UINT or
2804 8/24 COLOR variants */
2805 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2806 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2807 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2808 blend_clamp = 0;
2809 blend_bypass = 1;
2810 }
2811 #if 0
2812 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2813 (format == V_028C70_COLOR_8 ||
2814 format == V_028C70_COLOR_8_8 ||
2815 format == V_028C70_COLOR_8_8_8_8))
2816 ->color_is_int8 = true;
2817 #endif
2818 cb->cb_color_info = S_028C70_FORMAT(format) |
2819 S_028C70_COMP_SWAP(swap) |
2820 S_028C70_BLEND_CLAMP(blend_clamp) |
2821 S_028C70_BLEND_BYPASS(blend_bypass) |
2822 S_028C70_SIMPLE_FLOAT(1) |
2823 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2824 ntype != V_028C70_NUMBER_SNORM &&
2825 ntype != V_028C70_NUMBER_SRGB &&
2826 format != V_028C70_COLOR_8_24 &&
2827 format != V_028C70_COLOR_24_8) |
2828 S_028C70_NUMBER_TYPE(ntype) |
2829 S_028C70_ENDIAN(endian);
2830 if (iview->image->info.samples > 1)
2831 if (iview->image->fmask.size)
2832 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2833
2834 if (iview->image->cmask.size &&
2835 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2836 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2837
2838 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2839 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2840
2841 if (device->physical_device->rad_info.chip_class >= VI) {
2842 unsigned max_uncompressed_block_size = 2;
2843 if (iview->image->info.samples > 1) {
2844 if (iview->image->surface.bpe == 1)
2845 max_uncompressed_block_size = 0;
2846 else if (iview->image->surface.bpe == 2)
2847 max_uncompressed_block_size = 1;
2848 }
2849
2850 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2851 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2852 }
2853
2854 /* This must be set for fast clear to work without FMASK. */
2855 if (!iview->image->fmask.size &&
2856 device->physical_device->rad_info.chip_class == SI) {
2857 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2858 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2859 }
2860
2861 if (device->physical_device->rad_info.chip_class >= GFX9) {
2862 uint32_t max_slice = radv_surface_layer_count(iview);
2863 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2864
2865 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2866 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2867 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2868 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2869 S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2870 S_028C68_MAX_MIP(iview->image->info.levels);
2871
2872 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2873
2874 }
2875 }
2876
2877 static void
2878 radv_initialise_ds_surface(struct radv_device *device,
2879 struct radv_ds_buffer_info *ds,
2880 struct radv_image_view *iview)
2881 {
2882 unsigned level = iview->base_mip;
2883 unsigned format, stencil_format;
2884 uint64_t va, s_offs, z_offs;
2885 bool stencil_only = false;
2886 memset(ds, 0, sizeof(*ds));
2887 switch (iview->vk_format) {
2888 case VK_FORMAT_D24_UNORM_S8_UINT:
2889 case VK_FORMAT_X8_D24_UNORM_PACK32:
2890 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2891 ds->offset_scale = 2.0f;
2892 break;
2893 case VK_FORMAT_D16_UNORM:
2894 case VK_FORMAT_D16_UNORM_S8_UINT:
2895 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2896 ds->offset_scale = 4.0f;
2897 break;
2898 case VK_FORMAT_D32_SFLOAT:
2899 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2900 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2901 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2902 ds->offset_scale = 1.0f;
2903 break;
2904 case VK_FORMAT_S8_UINT:
2905 stencil_only = true;
2906 break;
2907 default:
2908 break;
2909 }
2910
2911 format = radv_translate_dbformat(iview->vk_format);
2912 stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2913 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2914
2915 uint32_t max_slice = radv_surface_layer_count(iview);
2916 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2917 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2918
2919 ds->db_htile_data_base = 0;
2920 ds->db_htile_surface = 0;
2921
2922 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2923 s_offs = z_offs = va;
2924
2925 if (device->physical_device->rad_info.chip_class >= GFX9) {
2926 assert(iview->image->surface.u.gfx9.surf_offset == 0);
2927 s_offs += iview->image->surface.u.gfx9.stencil_offset;
2928
2929 ds->db_z_info = S_028038_FORMAT(format) |
2930 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
2931 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2932 S_028038_MAXMIP(iview->image->info.levels - 1);
2933 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2934 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
2935
2936 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2937 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
2938 ds->db_depth_view |= S_028008_MIPID(level);
2939
2940 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
2941 S_02801C_Y_MAX(iview->image->info.height - 1);
2942
2943 /* Only use HTILE for the first level. */
2944 if (iview->image->surface.htile_size && !level) {
2945 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
2946
2947 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2948 /* Use all of the htile_buffer for depth if there's no stencil. */
2949 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2950 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2951 iview->image->htile_offset;
2952 ds->db_htile_data_base = va >> 8;
2953 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2954 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
2955 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
2956 }
2957 } else {
2958 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2959
2960 if (stencil_only)
2961 level_info = &iview->image->surface.u.legacy.stencil_level[level];
2962
2963 z_offs += iview->image->surface.u.legacy.level[level].offset;
2964 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2965
2966 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2967 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2968 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
2969
2970 if (iview->image->info.samples > 1)
2971 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2972
2973 if (device->physical_device->rad_info.chip_class >= CIK) {
2974 struct radeon_info *info = &device->physical_device->rad_info;
2975 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
2976 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
2977 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
2978 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2979 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2980 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2981
2982 if (stencil_only)
2983 tile_mode = stencil_tile_mode;
2984
2985 ds->db_depth_info |=
2986 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2987 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2988 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2989 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2990 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2991 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2992 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2993 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2994 } else {
2995 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2996 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2997 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2998 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2999 }
3000
3001 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3002 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3003 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3004
3005 if (iview->image->surface.htile_size && !level) {
3006 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3007
3008 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3009 /* Use all of the htile_buffer for depth if there's no stencil. */
3010 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3011
3012 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3013 iview->image->htile_offset;
3014 ds->db_htile_data_base = va >> 8;
3015 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3016 }
3017 }
3018
3019 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3020 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3021 }
3022
3023 VkResult radv_CreateFramebuffer(
3024 VkDevice _device,
3025 const VkFramebufferCreateInfo* pCreateInfo,
3026 const VkAllocationCallbacks* pAllocator,
3027 VkFramebuffer* pFramebuffer)
3028 {
3029 RADV_FROM_HANDLE(radv_device, device, _device);
3030 struct radv_framebuffer *framebuffer;
3031
3032 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3033
3034 size_t size = sizeof(*framebuffer) +
3035 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3036 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3037 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3038 if (framebuffer == NULL)
3039 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3040
3041 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3042 framebuffer->width = pCreateInfo->width;
3043 framebuffer->height = pCreateInfo->height;
3044 framebuffer->layers = pCreateInfo->layers;
3045 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3046 VkImageView _iview = pCreateInfo->pAttachments[i];
3047 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3048 framebuffer->attachments[i].attachment = iview;
3049 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3050 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3051 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3052 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3053 }
3054 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3055 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3056 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3057 }
3058
3059 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3060 return VK_SUCCESS;
3061 }
3062
3063 void radv_DestroyFramebuffer(
3064 VkDevice _device,
3065 VkFramebuffer _fb,
3066 const VkAllocationCallbacks* pAllocator)
3067 {
3068 RADV_FROM_HANDLE(radv_device, device, _device);
3069 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3070
3071 if (!fb)
3072 return;
3073 vk_free2(&device->alloc, pAllocator, fb);
3074 }
3075
3076 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3077 {
3078 switch (address_mode) {
3079 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3080 return V_008F30_SQ_TEX_WRAP;
3081 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3082 return V_008F30_SQ_TEX_MIRROR;
3083 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3084 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3085 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3086 return V_008F30_SQ_TEX_CLAMP_BORDER;
3087 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3088 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3089 default:
3090 unreachable("illegal tex wrap mode");
3091 break;
3092 }
3093 }
3094
3095 static unsigned
3096 radv_tex_compare(VkCompareOp op)
3097 {
3098 switch (op) {
3099 case VK_COMPARE_OP_NEVER:
3100 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3101 case VK_COMPARE_OP_LESS:
3102 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3103 case VK_COMPARE_OP_EQUAL:
3104 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3105 case VK_COMPARE_OP_LESS_OR_EQUAL:
3106 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3107 case VK_COMPARE_OP_GREATER:
3108 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3109 case VK_COMPARE_OP_NOT_EQUAL:
3110 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3111 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3112 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3113 case VK_COMPARE_OP_ALWAYS:
3114 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3115 default:
3116 unreachable("illegal compare mode");
3117 break;
3118 }
3119 }
3120
3121 static unsigned
3122 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3123 {
3124 switch (filter) {
3125 case VK_FILTER_NEAREST:
3126 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3127 V_008F38_SQ_TEX_XY_FILTER_POINT);
3128 case VK_FILTER_LINEAR:
3129 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3130 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3131 case VK_FILTER_CUBIC_IMG:
3132 default:
3133 fprintf(stderr, "illegal texture filter");
3134 return 0;
3135 }
3136 }
3137
3138 static unsigned
3139 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3140 {
3141 switch (mode) {
3142 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3143 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3144 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3145 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3146 default:
3147 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3148 }
3149 }
3150
3151 static unsigned
3152 radv_tex_bordercolor(VkBorderColor bcolor)
3153 {
3154 switch (bcolor) {
3155 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3156 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3157 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3158 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3159 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3160 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3161 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3162 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3163 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3164 default:
3165 break;
3166 }
3167 return 0;
3168 }
3169
3170 static unsigned
3171 radv_tex_aniso_filter(unsigned filter)
3172 {
3173 if (filter < 2)
3174 return 0;
3175 if (filter < 4)
3176 return 1;
3177 if (filter < 8)
3178 return 2;
3179 if (filter < 16)
3180 return 3;
3181 return 4;
3182 }
3183
3184 static void
3185 radv_init_sampler(struct radv_device *device,
3186 struct radv_sampler *sampler,
3187 const VkSamplerCreateInfo *pCreateInfo)
3188 {
3189 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3190 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3191 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3192 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3193
3194 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3195 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3196 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3197 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3198 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3199 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3200 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3201 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3202 S_008F30_DISABLE_CUBE_WRAP(0) |
3203 S_008F30_COMPAT_MODE(is_vi));
3204 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3205 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3206 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3207 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3208 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3209 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3210 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3211 S_008F38_MIP_POINT_PRECLAMP(0) |
3212 S_008F38_DISABLE_LSB_CEIL(1) |
3213 S_008F38_FILTER_PREC_FIX(1) |
3214 S_008F38_ANISO_OVERRIDE(is_vi));
3215 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3216 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3217 }
3218
3219 VkResult radv_CreateSampler(
3220 VkDevice _device,
3221 const VkSamplerCreateInfo* pCreateInfo,
3222 const VkAllocationCallbacks* pAllocator,
3223 VkSampler* pSampler)
3224 {
3225 RADV_FROM_HANDLE(radv_device, device, _device);
3226 struct radv_sampler *sampler;
3227
3228 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3229
3230 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3231 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3232 if (!sampler)
3233 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3234
3235 radv_init_sampler(device, sampler, pCreateInfo);
3236 *pSampler = radv_sampler_to_handle(sampler);
3237
3238 return VK_SUCCESS;
3239 }
3240
3241 void radv_DestroySampler(
3242 VkDevice _device,
3243 VkSampler _sampler,
3244 const VkAllocationCallbacks* pAllocator)
3245 {
3246 RADV_FROM_HANDLE(radv_device, device, _device);
3247 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3248
3249 if (!sampler)
3250 return;
3251 vk_free2(&device->alloc, pAllocator, sampler);
3252 }
3253
3254 /* vk_icd.h does not declare this function, so we declare it here to
3255 * suppress Wmissing-prototypes.
3256 */
3257 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3258 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3259
3260 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3261 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3262 {
3263 /* For the full details on loader interface versioning, see
3264 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3265 * What follows is a condensed summary, to help you navigate the large and
3266 * confusing official doc.
3267 *
3268 * - Loader interface v0 is incompatible with later versions. We don't
3269 * support it.
3270 *
3271 * - In loader interface v1:
3272 * - The first ICD entrypoint called by the loader is
3273 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3274 * entrypoint.
3275 * - The ICD must statically expose no other Vulkan symbol unless it is
3276 * linked with -Bsymbolic.
3277 * - Each dispatchable Vulkan handle created by the ICD must be
3278 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3279 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3280 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3281 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3282 * such loader-managed surfaces.
3283 *
3284 * - Loader interface v2 differs from v1 in:
3285 * - The first ICD entrypoint called by the loader is
3286 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3287 * statically expose this entrypoint.
3288 *
3289 * - Loader interface v3 differs from v2 in:
3290 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3291 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3292 * because the loader no longer does so.
3293 */
3294 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3295 return VK_SUCCESS;
3296 }
3297
3298 VkResult radv_GetMemoryFdKHX(VkDevice _device,
3299 VkDeviceMemory _memory,
3300 VkExternalMemoryHandleTypeFlagsKHX handleType,
3301 int *pFD)
3302 {
3303 RADV_FROM_HANDLE(radv_device, device, _device);
3304 RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
3305
3306 /* We support only one handle type. */
3307 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
3308
3309 bool ret = radv_get_memory_fd(device, memory, pFD);
3310 if (ret == false)
3311 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3312 return VK_SUCCESS;
3313 }
3314
3315 VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
3316 VkExternalMemoryHandleTypeFlagBitsKHX handleType,
3317 int fd,
3318 VkMemoryFdPropertiesKHX *pMemoryFdProperties)
3319 {
3320 /* The valid usage section for this function says:
3321 *
3322 * "handleType must not be one of the handle types defined as opaque."
3323 *
3324 * Since we only handle opaque handles for now, there are no FD properties.
3325 */
3326 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
3327 }