util/vulkan: Move Vulkan utilities to src/vulkan/util
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51 uint32_t mesa_timestamp, llvm_timestamp;
52 uint16_t f = family;
53 memset(uuid, 0, VK_UUID_SIZE);
54 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56 return -1;
57
58 memcpy(uuid, &mesa_timestamp, 4);
59 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60 memcpy((char*)uuid + 8, &f, 2);
61 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62 return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67 memset(uuid, 0, VK_UUID_SIZE);
68 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75 {
76 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77 .specVersion = 25,
78 },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80 {
81 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82 .specVersion = 6,
83 },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86 {
87 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92 {
93 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94 .specVersion = 5,
95 },
96 #endif
97 {
98 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99 .specVersion = 1,
100 },
101 {
102 .extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
103 .specVersion = 1,
104 },
105 };
106
107 static const VkExtensionProperties common_device_extensions[] = {
108 {
109 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
110 .specVersion = 1,
111 },
112 {
113 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
114 .specVersion = 1,
115 },
116 {
117 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
118 .specVersion = 1,
119 },
120 {
121 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
122 .specVersion = 1,
123 },
124 {
125 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
126 .specVersion = 1,
127 },
128 {
129 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
130 .specVersion = 68,
131 },
132 {
133 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
134 .specVersion = 1,
135 },
136 {
137 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
138 .specVersion = 1,
139 },
140 {
141 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
142 .specVersion = 1,
143 },
144 {
145 .extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
146 .specVersion = 1,
147 },
148 {
149 .extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
150 .specVersion = 1,
151 },
152 };
153
154 static VkResult
155 radv_extensions_register(struct radv_instance *instance,
156 struct radv_extensions *extensions,
157 const VkExtensionProperties *new_ext,
158 uint32_t num_ext)
159 {
160 size_t new_size;
161 VkExtensionProperties *new_ptr;
162
163 assert(new_ext && num_ext > 0);
164
165 if (!new_ext)
166 return VK_ERROR_INITIALIZATION_FAILED;
167
168 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
169 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
170 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
171
172 /* Old array continues to be valid, update nothing */
173 if (!new_ptr)
174 return VK_ERROR_OUT_OF_HOST_MEMORY;
175
176 memcpy(&new_ptr[extensions->num_ext], new_ext,
177 num_ext * sizeof(VkExtensionProperties));
178 extensions->ext_array = new_ptr;
179 extensions->num_ext += num_ext;
180
181 return VK_SUCCESS;
182 }
183
184 static void
185 radv_extensions_finish(struct radv_instance *instance,
186 struct radv_extensions *extensions)
187 {
188 assert(extensions);
189
190 if (!extensions)
191 radv_loge("Attemted to free invalid extension struct\n");
192
193 if (extensions->ext_array)
194 vk_free(&instance->alloc, extensions->ext_array);
195 }
196
197 static bool
198 is_extension_enabled(const VkExtensionProperties *extensions,
199 size_t num_ext,
200 const char *name)
201 {
202 assert(extensions && name);
203
204 for (uint32_t i = 0; i < num_ext; i++) {
205 if (strcmp(name, extensions[i].extensionName) == 0)
206 return true;
207 }
208
209 return false;
210 }
211
212 static const char *
213 get_chip_name(enum radeon_family family)
214 {
215 switch (family) {
216 case CHIP_TAHITI: return "AMD RADV TAHITI";
217 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
218 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
219 case CHIP_OLAND: return "AMD RADV OLAND";
220 case CHIP_HAINAN: return "AMD RADV HAINAN";
221 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
222 case CHIP_KAVERI: return "AMD RADV KAVERI";
223 case CHIP_KABINI: return "AMD RADV KABINI";
224 case CHIP_HAWAII: return "AMD RADV HAWAII";
225 case CHIP_MULLINS: return "AMD RADV MULLINS";
226 case CHIP_TONGA: return "AMD RADV TONGA";
227 case CHIP_ICELAND: return "AMD RADV ICELAND";
228 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
229 case CHIP_FIJI: return "AMD RADV FIJI";
230 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
231 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
232 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
233 case CHIP_STONEY: return "AMD RADV STONEY";
234 case CHIP_VEGA10: return "AMD RADV VEGA";
235 case CHIP_RAVEN: return "AMD RADV RAVEN";
236 default: return "AMD RADV unknown";
237 }
238 }
239
240 static VkResult
241 radv_physical_device_init(struct radv_physical_device *device,
242 struct radv_instance *instance,
243 drmDevicePtr drm_device)
244 {
245 const char *path = drm_device->nodes[DRM_NODE_RENDER];
246 VkResult result;
247 drmVersionPtr version;
248 int fd;
249
250 fd = open(path, O_RDWR | O_CLOEXEC);
251 if (fd < 0)
252 return VK_ERROR_INCOMPATIBLE_DRIVER;
253
254 version = drmGetVersion(fd);
255 if (!version) {
256 close(fd);
257 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
258 "failed to get version %s: %m", path);
259 }
260
261 if (strcmp(version->name, "amdgpu")) {
262 drmFreeVersion(version);
263 close(fd);
264 return VK_ERROR_INCOMPATIBLE_DRIVER;
265 }
266 drmFreeVersion(version);
267
268 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
269 device->instance = instance;
270 assert(strlen(path) < ARRAY_SIZE(device->path));
271 strncpy(device->path, path, ARRAY_SIZE(device->path));
272
273 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
274 if (!device->ws) {
275 result = VK_ERROR_INCOMPATIBLE_DRIVER;
276 goto fail;
277 }
278
279 device->local_fd = fd;
280 device->ws->query_info(device->ws, &device->rad_info);
281 result = radv_init_wsi(device);
282 if (result != VK_SUCCESS) {
283 device->ws->destroy(device->ws);
284 goto fail;
285 }
286
287 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
288 radv_finish_wsi(device);
289 device->ws->destroy(device->ws);
290 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
291 "cannot generate UUID");
292 goto fail;
293 }
294
295 result = radv_extensions_register(instance,
296 &device->extensions,
297 common_device_extensions,
298 ARRAY_SIZE(common_device_extensions));
299 if (result != VK_SUCCESS)
300 goto fail;
301
302 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
303 device->name = get_chip_name(device->rad_info.family);
304
305 radv_get_device_uuid(drm_device, device->device_uuid);
306
307 if (device->rad_info.family == CHIP_STONEY ||
308 device->rad_info.chip_class >= GFX9) {
309 device->has_rbplus = true;
310 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
311 }
312
313 return VK_SUCCESS;
314
315 fail:
316 close(fd);
317 return result;
318 }
319
320 static void
321 radv_physical_device_finish(struct radv_physical_device *device)
322 {
323 radv_extensions_finish(device->instance, &device->extensions);
324 radv_finish_wsi(device);
325 device->ws->destroy(device->ws);
326 close(device->local_fd);
327 }
328
329 static void *
330 default_alloc_func(void *pUserData, size_t size, size_t align,
331 VkSystemAllocationScope allocationScope)
332 {
333 return malloc(size);
334 }
335
336 static void *
337 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
338 size_t align, VkSystemAllocationScope allocationScope)
339 {
340 return realloc(pOriginal, size);
341 }
342
343 static void
344 default_free_func(void *pUserData, void *pMemory)
345 {
346 free(pMemory);
347 }
348
349 static const VkAllocationCallbacks default_alloc = {
350 .pUserData = NULL,
351 .pfnAllocation = default_alloc_func,
352 .pfnReallocation = default_realloc_func,
353 .pfnFree = default_free_func,
354 };
355
356 static const struct debug_control radv_debug_options[] = {
357 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
358 {"nodcc", RADV_DEBUG_NO_DCC},
359 {"shaders", RADV_DEBUG_DUMP_SHADERS},
360 {"nocache", RADV_DEBUG_NO_CACHE},
361 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
362 {"nohiz", RADV_DEBUG_NO_HIZ},
363 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
364 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
365 {"allbos", RADV_DEBUG_ALL_BOS},
366 {"noibs", RADV_DEBUG_NO_IBS},
367 {NULL, 0}
368 };
369
370 VkResult radv_CreateInstance(
371 const VkInstanceCreateInfo* pCreateInfo,
372 const VkAllocationCallbacks* pAllocator,
373 VkInstance* pInstance)
374 {
375 struct radv_instance *instance;
376
377 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
378
379 uint32_t client_version;
380 if (pCreateInfo->pApplicationInfo &&
381 pCreateInfo->pApplicationInfo->apiVersion != 0) {
382 client_version = pCreateInfo->pApplicationInfo->apiVersion;
383 } else {
384 client_version = VK_MAKE_VERSION(1, 0, 0);
385 }
386
387 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
388 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
389 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
390 "Client requested version %d.%d.%d",
391 VK_VERSION_MAJOR(client_version),
392 VK_VERSION_MINOR(client_version),
393 VK_VERSION_PATCH(client_version));
394 }
395
396 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
397 if (!is_extension_enabled(instance_extensions,
398 ARRAY_SIZE(instance_extensions),
399 pCreateInfo->ppEnabledExtensionNames[i]))
400 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
401 }
402
403 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
404 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
405 if (!instance)
406 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
407
408 memset(instance, 0, sizeof(*instance));
409
410 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
411
412 if (pAllocator)
413 instance->alloc = *pAllocator;
414 else
415 instance->alloc = default_alloc;
416
417 instance->apiVersion = client_version;
418 instance->physicalDeviceCount = -1;
419
420 _mesa_locale_init();
421
422 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
423
424 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
425 radv_debug_options);
426
427 *pInstance = radv_instance_to_handle(instance);
428
429 return VK_SUCCESS;
430 }
431
432 void radv_DestroyInstance(
433 VkInstance _instance,
434 const VkAllocationCallbacks* pAllocator)
435 {
436 RADV_FROM_HANDLE(radv_instance, instance, _instance);
437
438 if (!instance)
439 return;
440
441 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
442 radv_physical_device_finish(instance->physicalDevices + i);
443 }
444
445 VG(VALGRIND_DESTROY_MEMPOOL(instance));
446
447 _mesa_locale_fini();
448
449 vk_free(&instance->alloc, instance);
450 }
451
452 static VkResult
453 radv_enumerate_devices(struct radv_instance *instance)
454 {
455 /* TODO: Check for more devices ? */
456 drmDevicePtr devices[8];
457 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
458 int max_devices;
459
460 instance->physicalDeviceCount = 0;
461
462 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
463 if (max_devices < 1)
464 return VK_ERROR_INCOMPATIBLE_DRIVER;
465
466 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
467 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
468 devices[i]->bustype == DRM_BUS_PCI &&
469 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
470
471 result = radv_physical_device_init(instance->physicalDevices +
472 instance->physicalDeviceCount,
473 instance,
474 devices[i]);
475 if (result == VK_SUCCESS)
476 ++instance->physicalDeviceCount;
477 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
478 break;
479 }
480 }
481 drmFreeDevices(devices, max_devices);
482
483 return result;
484 }
485
486 VkResult radv_EnumeratePhysicalDevices(
487 VkInstance _instance,
488 uint32_t* pPhysicalDeviceCount,
489 VkPhysicalDevice* pPhysicalDevices)
490 {
491 RADV_FROM_HANDLE(radv_instance, instance, _instance);
492 VkResult result;
493
494 if (instance->physicalDeviceCount < 0) {
495 result = radv_enumerate_devices(instance);
496 if (result != VK_SUCCESS &&
497 result != VK_ERROR_INCOMPATIBLE_DRIVER)
498 return result;
499 }
500
501 if (!pPhysicalDevices) {
502 *pPhysicalDeviceCount = instance->physicalDeviceCount;
503 } else {
504 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
505 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
506 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
507 }
508
509 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
510 : VK_SUCCESS;
511 }
512
513 void radv_GetPhysicalDeviceFeatures(
514 VkPhysicalDevice physicalDevice,
515 VkPhysicalDeviceFeatures* pFeatures)
516 {
517 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
518 bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
519 memset(pFeatures, 0, sizeof(*pFeatures));
520
521 *pFeatures = (VkPhysicalDeviceFeatures) {
522 .robustBufferAccess = true,
523 .fullDrawIndexUint32 = true,
524 .imageCubeArray = true,
525 .independentBlend = true,
526 .geometryShader = !is_gfx9,
527 .tessellationShader = !is_gfx9,
528 .sampleRateShading = false,
529 .dualSrcBlend = true,
530 .logicOp = true,
531 .multiDrawIndirect = true,
532 .drawIndirectFirstInstance = true,
533 .depthClamp = true,
534 .depthBiasClamp = true,
535 .fillModeNonSolid = true,
536 .depthBounds = true,
537 .wideLines = true,
538 .largePoints = true,
539 .alphaToOne = true,
540 .multiViewport = true,
541 .samplerAnisotropy = true,
542 .textureCompressionETC2 = false,
543 .textureCompressionASTC_LDR = false,
544 .textureCompressionBC = true,
545 .occlusionQueryPrecise = true,
546 .pipelineStatisticsQuery = true,
547 .vertexPipelineStoresAndAtomics = true,
548 .fragmentStoresAndAtomics = true,
549 .shaderTessellationAndGeometryPointSize = true,
550 .shaderImageGatherExtended = true,
551 .shaderStorageImageExtendedFormats = true,
552 .shaderStorageImageMultisample = false,
553 .shaderUniformBufferArrayDynamicIndexing = true,
554 .shaderSampledImageArrayDynamicIndexing = true,
555 .shaderStorageBufferArrayDynamicIndexing = true,
556 .shaderStorageImageArrayDynamicIndexing = true,
557 .shaderStorageImageReadWithoutFormat = true,
558 .shaderStorageImageWriteWithoutFormat = true,
559 .shaderClipDistance = true,
560 .shaderCullDistance = true,
561 .shaderFloat64 = true,
562 .shaderInt64 = false,
563 .shaderInt16 = false,
564 .sparseBinding = true,
565 .variableMultisampleRate = true,
566 .inheritedQueries = true,
567 };
568 }
569
570 void radv_GetPhysicalDeviceFeatures2KHR(
571 VkPhysicalDevice physicalDevice,
572 VkPhysicalDeviceFeatures2KHR *pFeatures)
573 {
574 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
575 }
576
577 static uint32_t radv_get_driver_version()
578 {
579 const char *minor_string = strchr(VERSION, '.');
580 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
581 int major = atoi(VERSION);
582 int minor = minor_string ? atoi(minor_string + 1) : 0;
583 int patch = patch_string ? atoi(patch_string + 1) : 0;
584 if (strstr(VERSION, "devel")) {
585 if (patch == 0) {
586 patch = 99;
587 if (minor == 0) {
588 minor = 99;
589 --major;
590 } else
591 --minor;
592 } else
593 --patch;
594 }
595 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
596 return version;
597 }
598
599 void radv_GetPhysicalDeviceProperties(
600 VkPhysicalDevice physicalDevice,
601 VkPhysicalDeviceProperties* pProperties)
602 {
603 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
604 VkSampleCountFlags sample_counts = 0xf;
605
606 /* make sure that the entire descriptor set is addressable with a signed
607 * 32-bit int. So the sum of all limits scaled by descriptor size has to
608 * be at most 2 GiB. the combined image & samples object count as one of
609 * both. This limit is for the pipeline layout, not for the set layout, but
610 * there is no set limit, so we just set a pipeline limit. I don't think
611 * any app is going to hit this soon. */
612 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
613 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
614 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
615 32 /* sampler, largest when combined with image */ +
616 64 /* sampled image */ +
617 64 /* storage image */);
618
619 VkPhysicalDeviceLimits limits = {
620 .maxImageDimension1D = (1 << 14),
621 .maxImageDimension2D = (1 << 14),
622 .maxImageDimension3D = (1 << 11),
623 .maxImageDimensionCube = (1 << 14),
624 .maxImageArrayLayers = (1 << 11),
625 .maxTexelBufferElements = 128 * 1024 * 1024,
626 .maxUniformBufferRange = UINT32_MAX,
627 .maxStorageBufferRange = UINT32_MAX,
628 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
629 .maxMemoryAllocationCount = UINT32_MAX,
630 .maxSamplerAllocationCount = 64 * 1024,
631 .bufferImageGranularity = 64, /* A cache line */
632 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
633 .maxBoundDescriptorSets = MAX_SETS,
634 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
635 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
636 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
637 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
638 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
639 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
640 .maxPerStageResources = max_descriptor_set_size,
641 .maxDescriptorSetSamplers = max_descriptor_set_size,
642 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
643 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
644 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
645 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
646 .maxDescriptorSetSampledImages = max_descriptor_set_size,
647 .maxDescriptorSetStorageImages = max_descriptor_set_size,
648 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
649 .maxVertexInputAttributes = 32,
650 .maxVertexInputBindings = 32,
651 .maxVertexInputAttributeOffset = 2047,
652 .maxVertexInputBindingStride = 2048,
653 .maxVertexOutputComponents = 128,
654 .maxTessellationGenerationLevel = 64,
655 .maxTessellationPatchSize = 32,
656 .maxTessellationControlPerVertexInputComponents = 128,
657 .maxTessellationControlPerVertexOutputComponents = 128,
658 .maxTessellationControlPerPatchOutputComponents = 120,
659 .maxTessellationControlTotalOutputComponents = 4096,
660 .maxTessellationEvaluationInputComponents = 128,
661 .maxTessellationEvaluationOutputComponents = 128,
662 .maxGeometryShaderInvocations = 127,
663 .maxGeometryInputComponents = 64,
664 .maxGeometryOutputComponents = 128,
665 .maxGeometryOutputVertices = 256,
666 .maxGeometryTotalOutputComponents = 1024,
667 .maxFragmentInputComponents = 128,
668 .maxFragmentOutputAttachments = 8,
669 .maxFragmentDualSrcAttachments = 1,
670 .maxFragmentCombinedOutputResources = 8,
671 .maxComputeSharedMemorySize = 32768,
672 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
673 .maxComputeWorkGroupInvocations = 2048,
674 .maxComputeWorkGroupSize = {
675 2048,
676 2048,
677 2048
678 },
679 .subPixelPrecisionBits = 4 /* FIXME */,
680 .subTexelPrecisionBits = 4 /* FIXME */,
681 .mipmapPrecisionBits = 4 /* FIXME */,
682 .maxDrawIndexedIndexValue = UINT32_MAX,
683 .maxDrawIndirectCount = UINT32_MAX,
684 .maxSamplerLodBias = 16,
685 .maxSamplerAnisotropy = 16,
686 .maxViewports = MAX_VIEWPORTS,
687 .maxViewportDimensions = { (1 << 14), (1 << 14) },
688 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
689 .viewportSubPixelBits = 13, /* We take a float? */
690 .minMemoryMapAlignment = 4096, /* A page */
691 .minTexelBufferOffsetAlignment = 1,
692 .minUniformBufferOffsetAlignment = 4,
693 .minStorageBufferOffsetAlignment = 4,
694 .minTexelOffset = -32,
695 .maxTexelOffset = 31,
696 .minTexelGatherOffset = -32,
697 .maxTexelGatherOffset = 31,
698 .minInterpolationOffset = -2,
699 .maxInterpolationOffset = 2,
700 .subPixelInterpolationOffsetBits = 8,
701 .maxFramebufferWidth = (1 << 14),
702 .maxFramebufferHeight = (1 << 14),
703 .maxFramebufferLayers = (1 << 10),
704 .framebufferColorSampleCounts = sample_counts,
705 .framebufferDepthSampleCounts = sample_counts,
706 .framebufferStencilSampleCounts = sample_counts,
707 .framebufferNoAttachmentsSampleCounts = sample_counts,
708 .maxColorAttachments = MAX_RTS,
709 .sampledImageColorSampleCounts = sample_counts,
710 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
711 .sampledImageDepthSampleCounts = sample_counts,
712 .sampledImageStencilSampleCounts = sample_counts,
713 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
714 .maxSampleMaskWords = 1,
715 .timestampComputeAndGraphics = true,
716 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
717 .maxClipDistances = 8,
718 .maxCullDistances = 8,
719 .maxCombinedClipAndCullDistances = 8,
720 .discreteQueuePriorities = 1,
721 .pointSizeRange = { 0.125, 255.875 },
722 .lineWidthRange = { 0.0, 7.9921875 },
723 .pointSizeGranularity = (1.0 / 8.0),
724 .lineWidthGranularity = (1.0 / 128.0),
725 .strictLines = false, /* FINISHME */
726 .standardSampleLocations = true,
727 .optimalBufferCopyOffsetAlignment = 128,
728 .optimalBufferCopyRowPitchAlignment = 128,
729 .nonCoherentAtomSize = 64,
730 };
731
732 *pProperties = (VkPhysicalDeviceProperties) {
733 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
734 .driverVersion = radv_get_driver_version(),
735 .vendorID = 0x1002,
736 .deviceID = pdevice->rad_info.pci_id,
737 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
738 .limits = limits,
739 .sparseProperties = {0},
740 };
741
742 strcpy(pProperties->deviceName, pdevice->name);
743 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
744 }
745
746 void radv_GetPhysicalDeviceProperties2KHR(
747 VkPhysicalDevice physicalDevice,
748 VkPhysicalDeviceProperties2KHR *pProperties)
749 {
750 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
751 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
752
753 vk_foreach_struct(ext, pProperties->pNext) {
754 switch (ext->sType) {
755 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
756 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
757 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
758 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
759 break;
760 }
761 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
762 VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
763 radv_device_get_cache_uuid(0, properties->driverUUID);
764 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
765 properties->deviceLUIDValid = false;
766 break;
767 }
768 default:
769 break;
770 }
771 }
772 }
773
774 static void radv_get_physical_device_queue_family_properties(
775 struct radv_physical_device* pdevice,
776 uint32_t* pCount,
777 VkQueueFamilyProperties** pQueueFamilyProperties)
778 {
779 int num_queue_families = 1;
780 int idx;
781 if (pdevice->rad_info.num_compute_rings > 0 &&
782 pdevice->rad_info.chip_class >= CIK &&
783 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
784 num_queue_families++;
785
786 if (pQueueFamilyProperties == NULL) {
787 *pCount = num_queue_families;
788 return;
789 }
790
791 if (!*pCount)
792 return;
793
794 idx = 0;
795 if (*pCount >= 1) {
796 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
797 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
798 VK_QUEUE_COMPUTE_BIT |
799 VK_QUEUE_TRANSFER_BIT |
800 VK_QUEUE_SPARSE_BINDING_BIT,
801 .queueCount = 1,
802 .timestampValidBits = 64,
803 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
804 };
805 idx++;
806 }
807
808 if (pdevice->rad_info.num_compute_rings > 0 &&
809 pdevice->rad_info.chip_class >= CIK &&
810 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
811 if (*pCount > idx) {
812 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
813 .queueFlags = VK_QUEUE_COMPUTE_BIT |
814 VK_QUEUE_TRANSFER_BIT |
815 VK_QUEUE_SPARSE_BINDING_BIT,
816 .queueCount = pdevice->rad_info.num_compute_rings,
817 .timestampValidBits = 64,
818 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
819 };
820 idx++;
821 }
822 }
823 *pCount = idx;
824 }
825
826 void radv_GetPhysicalDeviceQueueFamilyProperties(
827 VkPhysicalDevice physicalDevice,
828 uint32_t* pCount,
829 VkQueueFamilyProperties* pQueueFamilyProperties)
830 {
831 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
832 if (!pQueueFamilyProperties) {
833 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
834 return;
835 }
836 VkQueueFamilyProperties *properties[] = {
837 pQueueFamilyProperties + 0,
838 pQueueFamilyProperties + 1,
839 pQueueFamilyProperties + 2,
840 };
841 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
842 assert(*pCount <= 3);
843 }
844
845 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
846 VkPhysicalDevice physicalDevice,
847 uint32_t* pCount,
848 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
849 {
850 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
851 if (!pQueueFamilyProperties) {
852 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
853 return;
854 }
855 VkQueueFamilyProperties *properties[] = {
856 &pQueueFamilyProperties[0].queueFamilyProperties,
857 &pQueueFamilyProperties[1].queueFamilyProperties,
858 &pQueueFamilyProperties[2].queueFamilyProperties,
859 };
860 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
861 assert(*pCount <= 3);
862 }
863
864 void radv_GetPhysicalDeviceMemoryProperties(
865 VkPhysicalDevice physicalDevice,
866 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
867 {
868 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
869
870 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
871
872 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
873 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
874 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
875 .heapIndex = RADV_MEM_HEAP_VRAM,
876 };
877 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
878 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
879 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
880 .heapIndex = RADV_MEM_HEAP_GTT,
881 };
882 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
883 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
884 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
885 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
886 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
887 };
888 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
889 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
890 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
891 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
892 .heapIndex = RADV_MEM_HEAP_GTT,
893 };
894
895 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
896
897 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
898 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
899 .size = physical_device->rad_info.vram_size -
900 physical_device->rad_info.vram_vis_size,
901 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
902 };
903 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
904 .size = physical_device->rad_info.vram_vis_size,
905 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
906 };
907 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
908 .size = physical_device->rad_info.gart_size,
909 .flags = 0,
910 };
911 }
912
913 void radv_GetPhysicalDeviceMemoryProperties2KHR(
914 VkPhysicalDevice physicalDevice,
915 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
916 {
917 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
918 &pMemoryProperties->memoryProperties);
919 }
920
921 static int
922 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
923 int queue_family_index, int idx)
924 {
925 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
926 queue->device = device;
927 queue->queue_family_index = queue_family_index;
928 queue->queue_idx = idx;
929
930 queue->hw_ctx = device->ws->ctx_create(device->ws);
931 if (!queue->hw_ctx)
932 return VK_ERROR_OUT_OF_HOST_MEMORY;
933
934 return VK_SUCCESS;
935 }
936
937 static void
938 radv_queue_finish(struct radv_queue *queue)
939 {
940 if (queue->hw_ctx)
941 queue->device->ws->ctx_destroy(queue->hw_ctx);
942
943 if (queue->initial_preamble_cs)
944 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
945 if (queue->continue_preamble_cs)
946 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
947 if (queue->descriptor_bo)
948 queue->device->ws->buffer_destroy(queue->descriptor_bo);
949 if (queue->scratch_bo)
950 queue->device->ws->buffer_destroy(queue->scratch_bo);
951 if (queue->esgs_ring_bo)
952 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
953 if (queue->gsvs_ring_bo)
954 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
955 if (queue->tess_factor_ring_bo)
956 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
957 if (queue->tess_offchip_ring_bo)
958 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
959 if (queue->compute_scratch_bo)
960 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
961 }
962
963 static void
964 radv_device_init_gs_info(struct radv_device *device)
965 {
966 switch (device->physical_device->rad_info.family) {
967 case CHIP_OLAND:
968 case CHIP_HAINAN:
969 case CHIP_KAVERI:
970 case CHIP_KABINI:
971 case CHIP_MULLINS:
972 case CHIP_ICELAND:
973 case CHIP_CARRIZO:
974 case CHIP_STONEY:
975 device->gs_table_depth = 16;
976 return;
977 case CHIP_TAHITI:
978 case CHIP_PITCAIRN:
979 case CHIP_VERDE:
980 case CHIP_BONAIRE:
981 case CHIP_HAWAII:
982 case CHIP_TONGA:
983 case CHIP_FIJI:
984 case CHIP_POLARIS10:
985 case CHIP_POLARIS11:
986 case CHIP_POLARIS12:
987 case CHIP_VEGA10:
988 case CHIP_RAVEN:
989 device->gs_table_depth = 32;
990 return;
991 default:
992 unreachable("unknown GPU");
993 }
994 }
995
996 VkResult radv_CreateDevice(
997 VkPhysicalDevice physicalDevice,
998 const VkDeviceCreateInfo* pCreateInfo,
999 const VkAllocationCallbacks* pAllocator,
1000 VkDevice* pDevice)
1001 {
1002 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1003 VkResult result;
1004 struct radv_device *device;
1005
1006 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1007 if (!is_extension_enabled(physical_device->extensions.ext_array,
1008 physical_device->extensions.num_ext,
1009 pCreateInfo->ppEnabledExtensionNames[i]))
1010 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1011 }
1012
1013 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1014 sizeof(*device), 8,
1015 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1016 if (!device)
1017 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1018
1019 memset(device, 0, sizeof(*device));
1020
1021 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1022 device->instance = physical_device->instance;
1023 device->physical_device = physical_device;
1024
1025 device->debug_flags = device->instance->debug_flags;
1026
1027 device->ws = physical_device->ws;
1028 if (pAllocator)
1029 device->alloc = *pAllocator;
1030 else
1031 device->alloc = physical_device->instance->alloc;
1032
1033 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1034 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1035 uint32_t qfi = queue_create->queueFamilyIndex;
1036
1037 device->queues[qfi] = vk_alloc(&device->alloc,
1038 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1039 if (!device->queues[qfi]) {
1040 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1041 goto fail;
1042 }
1043
1044 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1045
1046 device->queue_count[qfi] = queue_create->queueCount;
1047
1048 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1049 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1050 if (result != VK_SUCCESS)
1051 goto fail;
1052 }
1053 }
1054
1055 #if HAVE_LLVM < 0x0400
1056 device->llvm_supports_spill = false;
1057 #else
1058 device->llvm_supports_spill = true;
1059 #endif
1060
1061 /* The maximum number of scratch waves. Scratch space isn't divided
1062 * evenly between CUs. The number is only a function of the number of CUs.
1063 * We can decrease the constant to decrease the scratch buffer size.
1064 *
1065 * sctx->scratch_waves must be >= the maximum posible size of
1066 * 1 threadgroup, so that the hw doesn't hang from being unable
1067 * to start any.
1068 *
1069 * The recommended value is 4 per CU at most. Higher numbers don't
1070 * bring much benefit, but they still occupy chip resources (think
1071 * async compute). I've seen ~2% performance difference between 4 and 32.
1072 */
1073 uint32_t max_threads_per_block = 2048;
1074 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1075 max_threads_per_block / 64);
1076
1077 radv_device_init_gs_info(device);
1078
1079 device->tess_offchip_block_dw_size =
1080 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1081 device->has_distributed_tess =
1082 device->physical_device->rad_info.chip_class >= VI &&
1083 device->physical_device->rad_info.max_se >= 2;
1084
1085 result = radv_device_init_meta(device);
1086 if (result != VK_SUCCESS)
1087 goto fail;
1088
1089 radv_device_init_msaa(device);
1090
1091 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1092 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1093 switch (family) {
1094 case RADV_QUEUE_GENERAL:
1095 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1096 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1097 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1098 break;
1099 case RADV_QUEUE_COMPUTE:
1100 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1101 radeon_emit(device->empty_cs[family], 0);
1102 break;
1103 }
1104 device->ws->cs_finalize(device->empty_cs[family]);
1105
1106 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1107 switch (family) {
1108 case RADV_QUEUE_GENERAL:
1109 case RADV_QUEUE_COMPUTE:
1110 si_cs_emit_cache_flush(device->flush_cs[family],
1111 device->physical_device->rad_info.chip_class,
1112 NULL, 0,
1113 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1114 RADV_CMD_FLAG_INV_ICACHE |
1115 RADV_CMD_FLAG_INV_SMEM_L1 |
1116 RADV_CMD_FLAG_INV_VMEM_L1 |
1117 RADV_CMD_FLAG_INV_GLOBAL_L2);
1118 break;
1119 }
1120 device->ws->cs_finalize(device->flush_cs[family]);
1121
1122 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1123 switch (family) {
1124 case RADV_QUEUE_GENERAL:
1125 case RADV_QUEUE_COMPUTE:
1126 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1127 device->physical_device->rad_info.chip_class,
1128 NULL, 0,
1129 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1130 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1131 RADV_CMD_FLAG_INV_ICACHE |
1132 RADV_CMD_FLAG_INV_SMEM_L1 |
1133 RADV_CMD_FLAG_INV_VMEM_L1 |
1134 RADV_CMD_FLAG_INV_GLOBAL_L2);
1135 break;
1136 }
1137 device->ws->cs_finalize(device->flush_shader_cs[family]);
1138 }
1139
1140 if (getenv("RADV_TRACE_FILE")) {
1141 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1142 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1143 if (!device->trace_bo)
1144 goto fail;
1145
1146 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1147 if (!device->trace_id_ptr)
1148 goto fail;
1149 }
1150
1151 if (device->physical_device->rad_info.chip_class >= CIK)
1152 cik_create_gfx_config(device);
1153
1154 VkPipelineCacheCreateInfo ci;
1155 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1156 ci.pNext = NULL;
1157 ci.flags = 0;
1158 ci.pInitialData = NULL;
1159 ci.initialDataSize = 0;
1160 VkPipelineCache pc;
1161 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1162 &ci, NULL, &pc);
1163 if (result != VK_SUCCESS)
1164 goto fail;
1165
1166 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1167
1168 *pDevice = radv_device_to_handle(device);
1169 return VK_SUCCESS;
1170
1171 fail:
1172 if (device->trace_bo)
1173 device->ws->buffer_destroy(device->trace_bo);
1174
1175 if (device->gfx_init)
1176 device->ws->buffer_destroy(device->gfx_init);
1177
1178 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1179 for (unsigned q = 0; q < device->queue_count[i]; q++)
1180 radv_queue_finish(&device->queues[i][q]);
1181 if (device->queue_count[i])
1182 vk_free(&device->alloc, device->queues[i]);
1183 }
1184
1185 vk_free(&device->alloc, device);
1186 return result;
1187 }
1188
1189 void radv_DestroyDevice(
1190 VkDevice _device,
1191 const VkAllocationCallbacks* pAllocator)
1192 {
1193 RADV_FROM_HANDLE(radv_device, device, _device);
1194
1195 if (!device)
1196 return;
1197
1198 if (device->trace_bo)
1199 device->ws->buffer_destroy(device->trace_bo);
1200
1201 if (device->gfx_init)
1202 device->ws->buffer_destroy(device->gfx_init);
1203
1204 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1205 for (unsigned q = 0; q < device->queue_count[i]; q++)
1206 radv_queue_finish(&device->queues[i][q]);
1207 if (device->queue_count[i])
1208 vk_free(&device->alloc, device->queues[i]);
1209 if (device->empty_cs[i])
1210 device->ws->cs_destroy(device->empty_cs[i]);
1211 if (device->flush_cs[i])
1212 device->ws->cs_destroy(device->flush_cs[i]);
1213 if (device->flush_shader_cs[i])
1214 device->ws->cs_destroy(device->flush_shader_cs[i]);
1215 }
1216 radv_device_finish_meta(device);
1217
1218 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1219 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1220
1221 vk_free(&device->alloc, device);
1222 }
1223
1224 VkResult radv_EnumerateInstanceExtensionProperties(
1225 const char* pLayerName,
1226 uint32_t* pPropertyCount,
1227 VkExtensionProperties* pProperties)
1228 {
1229 if (pProperties == NULL) {
1230 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1231 return VK_SUCCESS;
1232 }
1233
1234 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1235 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1236
1237 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1238 return VK_INCOMPLETE;
1239
1240 return VK_SUCCESS;
1241 }
1242
1243 VkResult radv_EnumerateDeviceExtensionProperties(
1244 VkPhysicalDevice physicalDevice,
1245 const char* pLayerName,
1246 uint32_t* pPropertyCount,
1247 VkExtensionProperties* pProperties)
1248 {
1249 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1250
1251 if (pProperties == NULL) {
1252 *pPropertyCount = pdevice->extensions.num_ext;
1253 return VK_SUCCESS;
1254 }
1255
1256 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1257 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1258
1259 if (*pPropertyCount < pdevice->extensions.num_ext)
1260 return VK_INCOMPLETE;
1261
1262 return VK_SUCCESS;
1263 }
1264
1265 VkResult radv_EnumerateInstanceLayerProperties(
1266 uint32_t* pPropertyCount,
1267 VkLayerProperties* pProperties)
1268 {
1269 if (pProperties == NULL) {
1270 *pPropertyCount = 0;
1271 return VK_SUCCESS;
1272 }
1273
1274 /* None supported at this time */
1275 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1276 }
1277
1278 VkResult radv_EnumerateDeviceLayerProperties(
1279 VkPhysicalDevice physicalDevice,
1280 uint32_t* pPropertyCount,
1281 VkLayerProperties* pProperties)
1282 {
1283 if (pProperties == NULL) {
1284 *pPropertyCount = 0;
1285 return VK_SUCCESS;
1286 }
1287
1288 /* None supported at this time */
1289 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1290 }
1291
1292 void radv_GetDeviceQueue(
1293 VkDevice _device,
1294 uint32_t queueFamilyIndex,
1295 uint32_t queueIndex,
1296 VkQueue* pQueue)
1297 {
1298 RADV_FROM_HANDLE(radv_device, device, _device);
1299
1300 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1301 }
1302
1303 static void radv_dump_trace(struct radv_device *device,
1304 struct radeon_winsys_cs *cs)
1305 {
1306 const char *filename = getenv("RADV_TRACE_FILE");
1307 FILE *f = fopen(filename, "w");
1308 if (!f) {
1309 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1310 return;
1311 }
1312
1313 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1314 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1315 fclose(f);
1316 }
1317
1318 static void
1319 fill_geom_tess_rings(struct radv_queue *queue,
1320 uint32_t *map,
1321 bool add_sample_positions,
1322 uint32_t esgs_ring_size,
1323 struct radeon_winsys_bo *esgs_ring_bo,
1324 uint32_t gsvs_ring_size,
1325 struct radeon_winsys_bo *gsvs_ring_bo,
1326 uint32_t tess_factor_ring_size,
1327 struct radeon_winsys_bo *tess_factor_ring_bo,
1328 uint32_t tess_offchip_ring_size,
1329 struct radeon_winsys_bo *tess_offchip_ring_bo)
1330 {
1331 uint64_t esgs_va = 0, gsvs_va = 0;
1332 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1333 uint32_t *desc = &map[4];
1334
1335 if (esgs_ring_bo)
1336 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1337 if (gsvs_ring_bo)
1338 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1339 if (tess_factor_ring_bo)
1340 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1341 if (tess_offchip_ring_bo)
1342 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1343
1344 /* stride 0, num records - size, add tid, swizzle, elsize4,
1345 index stride 64 */
1346 desc[0] = esgs_va;
1347 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1348 S_008F04_STRIDE(0) |
1349 S_008F04_SWIZZLE_ENABLE(true);
1350 desc[2] = esgs_ring_size;
1351 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1352 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1353 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1354 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1355 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1356 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1357 S_008F0C_ELEMENT_SIZE(1) |
1358 S_008F0C_INDEX_STRIDE(3) |
1359 S_008F0C_ADD_TID_ENABLE(true);
1360
1361 desc += 4;
1362 /* GS entry for ES->GS ring */
1363 /* stride 0, num records - size, elsize0,
1364 index stride 0 */
1365 desc[0] = esgs_va;
1366 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1367 S_008F04_STRIDE(0) |
1368 S_008F04_SWIZZLE_ENABLE(false);
1369 desc[2] = esgs_ring_size;
1370 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1371 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1372 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1373 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1374 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1375 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1376 S_008F0C_ELEMENT_SIZE(0) |
1377 S_008F0C_INDEX_STRIDE(0) |
1378 S_008F0C_ADD_TID_ENABLE(false);
1379
1380 desc += 4;
1381 /* VS entry for GS->VS ring */
1382 /* stride 0, num records - size, elsize0,
1383 index stride 0 */
1384 desc[0] = gsvs_va;
1385 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1386 S_008F04_STRIDE(0) |
1387 S_008F04_SWIZZLE_ENABLE(false);
1388 desc[2] = gsvs_ring_size;
1389 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1390 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1391 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1392 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1393 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1394 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1395 S_008F0C_ELEMENT_SIZE(0) |
1396 S_008F0C_INDEX_STRIDE(0) |
1397 S_008F0C_ADD_TID_ENABLE(false);
1398 desc += 4;
1399
1400 /* stride gsvs_itemsize, num records 64
1401 elsize 4, index stride 16 */
1402 /* shader will patch stride and desc[2] */
1403 desc[0] = gsvs_va;
1404 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1405 S_008F04_STRIDE(0) |
1406 S_008F04_SWIZZLE_ENABLE(true);
1407 desc[2] = 0;
1408 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1409 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1410 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1411 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1412 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1413 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1414 S_008F0C_ELEMENT_SIZE(1) |
1415 S_008F0C_INDEX_STRIDE(1) |
1416 S_008F0C_ADD_TID_ENABLE(true);
1417 desc += 4;
1418
1419 desc[0] = tess_factor_va;
1420 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1421 S_008F04_STRIDE(0) |
1422 S_008F04_SWIZZLE_ENABLE(false);
1423 desc[2] = tess_factor_ring_size;
1424 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1425 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1426 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1427 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1428 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1429 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1430 S_008F0C_ELEMENT_SIZE(0) |
1431 S_008F0C_INDEX_STRIDE(0) |
1432 S_008F0C_ADD_TID_ENABLE(false);
1433 desc += 4;
1434
1435 desc[0] = tess_offchip_va;
1436 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1437 S_008F04_STRIDE(0) |
1438 S_008F04_SWIZZLE_ENABLE(false);
1439 desc[2] = tess_offchip_ring_size;
1440 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1441 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1442 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1443 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1444 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1445 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1446 S_008F0C_ELEMENT_SIZE(0) |
1447 S_008F0C_INDEX_STRIDE(0) |
1448 S_008F0C_ADD_TID_ENABLE(false);
1449 desc += 4;
1450
1451 /* add sample positions after all rings */
1452 memcpy(desc, queue->device->sample_locations_1x, 8);
1453 desc += 2;
1454 memcpy(desc, queue->device->sample_locations_2x, 16);
1455 desc += 4;
1456 memcpy(desc, queue->device->sample_locations_4x, 32);
1457 desc += 8;
1458 memcpy(desc, queue->device->sample_locations_8x, 64);
1459 desc += 16;
1460 memcpy(desc, queue->device->sample_locations_16x, 128);
1461 }
1462
1463 static unsigned
1464 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1465 {
1466 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1467 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1468 device->physical_device->rad_info.family != CHIP_STONEY;
1469 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1470 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1471 device->physical_device->rad_info.max_se;
1472 unsigned offchip_granularity;
1473 unsigned hs_offchip_param;
1474 switch (device->tess_offchip_block_dw_size) {
1475 default:
1476 assert(0);
1477 /* fall through */
1478 case 8192:
1479 offchip_granularity = V_03093C_X_8K_DWORDS;
1480 break;
1481 case 4096:
1482 offchip_granularity = V_03093C_X_4K_DWORDS;
1483 break;
1484 }
1485
1486 switch (device->physical_device->rad_info.chip_class) {
1487 case SI:
1488 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1489 break;
1490 case CIK:
1491 case VI:
1492 case GFX9:
1493 default:
1494 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1495 break;
1496 }
1497
1498 *max_offchip_buffers_p = max_offchip_buffers;
1499 if (device->physical_device->rad_info.chip_class >= CIK) {
1500 if (device->physical_device->rad_info.chip_class >= VI)
1501 --max_offchip_buffers;
1502 hs_offchip_param =
1503 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1504 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1505 } else {
1506 hs_offchip_param =
1507 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1508 }
1509 return hs_offchip_param;
1510 }
1511
1512 static VkResult
1513 radv_get_preamble_cs(struct radv_queue *queue,
1514 uint32_t scratch_size,
1515 uint32_t compute_scratch_size,
1516 uint32_t esgs_ring_size,
1517 uint32_t gsvs_ring_size,
1518 bool needs_tess_rings,
1519 bool needs_sample_positions,
1520 struct radeon_winsys_cs **initial_preamble_cs,
1521 struct radeon_winsys_cs **continue_preamble_cs)
1522 {
1523 struct radeon_winsys_bo *scratch_bo = NULL;
1524 struct radeon_winsys_bo *descriptor_bo = NULL;
1525 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1526 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1527 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1528 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1529 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1530 struct radeon_winsys_cs *dest_cs[2] = {0};
1531 bool add_tess_rings = false, add_sample_positions = false;
1532 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1533 unsigned max_offchip_buffers;
1534 unsigned hs_offchip_param = 0;
1535 if (!queue->has_tess_rings) {
1536 if (needs_tess_rings)
1537 add_tess_rings = true;
1538 }
1539 if (!queue->has_sample_positions) {
1540 if (needs_sample_positions)
1541 add_sample_positions = true;
1542 }
1543 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1544 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1545 &max_offchip_buffers);
1546 tess_offchip_ring_size = max_offchip_buffers *
1547 queue->device->tess_offchip_block_dw_size * 4;
1548
1549 if (scratch_size <= queue->scratch_size &&
1550 compute_scratch_size <= queue->compute_scratch_size &&
1551 esgs_ring_size <= queue->esgs_ring_size &&
1552 gsvs_ring_size <= queue->gsvs_ring_size &&
1553 !add_tess_rings && !add_sample_positions &&
1554 queue->initial_preamble_cs) {
1555 *initial_preamble_cs = queue->initial_preamble_cs;
1556 *continue_preamble_cs = queue->continue_preamble_cs;
1557 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1558 *continue_preamble_cs = NULL;
1559 return VK_SUCCESS;
1560 }
1561
1562 if (scratch_size > queue->scratch_size) {
1563 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1564 scratch_size,
1565 4096,
1566 RADEON_DOMAIN_VRAM,
1567 RADEON_FLAG_NO_CPU_ACCESS);
1568 if (!scratch_bo)
1569 goto fail;
1570 } else
1571 scratch_bo = queue->scratch_bo;
1572
1573 if (compute_scratch_size > queue->compute_scratch_size) {
1574 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1575 compute_scratch_size,
1576 4096,
1577 RADEON_DOMAIN_VRAM,
1578 RADEON_FLAG_NO_CPU_ACCESS);
1579 if (!compute_scratch_bo)
1580 goto fail;
1581
1582 } else
1583 compute_scratch_bo = queue->compute_scratch_bo;
1584
1585 if (esgs_ring_size > queue->esgs_ring_size) {
1586 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1587 esgs_ring_size,
1588 4096,
1589 RADEON_DOMAIN_VRAM,
1590 RADEON_FLAG_NO_CPU_ACCESS);
1591 if (!esgs_ring_bo)
1592 goto fail;
1593 } else {
1594 esgs_ring_bo = queue->esgs_ring_bo;
1595 esgs_ring_size = queue->esgs_ring_size;
1596 }
1597
1598 if (gsvs_ring_size > queue->gsvs_ring_size) {
1599 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1600 gsvs_ring_size,
1601 4096,
1602 RADEON_DOMAIN_VRAM,
1603 RADEON_FLAG_NO_CPU_ACCESS);
1604 if (!gsvs_ring_bo)
1605 goto fail;
1606 } else {
1607 gsvs_ring_bo = queue->gsvs_ring_bo;
1608 gsvs_ring_size = queue->gsvs_ring_size;
1609 }
1610
1611 if (add_tess_rings) {
1612 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1613 tess_factor_ring_size,
1614 256,
1615 RADEON_DOMAIN_VRAM,
1616 RADEON_FLAG_NO_CPU_ACCESS);
1617 if (!tess_factor_ring_bo)
1618 goto fail;
1619 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1620 tess_offchip_ring_size,
1621 256,
1622 RADEON_DOMAIN_VRAM,
1623 RADEON_FLAG_NO_CPU_ACCESS);
1624 if (!tess_offchip_ring_bo)
1625 goto fail;
1626 } else {
1627 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1628 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1629 }
1630
1631 if (scratch_bo != queue->scratch_bo ||
1632 esgs_ring_bo != queue->esgs_ring_bo ||
1633 gsvs_ring_bo != queue->gsvs_ring_bo ||
1634 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1635 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1636 uint32_t size = 0;
1637 if (gsvs_ring_bo || esgs_ring_bo ||
1638 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1639 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1640 if (add_sample_positions)
1641 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1642 }
1643 else if (scratch_bo)
1644 size = 8; /* 2 dword */
1645
1646 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1647 size,
1648 4096,
1649 RADEON_DOMAIN_VRAM,
1650 RADEON_FLAG_CPU_ACCESS);
1651 if (!descriptor_bo)
1652 goto fail;
1653 } else
1654 descriptor_bo = queue->descriptor_bo;
1655
1656 for(int i = 0; i < 2; ++i) {
1657 struct radeon_winsys_cs *cs = NULL;
1658 cs = queue->device->ws->cs_create(queue->device->ws,
1659 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1660 if (!cs)
1661 goto fail;
1662
1663 dest_cs[i] = cs;
1664
1665 if (scratch_bo)
1666 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1667
1668 if (esgs_ring_bo)
1669 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1670
1671 if (gsvs_ring_bo)
1672 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1673
1674 if (tess_factor_ring_bo)
1675 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1676
1677 if (tess_offchip_ring_bo)
1678 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1679
1680 if (descriptor_bo)
1681 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1682
1683 if (descriptor_bo != queue->descriptor_bo) {
1684 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1685
1686 if (scratch_bo) {
1687 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1688 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1689 S_008F04_SWIZZLE_ENABLE(1);
1690 map[0] = scratch_va;
1691 map[1] = rsrc1;
1692 }
1693
1694 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1695 add_sample_positions)
1696 fill_geom_tess_rings(queue, map, add_sample_positions,
1697 esgs_ring_size, esgs_ring_bo,
1698 gsvs_ring_size, gsvs_ring_bo,
1699 tess_factor_ring_size, tess_factor_ring_bo,
1700 tess_offchip_ring_size, tess_offchip_ring_bo);
1701
1702 queue->device->ws->buffer_unmap(descriptor_bo);
1703 }
1704
1705 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1706 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1707 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1708 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1709 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1710 }
1711
1712 if (esgs_ring_bo || gsvs_ring_bo) {
1713 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1714 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1715 radeon_emit(cs, esgs_ring_size >> 8);
1716 radeon_emit(cs, gsvs_ring_size >> 8);
1717 } else {
1718 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1719 radeon_emit(cs, esgs_ring_size >> 8);
1720 radeon_emit(cs, gsvs_ring_size >> 8);
1721 }
1722 }
1723
1724 if (tess_factor_ring_bo) {
1725 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1726 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1727 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1728 S_030938_SIZE(tess_factor_ring_size / 4));
1729 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1730 tf_va >> 8);
1731 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1732 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1733 tf_va >> 40);
1734 }
1735 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1736 } else {
1737 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1738 S_008988_SIZE(tess_factor_ring_size / 4));
1739 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1740 tf_va >> 8);
1741 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1742 hs_offchip_param);
1743 }
1744 }
1745
1746 if (descriptor_bo) {
1747 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1748 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1749 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1750 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1751 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1752 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1753
1754 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1755
1756 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1757 radeon_set_sh_reg_seq(cs, regs[i], 2);
1758 radeon_emit(cs, va);
1759 radeon_emit(cs, va >> 32);
1760 }
1761 }
1762
1763 if (compute_scratch_bo) {
1764 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1765 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1766 S_008F04_SWIZZLE_ENABLE(1);
1767
1768 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1769
1770 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1771 radeon_emit(cs, scratch_va);
1772 radeon_emit(cs, rsrc1);
1773 }
1774
1775 if (!i) {
1776 si_cs_emit_cache_flush(cs,
1777 queue->device->physical_device->rad_info.chip_class,
1778 NULL, 0,
1779 queue->queue_family_index == RING_COMPUTE &&
1780 queue->device->physical_device->rad_info.chip_class >= CIK,
1781 RADV_CMD_FLAG_INV_ICACHE |
1782 RADV_CMD_FLAG_INV_SMEM_L1 |
1783 RADV_CMD_FLAG_INV_VMEM_L1 |
1784 RADV_CMD_FLAG_INV_GLOBAL_L2);
1785 }
1786
1787 if (!queue->device->ws->cs_finalize(cs))
1788 goto fail;
1789 }
1790
1791 if (queue->initial_preamble_cs)
1792 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1793
1794 if (queue->continue_preamble_cs)
1795 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1796
1797 queue->initial_preamble_cs = dest_cs[0];
1798 queue->continue_preamble_cs = dest_cs[1];
1799
1800 if (scratch_bo != queue->scratch_bo) {
1801 if (queue->scratch_bo)
1802 queue->device->ws->buffer_destroy(queue->scratch_bo);
1803 queue->scratch_bo = scratch_bo;
1804 queue->scratch_size = scratch_size;
1805 }
1806
1807 if (compute_scratch_bo != queue->compute_scratch_bo) {
1808 if (queue->compute_scratch_bo)
1809 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1810 queue->compute_scratch_bo = compute_scratch_bo;
1811 queue->compute_scratch_size = compute_scratch_size;
1812 }
1813
1814 if (esgs_ring_bo != queue->esgs_ring_bo) {
1815 if (queue->esgs_ring_bo)
1816 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1817 queue->esgs_ring_bo = esgs_ring_bo;
1818 queue->esgs_ring_size = esgs_ring_size;
1819 }
1820
1821 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1822 if (queue->gsvs_ring_bo)
1823 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1824 queue->gsvs_ring_bo = gsvs_ring_bo;
1825 queue->gsvs_ring_size = gsvs_ring_size;
1826 }
1827
1828 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1829 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1830 }
1831
1832 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1833 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1834 queue->has_tess_rings = true;
1835 }
1836
1837 if (descriptor_bo != queue->descriptor_bo) {
1838 if (queue->descriptor_bo)
1839 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1840
1841 queue->descriptor_bo = descriptor_bo;
1842 }
1843
1844 if (add_sample_positions)
1845 queue->has_sample_positions = true;
1846
1847 *initial_preamble_cs = queue->initial_preamble_cs;
1848 *continue_preamble_cs = queue->continue_preamble_cs;
1849 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1850 *continue_preamble_cs = NULL;
1851 return VK_SUCCESS;
1852 fail:
1853 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1854 if (dest_cs[i])
1855 queue->device->ws->cs_destroy(dest_cs[i]);
1856 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1857 queue->device->ws->buffer_destroy(descriptor_bo);
1858 if (scratch_bo && scratch_bo != queue->scratch_bo)
1859 queue->device->ws->buffer_destroy(scratch_bo);
1860 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1861 queue->device->ws->buffer_destroy(compute_scratch_bo);
1862 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1863 queue->device->ws->buffer_destroy(esgs_ring_bo);
1864 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1865 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1866 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1867 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1868 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1869 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1870 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1871 }
1872
1873 VkResult radv_QueueSubmit(
1874 VkQueue _queue,
1875 uint32_t submitCount,
1876 const VkSubmitInfo* pSubmits,
1877 VkFence _fence)
1878 {
1879 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1880 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1881 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1882 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1883 int ret;
1884 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1885 uint32_t scratch_size = 0;
1886 uint32_t compute_scratch_size = 0;
1887 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1888 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1889 VkResult result;
1890 bool fence_emitted = false;
1891 bool tess_rings_needed = false;
1892 bool sample_positions_needed = false;
1893
1894 /* Do this first so failing to allocate scratch buffers can't result in
1895 * partially executed submissions. */
1896 for (uint32_t i = 0; i < submitCount; i++) {
1897 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1898 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1899 pSubmits[i].pCommandBuffers[j]);
1900
1901 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1902 compute_scratch_size = MAX2(compute_scratch_size,
1903 cmd_buffer->compute_scratch_size_needed);
1904 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1905 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1906 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1907 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1908 }
1909 }
1910
1911 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1912 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1913 sample_positions_needed,
1914 &initial_preamble_cs, &continue_preamble_cs);
1915 if (result != VK_SUCCESS)
1916 return result;
1917
1918 for (uint32_t i = 0; i < submitCount; i++) {
1919 struct radeon_winsys_cs **cs_array;
1920 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1921 bool can_patch = !do_flush;
1922 uint32_t advance;
1923
1924 if (!pSubmits[i].commandBufferCount) {
1925 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1926 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1927 &queue->device->empty_cs[queue->queue_family_index],
1928 1, NULL, NULL,
1929 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1930 pSubmits[i].waitSemaphoreCount,
1931 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1932 pSubmits[i].signalSemaphoreCount,
1933 false, base_fence);
1934 if (ret) {
1935 radv_loge("failed to submit CS %d\n", i);
1936 abort();
1937 }
1938 fence_emitted = true;
1939 }
1940 continue;
1941 }
1942
1943 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1944 (pSubmits[i].commandBufferCount + do_flush));
1945
1946 if(do_flush)
1947 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1948 queue->device->flush_shader_cs[queue->queue_family_index] :
1949 queue->device->flush_cs[queue->queue_family_index];
1950
1951 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1952 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1953 pSubmits[i].pCommandBuffers[j]);
1954 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1955
1956 cs_array[j + do_flush] = cmd_buffer->cs;
1957 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1958 can_patch = false;
1959 }
1960
1961 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1962 advance = MIN2(max_cs_submission,
1963 pSubmits[i].commandBufferCount + do_flush - j);
1964 bool b = j == 0;
1965 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1966
1967 if (queue->device->trace_bo)
1968 *queue->device->trace_id_ptr = 0;
1969
1970 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1971 advance, initial_preamble_cs, continue_preamble_cs,
1972 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1973 b ? pSubmits[i].waitSemaphoreCount : 0,
1974 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1975 e ? pSubmits[i].signalSemaphoreCount : 0,
1976 can_patch, base_fence);
1977
1978 if (ret) {
1979 radv_loge("failed to submit CS %d\n", i);
1980 abort();
1981 }
1982 fence_emitted = true;
1983 if (queue->device->trace_bo) {
1984 bool success = queue->device->ws->ctx_wait_idle(
1985 queue->hw_ctx,
1986 radv_queue_family_to_ring(
1987 queue->queue_family_index),
1988 queue->queue_idx);
1989
1990 if (!success) { /* Hang */
1991 radv_dump_trace(queue->device, cs_array[j]);
1992 abort();
1993 }
1994 }
1995 }
1996 free(cs_array);
1997 }
1998
1999 if (fence) {
2000 if (!fence_emitted)
2001 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2002 &queue->device->empty_cs[queue->queue_family_index],
2003 1, NULL, NULL, NULL, 0, NULL, 0,
2004 false, base_fence);
2005
2006 fence->submitted = true;
2007 }
2008
2009 return VK_SUCCESS;
2010 }
2011
2012 VkResult radv_QueueWaitIdle(
2013 VkQueue _queue)
2014 {
2015 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2016
2017 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2018 radv_queue_family_to_ring(queue->queue_family_index),
2019 queue->queue_idx);
2020 return VK_SUCCESS;
2021 }
2022
2023 VkResult radv_DeviceWaitIdle(
2024 VkDevice _device)
2025 {
2026 RADV_FROM_HANDLE(radv_device, device, _device);
2027
2028 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2029 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2030 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2031 }
2032 }
2033 return VK_SUCCESS;
2034 }
2035
2036 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2037 VkInstance instance,
2038 const char* pName)
2039 {
2040 return radv_lookup_entrypoint(pName);
2041 }
2042
2043 /* The loader wants us to expose a second GetInstanceProcAddr function
2044 * to work around certain LD_PRELOAD issues seen in apps.
2045 */
2046 PUBLIC
2047 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2048 VkInstance instance,
2049 const char* pName);
2050
2051 PUBLIC
2052 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2053 VkInstance instance,
2054 const char* pName)
2055 {
2056 return radv_GetInstanceProcAddr(instance, pName);
2057 }
2058
2059 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2060 VkDevice device,
2061 const char* pName)
2062 {
2063 return radv_lookup_entrypoint(pName);
2064 }
2065
2066 bool radv_get_memory_fd(struct radv_device *device,
2067 struct radv_device_memory *memory,
2068 int *pFD)
2069 {
2070 struct radeon_bo_metadata metadata;
2071
2072 if (memory->image) {
2073 radv_init_metadata(device, memory->image, &metadata);
2074 device->ws->buffer_set_metadata(memory->bo, &metadata);
2075 }
2076
2077 return device->ws->buffer_get_fd(device->ws, memory->bo,
2078 pFD);
2079 }
2080
2081 VkResult radv_AllocateMemory(
2082 VkDevice _device,
2083 const VkMemoryAllocateInfo* pAllocateInfo,
2084 const VkAllocationCallbacks* pAllocator,
2085 VkDeviceMemory* pMem)
2086 {
2087 RADV_FROM_HANDLE(radv_device, device, _device);
2088 struct radv_device_memory *mem;
2089 VkResult result;
2090 enum radeon_bo_domain domain;
2091 uint32_t flags = 0;
2092
2093 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2094
2095 if (pAllocateInfo->allocationSize == 0) {
2096 /* Apparently, this is allowed */
2097 *pMem = VK_NULL_HANDLE;
2098 return VK_SUCCESS;
2099 }
2100
2101 const VkImportMemoryFdInfoKHX *import_info =
2102 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
2103 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
2104 vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
2105
2106 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2107 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2108 if (mem == NULL)
2109 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2110
2111 if (dedicate_info) {
2112 mem->image = radv_image_from_handle(dedicate_info->image);
2113 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2114 } else {
2115 mem->image = NULL;
2116 mem->buffer = NULL;
2117 }
2118
2119 if (import_info) {
2120 assert(import_info->handleType ==
2121 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
2122 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2123 NULL, NULL);
2124 if (!mem->bo)
2125 goto fail;
2126 else
2127 goto out_success;
2128 }
2129
2130 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2131 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2132 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2133 domain = RADEON_DOMAIN_GTT;
2134 else
2135 domain = RADEON_DOMAIN_VRAM;
2136
2137 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2138 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2139 else
2140 flags |= RADEON_FLAG_CPU_ACCESS;
2141
2142 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2143 flags |= RADEON_FLAG_GTT_WC;
2144
2145 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2146 domain, flags);
2147
2148 if (!mem->bo) {
2149 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2150 goto fail;
2151 }
2152 mem->type_index = pAllocateInfo->memoryTypeIndex;
2153 out_success:
2154 *pMem = radv_device_memory_to_handle(mem);
2155
2156 return VK_SUCCESS;
2157
2158 fail:
2159 vk_free2(&device->alloc, pAllocator, mem);
2160
2161 return result;
2162 }
2163
2164 void radv_FreeMemory(
2165 VkDevice _device,
2166 VkDeviceMemory _mem,
2167 const VkAllocationCallbacks* pAllocator)
2168 {
2169 RADV_FROM_HANDLE(radv_device, device, _device);
2170 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2171
2172 if (mem == NULL)
2173 return;
2174
2175 device->ws->buffer_destroy(mem->bo);
2176 mem->bo = NULL;
2177
2178 vk_free2(&device->alloc, pAllocator, mem);
2179 }
2180
2181 VkResult radv_MapMemory(
2182 VkDevice _device,
2183 VkDeviceMemory _memory,
2184 VkDeviceSize offset,
2185 VkDeviceSize size,
2186 VkMemoryMapFlags flags,
2187 void** ppData)
2188 {
2189 RADV_FROM_HANDLE(radv_device, device, _device);
2190 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2191
2192 if (mem == NULL) {
2193 *ppData = NULL;
2194 return VK_SUCCESS;
2195 }
2196
2197 *ppData = device->ws->buffer_map(mem->bo);
2198 if (*ppData) {
2199 *ppData += offset;
2200 return VK_SUCCESS;
2201 }
2202
2203 return VK_ERROR_MEMORY_MAP_FAILED;
2204 }
2205
2206 void radv_UnmapMemory(
2207 VkDevice _device,
2208 VkDeviceMemory _memory)
2209 {
2210 RADV_FROM_HANDLE(radv_device, device, _device);
2211 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2212
2213 if (mem == NULL)
2214 return;
2215
2216 device->ws->buffer_unmap(mem->bo);
2217 }
2218
2219 VkResult radv_FlushMappedMemoryRanges(
2220 VkDevice _device,
2221 uint32_t memoryRangeCount,
2222 const VkMappedMemoryRange* pMemoryRanges)
2223 {
2224 return VK_SUCCESS;
2225 }
2226
2227 VkResult radv_InvalidateMappedMemoryRanges(
2228 VkDevice _device,
2229 uint32_t memoryRangeCount,
2230 const VkMappedMemoryRange* pMemoryRanges)
2231 {
2232 return VK_SUCCESS;
2233 }
2234
2235 void radv_GetBufferMemoryRequirements(
2236 VkDevice device,
2237 VkBuffer _buffer,
2238 VkMemoryRequirements* pMemoryRequirements)
2239 {
2240 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2241
2242 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2243
2244 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2245 pMemoryRequirements->alignment = 4096;
2246 else
2247 pMemoryRequirements->alignment = 16;
2248
2249 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2250 }
2251
2252 void radv_GetImageMemoryRequirements(
2253 VkDevice device,
2254 VkImage _image,
2255 VkMemoryRequirements* pMemoryRequirements)
2256 {
2257 RADV_FROM_HANDLE(radv_image, image, _image);
2258
2259 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2260
2261 pMemoryRequirements->size = image->size;
2262 pMemoryRequirements->alignment = image->alignment;
2263 }
2264
2265 void radv_GetImageSparseMemoryRequirements(
2266 VkDevice device,
2267 VkImage image,
2268 uint32_t* pSparseMemoryRequirementCount,
2269 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2270 {
2271 stub();
2272 }
2273
2274 void radv_GetDeviceMemoryCommitment(
2275 VkDevice device,
2276 VkDeviceMemory memory,
2277 VkDeviceSize* pCommittedMemoryInBytes)
2278 {
2279 *pCommittedMemoryInBytes = 0;
2280 }
2281
2282 VkResult radv_BindBufferMemory(
2283 VkDevice device,
2284 VkBuffer _buffer,
2285 VkDeviceMemory _memory,
2286 VkDeviceSize memoryOffset)
2287 {
2288 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2289 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2290
2291 if (mem) {
2292 buffer->bo = mem->bo;
2293 buffer->offset = memoryOffset;
2294 } else {
2295 buffer->bo = NULL;
2296 buffer->offset = 0;
2297 }
2298
2299 return VK_SUCCESS;
2300 }
2301
2302 VkResult radv_BindImageMemory(
2303 VkDevice device,
2304 VkImage _image,
2305 VkDeviceMemory _memory,
2306 VkDeviceSize memoryOffset)
2307 {
2308 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2309 RADV_FROM_HANDLE(radv_image, image, _image);
2310
2311 if (mem) {
2312 image->bo = mem->bo;
2313 image->offset = memoryOffset;
2314 } else {
2315 image->bo = NULL;
2316 image->offset = 0;
2317 }
2318
2319 return VK_SUCCESS;
2320 }
2321
2322
2323 static void
2324 radv_sparse_buffer_bind_memory(struct radv_device *device,
2325 const VkSparseBufferMemoryBindInfo *bind)
2326 {
2327 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2328
2329 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2330 struct radv_device_memory *mem = NULL;
2331
2332 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2333 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2334
2335 device->ws->buffer_virtual_bind(buffer->bo,
2336 bind->pBinds[i].resourceOffset,
2337 bind->pBinds[i].size,
2338 mem ? mem->bo : NULL,
2339 bind->pBinds[i].memoryOffset);
2340 }
2341 }
2342
2343 static void
2344 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2345 const VkSparseImageOpaqueMemoryBindInfo *bind)
2346 {
2347 RADV_FROM_HANDLE(radv_image, image, bind->image);
2348
2349 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2350 struct radv_device_memory *mem = NULL;
2351
2352 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2353 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2354
2355 device->ws->buffer_virtual_bind(image->bo,
2356 bind->pBinds[i].resourceOffset,
2357 bind->pBinds[i].size,
2358 mem ? mem->bo : NULL,
2359 bind->pBinds[i].memoryOffset);
2360 }
2361 }
2362
2363 VkResult radv_QueueBindSparse(
2364 VkQueue _queue,
2365 uint32_t bindInfoCount,
2366 const VkBindSparseInfo* pBindInfo,
2367 VkFence _fence)
2368 {
2369 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2370 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2371 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2372 bool fence_emitted = false;
2373
2374 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2375 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2376 radv_sparse_buffer_bind_memory(queue->device,
2377 pBindInfo[i].pBufferBinds + j);
2378 }
2379
2380 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2381 radv_sparse_image_opaque_bind_memory(queue->device,
2382 pBindInfo[i].pImageOpaqueBinds + j);
2383 }
2384
2385 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2386 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2387 &queue->device->empty_cs[queue->queue_family_index],
2388 1, NULL, NULL,
2389 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2390 pBindInfo[i].waitSemaphoreCount,
2391 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2392 pBindInfo[i].signalSemaphoreCount,
2393 false, base_fence);
2394 fence_emitted = true;
2395 if (fence)
2396 fence->submitted = true;
2397 }
2398 }
2399
2400 if (fence && !fence_emitted) {
2401 fence->signalled = true;
2402 }
2403
2404 return VK_SUCCESS;
2405 }
2406
2407 VkResult radv_CreateFence(
2408 VkDevice _device,
2409 const VkFenceCreateInfo* pCreateInfo,
2410 const VkAllocationCallbacks* pAllocator,
2411 VkFence* pFence)
2412 {
2413 RADV_FROM_HANDLE(radv_device, device, _device);
2414 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2415 sizeof(*fence), 8,
2416 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2417
2418 if (!fence)
2419 return VK_ERROR_OUT_OF_HOST_MEMORY;
2420
2421 memset(fence, 0, sizeof(*fence));
2422 fence->submitted = false;
2423 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2424 fence->fence = device->ws->create_fence();
2425 if (!fence->fence) {
2426 vk_free2(&device->alloc, pAllocator, fence);
2427 return VK_ERROR_OUT_OF_HOST_MEMORY;
2428 }
2429
2430 *pFence = radv_fence_to_handle(fence);
2431
2432 return VK_SUCCESS;
2433 }
2434
2435 void radv_DestroyFence(
2436 VkDevice _device,
2437 VkFence _fence,
2438 const VkAllocationCallbacks* pAllocator)
2439 {
2440 RADV_FROM_HANDLE(radv_device, device, _device);
2441 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2442
2443 if (!fence)
2444 return;
2445 device->ws->destroy_fence(fence->fence);
2446 vk_free2(&device->alloc, pAllocator, fence);
2447 }
2448
2449 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2450 {
2451 uint64_t current_time;
2452 struct timespec tv;
2453
2454 clock_gettime(CLOCK_MONOTONIC, &tv);
2455 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2456
2457 timeout = MIN2(UINT64_MAX - current_time, timeout);
2458
2459 return current_time + timeout;
2460 }
2461
2462 VkResult radv_WaitForFences(
2463 VkDevice _device,
2464 uint32_t fenceCount,
2465 const VkFence* pFences,
2466 VkBool32 waitAll,
2467 uint64_t timeout)
2468 {
2469 RADV_FROM_HANDLE(radv_device, device, _device);
2470 timeout = radv_get_absolute_timeout(timeout);
2471
2472 if (!waitAll && fenceCount > 1) {
2473 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2474 }
2475
2476 for (uint32_t i = 0; i < fenceCount; ++i) {
2477 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2478 bool expired = false;
2479
2480 if (fence->signalled)
2481 continue;
2482
2483 if (!fence->submitted)
2484 return VK_TIMEOUT;
2485
2486 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2487 if (!expired)
2488 return VK_TIMEOUT;
2489
2490 fence->signalled = true;
2491 }
2492
2493 return VK_SUCCESS;
2494 }
2495
2496 VkResult radv_ResetFences(VkDevice device,
2497 uint32_t fenceCount,
2498 const VkFence *pFences)
2499 {
2500 for (unsigned i = 0; i < fenceCount; ++i) {
2501 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2502 fence->submitted = fence->signalled = false;
2503 }
2504
2505 return VK_SUCCESS;
2506 }
2507
2508 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2509 {
2510 RADV_FROM_HANDLE(radv_device, device, _device);
2511 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2512
2513 if (fence->signalled)
2514 return VK_SUCCESS;
2515 if (!fence->submitted)
2516 return VK_NOT_READY;
2517
2518 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2519 return VK_NOT_READY;
2520
2521 return VK_SUCCESS;
2522 }
2523
2524
2525 // Queue semaphore functions
2526
2527 VkResult radv_CreateSemaphore(
2528 VkDevice _device,
2529 const VkSemaphoreCreateInfo* pCreateInfo,
2530 const VkAllocationCallbacks* pAllocator,
2531 VkSemaphore* pSemaphore)
2532 {
2533 RADV_FROM_HANDLE(radv_device, device, _device);
2534 struct radeon_winsys_sem *sem;
2535
2536 sem = device->ws->create_sem(device->ws);
2537 if (!sem)
2538 return VK_ERROR_OUT_OF_HOST_MEMORY;
2539
2540 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2541 return VK_SUCCESS;
2542 }
2543
2544 void radv_DestroySemaphore(
2545 VkDevice _device,
2546 VkSemaphore _semaphore,
2547 const VkAllocationCallbacks* pAllocator)
2548 {
2549 RADV_FROM_HANDLE(radv_device, device, _device);
2550 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2551 if (!_semaphore)
2552 return;
2553
2554 device->ws->destroy_sem(sem);
2555 }
2556
2557 VkResult radv_CreateEvent(
2558 VkDevice _device,
2559 const VkEventCreateInfo* pCreateInfo,
2560 const VkAllocationCallbacks* pAllocator,
2561 VkEvent* pEvent)
2562 {
2563 RADV_FROM_HANDLE(radv_device, device, _device);
2564 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2565 sizeof(*event), 8,
2566 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2567
2568 if (!event)
2569 return VK_ERROR_OUT_OF_HOST_MEMORY;
2570
2571 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2572 RADEON_DOMAIN_GTT,
2573 RADEON_FLAG_CPU_ACCESS);
2574 if (!event->bo) {
2575 vk_free2(&device->alloc, pAllocator, event);
2576 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2577 }
2578
2579 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2580
2581 *pEvent = radv_event_to_handle(event);
2582
2583 return VK_SUCCESS;
2584 }
2585
2586 void radv_DestroyEvent(
2587 VkDevice _device,
2588 VkEvent _event,
2589 const VkAllocationCallbacks* pAllocator)
2590 {
2591 RADV_FROM_HANDLE(radv_device, device, _device);
2592 RADV_FROM_HANDLE(radv_event, event, _event);
2593
2594 if (!event)
2595 return;
2596 device->ws->buffer_destroy(event->bo);
2597 vk_free2(&device->alloc, pAllocator, event);
2598 }
2599
2600 VkResult radv_GetEventStatus(
2601 VkDevice _device,
2602 VkEvent _event)
2603 {
2604 RADV_FROM_HANDLE(radv_event, event, _event);
2605
2606 if (*event->map == 1)
2607 return VK_EVENT_SET;
2608 return VK_EVENT_RESET;
2609 }
2610
2611 VkResult radv_SetEvent(
2612 VkDevice _device,
2613 VkEvent _event)
2614 {
2615 RADV_FROM_HANDLE(radv_event, event, _event);
2616 *event->map = 1;
2617
2618 return VK_SUCCESS;
2619 }
2620
2621 VkResult radv_ResetEvent(
2622 VkDevice _device,
2623 VkEvent _event)
2624 {
2625 RADV_FROM_HANDLE(radv_event, event, _event);
2626 *event->map = 0;
2627
2628 return VK_SUCCESS;
2629 }
2630
2631 VkResult radv_CreateBuffer(
2632 VkDevice _device,
2633 const VkBufferCreateInfo* pCreateInfo,
2634 const VkAllocationCallbacks* pAllocator,
2635 VkBuffer* pBuffer)
2636 {
2637 RADV_FROM_HANDLE(radv_device, device, _device);
2638 struct radv_buffer *buffer;
2639
2640 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2641
2642 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2643 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2644 if (buffer == NULL)
2645 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2646
2647 buffer->size = pCreateInfo->size;
2648 buffer->usage = pCreateInfo->usage;
2649 buffer->bo = NULL;
2650 buffer->offset = 0;
2651 buffer->flags = pCreateInfo->flags;
2652
2653 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2654 buffer->bo = device->ws->buffer_create(device->ws,
2655 align64(buffer->size, 4096),
2656 4096, 0, RADEON_FLAG_VIRTUAL);
2657 if (!buffer->bo) {
2658 vk_free2(&device->alloc, pAllocator, buffer);
2659 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2660 }
2661 }
2662
2663 *pBuffer = radv_buffer_to_handle(buffer);
2664
2665 return VK_SUCCESS;
2666 }
2667
2668 void radv_DestroyBuffer(
2669 VkDevice _device,
2670 VkBuffer _buffer,
2671 const VkAllocationCallbacks* pAllocator)
2672 {
2673 RADV_FROM_HANDLE(radv_device, device, _device);
2674 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2675
2676 if (!buffer)
2677 return;
2678
2679 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2680 device->ws->buffer_destroy(buffer->bo);
2681
2682 vk_free2(&device->alloc, pAllocator, buffer);
2683 }
2684
2685 static inline unsigned
2686 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2687 {
2688 if (stencil)
2689 return image->surface.u.legacy.stencil_tiling_index[level];
2690 else
2691 return image->surface.u.legacy.tiling_index[level];
2692 }
2693
2694 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2695 {
2696 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2697 }
2698
2699 static void
2700 radv_initialise_color_surface(struct radv_device *device,
2701 struct radv_color_buffer_info *cb,
2702 struct radv_image_view *iview)
2703 {
2704 const struct vk_format_description *desc;
2705 unsigned ntype, format, swap, endian;
2706 unsigned blend_clamp = 0, blend_bypass = 0;
2707 uint64_t va;
2708 const struct radeon_surf *surf = &iview->image->surface;
2709
2710 desc = vk_format_description(iview->vk_format);
2711
2712 memset(cb, 0, sizeof(*cb));
2713
2714 /* Intensity is implemented as Red, so treat it that way. */
2715 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2716
2717 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2718
2719 if (device->physical_device->rad_info.chip_class >= GFX9) {
2720 struct gfx9_surf_meta_flags meta;
2721 if (iview->image->dcc_offset)
2722 meta = iview->image->surface.u.gfx9.dcc;
2723 else
2724 meta = iview->image->surface.u.gfx9.cmask;
2725
2726 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2727 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2728 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2729 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2730
2731 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2732 } else {
2733 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2734 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2735
2736 va += level_info->offset;
2737
2738 pitch_tile_max = level_info->nblk_x / 8 - 1;
2739 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2740 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2741
2742 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2743 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2744 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2745
2746 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2747 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2748
2749 if (iview->image->fmask.size) {
2750 if (device->physical_device->rad_info.chip_class >= CIK)
2751 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2752 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2753 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2754 } else {
2755 /* This must be set for fast clear to work without FMASK. */
2756 if (device->physical_device->rad_info.chip_class >= CIK)
2757 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2758 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2759 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2760 }
2761 }
2762
2763 cb->cb_color_base = va >> 8;
2764
2765 /* CMASK variables */
2766 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2767 va += iview->image->cmask.offset;
2768 cb->cb_color_cmask = va >> 8;
2769
2770 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2771 va += iview->image->dcc_offset;
2772 cb->cb_dcc_base = va >> 8;
2773
2774 uint32_t max_slice = radv_surface_layer_count(iview);
2775 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2776 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2777
2778 if (iview->image->info.samples > 1) {
2779 unsigned log_samples = util_logbase2(iview->image->info.samples);
2780
2781 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2782 S_028C74_NUM_FRAGMENTS(log_samples);
2783 }
2784
2785 if (iview->image->fmask.size) {
2786 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2787 cb->cb_color_fmask = va >> 8;
2788 } else {
2789 cb->cb_color_fmask = cb->cb_color_base;
2790 }
2791
2792 ntype = radv_translate_color_numformat(iview->vk_format,
2793 desc,
2794 vk_format_get_first_non_void_channel(iview->vk_format));
2795 format = radv_translate_colorformat(iview->vk_format);
2796 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2797 radv_finishme("Illegal color\n");
2798 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2799 endian = radv_colorformat_endian_swap(format);
2800
2801 /* blend clamp should be set for all NORM/SRGB types */
2802 if (ntype == V_028C70_NUMBER_UNORM ||
2803 ntype == V_028C70_NUMBER_SNORM ||
2804 ntype == V_028C70_NUMBER_SRGB)
2805 blend_clamp = 1;
2806
2807 /* set blend bypass according to docs if SINT/UINT or
2808 8/24 COLOR variants */
2809 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2810 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2811 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2812 blend_clamp = 0;
2813 blend_bypass = 1;
2814 }
2815 #if 0
2816 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2817 (format == V_028C70_COLOR_8 ||
2818 format == V_028C70_COLOR_8_8 ||
2819 format == V_028C70_COLOR_8_8_8_8))
2820 ->color_is_int8 = true;
2821 #endif
2822 cb->cb_color_info = S_028C70_FORMAT(format) |
2823 S_028C70_COMP_SWAP(swap) |
2824 S_028C70_BLEND_CLAMP(blend_clamp) |
2825 S_028C70_BLEND_BYPASS(blend_bypass) |
2826 S_028C70_SIMPLE_FLOAT(1) |
2827 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2828 ntype != V_028C70_NUMBER_SNORM &&
2829 ntype != V_028C70_NUMBER_SRGB &&
2830 format != V_028C70_COLOR_8_24 &&
2831 format != V_028C70_COLOR_24_8) |
2832 S_028C70_NUMBER_TYPE(ntype) |
2833 S_028C70_ENDIAN(endian);
2834 if (iview->image->info.samples > 1)
2835 if (iview->image->fmask.size)
2836 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2837
2838 if (iview->image->cmask.size &&
2839 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2840 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2841
2842 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2843 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2844
2845 if (device->physical_device->rad_info.chip_class >= VI) {
2846 unsigned max_uncompressed_block_size = 2;
2847 if (iview->image->info.samples > 1) {
2848 if (iview->image->surface.bpe == 1)
2849 max_uncompressed_block_size = 0;
2850 else if (iview->image->surface.bpe == 2)
2851 max_uncompressed_block_size = 1;
2852 }
2853
2854 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2855 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2856 }
2857
2858 /* This must be set for fast clear to work without FMASK. */
2859 if (!iview->image->fmask.size &&
2860 device->physical_device->rad_info.chip_class == SI) {
2861 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2862 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2863 }
2864
2865 if (device->physical_device->rad_info.chip_class >= GFX9) {
2866 uint32_t max_slice = radv_surface_layer_count(iview);
2867 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2868
2869 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2870 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2871 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2872 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2873 S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2874 S_028C68_MAX_MIP(iview->image->info.levels);
2875
2876 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2877
2878 }
2879 }
2880
2881 static void
2882 radv_initialise_ds_surface(struct radv_device *device,
2883 struct radv_ds_buffer_info *ds,
2884 struct radv_image_view *iview)
2885 {
2886 unsigned level = iview->base_mip;
2887 unsigned format, stencil_format;
2888 uint64_t va, s_offs, z_offs;
2889 bool stencil_only = false;
2890 memset(ds, 0, sizeof(*ds));
2891 switch (iview->vk_format) {
2892 case VK_FORMAT_D24_UNORM_S8_UINT:
2893 case VK_FORMAT_X8_D24_UNORM_PACK32:
2894 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2895 ds->offset_scale = 2.0f;
2896 break;
2897 case VK_FORMAT_D16_UNORM:
2898 case VK_FORMAT_D16_UNORM_S8_UINT:
2899 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2900 ds->offset_scale = 4.0f;
2901 break;
2902 case VK_FORMAT_D32_SFLOAT:
2903 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2904 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2905 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2906 ds->offset_scale = 1.0f;
2907 break;
2908 case VK_FORMAT_S8_UINT:
2909 stencil_only = true;
2910 break;
2911 default:
2912 break;
2913 }
2914
2915 format = radv_translate_dbformat(iview->vk_format);
2916 stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2917 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2918
2919 uint32_t max_slice = radv_surface_layer_count(iview);
2920 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2921 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2922
2923 ds->db_htile_data_base = 0;
2924 ds->db_htile_surface = 0;
2925
2926 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2927 s_offs = z_offs = va;
2928
2929 if (device->physical_device->rad_info.chip_class >= GFX9) {
2930 assert(iview->image->surface.u.gfx9.surf_offset == 0);
2931 s_offs += iview->image->surface.u.gfx9.stencil_offset;
2932
2933 ds->db_z_info = S_028038_FORMAT(format) |
2934 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
2935 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2936 S_028038_MAXMIP(iview->image->info.levels - 1);
2937 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2938 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
2939
2940 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2941 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
2942 ds->db_depth_view |= S_028008_MIPID(level);
2943
2944 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
2945 S_02801C_Y_MAX(iview->image->info.height - 1);
2946
2947 /* Only use HTILE for the first level. */
2948 if (iview->image->surface.htile_size && !level) {
2949 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
2950
2951 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2952 /* Use all of the htile_buffer for depth if there's no stencil. */
2953 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2954 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2955 iview->image->htile_offset;
2956 ds->db_htile_data_base = va >> 8;
2957 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2958 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
2959 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
2960 }
2961 } else {
2962 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2963
2964 if (stencil_only)
2965 level_info = &iview->image->surface.u.legacy.stencil_level[level];
2966
2967 z_offs += iview->image->surface.u.legacy.level[level].offset;
2968 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2969
2970 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2971 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2972 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
2973
2974 if (iview->image->info.samples > 1)
2975 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2976
2977 if (device->physical_device->rad_info.chip_class >= CIK) {
2978 struct radeon_info *info = &device->physical_device->rad_info;
2979 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
2980 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
2981 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
2982 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2983 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2984 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2985
2986 if (stencil_only)
2987 tile_mode = stencil_tile_mode;
2988
2989 ds->db_depth_info |=
2990 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2991 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2992 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2993 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2994 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2995 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2996 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2997 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2998 } else {
2999 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3000 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3001 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3002 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3003 }
3004
3005 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3006 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3007 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3008
3009 if (iview->image->surface.htile_size && !level) {
3010 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3011
3012 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3013 /* Use all of the htile_buffer for depth if there's no stencil. */
3014 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3015
3016 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3017 iview->image->htile_offset;
3018 ds->db_htile_data_base = va >> 8;
3019 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3020 }
3021 }
3022
3023 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3024 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3025 }
3026
3027 VkResult radv_CreateFramebuffer(
3028 VkDevice _device,
3029 const VkFramebufferCreateInfo* pCreateInfo,
3030 const VkAllocationCallbacks* pAllocator,
3031 VkFramebuffer* pFramebuffer)
3032 {
3033 RADV_FROM_HANDLE(radv_device, device, _device);
3034 struct radv_framebuffer *framebuffer;
3035
3036 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3037
3038 size_t size = sizeof(*framebuffer) +
3039 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3040 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3041 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3042 if (framebuffer == NULL)
3043 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3044
3045 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3046 framebuffer->width = pCreateInfo->width;
3047 framebuffer->height = pCreateInfo->height;
3048 framebuffer->layers = pCreateInfo->layers;
3049 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3050 VkImageView _iview = pCreateInfo->pAttachments[i];
3051 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3052 framebuffer->attachments[i].attachment = iview;
3053 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3054 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3055 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3056 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3057 }
3058 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3059 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3060 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3061 }
3062
3063 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3064 return VK_SUCCESS;
3065 }
3066
3067 void radv_DestroyFramebuffer(
3068 VkDevice _device,
3069 VkFramebuffer _fb,
3070 const VkAllocationCallbacks* pAllocator)
3071 {
3072 RADV_FROM_HANDLE(radv_device, device, _device);
3073 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3074
3075 if (!fb)
3076 return;
3077 vk_free2(&device->alloc, pAllocator, fb);
3078 }
3079
3080 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3081 {
3082 switch (address_mode) {
3083 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3084 return V_008F30_SQ_TEX_WRAP;
3085 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3086 return V_008F30_SQ_TEX_MIRROR;
3087 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3088 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3089 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3090 return V_008F30_SQ_TEX_CLAMP_BORDER;
3091 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3092 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3093 default:
3094 unreachable("illegal tex wrap mode");
3095 break;
3096 }
3097 }
3098
3099 static unsigned
3100 radv_tex_compare(VkCompareOp op)
3101 {
3102 switch (op) {
3103 case VK_COMPARE_OP_NEVER:
3104 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3105 case VK_COMPARE_OP_LESS:
3106 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3107 case VK_COMPARE_OP_EQUAL:
3108 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3109 case VK_COMPARE_OP_LESS_OR_EQUAL:
3110 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3111 case VK_COMPARE_OP_GREATER:
3112 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3113 case VK_COMPARE_OP_NOT_EQUAL:
3114 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3115 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3116 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3117 case VK_COMPARE_OP_ALWAYS:
3118 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3119 default:
3120 unreachable("illegal compare mode");
3121 break;
3122 }
3123 }
3124
3125 static unsigned
3126 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3127 {
3128 switch (filter) {
3129 case VK_FILTER_NEAREST:
3130 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3131 V_008F38_SQ_TEX_XY_FILTER_POINT);
3132 case VK_FILTER_LINEAR:
3133 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3134 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3135 case VK_FILTER_CUBIC_IMG:
3136 default:
3137 fprintf(stderr, "illegal texture filter");
3138 return 0;
3139 }
3140 }
3141
3142 static unsigned
3143 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3144 {
3145 switch (mode) {
3146 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3147 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3148 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3149 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3150 default:
3151 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3152 }
3153 }
3154
3155 static unsigned
3156 radv_tex_bordercolor(VkBorderColor bcolor)
3157 {
3158 switch (bcolor) {
3159 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3160 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3161 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3162 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3163 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3164 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3165 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3166 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3167 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3168 default:
3169 break;
3170 }
3171 return 0;
3172 }
3173
3174 static unsigned
3175 radv_tex_aniso_filter(unsigned filter)
3176 {
3177 if (filter < 2)
3178 return 0;
3179 if (filter < 4)
3180 return 1;
3181 if (filter < 8)
3182 return 2;
3183 if (filter < 16)
3184 return 3;
3185 return 4;
3186 }
3187
3188 static void
3189 radv_init_sampler(struct radv_device *device,
3190 struct radv_sampler *sampler,
3191 const VkSamplerCreateInfo *pCreateInfo)
3192 {
3193 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3194 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3195 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3196 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3197
3198 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3199 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3200 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3201 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3202 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3203 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3204 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3205 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3206 S_008F30_DISABLE_CUBE_WRAP(0) |
3207 S_008F30_COMPAT_MODE(is_vi));
3208 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3209 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3210 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3211 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3212 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3213 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3214 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3215 S_008F38_MIP_POINT_PRECLAMP(0) |
3216 S_008F38_DISABLE_LSB_CEIL(1) |
3217 S_008F38_FILTER_PREC_FIX(1) |
3218 S_008F38_ANISO_OVERRIDE(is_vi));
3219 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3220 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3221 }
3222
3223 VkResult radv_CreateSampler(
3224 VkDevice _device,
3225 const VkSamplerCreateInfo* pCreateInfo,
3226 const VkAllocationCallbacks* pAllocator,
3227 VkSampler* pSampler)
3228 {
3229 RADV_FROM_HANDLE(radv_device, device, _device);
3230 struct radv_sampler *sampler;
3231
3232 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3233
3234 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3235 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3236 if (!sampler)
3237 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3238
3239 radv_init_sampler(device, sampler, pCreateInfo);
3240 *pSampler = radv_sampler_to_handle(sampler);
3241
3242 return VK_SUCCESS;
3243 }
3244
3245 void radv_DestroySampler(
3246 VkDevice _device,
3247 VkSampler _sampler,
3248 const VkAllocationCallbacks* pAllocator)
3249 {
3250 RADV_FROM_HANDLE(radv_device, device, _device);
3251 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3252
3253 if (!sampler)
3254 return;
3255 vk_free2(&device->alloc, pAllocator, sampler);
3256 }
3257
3258 /* vk_icd.h does not declare this function, so we declare it here to
3259 * suppress Wmissing-prototypes.
3260 */
3261 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3262 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3263
3264 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3265 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3266 {
3267 /* For the full details on loader interface versioning, see
3268 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3269 * What follows is a condensed summary, to help you navigate the large and
3270 * confusing official doc.
3271 *
3272 * - Loader interface v0 is incompatible with later versions. We don't
3273 * support it.
3274 *
3275 * - In loader interface v1:
3276 * - The first ICD entrypoint called by the loader is
3277 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3278 * entrypoint.
3279 * - The ICD must statically expose no other Vulkan symbol unless it is
3280 * linked with -Bsymbolic.
3281 * - Each dispatchable Vulkan handle created by the ICD must be
3282 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3283 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3284 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3285 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3286 * such loader-managed surfaces.
3287 *
3288 * - Loader interface v2 differs from v1 in:
3289 * - The first ICD entrypoint called by the loader is
3290 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3291 * statically expose this entrypoint.
3292 *
3293 * - Loader interface v3 differs from v2 in:
3294 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3295 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3296 * because the loader no longer does so.
3297 */
3298 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3299 return VK_SUCCESS;
3300 }
3301
3302 VkResult radv_GetMemoryFdKHX(VkDevice _device,
3303 VkDeviceMemory _memory,
3304 VkExternalMemoryHandleTypeFlagsKHX handleType,
3305 int *pFD)
3306 {
3307 RADV_FROM_HANDLE(radv_device, device, _device);
3308 RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
3309
3310 /* We support only one handle type. */
3311 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
3312
3313 bool ret = radv_get_memory_fd(device, memory, pFD);
3314 if (ret == false)
3315 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3316 return VK_SUCCESS;
3317 }
3318
3319 VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
3320 VkExternalMemoryHandleTypeFlagBitsKHX handleType,
3321 int fd,
3322 VkMemoryFdPropertiesKHX *pMemoryFdProperties)
3323 {
3324 /* The valid usage section for this function says:
3325 *
3326 * "handleType must not be one of the handle types defined as opaque."
3327 *
3328 * Since we only handle opaque handles for now, there are no FD properties.
3329 */
3330 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
3331 }