vulkan/wsi: Do image creation in common code
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static const char *
79 get_chip_name(enum radeon_family family)
80 {
81 switch (family) {
82 case CHIP_TAHITI: return "AMD RADV TAHITI";
83 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
84 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
85 case CHIP_OLAND: return "AMD RADV OLAND";
86 case CHIP_HAINAN: return "AMD RADV HAINAN";
87 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
88 case CHIP_KAVERI: return "AMD RADV KAVERI";
89 case CHIP_KABINI: return "AMD RADV KABINI";
90 case CHIP_HAWAII: return "AMD RADV HAWAII";
91 case CHIP_MULLINS: return "AMD RADV MULLINS";
92 case CHIP_TONGA: return "AMD RADV TONGA";
93 case CHIP_ICELAND: return "AMD RADV ICELAND";
94 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
95 case CHIP_FIJI: return "AMD RADV FIJI";
96 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
97 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
98 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
99 case CHIP_STONEY: return "AMD RADV STONEY";
100 case CHIP_VEGA10: return "AMD RADV VEGA";
101 case CHIP_RAVEN: return "AMD RADV RAVEN";
102 default: return "AMD RADV unknown";
103 }
104 }
105
106 static void
107 radv_physical_device_init_mem_types(struct radv_physical_device *device)
108 {
109 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
110 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
111 device->rad_info.vram_vis_size);
112
113 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
114 device->memory_properties.memoryHeapCount = 0;
115 if (device->rad_info.vram_size - visible_vram_size > 0) {
116 vram_index = device->memory_properties.memoryHeapCount++;
117 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
118 .size = device->rad_info.vram_size - visible_vram_size,
119 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
120 };
121 }
122 if (visible_vram_size) {
123 visible_vram_index = device->memory_properties.memoryHeapCount++;
124 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
125 .size = visible_vram_size,
126 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
127 };
128 }
129 if (device->rad_info.gart_size > 0) {
130 gart_index = device->memory_properties.memoryHeapCount++;
131 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
132 .size = device->rad_info.gart_size,
133 .flags = 0,
134 };
135 }
136
137 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
138 unsigned type_count = 0;
139 if (vram_index >= 0) {
140 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
141 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
142 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
143 .heapIndex = vram_index,
144 };
145 }
146 if (gart_index >= 0) {
147 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
148 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
149 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
150 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
151 .heapIndex = gart_index,
152 };
153 }
154 if (visible_vram_index >= 0) {
155 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
156 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
157 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
158 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
159 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
160 .heapIndex = visible_vram_index,
161 };
162 }
163 if (gart_index >= 0) {
164 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
165 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
166 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
167 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
168 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
169 .heapIndex = gart_index,
170 };
171 }
172 device->memory_properties.memoryTypeCount = type_count;
173 }
174
175 static VkResult
176 radv_physical_device_init(struct radv_physical_device *device,
177 struct radv_instance *instance,
178 drmDevicePtr drm_device)
179 {
180 const char *path = drm_device->nodes[DRM_NODE_RENDER];
181 VkResult result;
182 drmVersionPtr version;
183 int fd;
184
185 fd = open(path, O_RDWR | O_CLOEXEC);
186 if (fd < 0)
187 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
188
189 version = drmGetVersion(fd);
190 if (!version) {
191 close(fd);
192 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
193 "failed to get version %s: %m", path);
194 }
195
196 if (strcmp(version->name, "amdgpu")) {
197 drmFreeVersion(version);
198 close(fd);
199 return VK_ERROR_INCOMPATIBLE_DRIVER;
200 }
201 drmFreeVersion(version);
202
203 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
204 device->instance = instance;
205 assert(strlen(path) < ARRAY_SIZE(device->path));
206 strncpy(device->path, path, ARRAY_SIZE(device->path));
207
208 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
209 instance->perftest_flags);
210 if (!device->ws) {
211 result = VK_ERROR_INCOMPATIBLE_DRIVER;
212 goto fail;
213 }
214
215 device->local_fd = fd;
216 device->ws->query_info(device->ws, &device->rad_info);
217
218 device->name = get_chip_name(device->rad_info.family);
219
220 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
221 device->ws->destroy(device->ws);
222 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
223 "cannot generate UUID");
224 goto fail;
225 }
226
227 /* These flags affect shader compilation. */
228 uint64_t shader_env_flags =
229 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
230 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
231
232 /* The gpu id is already embeded in the uuid so we just pass "radv"
233 * when creating the cache.
234 */
235 char buf[VK_UUID_SIZE * 2 + 1];
236 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
237 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240
241 radv_get_driver_uuid(&device->device_uuid);
242 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
243
244 if (device->rad_info.family == CHIP_STONEY ||
245 device->rad_info.chip_class >= GFX9) {
246 device->has_rbplus = true;
247 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
248 }
249
250 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
251 * on SI.
252 */
253 device->has_clear_state = device->rad_info.chip_class >= CIK;
254
255 radv_physical_device_init_mem_types(device);
256
257 result = radv_init_wsi(device);
258 if (result != VK_SUCCESS) {
259 device->ws->destroy(device->ws);
260 goto fail;
261 }
262
263 return VK_SUCCESS;
264
265 fail:
266 close(fd);
267 return result;
268 }
269
270 static void
271 radv_physical_device_finish(struct radv_physical_device *device)
272 {
273 radv_finish_wsi(device);
274 device->ws->destroy(device->ws);
275 disk_cache_destroy(device->disk_cache);
276 close(device->local_fd);
277 }
278
279 static void *
280 default_alloc_func(void *pUserData, size_t size, size_t align,
281 VkSystemAllocationScope allocationScope)
282 {
283 return malloc(size);
284 }
285
286 static void *
287 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
288 size_t align, VkSystemAllocationScope allocationScope)
289 {
290 return realloc(pOriginal, size);
291 }
292
293 static void
294 default_free_func(void *pUserData, void *pMemory)
295 {
296 free(pMemory);
297 }
298
299 static const VkAllocationCallbacks default_alloc = {
300 .pUserData = NULL,
301 .pfnAllocation = default_alloc_func,
302 .pfnReallocation = default_realloc_func,
303 .pfnFree = default_free_func,
304 };
305
306 static const struct debug_control radv_debug_options[] = {
307 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
308 {"nodcc", RADV_DEBUG_NO_DCC},
309 {"shaders", RADV_DEBUG_DUMP_SHADERS},
310 {"nocache", RADV_DEBUG_NO_CACHE},
311 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
312 {"nohiz", RADV_DEBUG_NO_HIZ},
313 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
314 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
315 {"allbos", RADV_DEBUG_ALL_BOS},
316 {"noibs", RADV_DEBUG_NO_IBS},
317 {"spirv", RADV_DEBUG_DUMP_SPIRV},
318 {"vmfaults", RADV_DEBUG_VM_FAULTS},
319 {"zerovram", RADV_DEBUG_ZERO_VRAM},
320 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
321 {"nosisched", RADV_DEBUG_NO_SISCHED},
322 {NULL, 0}
323 };
324
325 const char *
326 radv_get_debug_option_name(int id)
327 {
328 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
329 return radv_debug_options[id].string;
330 }
331
332 static const struct debug_control radv_perftest_options[] = {
333 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
334 {"sisched", RADV_PERFTEST_SISCHED},
335 {NULL, 0}
336 };
337
338 const char *
339 radv_get_perftest_option_name(int id)
340 {
341 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
342 return radv_perftest_options[id].string;
343 }
344
345 static void
346 radv_handle_per_app_options(struct radv_instance *instance,
347 const VkApplicationInfo *info)
348 {
349 const char *name = info ? info->pApplicationName : NULL;
350
351 if (!name)
352 return;
353
354 if (!strcmp(name, "Talos - Linux - 32bit") ||
355 !strcmp(name, "Talos - Linux - 64bit")) {
356 /* Force enable LLVM sisched for Talos because it looks safe
357 * and it gives few more FPS.
358 */
359 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
360 }
361 }
362
363 VkResult radv_CreateInstance(
364 const VkInstanceCreateInfo* pCreateInfo,
365 const VkAllocationCallbacks* pAllocator,
366 VkInstance* pInstance)
367 {
368 struct radv_instance *instance;
369
370 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
371
372 uint32_t client_version;
373 if (pCreateInfo->pApplicationInfo &&
374 pCreateInfo->pApplicationInfo->apiVersion != 0) {
375 client_version = pCreateInfo->pApplicationInfo->apiVersion;
376 } else {
377 client_version = VK_MAKE_VERSION(1, 0, 0);
378 }
379
380 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
381 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
382 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
383 "Client requested version %d.%d.%d",
384 VK_VERSION_MAJOR(client_version),
385 VK_VERSION_MINOR(client_version),
386 VK_VERSION_PATCH(client_version));
387 }
388
389 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
390 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
391 if (!radv_instance_extension_supported(ext_name))
392 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
393 }
394
395 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
396 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
397 if (!instance)
398 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
399
400 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
401
402 if (pAllocator)
403 instance->alloc = *pAllocator;
404 else
405 instance->alloc = default_alloc;
406
407 instance->apiVersion = client_version;
408 instance->physicalDeviceCount = -1;
409
410 _mesa_locale_init();
411
412 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
413
414 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
415 radv_debug_options);
416
417 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
418 radv_perftest_options);
419
420 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
421
422 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
423 /* Disable sisched when the user requests it, this is mostly
424 * useful when the driver force-enable sisched for the given
425 * application.
426 */
427 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
428 }
429
430 *pInstance = radv_instance_to_handle(instance);
431
432 return VK_SUCCESS;
433 }
434
435 void radv_DestroyInstance(
436 VkInstance _instance,
437 const VkAllocationCallbacks* pAllocator)
438 {
439 RADV_FROM_HANDLE(radv_instance, instance, _instance);
440
441 if (!instance)
442 return;
443
444 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
445 radv_physical_device_finish(instance->physicalDevices + i);
446 }
447
448 VG(VALGRIND_DESTROY_MEMPOOL(instance));
449
450 _mesa_locale_fini();
451
452 vk_free(&instance->alloc, instance);
453 }
454
455 static VkResult
456 radv_enumerate_devices(struct radv_instance *instance)
457 {
458 /* TODO: Check for more devices ? */
459 drmDevicePtr devices[8];
460 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
461 int max_devices;
462
463 instance->physicalDeviceCount = 0;
464
465 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
466 if (max_devices < 1)
467 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
468
469 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
470 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
471 devices[i]->bustype == DRM_BUS_PCI &&
472 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
473
474 result = radv_physical_device_init(instance->physicalDevices +
475 instance->physicalDeviceCount,
476 instance,
477 devices[i]);
478 if (result == VK_SUCCESS)
479 ++instance->physicalDeviceCount;
480 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
481 break;
482 }
483 }
484 drmFreeDevices(devices, max_devices);
485
486 return result;
487 }
488
489 VkResult radv_EnumeratePhysicalDevices(
490 VkInstance _instance,
491 uint32_t* pPhysicalDeviceCount,
492 VkPhysicalDevice* pPhysicalDevices)
493 {
494 RADV_FROM_HANDLE(radv_instance, instance, _instance);
495 VkResult result;
496
497 if (instance->physicalDeviceCount < 0) {
498 result = radv_enumerate_devices(instance);
499 if (result != VK_SUCCESS &&
500 result != VK_ERROR_INCOMPATIBLE_DRIVER)
501 return result;
502 }
503
504 if (!pPhysicalDevices) {
505 *pPhysicalDeviceCount = instance->physicalDeviceCount;
506 } else {
507 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
508 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
509 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
510 }
511
512 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
513 : VK_SUCCESS;
514 }
515
516 void radv_GetPhysicalDeviceFeatures(
517 VkPhysicalDevice physicalDevice,
518 VkPhysicalDeviceFeatures* pFeatures)
519 {
520 memset(pFeatures, 0, sizeof(*pFeatures));
521
522 *pFeatures = (VkPhysicalDeviceFeatures) {
523 .robustBufferAccess = true,
524 .fullDrawIndexUint32 = true,
525 .imageCubeArray = true,
526 .independentBlend = true,
527 .geometryShader = true,
528 .tessellationShader = true,
529 .sampleRateShading = true,
530 .dualSrcBlend = true,
531 .logicOp = true,
532 .multiDrawIndirect = true,
533 .drawIndirectFirstInstance = true,
534 .depthClamp = true,
535 .depthBiasClamp = true,
536 .fillModeNonSolid = true,
537 .depthBounds = true,
538 .wideLines = true,
539 .largePoints = true,
540 .alphaToOne = true,
541 .multiViewport = true,
542 .samplerAnisotropy = true,
543 .textureCompressionETC2 = false,
544 .textureCompressionASTC_LDR = false,
545 .textureCompressionBC = true,
546 .occlusionQueryPrecise = true,
547 .pipelineStatisticsQuery = true,
548 .vertexPipelineStoresAndAtomics = true,
549 .fragmentStoresAndAtomics = true,
550 .shaderTessellationAndGeometryPointSize = true,
551 .shaderImageGatherExtended = true,
552 .shaderStorageImageExtendedFormats = true,
553 .shaderStorageImageMultisample = false,
554 .shaderUniformBufferArrayDynamicIndexing = true,
555 .shaderSampledImageArrayDynamicIndexing = true,
556 .shaderStorageBufferArrayDynamicIndexing = true,
557 .shaderStorageImageArrayDynamicIndexing = true,
558 .shaderStorageImageReadWithoutFormat = true,
559 .shaderStorageImageWriteWithoutFormat = true,
560 .shaderClipDistance = true,
561 .shaderCullDistance = true,
562 .shaderFloat64 = true,
563 .shaderInt64 = true,
564 .shaderInt16 = false,
565 .sparseBinding = true,
566 .variableMultisampleRate = true,
567 .inheritedQueries = true,
568 };
569 }
570
571 void radv_GetPhysicalDeviceFeatures2KHR(
572 VkPhysicalDevice physicalDevice,
573 VkPhysicalDeviceFeatures2KHR *pFeatures)
574 {
575 vk_foreach_struct(ext, pFeatures->pNext) {
576 switch (ext->sType) {
577 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
578 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
579 features->variablePointersStorageBuffer = true;
580 features->variablePointers = false;
581 break;
582 }
583 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
584 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
585 features->multiview = true;
586 features->multiviewGeometryShader = true;
587 features->multiviewTessellationShader = true;
588 break;
589 }
590 default:
591 break;
592 }
593 }
594 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
595 }
596
597 void radv_GetPhysicalDeviceProperties(
598 VkPhysicalDevice physicalDevice,
599 VkPhysicalDeviceProperties* pProperties)
600 {
601 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
602 VkSampleCountFlags sample_counts = 0xf;
603
604 /* make sure that the entire descriptor set is addressable with a signed
605 * 32-bit int. So the sum of all limits scaled by descriptor size has to
606 * be at most 2 GiB. the combined image & samples object count as one of
607 * both. This limit is for the pipeline layout, not for the set layout, but
608 * there is no set limit, so we just set a pipeline limit. I don't think
609 * any app is going to hit this soon. */
610 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
611 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
612 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
613 32 /* sampler, largest when combined with image */ +
614 64 /* sampled image */ +
615 64 /* storage image */);
616
617 VkPhysicalDeviceLimits limits = {
618 .maxImageDimension1D = (1 << 14),
619 .maxImageDimension2D = (1 << 14),
620 .maxImageDimension3D = (1 << 11),
621 .maxImageDimensionCube = (1 << 14),
622 .maxImageArrayLayers = (1 << 11),
623 .maxTexelBufferElements = 128 * 1024 * 1024,
624 .maxUniformBufferRange = UINT32_MAX,
625 .maxStorageBufferRange = UINT32_MAX,
626 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
627 .maxMemoryAllocationCount = UINT32_MAX,
628 .maxSamplerAllocationCount = 64 * 1024,
629 .bufferImageGranularity = 64, /* A cache line */
630 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
631 .maxBoundDescriptorSets = MAX_SETS,
632 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
633 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
634 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
635 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
636 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
637 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
638 .maxPerStageResources = max_descriptor_set_size,
639 .maxDescriptorSetSamplers = max_descriptor_set_size,
640 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
641 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
642 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
643 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
644 .maxDescriptorSetSampledImages = max_descriptor_set_size,
645 .maxDescriptorSetStorageImages = max_descriptor_set_size,
646 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
647 .maxVertexInputAttributes = 32,
648 .maxVertexInputBindings = 32,
649 .maxVertexInputAttributeOffset = 2047,
650 .maxVertexInputBindingStride = 2048,
651 .maxVertexOutputComponents = 128,
652 .maxTessellationGenerationLevel = 64,
653 .maxTessellationPatchSize = 32,
654 .maxTessellationControlPerVertexInputComponents = 128,
655 .maxTessellationControlPerVertexOutputComponents = 128,
656 .maxTessellationControlPerPatchOutputComponents = 120,
657 .maxTessellationControlTotalOutputComponents = 4096,
658 .maxTessellationEvaluationInputComponents = 128,
659 .maxTessellationEvaluationOutputComponents = 128,
660 .maxGeometryShaderInvocations = 127,
661 .maxGeometryInputComponents = 64,
662 .maxGeometryOutputComponents = 128,
663 .maxGeometryOutputVertices = 256,
664 .maxGeometryTotalOutputComponents = 1024,
665 .maxFragmentInputComponents = 128,
666 .maxFragmentOutputAttachments = 8,
667 .maxFragmentDualSrcAttachments = 1,
668 .maxFragmentCombinedOutputResources = 8,
669 .maxComputeSharedMemorySize = 32768,
670 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
671 .maxComputeWorkGroupInvocations = 2048,
672 .maxComputeWorkGroupSize = {
673 2048,
674 2048,
675 2048
676 },
677 .subPixelPrecisionBits = 4 /* FIXME */,
678 .subTexelPrecisionBits = 4 /* FIXME */,
679 .mipmapPrecisionBits = 4 /* FIXME */,
680 .maxDrawIndexedIndexValue = UINT32_MAX,
681 .maxDrawIndirectCount = UINT32_MAX,
682 .maxSamplerLodBias = 16,
683 .maxSamplerAnisotropy = 16,
684 .maxViewports = MAX_VIEWPORTS,
685 .maxViewportDimensions = { (1 << 14), (1 << 14) },
686 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
687 .viewportSubPixelBits = 13, /* We take a float? */
688 .minMemoryMapAlignment = 4096, /* A page */
689 .minTexelBufferOffsetAlignment = 1,
690 .minUniformBufferOffsetAlignment = 4,
691 .minStorageBufferOffsetAlignment = 4,
692 .minTexelOffset = -32,
693 .maxTexelOffset = 31,
694 .minTexelGatherOffset = -32,
695 .maxTexelGatherOffset = 31,
696 .minInterpolationOffset = -2,
697 .maxInterpolationOffset = 2,
698 .subPixelInterpolationOffsetBits = 8,
699 .maxFramebufferWidth = (1 << 14),
700 .maxFramebufferHeight = (1 << 14),
701 .maxFramebufferLayers = (1 << 10),
702 .framebufferColorSampleCounts = sample_counts,
703 .framebufferDepthSampleCounts = sample_counts,
704 .framebufferStencilSampleCounts = sample_counts,
705 .framebufferNoAttachmentsSampleCounts = sample_counts,
706 .maxColorAttachments = MAX_RTS,
707 .sampledImageColorSampleCounts = sample_counts,
708 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
709 .sampledImageDepthSampleCounts = sample_counts,
710 .sampledImageStencilSampleCounts = sample_counts,
711 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
712 .maxSampleMaskWords = 1,
713 .timestampComputeAndGraphics = true,
714 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
715 .maxClipDistances = 8,
716 .maxCullDistances = 8,
717 .maxCombinedClipAndCullDistances = 8,
718 .discreteQueuePriorities = 1,
719 .pointSizeRange = { 0.125, 255.875 },
720 .lineWidthRange = { 0.0, 7.9921875 },
721 .pointSizeGranularity = (1.0 / 8.0),
722 .lineWidthGranularity = (1.0 / 128.0),
723 .strictLines = false, /* FINISHME */
724 .standardSampleLocations = true,
725 .optimalBufferCopyOffsetAlignment = 128,
726 .optimalBufferCopyRowPitchAlignment = 128,
727 .nonCoherentAtomSize = 64,
728 };
729
730 *pProperties = (VkPhysicalDeviceProperties) {
731 .apiVersion = radv_physical_device_api_version(pdevice),
732 .driverVersion = vk_get_driver_version(),
733 .vendorID = ATI_VENDOR_ID,
734 .deviceID = pdevice->rad_info.pci_id,
735 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
736 .limits = limits,
737 .sparseProperties = {0},
738 };
739
740 strcpy(pProperties->deviceName, pdevice->name);
741 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
742 }
743
744 void radv_GetPhysicalDeviceProperties2KHR(
745 VkPhysicalDevice physicalDevice,
746 VkPhysicalDeviceProperties2KHR *pProperties)
747 {
748 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
749 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
750
751 vk_foreach_struct(ext, pProperties->pNext) {
752 switch (ext->sType) {
753 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
754 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
755 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
756 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
757 break;
758 }
759 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
760 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
761 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
762 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
763 properties->deviceLUIDValid = false;
764 break;
765 }
766 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
767 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
768 properties->maxMultiviewViewCount = MAX_VIEWS;
769 properties->maxMultiviewInstanceIndex = INT_MAX;
770 break;
771 }
772 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
773 VkPhysicalDevicePointClippingPropertiesKHR *properties =
774 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
775 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
776 break;
777 }
778 default:
779 break;
780 }
781 }
782 }
783
784 static void radv_get_physical_device_queue_family_properties(
785 struct radv_physical_device* pdevice,
786 uint32_t* pCount,
787 VkQueueFamilyProperties** pQueueFamilyProperties)
788 {
789 int num_queue_families = 1;
790 int idx;
791 if (pdevice->rad_info.num_compute_rings > 0 &&
792 pdevice->rad_info.chip_class >= CIK &&
793 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
794 num_queue_families++;
795
796 if (pQueueFamilyProperties == NULL) {
797 *pCount = num_queue_families;
798 return;
799 }
800
801 if (!*pCount)
802 return;
803
804 idx = 0;
805 if (*pCount >= 1) {
806 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
807 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
808 VK_QUEUE_COMPUTE_BIT |
809 VK_QUEUE_TRANSFER_BIT |
810 VK_QUEUE_SPARSE_BINDING_BIT,
811 .queueCount = 1,
812 .timestampValidBits = 64,
813 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
814 };
815 idx++;
816 }
817
818 if (pdevice->rad_info.num_compute_rings > 0 &&
819 pdevice->rad_info.chip_class >= CIK &&
820 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
821 if (*pCount > idx) {
822 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
823 .queueFlags = VK_QUEUE_COMPUTE_BIT |
824 VK_QUEUE_TRANSFER_BIT |
825 VK_QUEUE_SPARSE_BINDING_BIT,
826 .queueCount = pdevice->rad_info.num_compute_rings,
827 .timestampValidBits = 64,
828 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
829 };
830 idx++;
831 }
832 }
833 *pCount = idx;
834 }
835
836 void radv_GetPhysicalDeviceQueueFamilyProperties(
837 VkPhysicalDevice physicalDevice,
838 uint32_t* pCount,
839 VkQueueFamilyProperties* pQueueFamilyProperties)
840 {
841 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
842 if (!pQueueFamilyProperties) {
843 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
844 return;
845 }
846 VkQueueFamilyProperties *properties[] = {
847 pQueueFamilyProperties + 0,
848 pQueueFamilyProperties + 1,
849 pQueueFamilyProperties + 2,
850 };
851 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
852 assert(*pCount <= 3);
853 }
854
855 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
856 VkPhysicalDevice physicalDevice,
857 uint32_t* pCount,
858 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
859 {
860 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
861 if (!pQueueFamilyProperties) {
862 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
863 return;
864 }
865 VkQueueFamilyProperties *properties[] = {
866 &pQueueFamilyProperties[0].queueFamilyProperties,
867 &pQueueFamilyProperties[1].queueFamilyProperties,
868 &pQueueFamilyProperties[2].queueFamilyProperties,
869 };
870 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
871 assert(*pCount <= 3);
872 }
873
874 void radv_GetPhysicalDeviceMemoryProperties(
875 VkPhysicalDevice physicalDevice,
876 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
877 {
878 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
879
880 *pMemoryProperties = physical_device->memory_properties;
881 }
882
883 void radv_GetPhysicalDeviceMemoryProperties2KHR(
884 VkPhysicalDevice physicalDevice,
885 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
886 {
887 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
888 &pMemoryProperties->memoryProperties);
889 }
890
891 static enum radeon_ctx_priority
892 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
893 {
894 /* Default to MEDIUM when a specific global priority isn't requested */
895 if (!pObj)
896 return RADEON_CTX_PRIORITY_MEDIUM;
897
898 switch(pObj->globalPriority) {
899 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
900 return RADEON_CTX_PRIORITY_REALTIME;
901 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
902 return RADEON_CTX_PRIORITY_HIGH;
903 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
904 return RADEON_CTX_PRIORITY_MEDIUM;
905 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
906 return RADEON_CTX_PRIORITY_LOW;
907 default:
908 unreachable("Illegal global priority value");
909 return RADEON_CTX_PRIORITY_INVALID;
910 }
911 }
912
913 static int
914 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
915 uint32_t queue_family_index, int idx,
916 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
917 {
918 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
919 queue->device = device;
920 queue->queue_family_index = queue_family_index;
921 queue->queue_idx = idx;
922 queue->priority = radv_get_queue_global_priority(global_priority);
923
924 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
925 if (!queue->hw_ctx)
926 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
927
928 return VK_SUCCESS;
929 }
930
931 static void
932 radv_queue_finish(struct radv_queue *queue)
933 {
934 if (queue->hw_ctx)
935 queue->device->ws->ctx_destroy(queue->hw_ctx);
936
937 if (queue->initial_full_flush_preamble_cs)
938 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
939 if (queue->initial_preamble_cs)
940 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
941 if (queue->continue_preamble_cs)
942 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
943 if (queue->descriptor_bo)
944 queue->device->ws->buffer_destroy(queue->descriptor_bo);
945 if (queue->scratch_bo)
946 queue->device->ws->buffer_destroy(queue->scratch_bo);
947 if (queue->esgs_ring_bo)
948 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
949 if (queue->gsvs_ring_bo)
950 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
951 if (queue->tess_factor_ring_bo)
952 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
953 if (queue->tess_offchip_ring_bo)
954 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
955 if (queue->compute_scratch_bo)
956 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
957 }
958
959 static void
960 radv_device_init_gs_info(struct radv_device *device)
961 {
962 switch (device->physical_device->rad_info.family) {
963 case CHIP_OLAND:
964 case CHIP_HAINAN:
965 case CHIP_KAVERI:
966 case CHIP_KABINI:
967 case CHIP_MULLINS:
968 case CHIP_ICELAND:
969 case CHIP_CARRIZO:
970 case CHIP_STONEY:
971 device->gs_table_depth = 16;
972 return;
973 case CHIP_TAHITI:
974 case CHIP_PITCAIRN:
975 case CHIP_VERDE:
976 case CHIP_BONAIRE:
977 case CHIP_HAWAII:
978 case CHIP_TONGA:
979 case CHIP_FIJI:
980 case CHIP_POLARIS10:
981 case CHIP_POLARIS11:
982 case CHIP_POLARIS12:
983 case CHIP_VEGA10:
984 case CHIP_RAVEN:
985 device->gs_table_depth = 32;
986 return;
987 default:
988 unreachable("unknown GPU");
989 }
990 }
991
992 VkResult radv_CreateDevice(
993 VkPhysicalDevice physicalDevice,
994 const VkDeviceCreateInfo* pCreateInfo,
995 const VkAllocationCallbacks* pAllocator,
996 VkDevice* pDevice)
997 {
998 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
999 VkResult result;
1000 struct radv_device *device;
1001
1002 bool keep_shader_info = false;
1003
1004 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1005 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1006 if (!radv_physical_device_extension_supported(physical_device, ext_name))
1007 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1008
1009 if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
1010 keep_shader_info = true;
1011 }
1012
1013 /* Check enabled features */
1014 if (pCreateInfo->pEnabledFeatures) {
1015 VkPhysicalDeviceFeatures supported_features;
1016 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1017 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1018 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1019 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1020 for (uint32_t i = 0; i < num_features; i++) {
1021 if (enabled_feature[i] && !supported_feature[i])
1022 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1023 }
1024 }
1025
1026 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1027 sizeof(*device), 8,
1028 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1029 if (!device)
1030 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1031
1032 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1033 device->instance = physical_device->instance;
1034 device->physical_device = physical_device;
1035
1036 device->ws = physical_device->ws;
1037 if (pAllocator)
1038 device->alloc = *pAllocator;
1039 else
1040 device->alloc = physical_device->instance->alloc;
1041
1042 mtx_init(&device->shader_slab_mutex, mtx_plain);
1043 list_inithead(&device->shader_slabs);
1044
1045 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1046 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1047 uint32_t qfi = queue_create->queueFamilyIndex;
1048 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1049 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1050
1051 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1052
1053 device->queues[qfi] = vk_alloc(&device->alloc,
1054 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1055 if (!device->queues[qfi]) {
1056 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1057 goto fail;
1058 }
1059
1060 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1061
1062 device->queue_count[qfi] = queue_create->queueCount;
1063
1064 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1065 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1066 if (result != VK_SUCCESS)
1067 goto fail;
1068 }
1069 }
1070
1071 #if HAVE_LLVM < 0x0400
1072 device->llvm_supports_spill = false;
1073 #else
1074 device->llvm_supports_spill = true;
1075 #endif
1076
1077 /* The maximum number of scratch waves. Scratch space isn't divided
1078 * evenly between CUs. The number is only a function of the number of CUs.
1079 * We can decrease the constant to decrease the scratch buffer size.
1080 *
1081 * sctx->scratch_waves must be >= the maximum posible size of
1082 * 1 threadgroup, so that the hw doesn't hang from being unable
1083 * to start any.
1084 *
1085 * The recommended value is 4 per CU at most. Higher numbers don't
1086 * bring much benefit, but they still occupy chip resources (think
1087 * async compute). I've seen ~2% performance difference between 4 and 32.
1088 */
1089 uint32_t max_threads_per_block = 2048;
1090 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1091 max_threads_per_block / 64);
1092
1093 radv_device_init_gs_info(device);
1094
1095 device->tess_offchip_block_dw_size =
1096 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1097 device->has_distributed_tess =
1098 device->physical_device->rad_info.chip_class >= VI &&
1099 device->physical_device->rad_info.max_se >= 2;
1100
1101 if (getenv("RADV_TRACE_FILE")) {
1102 keep_shader_info = true;
1103
1104 if (!radv_init_trace(device))
1105 goto fail;
1106 }
1107
1108 device->keep_shader_info = keep_shader_info;
1109
1110 result = radv_device_init_meta(device);
1111 if (result != VK_SUCCESS)
1112 goto fail;
1113
1114 radv_device_init_msaa(device);
1115
1116 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1117 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1118 switch (family) {
1119 case RADV_QUEUE_GENERAL:
1120 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1121 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1122 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1123 break;
1124 case RADV_QUEUE_COMPUTE:
1125 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1126 radeon_emit(device->empty_cs[family], 0);
1127 break;
1128 }
1129 device->ws->cs_finalize(device->empty_cs[family]);
1130 }
1131
1132 if (device->physical_device->rad_info.chip_class >= CIK)
1133 cik_create_gfx_config(device);
1134
1135 VkPipelineCacheCreateInfo ci;
1136 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1137 ci.pNext = NULL;
1138 ci.flags = 0;
1139 ci.pInitialData = NULL;
1140 ci.initialDataSize = 0;
1141 VkPipelineCache pc;
1142 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1143 &ci, NULL, &pc);
1144 if (result != VK_SUCCESS)
1145 goto fail;
1146
1147 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1148
1149 *pDevice = radv_device_to_handle(device);
1150 return VK_SUCCESS;
1151
1152 fail:
1153 if (device->trace_bo)
1154 device->ws->buffer_destroy(device->trace_bo);
1155
1156 if (device->gfx_init)
1157 device->ws->buffer_destroy(device->gfx_init);
1158
1159 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1160 for (unsigned q = 0; q < device->queue_count[i]; q++)
1161 radv_queue_finish(&device->queues[i][q]);
1162 if (device->queue_count[i])
1163 vk_free(&device->alloc, device->queues[i]);
1164 }
1165
1166 vk_free(&device->alloc, device);
1167 return result;
1168 }
1169
1170 void radv_DestroyDevice(
1171 VkDevice _device,
1172 const VkAllocationCallbacks* pAllocator)
1173 {
1174 RADV_FROM_HANDLE(radv_device, device, _device);
1175
1176 if (!device)
1177 return;
1178
1179 if (device->trace_bo)
1180 device->ws->buffer_destroy(device->trace_bo);
1181
1182 if (device->gfx_init)
1183 device->ws->buffer_destroy(device->gfx_init);
1184
1185 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1186 for (unsigned q = 0; q < device->queue_count[i]; q++)
1187 radv_queue_finish(&device->queues[i][q]);
1188 if (device->queue_count[i])
1189 vk_free(&device->alloc, device->queues[i]);
1190 if (device->empty_cs[i])
1191 device->ws->cs_destroy(device->empty_cs[i]);
1192 }
1193 radv_device_finish_meta(device);
1194
1195 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1196 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1197
1198 radv_destroy_shader_slabs(device);
1199
1200 vk_free(&device->alloc, device);
1201 }
1202
1203 VkResult radv_EnumerateInstanceLayerProperties(
1204 uint32_t* pPropertyCount,
1205 VkLayerProperties* pProperties)
1206 {
1207 if (pProperties == NULL) {
1208 *pPropertyCount = 0;
1209 return VK_SUCCESS;
1210 }
1211
1212 /* None supported at this time */
1213 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1214 }
1215
1216 VkResult radv_EnumerateDeviceLayerProperties(
1217 VkPhysicalDevice physicalDevice,
1218 uint32_t* pPropertyCount,
1219 VkLayerProperties* pProperties)
1220 {
1221 if (pProperties == NULL) {
1222 *pPropertyCount = 0;
1223 return VK_SUCCESS;
1224 }
1225
1226 /* None supported at this time */
1227 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1228 }
1229
1230 void radv_GetDeviceQueue(
1231 VkDevice _device,
1232 uint32_t queueFamilyIndex,
1233 uint32_t queueIndex,
1234 VkQueue* pQueue)
1235 {
1236 RADV_FROM_HANDLE(radv_device, device, _device);
1237
1238 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1239 }
1240
1241 static void
1242 fill_geom_tess_rings(struct radv_queue *queue,
1243 uint32_t *map,
1244 bool add_sample_positions,
1245 uint32_t esgs_ring_size,
1246 struct radeon_winsys_bo *esgs_ring_bo,
1247 uint32_t gsvs_ring_size,
1248 struct radeon_winsys_bo *gsvs_ring_bo,
1249 uint32_t tess_factor_ring_size,
1250 struct radeon_winsys_bo *tess_factor_ring_bo,
1251 uint32_t tess_offchip_ring_size,
1252 struct radeon_winsys_bo *tess_offchip_ring_bo)
1253 {
1254 uint64_t esgs_va = 0, gsvs_va = 0;
1255 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1256 uint32_t *desc = &map[4];
1257
1258 if (esgs_ring_bo)
1259 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1260 if (gsvs_ring_bo)
1261 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1262 if (tess_factor_ring_bo)
1263 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1264 if (tess_offchip_ring_bo)
1265 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1266
1267 /* stride 0, num records - size, add tid, swizzle, elsize4,
1268 index stride 64 */
1269 desc[0] = esgs_va;
1270 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1271 S_008F04_STRIDE(0) |
1272 S_008F04_SWIZZLE_ENABLE(true);
1273 desc[2] = esgs_ring_size;
1274 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1275 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1276 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1277 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1278 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1279 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1280 S_008F0C_ELEMENT_SIZE(1) |
1281 S_008F0C_INDEX_STRIDE(3) |
1282 S_008F0C_ADD_TID_ENABLE(true);
1283
1284 desc += 4;
1285 /* GS entry for ES->GS ring */
1286 /* stride 0, num records - size, elsize0,
1287 index stride 0 */
1288 desc[0] = esgs_va;
1289 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1290 S_008F04_STRIDE(0) |
1291 S_008F04_SWIZZLE_ENABLE(false);
1292 desc[2] = esgs_ring_size;
1293 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1294 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1295 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1296 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1297 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1298 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1299 S_008F0C_ELEMENT_SIZE(0) |
1300 S_008F0C_INDEX_STRIDE(0) |
1301 S_008F0C_ADD_TID_ENABLE(false);
1302
1303 desc += 4;
1304 /* VS entry for GS->VS ring */
1305 /* stride 0, num records - size, elsize0,
1306 index stride 0 */
1307 desc[0] = gsvs_va;
1308 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1309 S_008F04_STRIDE(0) |
1310 S_008F04_SWIZZLE_ENABLE(false);
1311 desc[2] = gsvs_ring_size;
1312 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1313 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1314 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1315 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1316 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1317 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1318 S_008F0C_ELEMENT_SIZE(0) |
1319 S_008F0C_INDEX_STRIDE(0) |
1320 S_008F0C_ADD_TID_ENABLE(false);
1321 desc += 4;
1322
1323 /* stride gsvs_itemsize, num records 64
1324 elsize 4, index stride 16 */
1325 /* shader will patch stride and desc[2] */
1326 desc[0] = gsvs_va;
1327 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1328 S_008F04_STRIDE(0) |
1329 S_008F04_SWIZZLE_ENABLE(true);
1330 desc[2] = 0;
1331 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1332 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1333 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1334 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1335 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1336 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1337 S_008F0C_ELEMENT_SIZE(1) |
1338 S_008F0C_INDEX_STRIDE(1) |
1339 S_008F0C_ADD_TID_ENABLE(true);
1340 desc += 4;
1341
1342 desc[0] = tess_factor_va;
1343 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1344 S_008F04_STRIDE(0) |
1345 S_008F04_SWIZZLE_ENABLE(false);
1346 desc[2] = tess_factor_ring_size;
1347 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1348 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1349 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1350 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1351 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1352 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1353 S_008F0C_ELEMENT_SIZE(0) |
1354 S_008F0C_INDEX_STRIDE(0) |
1355 S_008F0C_ADD_TID_ENABLE(false);
1356 desc += 4;
1357
1358 desc[0] = tess_offchip_va;
1359 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1360 S_008F04_STRIDE(0) |
1361 S_008F04_SWIZZLE_ENABLE(false);
1362 desc[2] = tess_offchip_ring_size;
1363 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1364 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1365 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1366 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1367 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1368 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1369 S_008F0C_ELEMENT_SIZE(0) |
1370 S_008F0C_INDEX_STRIDE(0) |
1371 S_008F0C_ADD_TID_ENABLE(false);
1372 desc += 4;
1373
1374 /* add sample positions after all rings */
1375 memcpy(desc, queue->device->sample_locations_1x, 8);
1376 desc += 2;
1377 memcpy(desc, queue->device->sample_locations_2x, 16);
1378 desc += 4;
1379 memcpy(desc, queue->device->sample_locations_4x, 32);
1380 desc += 8;
1381 memcpy(desc, queue->device->sample_locations_8x, 64);
1382 desc += 16;
1383 memcpy(desc, queue->device->sample_locations_16x, 128);
1384 }
1385
1386 static unsigned
1387 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1388 {
1389 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1390 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1391 device->physical_device->rad_info.family != CHIP_STONEY;
1392 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1393 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1394 device->physical_device->rad_info.max_se;
1395 unsigned offchip_granularity;
1396 unsigned hs_offchip_param;
1397 switch (device->tess_offchip_block_dw_size) {
1398 default:
1399 assert(0);
1400 /* fall through */
1401 case 8192:
1402 offchip_granularity = V_03093C_X_8K_DWORDS;
1403 break;
1404 case 4096:
1405 offchip_granularity = V_03093C_X_4K_DWORDS;
1406 break;
1407 }
1408
1409 switch (device->physical_device->rad_info.chip_class) {
1410 case SI:
1411 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1412 break;
1413 case CIK:
1414 case VI:
1415 case GFX9:
1416 default:
1417 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1418 break;
1419 }
1420
1421 *max_offchip_buffers_p = max_offchip_buffers;
1422 if (device->physical_device->rad_info.chip_class >= CIK) {
1423 if (device->physical_device->rad_info.chip_class >= VI)
1424 --max_offchip_buffers;
1425 hs_offchip_param =
1426 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1427 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1428 } else {
1429 hs_offchip_param =
1430 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1431 }
1432 return hs_offchip_param;
1433 }
1434
1435 static VkResult
1436 radv_get_preamble_cs(struct radv_queue *queue,
1437 uint32_t scratch_size,
1438 uint32_t compute_scratch_size,
1439 uint32_t esgs_ring_size,
1440 uint32_t gsvs_ring_size,
1441 bool needs_tess_rings,
1442 bool needs_sample_positions,
1443 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1444 struct radeon_winsys_cs **initial_preamble_cs,
1445 struct radeon_winsys_cs **continue_preamble_cs)
1446 {
1447 struct radeon_winsys_bo *scratch_bo = NULL;
1448 struct radeon_winsys_bo *descriptor_bo = NULL;
1449 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1450 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1451 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1452 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1453 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1454 struct radeon_winsys_cs *dest_cs[3] = {0};
1455 bool add_tess_rings = false, add_sample_positions = false;
1456 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1457 unsigned max_offchip_buffers;
1458 unsigned hs_offchip_param = 0;
1459 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1460 if (!queue->has_tess_rings) {
1461 if (needs_tess_rings)
1462 add_tess_rings = true;
1463 }
1464 if (!queue->has_sample_positions) {
1465 if (needs_sample_positions)
1466 add_sample_positions = true;
1467 }
1468 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1469 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1470 &max_offchip_buffers);
1471 tess_offchip_ring_size = max_offchip_buffers *
1472 queue->device->tess_offchip_block_dw_size * 4;
1473
1474 if (scratch_size <= queue->scratch_size &&
1475 compute_scratch_size <= queue->compute_scratch_size &&
1476 esgs_ring_size <= queue->esgs_ring_size &&
1477 gsvs_ring_size <= queue->gsvs_ring_size &&
1478 !add_tess_rings && !add_sample_positions &&
1479 queue->initial_preamble_cs) {
1480 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1481 *initial_preamble_cs = queue->initial_preamble_cs;
1482 *continue_preamble_cs = queue->continue_preamble_cs;
1483 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1484 *continue_preamble_cs = NULL;
1485 return VK_SUCCESS;
1486 }
1487
1488 if (scratch_size > queue->scratch_size) {
1489 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1490 scratch_size,
1491 4096,
1492 RADEON_DOMAIN_VRAM,
1493 ring_bo_flags);
1494 if (!scratch_bo)
1495 goto fail;
1496 } else
1497 scratch_bo = queue->scratch_bo;
1498
1499 if (compute_scratch_size > queue->compute_scratch_size) {
1500 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1501 compute_scratch_size,
1502 4096,
1503 RADEON_DOMAIN_VRAM,
1504 ring_bo_flags);
1505 if (!compute_scratch_bo)
1506 goto fail;
1507
1508 } else
1509 compute_scratch_bo = queue->compute_scratch_bo;
1510
1511 if (esgs_ring_size > queue->esgs_ring_size) {
1512 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1513 esgs_ring_size,
1514 4096,
1515 RADEON_DOMAIN_VRAM,
1516 ring_bo_flags);
1517 if (!esgs_ring_bo)
1518 goto fail;
1519 } else {
1520 esgs_ring_bo = queue->esgs_ring_bo;
1521 esgs_ring_size = queue->esgs_ring_size;
1522 }
1523
1524 if (gsvs_ring_size > queue->gsvs_ring_size) {
1525 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1526 gsvs_ring_size,
1527 4096,
1528 RADEON_DOMAIN_VRAM,
1529 ring_bo_flags);
1530 if (!gsvs_ring_bo)
1531 goto fail;
1532 } else {
1533 gsvs_ring_bo = queue->gsvs_ring_bo;
1534 gsvs_ring_size = queue->gsvs_ring_size;
1535 }
1536
1537 if (add_tess_rings) {
1538 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1539 tess_factor_ring_size,
1540 256,
1541 RADEON_DOMAIN_VRAM,
1542 ring_bo_flags);
1543 if (!tess_factor_ring_bo)
1544 goto fail;
1545 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1546 tess_offchip_ring_size,
1547 256,
1548 RADEON_DOMAIN_VRAM,
1549 ring_bo_flags);
1550 if (!tess_offchip_ring_bo)
1551 goto fail;
1552 } else {
1553 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1554 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1555 }
1556
1557 if (scratch_bo != queue->scratch_bo ||
1558 esgs_ring_bo != queue->esgs_ring_bo ||
1559 gsvs_ring_bo != queue->gsvs_ring_bo ||
1560 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1561 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1562 uint32_t size = 0;
1563 if (gsvs_ring_bo || esgs_ring_bo ||
1564 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1565 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1566 if (add_sample_positions)
1567 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1568 }
1569 else if (scratch_bo)
1570 size = 8; /* 2 dword */
1571
1572 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1573 size,
1574 4096,
1575 RADEON_DOMAIN_VRAM,
1576 RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
1577 if (!descriptor_bo)
1578 goto fail;
1579 } else
1580 descriptor_bo = queue->descriptor_bo;
1581
1582 for(int i = 0; i < 3; ++i) {
1583 struct radeon_winsys_cs *cs = NULL;
1584 cs = queue->device->ws->cs_create(queue->device->ws,
1585 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1586 if (!cs)
1587 goto fail;
1588
1589 dest_cs[i] = cs;
1590
1591 if (scratch_bo)
1592 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1593
1594 if (esgs_ring_bo)
1595 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1596
1597 if (gsvs_ring_bo)
1598 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1599
1600 if (tess_factor_ring_bo)
1601 radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1602
1603 if (tess_offchip_ring_bo)
1604 radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1605
1606 if (descriptor_bo)
1607 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1608
1609 if (descriptor_bo != queue->descriptor_bo) {
1610 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1611
1612 if (scratch_bo) {
1613 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1614 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1615 S_008F04_SWIZZLE_ENABLE(1);
1616 map[0] = scratch_va;
1617 map[1] = rsrc1;
1618 }
1619
1620 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1621 add_sample_positions)
1622 fill_geom_tess_rings(queue, map, add_sample_positions,
1623 esgs_ring_size, esgs_ring_bo,
1624 gsvs_ring_size, gsvs_ring_bo,
1625 tess_factor_ring_size, tess_factor_ring_bo,
1626 tess_offchip_ring_size, tess_offchip_ring_bo);
1627
1628 queue->device->ws->buffer_unmap(descriptor_bo);
1629 }
1630
1631 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1632 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1633 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1634 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1635 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1636 }
1637
1638 if (esgs_ring_bo || gsvs_ring_bo) {
1639 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1640 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1641 radeon_emit(cs, esgs_ring_size >> 8);
1642 radeon_emit(cs, gsvs_ring_size >> 8);
1643 } else {
1644 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1645 radeon_emit(cs, esgs_ring_size >> 8);
1646 radeon_emit(cs, gsvs_ring_size >> 8);
1647 }
1648 }
1649
1650 if (tess_factor_ring_bo) {
1651 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1652 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1653 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1654 S_030938_SIZE(tess_factor_ring_size / 4));
1655 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1656 tf_va >> 8);
1657 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1658 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1659 tf_va >> 40);
1660 }
1661 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1662 } else {
1663 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1664 S_008988_SIZE(tess_factor_ring_size / 4));
1665 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1666 tf_va >> 8);
1667 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1668 hs_offchip_param);
1669 }
1670 }
1671
1672 if (descriptor_bo) {
1673 uint64_t va = radv_buffer_get_va(descriptor_bo);
1674 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1675 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1676 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1677 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1678 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1679
1680 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1681 radeon_set_sh_reg_seq(cs, regs[i], 2);
1682 radeon_emit(cs, va);
1683 radeon_emit(cs, va >> 32);
1684 }
1685 } else {
1686 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1687 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1688 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1689 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1690 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1691 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1692
1693 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1694 radeon_set_sh_reg_seq(cs, regs[i], 2);
1695 radeon_emit(cs, va);
1696 radeon_emit(cs, va >> 32);
1697 }
1698 }
1699 }
1700
1701 if (compute_scratch_bo) {
1702 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1703 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1704 S_008F04_SWIZZLE_ENABLE(1);
1705
1706 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1707
1708 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1709 radeon_emit(cs, scratch_va);
1710 radeon_emit(cs, rsrc1);
1711 }
1712
1713 if (i == 0) {
1714 si_cs_emit_cache_flush(cs,
1715 false,
1716 queue->device->physical_device->rad_info.chip_class,
1717 NULL, 0,
1718 queue->queue_family_index == RING_COMPUTE &&
1719 queue->device->physical_device->rad_info.chip_class >= CIK,
1720 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1721 RADV_CMD_FLAG_INV_ICACHE |
1722 RADV_CMD_FLAG_INV_SMEM_L1 |
1723 RADV_CMD_FLAG_INV_VMEM_L1 |
1724 RADV_CMD_FLAG_INV_GLOBAL_L2);
1725 } else if (i == 1) {
1726 si_cs_emit_cache_flush(cs,
1727 false,
1728 queue->device->physical_device->rad_info.chip_class,
1729 NULL, 0,
1730 queue->queue_family_index == RING_COMPUTE &&
1731 queue->device->physical_device->rad_info.chip_class >= CIK,
1732 RADV_CMD_FLAG_INV_ICACHE |
1733 RADV_CMD_FLAG_INV_SMEM_L1 |
1734 RADV_CMD_FLAG_INV_VMEM_L1 |
1735 RADV_CMD_FLAG_INV_GLOBAL_L2);
1736 }
1737
1738 if (!queue->device->ws->cs_finalize(cs))
1739 goto fail;
1740 }
1741
1742 if (queue->initial_full_flush_preamble_cs)
1743 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1744
1745 if (queue->initial_preamble_cs)
1746 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1747
1748 if (queue->continue_preamble_cs)
1749 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1750
1751 queue->initial_full_flush_preamble_cs = dest_cs[0];
1752 queue->initial_preamble_cs = dest_cs[1];
1753 queue->continue_preamble_cs = dest_cs[2];
1754
1755 if (scratch_bo != queue->scratch_bo) {
1756 if (queue->scratch_bo)
1757 queue->device->ws->buffer_destroy(queue->scratch_bo);
1758 queue->scratch_bo = scratch_bo;
1759 queue->scratch_size = scratch_size;
1760 }
1761
1762 if (compute_scratch_bo != queue->compute_scratch_bo) {
1763 if (queue->compute_scratch_bo)
1764 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1765 queue->compute_scratch_bo = compute_scratch_bo;
1766 queue->compute_scratch_size = compute_scratch_size;
1767 }
1768
1769 if (esgs_ring_bo != queue->esgs_ring_bo) {
1770 if (queue->esgs_ring_bo)
1771 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1772 queue->esgs_ring_bo = esgs_ring_bo;
1773 queue->esgs_ring_size = esgs_ring_size;
1774 }
1775
1776 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1777 if (queue->gsvs_ring_bo)
1778 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1779 queue->gsvs_ring_bo = gsvs_ring_bo;
1780 queue->gsvs_ring_size = gsvs_ring_size;
1781 }
1782
1783 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1784 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1785 }
1786
1787 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1788 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1789 queue->has_tess_rings = true;
1790 }
1791
1792 if (descriptor_bo != queue->descriptor_bo) {
1793 if (queue->descriptor_bo)
1794 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1795
1796 queue->descriptor_bo = descriptor_bo;
1797 }
1798
1799 if (add_sample_positions)
1800 queue->has_sample_positions = true;
1801
1802 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1803 *initial_preamble_cs = queue->initial_preamble_cs;
1804 *continue_preamble_cs = queue->continue_preamble_cs;
1805 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1806 *continue_preamble_cs = NULL;
1807 return VK_SUCCESS;
1808 fail:
1809 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1810 if (dest_cs[i])
1811 queue->device->ws->cs_destroy(dest_cs[i]);
1812 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1813 queue->device->ws->buffer_destroy(descriptor_bo);
1814 if (scratch_bo && scratch_bo != queue->scratch_bo)
1815 queue->device->ws->buffer_destroy(scratch_bo);
1816 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1817 queue->device->ws->buffer_destroy(compute_scratch_bo);
1818 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1819 queue->device->ws->buffer_destroy(esgs_ring_bo);
1820 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1821 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1822 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1823 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1824 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1825 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1826 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1827 }
1828
1829 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1830 int num_sems,
1831 const VkSemaphore *sems,
1832 bool reset_temp)
1833 {
1834 int syncobj_idx = 0, sem_idx = 0;
1835
1836 if (num_sems == 0)
1837 return VK_SUCCESS;
1838 for (uint32_t i = 0; i < num_sems; i++) {
1839 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1840
1841 if (sem->temp_syncobj || sem->syncobj)
1842 counts->syncobj_count++;
1843 else
1844 counts->sem_count++;
1845 }
1846
1847 if (counts->syncobj_count) {
1848 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
1849 if (!counts->syncobj)
1850 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1851 }
1852
1853 if (counts->sem_count) {
1854 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
1855 if (!counts->sem) {
1856 free(counts->syncobj);
1857 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1858 }
1859 }
1860
1861 for (uint32_t i = 0; i < num_sems; i++) {
1862 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1863
1864 if (sem->temp_syncobj) {
1865 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
1866 }
1867 else if (sem->syncobj)
1868 counts->syncobj[syncobj_idx++] = sem->syncobj;
1869 else {
1870 assert(sem->sem);
1871 counts->sem[sem_idx++] = sem->sem;
1872 }
1873 }
1874
1875 return VK_SUCCESS;
1876 }
1877
1878 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
1879 {
1880 free(sem_info->wait.syncobj);
1881 free(sem_info->wait.sem);
1882 free(sem_info->signal.syncobj);
1883 free(sem_info->signal.sem);
1884 }
1885
1886
1887 static void radv_free_temp_syncobjs(struct radv_device *device,
1888 int num_sems,
1889 const VkSemaphore *sems)
1890 {
1891 for (uint32_t i = 0; i < num_sems; i++) {
1892 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1893
1894 if (sem->temp_syncobj) {
1895 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
1896 sem->temp_syncobj = 0;
1897 }
1898 }
1899 }
1900
1901 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1902 int num_wait_sems,
1903 const VkSemaphore *wait_sems,
1904 int num_signal_sems,
1905 const VkSemaphore *signal_sems)
1906 {
1907 VkResult ret;
1908 memset(sem_info, 0, sizeof(*sem_info));
1909
1910 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
1911 if (ret)
1912 return ret;
1913 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
1914 if (ret)
1915 radv_free_sem_info(sem_info);
1916
1917 /* caller can override these */
1918 sem_info->cs_emit_wait = true;
1919 sem_info->cs_emit_signal = true;
1920 return ret;
1921 }
1922
1923 VkResult radv_QueueSubmit(
1924 VkQueue _queue,
1925 uint32_t submitCount,
1926 const VkSubmitInfo* pSubmits,
1927 VkFence _fence)
1928 {
1929 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1930 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1931 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1932 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1933 int ret;
1934 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1935 uint32_t scratch_size = 0;
1936 uint32_t compute_scratch_size = 0;
1937 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1938 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
1939 VkResult result;
1940 bool fence_emitted = false;
1941 bool tess_rings_needed = false;
1942 bool sample_positions_needed = false;
1943
1944 /* Do this first so failing to allocate scratch buffers can't result in
1945 * partially executed submissions. */
1946 for (uint32_t i = 0; i < submitCount; i++) {
1947 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1948 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1949 pSubmits[i].pCommandBuffers[j]);
1950
1951 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1952 compute_scratch_size = MAX2(compute_scratch_size,
1953 cmd_buffer->compute_scratch_size_needed);
1954 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1955 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1956 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1957 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1958 }
1959 }
1960
1961 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1962 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1963 sample_positions_needed, &initial_flush_preamble_cs,
1964 &initial_preamble_cs, &continue_preamble_cs);
1965 if (result != VK_SUCCESS)
1966 return result;
1967
1968 for (uint32_t i = 0; i < submitCount; i++) {
1969 struct radeon_winsys_cs **cs_array;
1970 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1971 bool can_patch = true;
1972 uint32_t advance;
1973 struct radv_winsys_sem_info sem_info;
1974
1975 result = radv_alloc_sem_info(&sem_info,
1976 pSubmits[i].waitSemaphoreCount,
1977 pSubmits[i].pWaitSemaphores,
1978 pSubmits[i].signalSemaphoreCount,
1979 pSubmits[i].pSignalSemaphores);
1980 if (result != VK_SUCCESS)
1981 return result;
1982
1983 if (!pSubmits[i].commandBufferCount) {
1984 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1985 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1986 &queue->device->empty_cs[queue->queue_family_index],
1987 1, NULL, NULL,
1988 &sem_info,
1989 false, base_fence);
1990 if (ret) {
1991 radv_loge("failed to submit CS %d\n", i);
1992 abort();
1993 }
1994 fence_emitted = true;
1995 }
1996 radv_free_sem_info(&sem_info);
1997 continue;
1998 }
1999
2000 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2001 (pSubmits[i].commandBufferCount));
2002
2003 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2004 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2005 pSubmits[i].pCommandBuffers[j]);
2006 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2007
2008 cs_array[j] = cmd_buffer->cs;
2009 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2010 can_patch = false;
2011 }
2012
2013 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2014 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2015 advance = MIN2(max_cs_submission,
2016 pSubmits[i].commandBufferCount - j);
2017
2018 if (queue->device->trace_bo)
2019 *queue->device->trace_id_ptr = 0;
2020
2021 sem_info.cs_emit_wait = j == 0;
2022 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2023
2024 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2025 advance, initial_preamble, continue_preamble_cs,
2026 &sem_info,
2027 can_patch, base_fence);
2028
2029 if (ret) {
2030 radv_loge("failed to submit CS %d\n", i);
2031 abort();
2032 }
2033 fence_emitted = true;
2034 if (queue->device->trace_bo) {
2035 radv_check_gpu_hangs(queue, cs_array[j]);
2036 }
2037 }
2038
2039 radv_free_temp_syncobjs(queue->device,
2040 pSubmits[i].waitSemaphoreCount,
2041 pSubmits[i].pWaitSemaphores);
2042 radv_free_sem_info(&sem_info);
2043 free(cs_array);
2044 }
2045
2046 if (fence) {
2047 if (!fence_emitted) {
2048 struct radv_winsys_sem_info sem_info = {0};
2049 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2050 &queue->device->empty_cs[queue->queue_family_index],
2051 1, NULL, NULL, &sem_info,
2052 false, base_fence);
2053 }
2054 fence->submitted = true;
2055 }
2056
2057 return VK_SUCCESS;
2058 }
2059
2060 VkResult radv_QueueWaitIdle(
2061 VkQueue _queue)
2062 {
2063 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2064
2065 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2066 radv_queue_family_to_ring(queue->queue_family_index),
2067 queue->queue_idx);
2068 return VK_SUCCESS;
2069 }
2070
2071 VkResult radv_DeviceWaitIdle(
2072 VkDevice _device)
2073 {
2074 RADV_FROM_HANDLE(radv_device, device, _device);
2075
2076 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2077 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2078 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2079 }
2080 }
2081 return VK_SUCCESS;
2082 }
2083
2084 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2085 VkInstance instance,
2086 const char* pName)
2087 {
2088 return radv_lookup_entrypoint(pName);
2089 }
2090
2091 /* The loader wants us to expose a second GetInstanceProcAddr function
2092 * to work around certain LD_PRELOAD issues seen in apps.
2093 */
2094 PUBLIC
2095 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2096 VkInstance instance,
2097 const char* pName);
2098
2099 PUBLIC
2100 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2101 VkInstance instance,
2102 const char* pName)
2103 {
2104 return radv_GetInstanceProcAddr(instance, pName);
2105 }
2106
2107 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2108 VkDevice device,
2109 const char* pName)
2110 {
2111 return radv_lookup_entrypoint(pName);
2112 }
2113
2114 bool radv_get_memory_fd(struct radv_device *device,
2115 struct radv_device_memory *memory,
2116 int *pFD)
2117 {
2118 struct radeon_bo_metadata metadata;
2119
2120 if (memory->image) {
2121 radv_init_metadata(device, memory->image, &metadata);
2122 device->ws->buffer_set_metadata(memory->bo, &metadata);
2123 }
2124
2125 return device->ws->buffer_get_fd(device->ws, memory->bo,
2126 pFD);
2127 }
2128
2129 VkResult radv_alloc_memory(VkDevice _device,
2130 const VkMemoryAllocateInfo* pAllocateInfo,
2131 const VkAllocationCallbacks* pAllocator,
2132 enum radv_mem_flags_bits mem_flags,
2133 VkDeviceMemory* pMem)
2134 {
2135 RADV_FROM_HANDLE(radv_device, device, _device);
2136 struct radv_device_memory *mem;
2137 VkResult result;
2138 enum radeon_bo_domain domain;
2139 uint32_t flags = 0;
2140 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2141
2142 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2143
2144 if (pAllocateInfo->allocationSize == 0) {
2145 /* Apparently, this is allowed */
2146 *pMem = VK_NULL_HANDLE;
2147 return VK_SUCCESS;
2148 }
2149
2150 const VkImportMemoryFdInfoKHR *import_info =
2151 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2152 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2153 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2154
2155 const struct wsi_memory_allocate_info *wsi_info =
2156 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2157
2158 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2159 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2160 if (mem == NULL)
2161 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2162
2163 if (wsi_info && wsi_info->implicit_sync)
2164 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2165
2166 if (dedicate_info) {
2167 mem->image = radv_image_from_handle(dedicate_info->image);
2168 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2169 } else {
2170 mem->image = NULL;
2171 mem->buffer = NULL;
2172 }
2173
2174 if (import_info) {
2175 assert(import_info->handleType ==
2176 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2177 import_info->handleType ==
2178 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2179 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2180 NULL, NULL);
2181 if (!mem->bo) {
2182 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2183 goto fail;
2184 } else {
2185 close(import_info->fd);
2186 goto out_success;
2187 }
2188 }
2189
2190 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2191 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2192 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2193 domain = RADEON_DOMAIN_GTT;
2194 else
2195 domain = RADEON_DOMAIN_VRAM;
2196
2197 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2198 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2199 else
2200 flags |= RADEON_FLAG_CPU_ACCESS;
2201
2202 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2203 flags |= RADEON_FLAG_GTT_WC;
2204
2205 if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
2206 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2207
2208 if (!dedicate_info && !import_info)
2209 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2210
2211 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2212 domain, flags);
2213
2214 if (!mem->bo) {
2215 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2216 goto fail;
2217 }
2218 mem->type_index = mem_type_index;
2219 out_success:
2220 *pMem = radv_device_memory_to_handle(mem);
2221
2222 return VK_SUCCESS;
2223
2224 fail:
2225 vk_free2(&device->alloc, pAllocator, mem);
2226
2227 return result;
2228 }
2229
2230 VkResult radv_AllocateMemory(
2231 VkDevice _device,
2232 const VkMemoryAllocateInfo* pAllocateInfo,
2233 const VkAllocationCallbacks* pAllocator,
2234 VkDeviceMemory* pMem)
2235 {
2236 return radv_alloc_memory(_device, pAllocateInfo, pAllocator, 0, pMem);
2237 }
2238
2239 void radv_FreeMemory(
2240 VkDevice _device,
2241 VkDeviceMemory _mem,
2242 const VkAllocationCallbacks* pAllocator)
2243 {
2244 RADV_FROM_HANDLE(radv_device, device, _device);
2245 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2246
2247 if (mem == NULL)
2248 return;
2249
2250 device->ws->buffer_destroy(mem->bo);
2251 mem->bo = NULL;
2252
2253 vk_free2(&device->alloc, pAllocator, mem);
2254 }
2255
2256 VkResult radv_MapMemory(
2257 VkDevice _device,
2258 VkDeviceMemory _memory,
2259 VkDeviceSize offset,
2260 VkDeviceSize size,
2261 VkMemoryMapFlags flags,
2262 void** ppData)
2263 {
2264 RADV_FROM_HANDLE(radv_device, device, _device);
2265 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2266
2267 if (mem == NULL) {
2268 *ppData = NULL;
2269 return VK_SUCCESS;
2270 }
2271
2272 *ppData = device->ws->buffer_map(mem->bo);
2273 if (*ppData) {
2274 *ppData += offset;
2275 return VK_SUCCESS;
2276 }
2277
2278 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2279 }
2280
2281 void radv_UnmapMemory(
2282 VkDevice _device,
2283 VkDeviceMemory _memory)
2284 {
2285 RADV_FROM_HANDLE(radv_device, device, _device);
2286 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2287
2288 if (mem == NULL)
2289 return;
2290
2291 device->ws->buffer_unmap(mem->bo);
2292 }
2293
2294 VkResult radv_FlushMappedMemoryRanges(
2295 VkDevice _device,
2296 uint32_t memoryRangeCount,
2297 const VkMappedMemoryRange* pMemoryRanges)
2298 {
2299 return VK_SUCCESS;
2300 }
2301
2302 VkResult radv_InvalidateMappedMemoryRanges(
2303 VkDevice _device,
2304 uint32_t memoryRangeCount,
2305 const VkMappedMemoryRange* pMemoryRanges)
2306 {
2307 return VK_SUCCESS;
2308 }
2309
2310 void radv_GetBufferMemoryRequirements(
2311 VkDevice _device,
2312 VkBuffer _buffer,
2313 VkMemoryRequirements* pMemoryRequirements)
2314 {
2315 RADV_FROM_HANDLE(radv_device, device, _device);
2316 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2317
2318 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2319
2320 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2321 pMemoryRequirements->alignment = 4096;
2322 else
2323 pMemoryRequirements->alignment = 16;
2324
2325 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2326 }
2327
2328 void radv_GetBufferMemoryRequirements2KHR(
2329 VkDevice device,
2330 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2331 VkMemoryRequirements2KHR* pMemoryRequirements)
2332 {
2333 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2334 &pMemoryRequirements->memoryRequirements);
2335 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2336 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2337 switch (ext->sType) {
2338 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2339 VkMemoryDedicatedRequirementsKHR *req =
2340 (VkMemoryDedicatedRequirementsKHR *) ext;
2341 req->requiresDedicatedAllocation = buffer->shareable;
2342 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2343 break;
2344 }
2345 default:
2346 break;
2347 }
2348 }
2349 }
2350
2351 void radv_GetImageMemoryRequirements(
2352 VkDevice _device,
2353 VkImage _image,
2354 VkMemoryRequirements* pMemoryRequirements)
2355 {
2356 RADV_FROM_HANDLE(radv_device, device, _device);
2357 RADV_FROM_HANDLE(radv_image, image, _image);
2358
2359 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2360
2361 pMemoryRequirements->size = image->size;
2362 pMemoryRequirements->alignment = image->alignment;
2363 }
2364
2365 void radv_GetImageMemoryRequirements2KHR(
2366 VkDevice device,
2367 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2368 VkMemoryRequirements2KHR* pMemoryRequirements)
2369 {
2370 radv_GetImageMemoryRequirements(device, pInfo->image,
2371 &pMemoryRequirements->memoryRequirements);
2372
2373 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2374
2375 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2376 switch (ext->sType) {
2377 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2378 VkMemoryDedicatedRequirementsKHR *req =
2379 (VkMemoryDedicatedRequirementsKHR *) ext;
2380 req->requiresDedicatedAllocation = image->shareable;
2381 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2382 break;
2383 }
2384 default:
2385 break;
2386 }
2387 }
2388 }
2389
2390 void radv_GetImageSparseMemoryRequirements(
2391 VkDevice device,
2392 VkImage image,
2393 uint32_t* pSparseMemoryRequirementCount,
2394 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2395 {
2396 stub();
2397 }
2398
2399 void radv_GetImageSparseMemoryRequirements2KHR(
2400 VkDevice device,
2401 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2402 uint32_t* pSparseMemoryRequirementCount,
2403 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2404 {
2405 stub();
2406 }
2407
2408 void radv_GetDeviceMemoryCommitment(
2409 VkDevice device,
2410 VkDeviceMemory memory,
2411 VkDeviceSize* pCommittedMemoryInBytes)
2412 {
2413 *pCommittedMemoryInBytes = 0;
2414 }
2415
2416 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2417 uint32_t bindInfoCount,
2418 const VkBindBufferMemoryInfoKHR *pBindInfos)
2419 {
2420 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2421 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2422 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2423
2424 if (mem) {
2425 buffer->bo = mem->bo;
2426 buffer->offset = pBindInfos[i].memoryOffset;
2427 } else {
2428 buffer->bo = NULL;
2429 }
2430 }
2431 return VK_SUCCESS;
2432 }
2433
2434 VkResult radv_BindBufferMemory(
2435 VkDevice device,
2436 VkBuffer buffer,
2437 VkDeviceMemory memory,
2438 VkDeviceSize memoryOffset)
2439 {
2440 const VkBindBufferMemoryInfoKHR info = {
2441 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2442 .buffer = buffer,
2443 .memory = memory,
2444 .memoryOffset = memoryOffset
2445 };
2446
2447 return radv_BindBufferMemory2KHR(device, 1, &info);
2448 }
2449
2450 VkResult radv_BindImageMemory2KHR(VkDevice device,
2451 uint32_t bindInfoCount,
2452 const VkBindImageMemoryInfoKHR *pBindInfos)
2453 {
2454 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2455 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2456 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2457
2458 if (mem) {
2459 image->bo = mem->bo;
2460 image->offset = pBindInfos[i].memoryOffset;
2461 } else {
2462 image->bo = NULL;
2463 image->offset = 0;
2464 }
2465 }
2466 return VK_SUCCESS;
2467 }
2468
2469
2470 VkResult radv_BindImageMemory(
2471 VkDevice device,
2472 VkImage image,
2473 VkDeviceMemory memory,
2474 VkDeviceSize memoryOffset)
2475 {
2476 const VkBindImageMemoryInfoKHR info = {
2477 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2478 .image = image,
2479 .memory = memory,
2480 .memoryOffset = memoryOffset
2481 };
2482
2483 return radv_BindImageMemory2KHR(device, 1, &info);
2484 }
2485
2486
2487 static void
2488 radv_sparse_buffer_bind_memory(struct radv_device *device,
2489 const VkSparseBufferMemoryBindInfo *bind)
2490 {
2491 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2492
2493 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2494 struct radv_device_memory *mem = NULL;
2495
2496 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2497 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2498
2499 device->ws->buffer_virtual_bind(buffer->bo,
2500 bind->pBinds[i].resourceOffset,
2501 bind->pBinds[i].size,
2502 mem ? mem->bo : NULL,
2503 bind->pBinds[i].memoryOffset);
2504 }
2505 }
2506
2507 static void
2508 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2509 const VkSparseImageOpaqueMemoryBindInfo *bind)
2510 {
2511 RADV_FROM_HANDLE(radv_image, image, bind->image);
2512
2513 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2514 struct radv_device_memory *mem = NULL;
2515
2516 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2517 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2518
2519 device->ws->buffer_virtual_bind(image->bo,
2520 bind->pBinds[i].resourceOffset,
2521 bind->pBinds[i].size,
2522 mem ? mem->bo : NULL,
2523 bind->pBinds[i].memoryOffset);
2524 }
2525 }
2526
2527 VkResult radv_QueueBindSparse(
2528 VkQueue _queue,
2529 uint32_t bindInfoCount,
2530 const VkBindSparseInfo* pBindInfo,
2531 VkFence _fence)
2532 {
2533 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2534 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2535 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2536 bool fence_emitted = false;
2537
2538 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2539 struct radv_winsys_sem_info sem_info;
2540 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2541 radv_sparse_buffer_bind_memory(queue->device,
2542 pBindInfo[i].pBufferBinds + j);
2543 }
2544
2545 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2546 radv_sparse_image_opaque_bind_memory(queue->device,
2547 pBindInfo[i].pImageOpaqueBinds + j);
2548 }
2549
2550 VkResult result;
2551 result = radv_alloc_sem_info(&sem_info,
2552 pBindInfo[i].waitSemaphoreCount,
2553 pBindInfo[i].pWaitSemaphores,
2554 pBindInfo[i].signalSemaphoreCount,
2555 pBindInfo[i].pSignalSemaphores);
2556 if (result != VK_SUCCESS)
2557 return result;
2558
2559 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2560 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2561 &queue->device->empty_cs[queue->queue_family_index],
2562 1, NULL, NULL,
2563 &sem_info,
2564 false, base_fence);
2565 fence_emitted = true;
2566 if (fence)
2567 fence->submitted = true;
2568 }
2569
2570 radv_free_sem_info(&sem_info);
2571
2572 }
2573
2574 if (fence && !fence_emitted) {
2575 fence->signalled = true;
2576 }
2577
2578 return VK_SUCCESS;
2579 }
2580
2581 VkResult radv_CreateFence(
2582 VkDevice _device,
2583 const VkFenceCreateInfo* pCreateInfo,
2584 const VkAllocationCallbacks* pAllocator,
2585 VkFence* pFence)
2586 {
2587 RADV_FROM_HANDLE(radv_device, device, _device);
2588 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2589 sizeof(*fence), 8,
2590 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2591
2592 if (!fence)
2593 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2594
2595 fence->submitted = false;
2596 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2597 fence->fence = device->ws->create_fence();
2598 if (!fence->fence) {
2599 vk_free2(&device->alloc, pAllocator, fence);
2600 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2601 }
2602
2603 *pFence = radv_fence_to_handle(fence);
2604
2605 return VK_SUCCESS;
2606 }
2607
2608 void radv_DestroyFence(
2609 VkDevice _device,
2610 VkFence _fence,
2611 const VkAllocationCallbacks* pAllocator)
2612 {
2613 RADV_FROM_HANDLE(radv_device, device, _device);
2614 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2615
2616 if (!fence)
2617 return;
2618 device->ws->destroy_fence(fence->fence);
2619 vk_free2(&device->alloc, pAllocator, fence);
2620 }
2621
2622 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2623 {
2624 uint64_t current_time;
2625 struct timespec tv;
2626
2627 clock_gettime(CLOCK_MONOTONIC, &tv);
2628 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2629
2630 timeout = MIN2(UINT64_MAX - current_time, timeout);
2631
2632 return current_time + timeout;
2633 }
2634
2635 VkResult radv_WaitForFences(
2636 VkDevice _device,
2637 uint32_t fenceCount,
2638 const VkFence* pFences,
2639 VkBool32 waitAll,
2640 uint64_t timeout)
2641 {
2642 RADV_FROM_HANDLE(radv_device, device, _device);
2643 timeout = radv_get_absolute_timeout(timeout);
2644
2645 if (!waitAll && fenceCount > 1) {
2646 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2647 }
2648
2649 for (uint32_t i = 0; i < fenceCount; ++i) {
2650 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2651 bool expired = false;
2652
2653 if (fence->signalled)
2654 continue;
2655
2656 if (!fence->submitted)
2657 return VK_TIMEOUT;
2658
2659 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2660 if (!expired)
2661 return VK_TIMEOUT;
2662
2663 fence->signalled = true;
2664 }
2665
2666 return VK_SUCCESS;
2667 }
2668
2669 VkResult radv_ResetFences(VkDevice device,
2670 uint32_t fenceCount,
2671 const VkFence *pFences)
2672 {
2673 for (unsigned i = 0; i < fenceCount; ++i) {
2674 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2675 fence->submitted = fence->signalled = false;
2676 }
2677
2678 return VK_SUCCESS;
2679 }
2680
2681 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2682 {
2683 RADV_FROM_HANDLE(radv_device, device, _device);
2684 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2685
2686 if (fence->signalled)
2687 return VK_SUCCESS;
2688 if (!fence->submitted)
2689 return VK_NOT_READY;
2690
2691 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2692 return VK_NOT_READY;
2693
2694 return VK_SUCCESS;
2695 }
2696
2697
2698 // Queue semaphore functions
2699
2700 VkResult radv_CreateSemaphore(
2701 VkDevice _device,
2702 const VkSemaphoreCreateInfo* pCreateInfo,
2703 const VkAllocationCallbacks* pAllocator,
2704 VkSemaphore* pSemaphore)
2705 {
2706 RADV_FROM_HANDLE(radv_device, device, _device);
2707 const VkExportSemaphoreCreateInfoKHR *export =
2708 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2709 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2710 export ? export->handleTypes : 0;
2711
2712 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2713 sizeof(*sem), 8,
2714 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2715 if (!sem)
2716 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2717
2718 sem->temp_syncobj = 0;
2719 /* create a syncobject if we are going to export this semaphore */
2720 if (handleTypes) {
2721 assert (device->physical_device->rad_info.has_syncobj);
2722 assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2723 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2724 if (ret) {
2725 vk_free2(&device->alloc, pAllocator, sem);
2726 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2727 }
2728 sem->sem = NULL;
2729 } else {
2730 sem->sem = device->ws->create_sem(device->ws);
2731 if (!sem->sem) {
2732 vk_free2(&device->alloc, pAllocator, sem);
2733 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2734 }
2735 sem->syncobj = 0;
2736 }
2737
2738 *pSemaphore = radv_semaphore_to_handle(sem);
2739 return VK_SUCCESS;
2740 }
2741
2742 void radv_DestroySemaphore(
2743 VkDevice _device,
2744 VkSemaphore _semaphore,
2745 const VkAllocationCallbacks* pAllocator)
2746 {
2747 RADV_FROM_HANDLE(radv_device, device, _device);
2748 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2749 if (!_semaphore)
2750 return;
2751
2752 if (sem->syncobj)
2753 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2754 else
2755 device->ws->destroy_sem(sem->sem);
2756 vk_free2(&device->alloc, pAllocator, sem);
2757 }
2758
2759 VkResult radv_CreateEvent(
2760 VkDevice _device,
2761 const VkEventCreateInfo* pCreateInfo,
2762 const VkAllocationCallbacks* pAllocator,
2763 VkEvent* pEvent)
2764 {
2765 RADV_FROM_HANDLE(radv_device, device, _device);
2766 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2767 sizeof(*event), 8,
2768 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2769
2770 if (!event)
2771 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2772
2773 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2774 RADEON_DOMAIN_GTT,
2775 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
2776 if (!event->bo) {
2777 vk_free2(&device->alloc, pAllocator, event);
2778 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2779 }
2780
2781 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2782
2783 *pEvent = radv_event_to_handle(event);
2784
2785 return VK_SUCCESS;
2786 }
2787
2788 void radv_DestroyEvent(
2789 VkDevice _device,
2790 VkEvent _event,
2791 const VkAllocationCallbacks* pAllocator)
2792 {
2793 RADV_FROM_HANDLE(radv_device, device, _device);
2794 RADV_FROM_HANDLE(radv_event, event, _event);
2795
2796 if (!event)
2797 return;
2798 device->ws->buffer_destroy(event->bo);
2799 vk_free2(&device->alloc, pAllocator, event);
2800 }
2801
2802 VkResult radv_GetEventStatus(
2803 VkDevice _device,
2804 VkEvent _event)
2805 {
2806 RADV_FROM_HANDLE(radv_event, event, _event);
2807
2808 if (*event->map == 1)
2809 return VK_EVENT_SET;
2810 return VK_EVENT_RESET;
2811 }
2812
2813 VkResult radv_SetEvent(
2814 VkDevice _device,
2815 VkEvent _event)
2816 {
2817 RADV_FROM_HANDLE(radv_event, event, _event);
2818 *event->map = 1;
2819
2820 return VK_SUCCESS;
2821 }
2822
2823 VkResult radv_ResetEvent(
2824 VkDevice _device,
2825 VkEvent _event)
2826 {
2827 RADV_FROM_HANDLE(radv_event, event, _event);
2828 *event->map = 0;
2829
2830 return VK_SUCCESS;
2831 }
2832
2833 VkResult radv_CreateBuffer(
2834 VkDevice _device,
2835 const VkBufferCreateInfo* pCreateInfo,
2836 const VkAllocationCallbacks* pAllocator,
2837 VkBuffer* pBuffer)
2838 {
2839 RADV_FROM_HANDLE(radv_device, device, _device);
2840 struct radv_buffer *buffer;
2841
2842 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2843
2844 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2845 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2846 if (buffer == NULL)
2847 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2848
2849 buffer->size = pCreateInfo->size;
2850 buffer->usage = pCreateInfo->usage;
2851 buffer->bo = NULL;
2852 buffer->offset = 0;
2853 buffer->flags = pCreateInfo->flags;
2854
2855 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
2856 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
2857
2858 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2859 buffer->bo = device->ws->buffer_create(device->ws,
2860 align64(buffer->size, 4096),
2861 4096, 0, RADEON_FLAG_VIRTUAL);
2862 if (!buffer->bo) {
2863 vk_free2(&device->alloc, pAllocator, buffer);
2864 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2865 }
2866 }
2867
2868 *pBuffer = radv_buffer_to_handle(buffer);
2869
2870 return VK_SUCCESS;
2871 }
2872
2873 void radv_DestroyBuffer(
2874 VkDevice _device,
2875 VkBuffer _buffer,
2876 const VkAllocationCallbacks* pAllocator)
2877 {
2878 RADV_FROM_HANDLE(radv_device, device, _device);
2879 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2880
2881 if (!buffer)
2882 return;
2883
2884 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2885 device->ws->buffer_destroy(buffer->bo);
2886
2887 vk_free2(&device->alloc, pAllocator, buffer);
2888 }
2889
2890 static inline unsigned
2891 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2892 {
2893 if (stencil)
2894 return image->surface.u.legacy.stencil_tiling_index[level];
2895 else
2896 return image->surface.u.legacy.tiling_index[level];
2897 }
2898
2899 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2900 {
2901 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2902 }
2903
2904 static void
2905 radv_initialise_color_surface(struct radv_device *device,
2906 struct radv_color_buffer_info *cb,
2907 struct radv_image_view *iview)
2908 {
2909 const struct vk_format_description *desc;
2910 unsigned ntype, format, swap, endian;
2911 unsigned blend_clamp = 0, blend_bypass = 0;
2912 uint64_t va;
2913 const struct radeon_surf *surf = &iview->image->surface;
2914
2915 desc = vk_format_description(iview->vk_format);
2916
2917 memset(cb, 0, sizeof(*cb));
2918
2919 /* Intensity is implemented as Red, so treat it that way. */
2920 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2921
2922 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2923
2924 cb->cb_color_base = va >> 8;
2925
2926 if (device->physical_device->rad_info.chip_class >= GFX9) {
2927 struct gfx9_surf_meta_flags meta;
2928 if (iview->image->dcc_offset)
2929 meta = iview->image->surface.u.gfx9.dcc;
2930 else
2931 meta = iview->image->surface.u.gfx9.cmask;
2932
2933 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2934 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2935 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2936 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2937
2938 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
2939 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2940 } else {
2941 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2942 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2943
2944 cb->cb_color_base += level_info->offset >> 8;
2945 if (level_info->mode == RADEON_SURF_MODE_2D)
2946 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2947
2948 pitch_tile_max = level_info->nblk_x / 8 - 1;
2949 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2950 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2951
2952 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2953 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2954 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2955
2956 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2957
2958 if (iview->image->fmask.size) {
2959 if (device->physical_device->rad_info.chip_class >= CIK)
2960 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2961 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2962 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2963 } else {
2964 /* This must be set for fast clear to work without FMASK. */
2965 if (device->physical_device->rad_info.chip_class >= CIK)
2966 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2967 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2968 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2969 }
2970 }
2971
2972 /* CMASK variables */
2973 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2974 va += iview->image->cmask.offset;
2975 cb->cb_color_cmask = va >> 8;
2976
2977 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2978 va += iview->image->dcc_offset;
2979 cb->cb_dcc_base = va >> 8;
2980 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
2981
2982 uint32_t max_slice = radv_surface_layer_count(iview);
2983 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2984 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2985
2986 if (iview->image->info.samples > 1) {
2987 unsigned log_samples = util_logbase2(iview->image->info.samples);
2988
2989 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2990 S_028C74_NUM_FRAGMENTS(log_samples);
2991 }
2992
2993 if (iview->image->fmask.size) {
2994 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2995 cb->cb_color_fmask = va >> 8;
2996 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
2997 } else {
2998 cb->cb_color_fmask = cb->cb_color_base;
2999 }
3000
3001 ntype = radv_translate_color_numformat(iview->vk_format,
3002 desc,
3003 vk_format_get_first_non_void_channel(iview->vk_format));
3004 format = radv_translate_colorformat(iview->vk_format);
3005 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3006 radv_finishme("Illegal color\n");
3007 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3008 endian = radv_colorformat_endian_swap(format);
3009
3010 /* blend clamp should be set for all NORM/SRGB types */
3011 if (ntype == V_028C70_NUMBER_UNORM ||
3012 ntype == V_028C70_NUMBER_SNORM ||
3013 ntype == V_028C70_NUMBER_SRGB)
3014 blend_clamp = 1;
3015
3016 /* set blend bypass according to docs if SINT/UINT or
3017 8/24 COLOR variants */
3018 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3019 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3020 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3021 blend_clamp = 0;
3022 blend_bypass = 1;
3023 }
3024 #if 0
3025 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3026 (format == V_028C70_COLOR_8 ||
3027 format == V_028C70_COLOR_8_8 ||
3028 format == V_028C70_COLOR_8_8_8_8))
3029 ->color_is_int8 = true;
3030 #endif
3031 cb->cb_color_info = S_028C70_FORMAT(format) |
3032 S_028C70_COMP_SWAP(swap) |
3033 S_028C70_BLEND_CLAMP(blend_clamp) |
3034 S_028C70_BLEND_BYPASS(blend_bypass) |
3035 S_028C70_SIMPLE_FLOAT(1) |
3036 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3037 ntype != V_028C70_NUMBER_SNORM &&
3038 ntype != V_028C70_NUMBER_SRGB &&
3039 format != V_028C70_COLOR_8_24 &&
3040 format != V_028C70_COLOR_24_8) |
3041 S_028C70_NUMBER_TYPE(ntype) |
3042 S_028C70_ENDIAN(endian);
3043 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3044 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3045 if (device->physical_device->rad_info.chip_class == SI) {
3046 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3047 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3048 }
3049 }
3050
3051 if (iview->image->cmask.size &&
3052 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3053 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3054
3055 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3056 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3057
3058 if (device->physical_device->rad_info.chip_class >= VI) {
3059 unsigned max_uncompressed_block_size = 2;
3060 if (iview->image->info.samples > 1) {
3061 if (iview->image->surface.bpe == 1)
3062 max_uncompressed_block_size = 0;
3063 else if (iview->image->surface.bpe == 2)
3064 max_uncompressed_block_size = 1;
3065 }
3066
3067 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3068 S_028C78_INDEPENDENT_64B_BLOCKS(1);
3069 }
3070
3071 /* This must be set for fast clear to work without FMASK. */
3072 if (!iview->image->fmask.size &&
3073 device->physical_device->rad_info.chip_class == SI) {
3074 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3075 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3076 }
3077
3078 if (device->physical_device->rad_info.chip_class >= GFX9) {
3079 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3080 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3081
3082 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3083 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3084 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3085 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3086 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3087 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3088 }
3089 }
3090
3091 static void
3092 radv_initialise_ds_surface(struct radv_device *device,
3093 struct radv_ds_buffer_info *ds,
3094 struct radv_image_view *iview)
3095 {
3096 unsigned level = iview->base_mip;
3097 unsigned format, stencil_format;
3098 uint64_t va, s_offs, z_offs;
3099 bool stencil_only = false;
3100 memset(ds, 0, sizeof(*ds));
3101 switch (iview->image->vk_format) {
3102 case VK_FORMAT_D24_UNORM_S8_UINT:
3103 case VK_FORMAT_X8_D24_UNORM_PACK32:
3104 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3105 ds->offset_scale = 2.0f;
3106 break;
3107 case VK_FORMAT_D16_UNORM:
3108 case VK_FORMAT_D16_UNORM_S8_UINT:
3109 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3110 ds->offset_scale = 4.0f;
3111 break;
3112 case VK_FORMAT_D32_SFLOAT:
3113 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3114 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3115 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3116 ds->offset_scale = 1.0f;
3117 break;
3118 case VK_FORMAT_S8_UINT:
3119 stencil_only = true;
3120 break;
3121 default:
3122 break;
3123 }
3124
3125 format = radv_translate_dbformat(iview->image->vk_format);
3126 stencil_format = iview->image->surface.has_stencil ?
3127 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3128
3129 uint32_t max_slice = radv_surface_layer_count(iview);
3130 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3131 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3132
3133 ds->db_htile_data_base = 0;
3134 ds->db_htile_surface = 0;
3135
3136 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3137 s_offs = z_offs = va;
3138
3139 if (device->physical_device->rad_info.chip_class >= GFX9) {
3140 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3141 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3142
3143 ds->db_z_info = S_028038_FORMAT(format) |
3144 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3145 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3146 S_028038_MAXMIP(iview->image->info.levels - 1);
3147 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3148 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3149
3150 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3151 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3152 ds->db_depth_view |= S_028008_MIPID(level);
3153
3154 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3155 S_02801C_Y_MAX(iview->image->info.height - 1);
3156
3157 if (radv_htile_enabled(iview->image, level)) {
3158 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3159
3160 if (iview->image->tc_compatible_htile) {
3161 unsigned max_zplanes = 4;
3162
3163 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3164 iview->image->info.samples > 1)
3165 max_zplanes = 2;
3166
3167 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3168 S_028038_ITERATE_FLUSH(1);
3169 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3170 }
3171
3172 if (!iview->image->surface.has_stencil)
3173 /* Use all of the htile_buffer for depth if there's no stencil. */
3174 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3175 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3176 iview->image->htile_offset;
3177 ds->db_htile_data_base = va >> 8;
3178 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3179 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3180 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3181 }
3182 } else {
3183 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3184
3185 if (stencil_only)
3186 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3187
3188 z_offs += iview->image->surface.u.legacy.level[level].offset;
3189 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3190
3191 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3192 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3193 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3194
3195 if (iview->image->info.samples > 1)
3196 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3197
3198 if (device->physical_device->rad_info.chip_class >= CIK) {
3199 struct radeon_info *info = &device->physical_device->rad_info;
3200 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3201 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3202 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3203 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3204 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3205 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3206
3207 if (stencil_only)
3208 tile_mode = stencil_tile_mode;
3209
3210 ds->db_depth_info |=
3211 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3212 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3213 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3214 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3215 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3216 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3217 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3218 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3219 } else {
3220 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3221 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3222 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3223 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3224 if (stencil_only)
3225 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3226 }
3227
3228 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3229 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3230 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3231
3232 if (radv_htile_enabled(iview->image, level)) {
3233 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3234
3235 if (!iview->image->surface.has_stencil &&
3236 !iview->image->tc_compatible_htile)
3237 /* Use all of the htile_buffer for depth if there's no stencil. */
3238 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3239
3240 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3241 iview->image->htile_offset;
3242 ds->db_htile_data_base = va >> 8;
3243 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3244
3245 if (iview->image->tc_compatible_htile) {
3246 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3247
3248 if (iview->image->info.samples <= 1)
3249 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3250 else if (iview->image->info.samples <= 4)
3251 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3252 else
3253 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3254 }
3255 }
3256 }
3257
3258 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3259 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3260 }
3261
3262 VkResult radv_CreateFramebuffer(
3263 VkDevice _device,
3264 const VkFramebufferCreateInfo* pCreateInfo,
3265 const VkAllocationCallbacks* pAllocator,
3266 VkFramebuffer* pFramebuffer)
3267 {
3268 RADV_FROM_HANDLE(radv_device, device, _device);
3269 struct radv_framebuffer *framebuffer;
3270
3271 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3272
3273 size_t size = sizeof(*framebuffer) +
3274 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3275 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3276 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3277 if (framebuffer == NULL)
3278 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3279
3280 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3281 framebuffer->width = pCreateInfo->width;
3282 framebuffer->height = pCreateInfo->height;
3283 framebuffer->layers = pCreateInfo->layers;
3284 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3285 VkImageView _iview = pCreateInfo->pAttachments[i];
3286 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3287 framebuffer->attachments[i].attachment = iview;
3288 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3289 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3290 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3291 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3292 }
3293 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3294 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3295 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3296 }
3297
3298 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3299 return VK_SUCCESS;
3300 }
3301
3302 void radv_DestroyFramebuffer(
3303 VkDevice _device,
3304 VkFramebuffer _fb,
3305 const VkAllocationCallbacks* pAllocator)
3306 {
3307 RADV_FROM_HANDLE(radv_device, device, _device);
3308 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3309
3310 if (!fb)
3311 return;
3312 vk_free2(&device->alloc, pAllocator, fb);
3313 }
3314
3315 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3316 {
3317 switch (address_mode) {
3318 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3319 return V_008F30_SQ_TEX_WRAP;
3320 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3321 return V_008F30_SQ_TEX_MIRROR;
3322 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3323 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3324 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3325 return V_008F30_SQ_TEX_CLAMP_BORDER;
3326 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3327 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3328 default:
3329 unreachable("illegal tex wrap mode");
3330 break;
3331 }
3332 }
3333
3334 static unsigned
3335 radv_tex_compare(VkCompareOp op)
3336 {
3337 switch (op) {
3338 case VK_COMPARE_OP_NEVER:
3339 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3340 case VK_COMPARE_OP_LESS:
3341 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3342 case VK_COMPARE_OP_EQUAL:
3343 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3344 case VK_COMPARE_OP_LESS_OR_EQUAL:
3345 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3346 case VK_COMPARE_OP_GREATER:
3347 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3348 case VK_COMPARE_OP_NOT_EQUAL:
3349 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3350 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3351 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3352 case VK_COMPARE_OP_ALWAYS:
3353 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3354 default:
3355 unreachable("illegal compare mode");
3356 break;
3357 }
3358 }
3359
3360 static unsigned
3361 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3362 {
3363 switch (filter) {
3364 case VK_FILTER_NEAREST:
3365 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3366 V_008F38_SQ_TEX_XY_FILTER_POINT);
3367 case VK_FILTER_LINEAR:
3368 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3369 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3370 case VK_FILTER_CUBIC_IMG:
3371 default:
3372 fprintf(stderr, "illegal texture filter");
3373 return 0;
3374 }
3375 }
3376
3377 static unsigned
3378 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3379 {
3380 switch (mode) {
3381 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3382 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3383 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3384 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3385 default:
3386 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3387 }
3388 }
3389
3390 static unsigned
3391 radv_tex_bordercolor(VkBorderColor bcolor)
3392 {
3393 switch (bcolor) {
3394 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3395 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3396 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3397 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3398 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3399 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3400 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3401 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3402 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3403 default:
3404 break;
3405 }
3406 return 0;
3407 }
3408
3409 static unsigned
3410 radv_tex_aniso_filter(unsigned filter)
3411 {
3412 if (filter < 2)
3413 return 0;
3414 if (filter < 4)
3415 return 1;
3416 if (filter < 8)
3417 return 2;
3418 if (filter < 16)
3419 return 3;
3420 return 4;
3421 }
3422
3423 static void
3424 radv_init_sampler(struct radv_device *device,
3425 struct radv_sampler *sampler,
3426 const VkSamplerCreateInfo *pCreateInfo)
3427 {
3428 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3429 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3430 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3431 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3432
3433 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3434 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3435 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3436 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3437 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3438 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3439 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3440 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3441 S_008F30_DISABLE_CUBE_WRAP(0) |
3442 S_008F30_COMPAT_MODE(is_vi));
3443 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3444 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3445 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3446 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3447 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3448 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3449 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3450 S_008F38_MIP_POINT_PRECLAMP(0) |
3451 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3452 S_008F38_FILTER_PREC_FIX(1) |
3453 S_008F38_ANISO_OVERRIDE(is_vi));
3454 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3455 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3456 }
3457
3458 VkResult radv_CreateSampler(
3459 VkDevice _device,
3460 const VkSamplerCreateInfo* pCreateInfo,
3461 const VkAllocationCallbacks* pAllocator,
3462 VkSampler* pSampler)
3463 {
3464 RADV_FROM_HANDLE(radv_device, device, _device);
3465 struct radv_sampler *sampler;
3466
3467 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3468
3469 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3470 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3471 if (!sampler)
3472 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3473
3474 radv_init_sampler(device, sampler, pCreateInfo);
3475 *pSampler = radv_sampler_to_handle(sampler);
3476
3477 return VK_SUCCESS;
3478 }
3479
3480 void radv_DestroySampler(
3481 VkDevice _device,
3482 VkSampler _sampler,
3483 const VkAllocationCallbacks* pAllocator)
3484 {
3485 RADV_FROM_HANDLE(radv_device, device, _device);
3486 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3487
3488 if (!sampler)
3489 return;
3490 vk_free2(&device->alloc, pAllocator, sampler);
3491 }
3492
3493 /* vk_icd.h does not declare this function, so we declare it here to
3494 * suppress Wmissing-prototypes.
3495 */
3496 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3497 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3498
3499 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3500 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3501 {
3502 /* For the full details on loader interface versioning, see
3503 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3504 * What follows is a condensed summary, to help you navigate the large and
3505 * confusing official doc.
3506 *
3507 * - Loader interface v0 is incompatible with later versions. We don't
3508 * support it.
3509 *
3510 * - In loader interface v1:
3511 * - The first ICD entrypoint called by the loader is
3512 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3513 * entrypoint.
3514 * - The ICD must statically expose no other Vulkan symbol unless it is
3515 * linked with -Bsymbolic.
3516 * - Each dispatchable Vulkan handle created by the ICD must be
3517 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3518 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3519 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3520 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3521 * such loader-managed surfaces.
3522 *
3523 * - Loader interface v2 differs from v1 in:
3524 * - The first ICD entrypoint called by the loader is
3525 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3526 * statically expose this entrypoint.
3527 *
3528 * - Loader interface v3 differs from v2 in:
3529 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3530 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3531 * because the loader no longer does so.
3532 */
3533 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3534 return VK_SUCCESS;
3535 }
3536
3537 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3538 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3539 int *pFD)
3540 {
3541 RADV_FROM_HANDLE(radv_device, device, _device);
3542 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3543
3544 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3545
3546 /* At the moment, we support only the below handle types. */
3547 assert(pGetFdInfo->handleType ==
3548 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3549 pGetFdInfo->handleType ==
3550 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3551
3552 bool ret = radv_get_memory_fd(device, memory, pFD);
3553 if (ret == false)
3554 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3555 return VK_SUCCESS;
3556 }
3557
3558 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3559 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3560 int fd,
3561 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3562 {
3563 switch (handleType) {
3564 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3565 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
3566 return VK_SUCCESS;
3567
3568 default:
3569 /* The valid usage section for this function says:
3570 *
3571 * "handleType must not be one of the handle types defined as
3572 * opaque."
3573 *
3574 * So opaque handle types fall into the default "unsupported" case.
3575 */
3576 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3577 }
3578 }
3579
3580 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3581 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3582 {
3583 RADV_FROM_HANDLE(radv_device, device, _device);
3584 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3585 uint32_t syncobj_handle = 0;
3586 uint32_t *syncobj_dst = NULL;
3587 assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3588
3589 int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
3590 if (ret != 0)
3591 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3592
3593 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3594 syncobj_dst = &sem->temp_syncobj;
3595 } else {
3596 syncobj_dst = &sem->syncobj;
3597 }
3598
3599 if (*syncobj_dst)
3600 device->ws->destroy_syncobj(device->ws, *syncobj_dst);
3601
3602 *syncobj_dst = syncobj_handle;
3603 close(pImportSemaphoreFdInfo->fd);
3604 return VK_SUCCESS;
3605 }
3606
3607 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3608 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3609 int *pFd)
3610 {
3611 RADV_FROM_HANDLE(radv_device, device, _device);
3612 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3613 int ret;
3614 uint32_t syncobj_handle;
3615
3616 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3617 if (sem->temp_syncobj)
3618 syncobj_handle = sem->temp_syncobj;
3619 else
3620 syncobj_handle = sem->syncobj;
3621 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3622 if (ret)
3623 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3624 return VK_SUCCESS;
3625 }
3626
3627 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3628 VkPhysicalDevice physicalDevice,
3629 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3630 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3631 {
3632 if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3633 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3634 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3635 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3636 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3637 } else {
3638 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3639 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3640 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3641 }
3642 }