778887bd580e995a05bf243252a41dfb418f0df2
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "addrlib/gfx9/chip/gfx9_enum.h"
48 #include "util/debug.h"
49
50 static int
51 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
52 {
53 uint32_t mesa_timestamp, llvm_timestamp;
54 uint16_t f = family;
55 memset(uuid, 0, VK_UUID_SIZE);
56 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
57 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
58 return -1;
59
60 memcpy(uuid, &mesa_timestamp, 4);
61 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
62 memcpy((char*)uuid + 8, &f, 2);
63 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
64 return 0;
65 }
66
67 static void
68 radv_get_driver_uuid(void *uuid)
69 {
70 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
71 }
72
73 static void
74 radv_get_device_uuid(struct radeon_info *info, void *uuid)
75 {
76 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
77 }
78
79 static void
80 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
81 {
82 const char *chip_string;
83 char llvm_string[32] = {};
84
85 switch (family) {
86 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
87 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
88 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
89 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
90 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
91 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
92 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
93 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
94 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
95 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
96 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
97 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
98 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
99 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
100 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
101 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
102 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
103 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
104 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
105 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
106 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
107 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
108 default: chip_string = "AMD RADV unknown"; break;
109 }
110
111 if (HAVE_LLVM > 0) {
112 snprintf(llvm_string, sizeof(llvm_string),
113 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
114 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
115 }
116
117 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
118 }
119
120 static void
121 radv_physical_device_init_mem_types(struct radv_physical_device *device)
122 {
123 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
124 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
125 device->rad_info.vram_vis_size);
126
127 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
128 device->memory_properties.memoryHeapCount = 0;
129 if (device->rad_info.vram_size - visible_vram_size > 0) {
130 vram_index = device->memory_properties.memoryHeapCount++;
131 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
132 .size = device->rad_info.vram_size - visible_vram_size,
133 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
134 };
135 }
136 if (visible_vram_size) {
137 visible_vram_index = device->memory_properties.memoryHeapCount++;
138 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
139 .size = visible_vram_size,
140 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
141 };
142 }
143 if (device->rad_info.gart_size > 0) {
144 gart_index = device->memory_properties.memoryHeapCount++;
145 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
146 .size = device->rad_info.gart_size,
147 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
148 };
149 }
150
151 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
152 unsigned type_count = 0;
153 if (vram_index >= 0) {
154 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
155 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
156 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
157 .heapIndex = vram_index,
158 };
159 }
160 if (gart_index >= 0) {
161 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
162 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
163 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
164 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
165 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
166 .heapIndex = gart_index,
167 };
168 }
169 if (visible_vram_index >= 0) {
170 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
171 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
172 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
173 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
174 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
175 .heapIndex = visible_vram_index,
176 };
177 }
178 if (gart_index >= 0) {
179 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
180 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
181 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
182 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
183 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
184 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
185 .heapIndex = gart_index,
186 };
187 }
188 device->memory_properties.memoryTypeCount = type_count;
189 }
190
191 static void
192 radv_handle_env_var_force_family(struct radv_physical_device *device)
193 {
194 const char *family = getenv("RADV_FORCE_FAMILY");
195 unsigned i;
196
197 if (!family)
198 return;
199
200 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
201 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
202 /* Override family and chip_class. */
203 device->rad_info.family = i;
204
205 if (i >= CHIP_VEGA10)
206 device->rad_info.chip_class = GFX9;
207 else if (i >= CHIP_TONGA)
208 device->rad_info.chip_class = VI;
209 else if (i >= CHIP_BONAIRE)
210 device->rad_info.chip_class = CIK;
211 else
212 device->rad_info.chip_class = SI;
213
214 return;
215 }
216 }
217
218 fprintf(stderr, "radv: Unknown family: %s\n", family);
219 exit(1);
220 }
221
222 static VkResult
223 radv_physical_device_init(struct radv_physical_device *device,
224 struct radv_instance *instance,
225 drmDevicePtr drm_device)
226 {
227 const char *path = drm_device->nodes[DRM_NODE_RENDER];
228 VkResult result;
229 drmVersionPtr version;
230 int fd;
231
232 fd = open(path, O_RDWR | O_CLOEXEC);
233 if (fd < 0)
234 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
235
236 version = drmGetVersion(fd);
237 if (!version) {
238 close(fd);
239 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
240 "failed to get version %s: %m", path);
241 }
242
243 if (strcmp(version->name, "amdgpu")) {
244 drmFreeVersion(version);
245 close(fd);
246 return VK_ERROR_INCOMPATIBLE_DRIVER;
247 }
248 drmFreeVersion(version);
249
250 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
251 device->instance = instance;
252 assert(strlen(path) < ARRAY_SIZE(device->path));
253 strncpy(device->path, path, ARRAY_SIZE(device->path));
254
255 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
256 instance->perftest_flags);
257 if (!device->ws) {
258 result = VK_ERROR_INCOMPATIBLE_DRIVER;
259 goto fail;
260 }
261
262 device->local_fd = fd;
263 device->ws->query_info(device->ws, &device->rad_info);
264
265 radv_handle_env_var_force_family(device);
266
267 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
268
269 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
270 device->ws->destroy(device->ws);
271 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
272 "cannot generate UUID");
273 goto fail;
274 }
275
276 /* These flags affect shader compilation. */
277 uint64_t shader_env_flags =
278 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
279 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
280
281 /* The gpu id is already embedded in the uuid so we just pass "radv"
282 * when creating the cache.
283 */
284 char buf[VK_UUID_SIZE * 2 + 1];
285 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
286 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
287
288 if (device->rad_info.chip_class < VI ||
289 device->rad_info.chip_class > GFX9)
290 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
291
292 radv_get_driver_uuid(&device->device_uuid);
293 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
294
295 if (device->rad_info.family == CHIP_STONEY ||
296 device->rad_info.chip_class >= GFX9) {
297 device->has_rbplus = true;
298 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
299 device->rad_info.family == CHIP_VEGA12 ||
300 device->rad_info.family == CHIP_RAVEN;
301 }
302
303 /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
304 * on SI.
305 */
306 device->has_clear_state = device->rad_info.chip_class >= CIK;
307
308 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
309
310 /* Vega10/Raven need a special workaround for a hardware bug. */
311 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
312 device->rad_info.family == CHIP_RAVEN;
313
314 /* Out-of-order primitive rasterization. */
315 device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
316 device->rad_info.max_se >= 2;
317 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
318 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
319
320 device->dcc_msaa_allowed = device->rad_info.chip_class == VI &&
321 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
322
323 radv_physical_device_init_mem_types(device);
324 radv_fill_device_extension_table(device, &device->supported_extensions);
325
326 result = radv_init_wsi(device);
327 if (result != VK_SUCCESS) {
328 device->ws->destroy(device->ws);
329 goto fail;
330 }
331
332 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
333 ac_print_gpu_info(&device->rad_info);
334
335 return VK_SUCCESS;
336
337 fail:
338 close(fd);
339 return result;
340 }
341
342 static void
343 radv_physical_device_finish(struct radv_physical_device *device)
344 {
345 radv_finish_wsi(device);
346 device->ws->destroy(device->ws);
347 disk_cache_destroy(device->disk_cache);
348 close(device->local_fd);
349 }
350
351 static void *
352 default_alloc_func(void *pUserData, size_t size, size_t align,
353 VkSystemAllocationScope allocationScope)
354 {
355 return malloc(size);
356 }
357
358 static void *
359 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
360 size_t align, VkSystemAllocationScope allocationScope)
361 {
362 return realloc(pOriginal, size);
363 }
364
365 static void
366 default_free_func(void *pUserData, void *pMemory)
367 {
368 free(pMemory);
369 }
370
371 static const VkAllocationCallbacks default_alloc = {
372 .pUserData = NULL,
373 .pfnAllocation = default_alloc_func,
374 .pfnReallocation = default_realloc_func,
375 .pfnFree = default_free_func,
376 };
377
378 static const struct debug_control radv_debug_options[] = {
379 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
380 {"nodcc", RADV_DEBUG_NO_DCC},
381 {"shaders", RADV_DEBUG_DUMP_SHADERS},
382 {"nocache", RADV_DEBUG_NO_CACHE},
383 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
384 {"nohiz", RADV_DEBUG_NO_HIZ},
385 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
386 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
387 {"allbos", RADV_DEBUG_ALL_BOS},
388 {"noibs", RADV_DEBUG_NO_IBS},
389 {"spirv", RADV_DEBUG_DUMP_SPIRV},
390 {"vmfaults", RADV_DEBUG_VM_FAULTS},
391 {"zerovram", RADV_DEBUG_ZERO_VRAM},
392 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
393 {"nosisched", RADV_DEBUG_NO_SISCHED},
394 {"preoptir", RADV_DEBUG_PREOPTIR},
395 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
396 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
397 {"info", RADV_DEBUG_INFO},
398 {NULL, 0}
399 };
400
401 const char *
402 radv_get_debug_option_name(int id)
403 {
404 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
405 return radv_debug_options[id].string;
406 }
407
408 static const struct debug_control radv_perftest_options[] = {
409 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
410 {"sisched", RADV_PERFTEST_SISCHED},
411 {"localbos", RADV_PERFTEST_LOCAL_BOS},
412 {"binning", RADV_PERFTEST_BINNING},
413 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
414 {NULL, 0}
415 };
416
417 const char *
418 radv_get_perftest_option_name(int id)
419 {
420 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
421 return radv_perftest_options[id].string;
422 }
423
424 static void
425 radv_handle_per_app_options(struct radv_instance *instance,
426 const VkApplicationInfo *info)
427 {
428 const char *name = info ? info->pApplicationName : NULL;
429
430 if (!name)
431 return;
432
433 if (!strcmp(name, "Talos - Linux - 32bit") ||
434 !strcmp(name, "Talos - Linux - 64bit")) {
435 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
436 /* Force enable LLVM sisched for Talos because it looks
437 * safe and it gives few more FPS.
438 */
439 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
440 }
441 }
442 }
443
444 static int radv_get_instance_extension_index(const char *name)
445 {
446 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
447 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
448 return i;
449 }
450 return -1;
451 }
452
453
454 VkResult radv_CreateInstance(
455 const VkInstanceCreateInfo* pCreateInfo,
456 const VkAllocationCallbacks* pAllocator,
457 VkInstance* pInstance)
458 {
459 struct radv_instance *instance;
460 VkResult result;
461
462 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
463
464 uint32_t client_version;
465 if (pCreateInfo->pApplicationInfo &&
466 pCreateInfo->pApplicationInfo->apiVersion != 0) {
467 client_version = pCreateInfo->pApplicationInfo->apiVersion;
468 } else {
469 radv_EnumerateInstanceVersion(&client_version);
470 }
471
472 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
473 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
474 if (!instance)
475 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
476
477 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
478
479 if (pAllocator)
480 instance->alloc = *pAllocator;
481 else
482 instance->alloc = default_alloc;
483
484 instance->apiVersion = client_version;
485 instance->physicalDeviceCount = -1;
486
487 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
488 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
489 int index = radv_get_instance_extension_index(ext_name);
490
491 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
492 vk_free2(&default_alloc, pAllocator, instance);
493 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
494 }
495
496 instance->enabled_extensions.extensions[index] = true;
497 }
498
499 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
500 if (result != VK_SUCCESS) {
501 vk_free2(&default_alloc, pAllocator, instance);
502 return vk_error(result);
503 }
504
505 _mesa_locale_init();
506
507 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
508
509 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
510 radv_debug_options);
511
512 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
513 radv_perftest_options);
514
515 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
516
517 *pInstance = radv_instance_to_handle(instance);
518
519 return VK_SUCCESS;
520 }
521
522 void radv_DestroyInstance(
523 VkInstance _instance,
524 const VkAllocationCallbacks* pAllocator)
525 {
526 RADV_FROM_HANDLE(radv_instance, instance, _instance);
527
528 if (!instance)
529 return;
530
531 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
532 radv_physical_device_finish(instance->physicalDevices + i);
533 }
534
535 VG(VALGRIND_DESTROY_MEMPOOL(instance));
536
537 _mesa_locale_fini();
538
539 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
540
541 vk_free(&instance->alloc, instance);
542 }
543
544 static VkResult
545 radv_enumerate_devices(struct radv_instance *instance)
546 {
547 /* TODO: Check for more devices ? */
548 drmDevicePtr devices[8];
549 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
550 int max_devices;
551
552 instance->physicalDeviceCount = 0;
553
554 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
555 if (max_devices < 1)
556 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
557
558 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
559 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
560 devices[i]->bustype == DRM_BUS_PCI &&
561 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
562
563 result = radv_physical_device_init(instance->physicalDevices +
564 instance->physicalDeviceCount,
565 instance,
566 devices[i]);
567 if (result == VK_SUCCESS)
568 ++instance->physicalDeviceCount;
569 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
570 break;
571 }
572 }
573 drmFreeDevices(devices, max_devices);
574
575 return result;
576 }
577
578 VkResult radv_EnumeratePhysicalDevices(
579 VkInstance _instance,
580 uint32_t* pPhysicalDeviceCount,
581 VkPhysicalDevice* pPhysicalDevices)
582 {
583 RADV_FROM_HANDLE(radv_instance, instance, _instance);
584 VkResult result;
585
586 if (instance->physicalDeviceCount < 0) {
587 result = radv_enumerate_devices(instance);
588 if (result != VK_SUCCESS &&
589 result != VK_ERROR_INCOMPATIBLE_DRIVER)
590 return result;
591 }
592
593 if (!pPhysicalDevices) {
594 *pPhysicalDeviceCount = instance->physicalDeviceCount;
595 } else {
596 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
597 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
598 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
599 }
600
601 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
602 : VK_SUCCESS;
603 }
604
605 VkResult radv_EnumeratePhysicalDeviceGroups(
606 VkInstance _instance,
607 uint32_t* pPhysicalDeviceGroupCount,
608 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
609 {
610 RADV_FROM_HANDLE(radv_instance, instance, _instance);
611 VkResult result;
612
613 if (instance->physicalDeviceCount < 0) {
614 result = radv_enumerate_devices(instance);
615 if (result != VK_SUCCESS &&
616 result != VK_ERROR_INCOMPATIBLE_DRIVER)
617 return result;
618 }
619
620 if (!pPhysicalDeviceGroupProperties) {
621 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
622 } else {
623 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
624 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
625 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
626 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
627 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
628 }
629 }
630 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
631 : VK_SUCCESS;
632 }
633
634 void radv_GetPhysicalDeviceFeatures(
635 VkPhysicalDevice physicalDevice,
636 VkPhysicalDeviceFeatures* pFeatures)
637 {
638 memset(pFeatures, 0, sizeof(*pFeatures));
639
640 *pFeatures = (VkPhysicalDeviceFeatures) {
641 .robustBufferAccess = true,
642 .fullDrawIndexUint32 = true,
643 .imageCubeArray = true,
644 .independentBlend = true,
645 .geometryShader = true,
646 .tessellationShader = true,
647 .sampleRateShading = true,
648 .dualSrcBlend = true,
649 .logicOp = true,
650 .multiDrawIndirect = true,
651 .drawIndirectFirstInstance = true,
652 .depthClamp = true,
653 .depthBiasClamp = true,
654 .fillModeNonSolid = true,
655 .depthBounds = true,
656 .wideLines = true,
657 .largePoints = true,
658 .alphaToOne = true,
659 .multiViewport = true,
660 .samplerAnisotropy = true,
661 .textureCompressionETC2 = false,
662 .textureCompressionASTC_LDR = false,
663 .textureCompressionBC = true,
664 .occlusionQueryPrecise = true,
665 .pipelineStatisticsQuery = true,
666 .vertexPipelineStoresAndAtomics = true,
667 .fragmentStoresAndAtomics = true,
668 .shaderTessellationAndGeometryPointSize = true,
669 .shaderImageGatherExtended = true,
670 .shaderStorageImageExtendedFormats = true,
671 .shaderStorageImageMultisample = false,
672 .shaderUniformBufferArrayDynamicIndexing = true,
673 .shaderSampledImageArrayDynamicIndexing = true,
674 .shaderStorageBufferArrayDynamicIndexing = true,
675 .shaderStorageImageArrayDynamicIndexing = true,
676 .shaderStorageImageReadWithoutFormat = true,
677 .shaderStorageImageWriteWithoutFormat = true,
678 .shaderClipDistance = true,
679 .shaderCullDistance = true,
680 .shaderFloat64 = true,
681 .shaderInt64 = true,
682 .shaderInt16 = false,
683 .sparseBinding = true,
684 .variableMultisampleRate = true,
685 .inheritedQueries = true,
686 };
687 }
688
689 void radv_GetPhysicalDeviceFeatures2(
690 VkPhysicalDevice physicalDevice,
691 VkPhysicalDeviceFeatures2KHR *pFeatures)
692 {
693 vk_foreach_struct(ext, pFeatures->pNext) {
694 switch (ext->sType) {
695 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
696 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
697 features->variablePointersStorageBuffer = true;
698 features->variablePointers = false;
699 break;
700 }
701 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
702 VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
703 features->multiview = true;
704 features->multiviewGeometryShader = true;
705 features->multiviewTessellationShader = true;
706 break;
707 }
708 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
709 VkPhysicalDeviceShaderDrawParameterFeatures *features =
710 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
711 features->shaderDrawParameters = true;
712 break;
713 }
714 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
715 VkPhysicalDeviceProtectedMemoryFeatures *features =
716 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
717 features->protectedMemory = false;
718 break;
719 }
720 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
721 VkPhysicalDevice16BitStorageFeatures *features =
722 (VkPhysicalDevice16BitStorageFeatures*)ext;
723 features->storageBuffer16BitAccess = false;
724 features->uniformAndStorageBuffer16BitAccess = false;
725 features->storagePushConstant16 = false;
726 features->storageInputOutput16 = false;
727 break;
728 }
729 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
730 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
731 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
732 features->samplerYcbcrConversion = false;
733 break;
734 }
735 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
736 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
737 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)features;
738 features->shaderInputAttachmentArrayDynamicIndexing = true;
739 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
740 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
741 features->shaderUniformBufferArrayNonUniformIndexing = false;
742 features->shaderSampledImageArrayNonUniformIndexing = false;
743 features->shaderStorageBufferArrayNonUniformIndexing = false;
744 features->shaderStorageImageArrayNonUniformIndexing = false;
745 features->shaderInputAttachmentArrayNonUniformIndexing = false;
746 features->shaderUniformTexelBufferArrayNonUniformIndexing = false;
747 features->shaderStorageTexelBufferArrayNonUniformIndexing = false;
748 features->descriptorBindingUniformBufferUpdateAfterBind = true;
749 features->descriptorBindingSampledImageUpdateAfterBind = true;
750 features->descriptorBindingStorageImageUpdateAfterBind = true;
751 features->descriptorBindingStorageBufferUpdateAfterBind = true;
752 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
753 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
754 features->descriptorBindingUpdateUnusedWhilePending = true;
755 features->descriptorBindingPartiallyBound = true;
756 features->descriptorBindingVariableDescriptorCount = true;
757 features->runtimeDescriptorArray = true;
758 break;
759 }
760 default:
761 break;
762 }
763 }
764 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
765 }
766
767 void radv_GetPhysicalDeviceProperties(
768 VkPhysicalDevice physicalDevice,
769 VkPhysicalDeviceProperties* pProperties)
770 {
771 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
772 VkSampleCountFlags sample_counts = 0xf;
773
774 /* make sure that the entire descriptor set is addressable with a signed
775 * 32-bit int. So the sum of all limits scaled by descriptor size has to
776 * be at most 2 GiB. the combined image & samples object count as one of
777 * both. This limit is for the pipeline layout, not for the set layout, but
778 * there is no set limit, so we just set a pipeline limit. I don't think
779 * any app is going to hit this soon. */
780 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
781 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
782 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
783 32 /* sampler, largest when combined with image */ +
784 64 /* sampled image */ +
785 64 /* storage image */);
786
787 VkPhysicalDeviceLimits limits = {
788 .maxImageDimension1D = (1 << 14),
789 .maxImageDimension2D = (1 << 14),
790 .maxImageDimension3D = (1 << 11),
791 .maxImageDimensionCube = (1 << 14),
792 .maxImageArrayLayers = (1 << 11),
793 .maxTexelBufferElements = 128 * 1024 * 1024,
794 .maxUniformBufferRange = UINT32_MAX,
795 .maxStorageBufferRange = UINT32_MAX,
796 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
797 .maxMemoryAllocationCount = UINT32_MAX,
798 .maxSamplerAllocationCount = 64 * 1024,
799 .bufferImageGranularity = 64, /* A cache line */
800 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
801 .maxBoundDescriptorSets = MAX_SETS,
802 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
803 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
804 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
805 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
806 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
807 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
808 .maxPerStageResources = max_descriptor_set_size,
809 .maxDescriptorSetSamplers = max_descriptor_set_size,
810 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
811 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
812 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
813 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
814 .maxDescriptorSetSampledImages = max_descriptor_set_size,
815 .maxDescriptorSetStorageImages = max_descriptor_set_size,
816 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
817 .maxVertexInputAttributes = 32,
818 .maxVertexInputBindings = 32,
819 .maxVertexInputAttributeOffset = 2047,
820 .maxVertexInputBindingStride = 2048,
821 .maxVertexOutputComponents = 128,
822 .maxTessellationGenerationLevel = 64,
823 .maxTessellationPatchSize = 32,
824 .maxTessellationControlPerVertexInputComponents = 128,
825 .maxTessellationControlPerVertexOutputComponents = 128,
826 .maxTessellationControlPerPatchOutputComponents = 120,
827 .maxTessellationControlTotalOutputComponents = 4096,
828 .maxTessellationEvaluationInputComponents = 128,
829 .maxTessellationEvaluationOutputComponents = 128,
830 .maxGeometryShaderInvocations = 127,
831 .maxGeometryInputComponents = 64,
832 .maxGeometryOutputComponents = 128,
833 .maxGeometryOutputVertices = 256,
834 .maxGeometryTotalOutputComponents = 1024,
835 .maxFragmentInputComponents = 128,
836 .maxFragmentOutputAttachments = 8,
837 .maxFragmentDualSrcAttachments = 1,
838 .maxFragmentCombinedOutputResources = 8,
839 .maxComputeSharedMemorySize = 32768,
840 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
841 .maxComputeWorkGroupInvocations = 2048,
842 .maxComputeWorkGroupSize = {
843 2048,
844 2048,
845 2048
846 },
847 .subPixelPrecisionBits = 4 /* FIXME */,
848 .subTexelPrecisionBits = 4 /* FIXME */,
849 .mipmapPrecisionBits = 4 /* FIXME */,
850 .maxDrawIndexedIndexValue = UINT32_MAX,
851 .maxDrawIndirectCount = UINT32_MAX,
852 .maxSamplerLodBias = 16,
853 .maxSamplerAnisotropy = 16,
854 .maxViewports = MAX_VIEWPORTS,
855 .maxViewportDimensions = { (1 << 14), (1 << 14) },
856 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
857 .viewportSubPixelBits = 8,
858 .minMemoryMapAlignment = 4096, /* A page */
859 .minTexelBufferOffsetAlignment = 1,
860 .minUniformBufferOffsetAlignment = 4,
861 .minStorageBufferOffsetAlignment = 4,
862 .minTexelOffset = -32,
863 .maxTexelOffset = 31,
864 .minTexelGatherOffset = -32,
865 .maxTexelGatherOffset = 31,
866 .minInterpolationOffset = -2,
867 .maxInterpolationOffset = 2,
868 .subPixelInterpolationOffsetBits = 8,
869 .maxFramebufferWidth = (1 << 14),
870 .maxFramebufferHeight = (1 << 14),
871 .maxFramebufferLayers = (1 << 10),
872 .framebufferColorSampleCounts = sample_counts,
873 .framebufferDepthSampleCounts = sample_counts,
874 .framebufferStencilSampleCounts = sample_counts,
875 .framebufferNoAttachmentsSampleCounts = sample_counts,
876 .maxColorAttachments = MAX_RTS,
877 .sampledImageColorSampleCounts = sample_counts,
878 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
879 .sampledImageDepthSampleCounts = sample_counts,
880 .sampledImageStencilSampleCounts = sample_counts,
881 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
882 .maxSampleMaskWords = 1,
883 .timestampComputeAndGraphics = true,
884 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
885 .maxClipDistances = 8,
886 .maxCullDistances = 8,
887 .maxCombinedClipAndCullDistances = 8,
888 .discreteQueuePriorities = 1,
889 .pointSizeRange = { 0.125, 255.875 },
890 .lineWidthRange = { 0.0, 7.9921875 },
891 .pointSizeGranularity = (1.0 / 8.0),
892 .lineWidthGranularity = (1.0 / 128.0),
893 .strictLines = false, /* FINISHME */
894 .standardSampleLocations = true,
895 .optimalBufferCopyOffsetAlignment = 128,
896 .optimalBufferCopyRowPitchAlignment = 128,
897 .nonCoherentAtomSize = 64,
898 };
899
900 *pProperties = (VkPhysicalDeviceProperties) {
901 .apiVersion = radv_physical_device_api_version(pdevice),
902 .driverVersion = vk_get_driver_version(),
903 .vendorID = ATI_VENDOR_ID,
904 .deviceID = pdevice->rad_info.pci_id,
905 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
906 .limits = limits,
907 .sparseProperties = {0},
908 };
909
910 strcpy(pProperties->deviceName, pdevice->name);
911 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
912 }
913
914 void radv_GetPhysicalDeviceProperties2(
915 VkPhysicalDevice physicalDevice,
916 VkPhysicalDeviceProperties2KHR *pProperties)
917 {
918 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
919 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
920
921 vk_foreach_struct(ext, pProperties->pNext) {
922 switch (ext->sType) {
923 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
924 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
925 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
926 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
927 break;
928 }
929 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
930 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
931 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
932 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
933 properties->deviceLUIDValid = false;
934 break;
935 }
936 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
937 VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
938 properties->maxMultiviewViewCount = MAX_VIEWS;
939 properties->maxMultiviewInstanceIndex = INT_MAX;
940 break;
941 }
942 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
943 VkPhysicalDevicePointClippingPropertiesKHR *properties =
944 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
945 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
946 break;
947 }
948 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
949 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
950 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
951 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
952 break;
953 }
954 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
955 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
956 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
957 properties->minImportedHostPointerAlignment = 4096;
958 break;
959 }
960 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
961 VkPhysicalDeviceSubgroupProperties *properties =
962 (VkPhysicalDeviceSubgroupProperties*)ext;
963 properties->subgroupSize = 64;
964 properties->supportedStages = VK_SHADER_STAGE_ALL;
965 properties->supportedOperations =
966 VK_SUBGROUP_FEATURE_BASIC_BIT |
967 VK_SUBGROUP_FEATURE_BALLOT_BIT |
968 VK_SUBGROUP_FEATURE_QUAD_BIT |
969 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
970 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
971 VK_SUBGROUP_FEATURE_VOTE_BIT;
972 properties->quadOperationsInAllStages = true;
973 break;
974 }
975 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
976 VkPhysicalDeviceMaintenance3Properties *properties =
977 (VkPhysicalDeviceMaintenance3Properties*)ext;
978 /* Make sure everything is addressable by a signed 32-bit int, and
979 * our largest descriptors are 96 bytes. */
980 properties->maxPerSetDescriptors = (1ull << 31) / 96;
981 /* Our buffer size fields allow only this much */
982 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
983 break;
984 }
985 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
986 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
987 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
988 /* GFX6-8 only support single channel min/max filter. */
989 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
990 properties->filterMinmaxSingleComponentFormats = true;
991 break;
992 }
993 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
994 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
995 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
996
997 /* Shader engines. */
998 properties->shaderEngineCount =
999 pdevice->rad_info.max_se;
1000 properties->shaderArraysPerEngineCount =
1001 pdevice->rad_info.max_sh_per_se;
1002 properties->computeUnitsPerShaderArray =
1003 pdevice->rad_info.num_good_compute_units /
1004 (pdevice->rad_info.max_se *
1005 pdevice->rad_info.max_sh_per_se);
1006 properties->simdPerComputeUnit = 4;
1007 properties->wavefrontsPerSimd =
1008 pdevice->rad_info.family == CHIP_TONGA ||
1009 pdevice->rad_info.family == CHIP_ICELAND ||
1010 pdevice->rad_info.family == CHIP_POLARIS10 ||
1011 pdevice->rad_info.family == CHIP_POLARIS11 ||
1012 pdevice->rad_info.family == CHIP_POLARIS12 ||
1013 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1014 properties->wavefrontSize = 64;
1015
1016 /* SGPR. */
1017 properties->sgprsPerSimd =
1018 radv_get_num_physical_sgprs(pdevice);
1019 properties->minSgprAllocation =
1020 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1021 properties->maxSgprAllocation =
1022 pdevice->rad_info.family == CHIP_TONGA ||
1023 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1024 properties->sgprAllocationGranularity =
1025 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1026
1027 /* VGPR. */
1028 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1029 properties->minVgprAllocation = 4;
1030 properties->maxVgprAllocation = 256;
1031 properties->vgprAllocationGranularity = 4;
1032 break;
1033 }
1034 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1035 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1036 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1037 properties->maxVertexAttribDivisor = UINT32_MAX;
1038 break;
1039 }
1040 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1041 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1042 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1043 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1044 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1045 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1046 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1047 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1048 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1049 properties->robustBufferAccessUpdateAfterBind = false;
1050 properties->quadDivergentImplicitLod = false;
1051
1052 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1053 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1054 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1055 32 /* sampler, largest when combined with image */ +
1056 64 /* sampled image */ +
1057 64 /* storage image */);
1058 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1059 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1060 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1061 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1062 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1063 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1064 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1065 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1066 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1067 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1068 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1069 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1070 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1071 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1072 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1073 break;
1074 }
1075 default:
1076 break;
1077 }
1078 }
1079 }
1080
1081 static void radv_get_physical_device_queue_family_properties(
1082 struct radv_physical_device* pdevice,
1083 uint32_t* pCount,
1084 VkQueueFamilyProperties** pQueueFamilyProperties)
1085 {
1086 int num_queue_families = 1;
1087 int idx;
1088 if (pdevice->rad_info.num_compute_rings > 0 &&
1089 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1090 num_queue_families++;
1091
1092 if (pQueueFamilyProperties == NULL) {
1093 *pCount = num_queue_families;
1094 return;
1095 }
1096
1097 if (!*pCount)
1098 return;
1099
1100 idx = 0;
1101 if (*pCount >= 1) {
1102 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1103 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1104 VK_QUEUE_COMPUTE_BIT |
1105 VK_QUEUE_TRANSFER_BIT |
1106 VK_QUEUE_SPARSE_BINDING_BIT,
1107 .queueCount = 1,
1108 .timestampValidBits = 64,
1109 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1110 };
1111 idx++;
1112 }
1113
1114 if (pdevice->rad_info.num_compute_rings > 0 &&
1115 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1116 if (*pCount > idx) {
1117 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1118 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1119 VK_QUEUE_TRANSFER_BIT |
1120 VK_QUEUE_SPARSE_BINDING_BIT,
1121 .queueCount = pdevice->rad_info.num_compute_rings,
1122 .timestampValidBits = 64,
1123 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1124 };
1125 idx++;
1126 }
1127 }
1128 *pCount = idx;
1129 }
1130
1131 void radv_GetPhysicalDeviceQueueFamilyProperties(
1132 VkPhysicalDevice physicalDevice,
1133 uint32_t* pCount,
1134 VkQueueFamilyProperties* pQueueFamilyProperties)
1135 {
1136 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1137 if (!pQueueFamilyProperties) {
1138 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1139 return;
1140 }
1141 VkQueueFamilyProperties *properties[] = {
1142 pQueueFamilyProperties + 0,
1143 pQueueFamilyProperties + 1,
1144 pQueueFamilyProperties + 2,
1145 };
1146 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1147 assert(*pCount <= 3);
1148 }
1149
1150 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1151 VkPhysicalDevice physicalDevice,
1152 uint32_t* pCount,
1153 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
1154 {
1155 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1156 if (!pQueueFamilyProperties) {
1157 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1158 return;
1159 }
1160 VkQueueFamilyProperties *properties[] = {
1161 &pQueueFamilyProperties[0].queueFamilyProperties,
1162 &pQueueFamilyProperties[1].queueFamilyProperties,
1163 &pQueueFamilyProperties[2].queueFamilyProperties,
1164 };
1165 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1166 assert(*pCount <= 3);
1167 }
1168
1169 void radv_GetPhysicalDeviceMemoryProperties(
1170 VkPhysicalDevice physicalDevice,
1171 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1172 {
1173 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1174
1175 *pMemoryProperties = physical_device->memory_properties;
1176 }
1177
1178 void radv_GetPhysicalDeviceMemoryProperties2(
1179 VkPhysicalDevice physicalDevice,
1180 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1181 {
1182 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1183 &pMemoryProperties->memoryProperties);
1184 }
1185
1186 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1187 VkDevice _device,
1188 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1189 const void *pHostPointer,
1190 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1191 {
1192 RADV_FROM_HANDLE(radv_device, device, _device);
1193
1194 switch (handleType)
1195 {
1196 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1197 const struct radv_physical_device *physical_device = device->physical_device;
1198 uint32_t memoryTypeBits = 0;
1199 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1200 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1201 memoryTypeBits = (1 << i);
1202 break;
1203 }
1204 }
1205 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1206 return VK_SUCCESS;
1207 }
1208 default:
1209 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1210 }
1211 }
1212
1213 static enum radeon_ctx_priority
1214 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1215 {
1216 /* Default to MEDIUM when a specific global priority isn't requested */
1217 if (!pObj)
1218 return RADEON_CTX_PRIORITY_MEDIUM;
1219
1220 switch(pObj->globalPriority) {
1221 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1222 return RADEON_CTX_PRIORITY_REALTIME;
1223 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1224 return RADEON_CTX_PRIORITY_HIGH;
1225 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1226 return RADEON_CTX_PRIORITY_MEDIUM;
1227 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1228 return RADEON_CTX_PRIORITY_LOW;
1229 default:
1230 unreachable("Illegal global priority value");
1231 return RADEON_CTX_PRIORITY_INVALID;
1232 }
1233 }
1234
1235 static int
1236 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1237 uint32_t queue_family_index, int idx,
1238 VkDeviceQueueCreateFlags flags,
1239 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1240 {
1241 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1242 queue->device = device;
1243 queue->queue_family_index = queue_family_index;
1244 queue->queue_idx = idx;
1245 queue->priority = radv_get_queue_global_priority(global_priority);
1246 queue->flags = flags;
1247
1248 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1249 if (!queue->hw_ctx)
1250 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1251
1252 return VK_SUCCESS;
1253 }
1254
1255 static void
1256 radv_queue_finish(struct radv_queue *queue)
1257 {
1258 if (queue->hw_ctx)
1259 queue->device->ws->ctx_destroy(queue->hw_ctx);
1260
1261 if (queue->initial_full_flush_preamble_cs)
1262 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1263 if (queue->initial_preamble_cs)
1264 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1265 if (queue->continue_preamble_cs)
1266 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1267 if (queue->descriptor_bo)
1268 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1269 if (queue->scratch_bo)
1270 queue->device->ws->buffer_destroy(queue->scratch_bo);
1271 if (queue->esgs_ring_bo)
1272 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1273 if (queue->gsvs_ring_bo)
1274 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1275 if (queue->tess_rings_bo)
1276 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1277 if (queue->compute_scratch_bo)
1278 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1279 }
1280
1281 static void
1282 radv_bo_list_init(struct radv_bo_list *bo_list)
1283 {
1284 pthread_mutex_init(&bo_list->mutex, NULL);
1285 bo_list->list.count = bo_list->capacity = 0;
1286 bo_list->list.bos = NULL;
1287 }
1288
1289 static void
1290 radv_bo_list_finish(struct radv_bo_list *bo_list)
1291 {
1292 free(bo_list->list.bos);
1293 pthread_mutex_destroy(&bo_list->mutex);
1294 }
1295
1296 static VkResult radv_bo_list_add(struct radv_device *device,
1297 struct radeon_winsys_bo *bo)
1298 {
1299 struct radv_bo_list *bo_list = &device->bo_list;
1300
1301 if (unlikely(!device->use_global_bo_list))
1302 return VK_SUCCESS;
1303
1304 pthread_mutex_lock(&bo_list->mutex);
1305 if (bo_list->list.count == bo_list->capacity) {
1306 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1307 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1308
1309 if (!data) {
1310 pthread_mutex_unlock(&bo_list->mutex);
1311 return VK_ERROR_OUT_OF_HOST_MEMORY;
1312 }
1313
1314 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1315 bo_list->capacity = capacity;
1316 }
1317
1318 bo_list->list.bos[bo_list->list.count++] = bo;
1319 pthread_mutex_unlock(&bo_list->mutex);
1320 return VK_SUCCESS;
1321 }
1322
1323 static void radv_bo_list_remove(struct radv_device *device,
1324 struct radeon_winsys_bo *bo)
1325 {
1326 struct radv_bo_list *bo_list = &device->bo_list;
1327
1328 if (unlikely(!device->use_global_bo_list))
1329 return;
1330
1331 pthread_mutex_lock(&bo_list->mutex);
1332 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1333 if (bo_list->list.bos[i] == bo) {
1334 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1335 --bo_list->list.count;
1336 break;
1337 }
1338 }
1339 pthread_mutex_unlock(&bo_list->mutex);
1340 }
1341
1342 static void
1343 radv_device_init_gs_info(struct radv_device *device)
1344 {
1345 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1346 device->physical_device->rad_info.family);
1347 }
1348
1349 static int radv_get_device_extension_index(const char *name)
1350 {
1351 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1352 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1353 return i;
1354 }
1355 return -1;
1356 }
1357
1358 VkResult radv_CreateDevice(
1359 VkPhysicalDevice physicalDevice,
1360 const VkDeviceCreateInfo* pCreateInfo,
1361 const VkAllocationCallbacks* pAllocator,
1362 VkDevice* pDevice)
1363 {
1364 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1365 VkResult result;
1366 struct radv_device *device;
1367
1368 bool keep_shader_info = false;
1369
1370 /* Check enabled features */
1371 if (pCreateInfo->pEnabledFeatures) {
1372 VkPhysicalDeviceFeatures supported_features;
1373 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1374 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1375 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1376 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1377 for (uint32_t i = 0; i < num_features; i++) {
1378 if (enabled_feature[i] && !supported_feature[i])
1379 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1380 }
1381 }
1382
1383 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1384 sizeof(*device), 8,
1385 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1386 if (!device)
1387 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1388
1389 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1390 device->instance = physical_device->instance;
1391 device->physical_device = physical_device;
1392
1393 device->ws = physical_device->ws;
1394 if (pAllocator)
1395 device->alloc = *pAllocator;
1396 else
1397 device->alloc = physical_device->instance->alloc;
1398
1399 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1400 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1401 int index = radv_get_device_extension_index(ext_name);
1402 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1403 vk_free(&device->alloc, device);
1404 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1405 }
1406
1407 device->enabled_extensions.extensions[index] = true;
1408 }
1409
1410 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1411
1412 /* With update after bind we can't attach bo's to the command buffer
1413 * from the descriptor set anymore, so we have to use a global BO list.
1414 */
1415 device->use_global_bo_list =
1416 device->enabled_extensions.EXT_descriptor_indexing;
1417
1418 mtx_init(&device->shader_slab_mutex, mtx_plain);
1419 list_inithead(&device->shader_slabs);
1420
1421 radv_bo_list_init(&device->bo_list);
1422
1423 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1424 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1425 uint32_t qfi = queue_create->queueFamilyIndex;
1426 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1427 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1428
1429 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1430
1431 device->queues[qfi] = vk_alloc(&device->alloc,
1432 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1433 if (!device->queues[qfi]) {
1434 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1435 goto fail;
1436 }
1437
1438 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1439
1440 device->queue_count[qfi] = queue_create->queueCount;
1441
1442 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1443 result = radv_queue_init(device, &device->queues[qfi][q],
1444 qfi, q, queue_create->flags,
1445 global_priority);
1446 if (result != VK_SUCCESS)
1447 goto fail;
1448 }
1449 }
1450
1451 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1452 (device->instance->perftest_flags & RADV_PERFTEST_BINNING);
1453
1454 /* Disabled and not implemented for now. */
1455 device->dfsm_allowed = device->pbb_allowed && false;
1456
1457 #ifdef ANDROID
1458 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1459 #endif
1460
1461 /* The maximum number of scratch waves. Scratch space isn't divided
1462 * evenly between CUs. The number is only a function of the number of CUs.
1463 * We can decrease the constant to decrease the scratch buffer size.
1464 *
1465 * sctx->scratch_waves must be >= the maximum possible size of
1466 * 1 threadgroup, so that the hw doesn't hang from being unable
1467 * to start any.
1468 *
1469 * The recommended value is 4 per CU at most. Higher numbers don't
1470 * bring much benefit, but they still occupy chip resources (think
1471 * async compute). I've seen ~2% performance difference between 4 and 32.
1472 */
1473 uint32_t max_threads_per_block = 2048;
1474 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1475 max_threads_per_block / 64);
1476
1477 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1478
1479 if (device->physical_device->rad_info.chip_class >= CIK) {
1480 /* If the KMD allows it (there is a KMD hw register for it),
1481 * allow launching waves out-of-order.
1482 */
1483 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1484 }
1485
1486 radv_device_init_gs_info(device);
1487
1488 device->tess_offchip_block_dw_size =
1489 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1490 device->has_distributed_tess =
1491 device->physical_device->rad_info.chip_class >= VI &&
1492 device->physical_device->rad_info.max_se >= 2;
1493
1494 if (getenv("RADV_TRACE_FILE")) {
1495 const char *filename = getenv("RADV_TRACE_FILE");
1496
1497 keep_shader_info = true;
1498
1499 if (!radv_init_trace(device))
1500 goto fail;
1501
1502 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1503 radv_dump_enabled_options(device, stderr);
1504 }
1505
1506 device->keep_shader_info = keep_shader_info;
1507
1508 result = radv_device_init_meta(device);
1509 if (result != VK_SUCCESS)
1510 goto fail;
1511
1512 radv_device_init_msaa(device);
1513
1514 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1515 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1516 switch (family) {
1517 case RADV_QUEUE_GENERAL:
1518 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1519 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1520 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1521 break;
1522 case RADV_QUEUE_COMPUTE:
1523 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1524 radeon_emit(device->empty_cs[family], 0);
1525 break;
1526 }
1527 device->ws->cs_finalize(device->empty_cs[family]);
1528 }
1529
1530 if (device->physical_device->rad_info.chip_class >= CIK)
1531 cik_create_gfx_config(device);
1532
1533 VkPipelineCacheCreateInfo ci;
1534 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1535 ci.pNext = NULL;
1536 ci.flags = 0;
1537 ci.pInitialData = NULL;
1538 ci.initialDataSize = 0;
1539 VkPipelineCache pc;
1540 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1541 &ci, NULL, &pc);
1542 if (result != VK_SUCCESS)
1543 goto fail_meta;
1544
1545 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1546
1547 *pDevice = radv_device_to_handle(device);
1548 return VK_SUCCESS;
1549
1550 fail_meta:
1551 radv_device_finish_meta(device);
1552 fail:
1553 radv_bo_list_finish(&device->bo_list);
1554
1555 if (device->trace_bo)
1556 device->ws->buffer_destroy(device->trace_bo);
1557
1558 if (device->gfx_init)
1559 device->ws->buffer_destroy(device->gfx_init);
1560
1561 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1562 for (unsigned q = 0; q < device->queue_count[i]; q++)
1563 radv_queue_finish(&device->queues[i][q]);
1564 if (device->queue_count[i])
1565 vk_free(&device->alloc, device->queues[i]);
1566 }
1567
1568 vk_free(&device->alloc, device);
1569 return result;
1570 }
1571
1572 void radv_DestroyDevice(
1573 VkDevice _device,
1574 const VkAllocationCallbacks* pAllocator)
1575 {
1576 RADV_FROM_HANDLE(radv_device, device, _device);
1577
1578 if (!device)
1579 return;
1580
1581 if (device->trace_bo)
1582 device->ws->buffer_destroy(device->trace_bo);
1583
1584 if (device->gfx_init)
1585 device->ws->buffer_destroy(device->gfx_init);
1586
1587 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1588 for (unsigned q = 0; q < device->queue_count[i]; q++)
1589 radv_queue_finish(&device->queues[i][q]);
1590 if (device->queue_count[i])
1591 vk_free(&device->alloc, device->queues[i]);
1592 if (device->empty_cs[i])
1593 device->ws->cs_destroy(device->empty_cs[i]);
1594 }
1595 radv_device_finish_meta(device);
1596
1597 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1598 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1599
1600 radv_destroy_shader_slabs(device);
1601
1602 radv_bo_list_finish(&device->bo_list);
1603 vk_free(&device->alloc, device);
1604 }
1605
1606 VkResult radv_EnumerateInstanceLayerProperties(
1607 uint32_t* pPropertyCount,
1608 VkLayerProperties* pProperties)
1609 {
1610 if (pProperties == NULL) {
1611 *pPropertyCount = 0;
1612 return VK_SUCCESS;
1613 }
1614
1615 /* None supported at this time */
1616 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1617 }
1618
1619 VkResult radv_EnumerateDeviceLayerProperties(
1620 VkPhysicalDevice physicalDevice,
1621 uint32_t* pPropertyCount,
1622 VkLayerProperties* pProperties)
1623 {
1624 if (pProperties == NULL) {
1625 *pPropertyCount = 0;
1626 return VK_SUCCESS;
1627 }
1628
1629 /* None supported at this time */
1630 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1631 }
1632
1633 void radv_GetDeviceQueue2(
1634 VkDevice _device,
1635 const VkDeviceQueueInfo2* pQueueInfo,
1636 VkQueue* pQueue)
1637 {
1638 RADV_FROM_HANDLE(radv_device, device, _device);
1639 struct radv_queue *queue;
1640
1641 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1642 if (pQueueInfo->flags != queue->flags) {
1643 /* From the Vulkan 1.1.70 spec:
1644 *
1645 * "The queue returned by vkGetDeviceQueue2 must have the same
1646 * flags value from this structure as that used at device
1647 * creation time in a VkDeviceQueueCreateInfo instance. If no
1648 * matching flags were specified at device creation time then
1649 * pQueue will return VK_NULL_HANDLE."
1650 */
1651 *pQueue = VK_NULL_HANDLE;
1652 return;
1653 }
1654
1655 *pQueue = radv_queue_to_handle(queue);
1656 }
1657
1658 void radv_GetDeviceQueue(
1659 VkDevice _device,
1660 uint32_t queueFamilyIndex,
1661 uint32_t queueIndex,
1662 VkQueue* pQueue)
1663 {
1664 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1665 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1666 .queueFamilyIndex = queueFamilyIndex,
1667 .queueIndex = queueIndex
1668 };
1669
1670 radv_GetDeviceQueue2(_device, &info, pQueue);
1671 }
1672
1673 static void
1674 fill_geom_tess_rings(struct radv_queue *queue,
1675 uint32_t *map,
1676 bool add_sample_positions,
1677 uint32_t esgs_ring_size,
1678 struct radeon_winsys_bo *esgs_ring_bo,
1679 uint32_t gsvs_ring_size,
1680 struct radeon_winsys_bo *gsvs_ring_bo,
1681 uint32_t tess_factor_ring_size,
1682 uint32_t tess_offchip_ring_offset,
1683 uint32_t tess_offchip_ring_size,
1684 struct radeon_winsys_bo *tess_rings_bo)
1685 {
1686 uint64_t esgs_va = 0, gsvs_va = 0;
1687 uint64_t tess_va = 0, tess_offchip_va = 0;
1688 uint32_t *desc = &map[4];
1689
1690 if (esgs_ring_bo)
1691 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1692 if (gsvs_ring_bo)
1693 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1694 if (tess_rings_bo) {
1695 tess_va = radv_buffer_get_va(tess_rings_bo);
1696 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1697 }
1698
1699 /* stride 0, num records - size, add tid, swizzle, elsize4,
1700 index stride 64 */
1701 desc[0] = esgs_va;
1702 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1703 S_008F04_STRIDE(0) |
1704 S_008F04_SWIZZLE_ENABLE(true);
1705 desc[2] = esgs_ring_size;
1706 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1707 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1708 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1709 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1710 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1711 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1712 S_008F0C_ELEMENT_SIZE(1) |
1713 S_008F0C_INDEX_STRIDE(3) |
1714 S_008F0C_ADD_TID_ENABLE(true);
1715
1716 desc += 4;
1717 /* GS entry for ES->GS ring */
1718 /* stride 0, num records - size, elsize0,
1719 index stride 0 */
1720 desc[0] = esgs_va;
1721 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1722 S_008F04_STRIDE(0) |
1723 S_008F04_SWIZZLE_ENABLE(false);
1724 desc[2] = esgs_ring_size;
1725 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1726 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1727 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1728 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1729 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1730 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1731 S_008F0C_ELEMENT_SIZE(0) |
1732 S_008F0C_INDEX_STRIDE(0) |
1733 S_008F0C_ADD_TID_ENABLE(false);
1734
1735 desc += 4;
1736 /* VS entry for GS->VS ring */
1737 /* stride 0, num records - size, elsize0,
1738 index stride 0 */
1739 desc[0] = gsvs_va;
1740 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1741 S_008F04_STRIDE(0) |
1742 S_008F04_SWIZZLE_ENABLE(false);
1743 desc[2] = gsvs_ring_size;
1744 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1745 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1746 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1747 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1748 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1749 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1750 S_008F0C_ELEMENT_SIZE(0) |
1751 S_008F0C_INDEX_STRIDE(0) |
1752 S_008F0C_ADD_TID_ENABLE(false);
1753 desc += 4;
1754
1755 /* stride gsvs_itemsize, num records 64
1756 elsize 4, index stride 16 */
1757 /* shader will patch stride and desc[2] */
1758 desc[0] = gsvs_va;
1759 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1760 S_008F04_STRIDE(0) |
1761 S_008F04_SWIZZLE_ENABLE(true);
1762 desc[2] = 0;
1763 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1764 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1765 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1766 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1767 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1768 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1769 S_008F0C_ELEMENT_SIZE(1) |
1770 S_008F0C_INDEX_STRIDE(1) |
1771 S_008F0C_ADD_TID_ENABLE(true);
1772 desc += 4;
1773
1774 desc[0] = tess_va;
1775 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1776 S_008F04_STRIDE(0) |
1777 S_008F04_SWIZZLE_ENABLE(false);
1778 desc[2] = tess_factor_ring_size;
1779 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1780 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1781 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1782 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1783 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1784 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1785 S_008F0C_ELEMENT_SIZE(0) |
1786 S_008F0C_INDEX_STRIDE(0) |
1787 S_008F0C_ADD_TID_ENABLE(false);
1788 desc += 4;
1789
1790 desc[0] = tess_offchip_va;
1791 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1792 S_008F04_STRIDE(0) |
1793 S_008F04_SWIZZLE_ENABLE(false);
1794 desc[2] = tess_offchip_ring_size;
1795 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1796 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1797 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1798 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1799 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1800 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1801 S_008F0C_ELEMENT_SIZE(0) |
1802 S_008F0C_INDEX_STRIDE(0) |
1803 S_008F0C_ADD_TID_ENABLE(false);
1804 desc += 4;
1805
1806 /* add sample positions after all rings */
1807 memcpy(desc, queue->device->sample_locations_1x, 8);
1808 desc += 2;
1809 memcpy(desc, queue->device->sample_locations_2x, 16);
1810 desc += 4;
1811 memcpy(desc, queue->device->sample_locations_4x, 32);
1812 desc += 8;
1813 memcpy(desc, queue->device->sample_locations_8x, 64);
1814 desc += 16;
1815 memcpy(desc, queue->device->sample_locations_16x, 128);
1816 }
1817
1818 static unsigned
1819 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1820 {
1821 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1822 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1823 device->physical_device->rad_info.family != CHIP_STONEY;
1824 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1825 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1826 device->physical_device->rad_info.max_se;
1827 unsigned offchip_granularity;
1828 unsigned hs_offchip_param;
1829 switch (device->tess_offchip_block_dw_size) {
1830 default:
1831 assert(0);
1832 /* fall through */
1833 case 8192:
1834 offchip_granularity = V_03093C_X_8K_DWORDS;
1835 break;
1836 case 4096:
1837 offchip_granularity = V_03093C_X_4K_DWORDS;
1838 break;
1839 }
1840
1841 switch (device->physical_device->rad_info.chip_class) {
1842 case SI:
1843 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1844 break;
1845 case CIK:
1846 case VI:
1847 case GFX9:
1848 default:
1849 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1850 break;
1851 }
1852
1853 *max_offchip_buffers_p = max_offchip_buffers;
1854 if (device->physical_device->rad_info.chip_class >= CIK) {
1855 if (device->physical_device->rad_info.chip_class >= VI)
1856 --max_offchip_buffers;
1857 hs_offchip_param =
1858 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1859 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1860 } else {
1861 hs_offchip_param =
1862 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1863 }
1864 return hs_offchip_param;
1865 }
1866
1867 static VkResult
1868 radv_get_preamble_cs(struct radv_queue *queue,
1869 uint32_t scratch_size,
1870 uint32_t compute_scratch_size,
1871 uint32_t esgs_ring_size,
1872 uint32_t gsvs_ring_size,
1873 bool needs_tess_rings,
1874 bool needs_sample_positions,
1875 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1876 struct radeon_winsys_cs **initial_preamble_cs,
1877 struct radeon_winsys_cs **continue_preamble_cs)
1878 {
1879 struct radeon_winsys_bo *scratch_bo = NULL;
1880 struct radeon_winsys_bo *descriptor_bo = NULL;
1881 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1882 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1883 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1884 struct radeon_winsys_bo *tess_rings_bo = NULL;
1885 struct radeon_winsys_cs *dest_cs[3] = {0};
1886 bool add_tess_rings = false, add_sample_positions = false;
1887 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1888 unsigned max_offchip_buffers;
1889 unsigned hs_offchip_param = 0;
1890 unsigned tess_offchip_ring_offset;
1891 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1892 if (!queue->has_tess_rings) {
1893 if (needs_tess_rings)
1894 add_tess_rings = true;
1895 }
1896 if (!queue->has_sample_positions) {
1897 if (needs_sample_positions)
1898 add_sample_positions = true;
1899 }
1900 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1901 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1902 &max_offchip_buffers);
1903 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
1904 tess_offchip_ring_size = max_offchip_buffers *
1905 queue->device->tess_offchip_block_dw_size * 4;
1906
1907 if (scratch_size <= queue->scratch_size &&
1908 compute_scratch_size <= queue->compute_scratch_size &&
1909 esgs_ring_size <= queue->esgs_ring_size &&
1910 gsvs_ring_size <= queue->gsvs_ring_size &&
1911 !add_tess_rings && !add_sample_positions &&
1912 queue->initial_preamble_cs) {
1913 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1914 *initial_preamble_cs = queue->initial_preamble_cs;
1915 *continue_preamble_cs = queue->continue_preamble_cs;
1916 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1917 *continue_preamble_cs = NULL;
1918 return VK_SUCCESS;
1919 }
1920
1921 if (scratch_size > queue->scratch_size) {
1922 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1923 scratch_size,
1924 4096,
1925 RADEON_DOMAIN_VRAM,
1926 ring_bo_flags);
1927 if (!scratch_bo)
1928 goto fail;
1929 } else
1930 scratch_bo = queue->scratch_bo;
1931
1932 if (compute_scratch_size > queue->compute_scratch_size) {
1933 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1934 compute_scratch_size,
1935 4096,
1936 RADEON_DOMAIN_VRAM,
1937 ring_bo_flags);
1938 if (!compute_scratch_bo)
1939 goto fail;
1940
1941 } else
1942 compute_scratch_bo = queue->compute_scratch_bo;
1943
1944 if (esgs_ring_size > queue->esgs_ring_size) {
1945 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1946 esgs_ring_size,
1947 4096,
1948 RADEON_DOMAIN_VRAM,
1949 ring_bo_flags);
1950 if (!esgs_ring_bo)
1951 goto fail;
1952 } else {
1953 esgs_ring_bo = queue->esgs_ring_bo;
1954 esgs_ring_size = queue->esgs_ring_size;
1955 }
1956
1957 if (gsvs_ring_size > queue->gsvs_ring_size) {
1958 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1959 gsvs_ring_size,
1960 4096,
1961 RADEON_DOMAIN_VRAM,
1962 ring_bo_flags);
1963 if (!gsvs_ring_bo)
1964 goto fail;
1965 } else {
1966 gsvs_ring_bo = queue->gsvs_ring_bo;
1967 gsvs_ring_size = queue->gsvs_ring_size;
1968 }
1969
1970 if (add_tess_rings) {
1971 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
1972 tess_offchip_ring_offset + tess_offchip_ring_size,
1973 256,
1974 RADEON_DOMAIN_VRAM,
1975 ring_bo_flags);
1976 if (!tess_rings_bo)
1977 goto fail;
1978 } else {
1979 tess_rings_bo = queue->tess_rings_bo;
1980 }
1981
1982 if (scratch_bo != queue->scratch_bo ||
1983 esgs_ring_bo != queue->esgs_ring_bo ||
1984 gsvs_ring_bo != queue->gsvs_ring_bo ||
1985 tess_rings_bo != queue->tess_rings_bo ||
1986 add_sample_positions) {
1987 uint32_t size = 0;
1988 if (gsvs_ring_bo || esgs_ring_bo ||
1989 tess_rings_bo || add_sample_positions) {
1990 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1991 if (add_sample_positions)
1992 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1993 }
1994 else if (scratch_bo)
1995 size = 8; /* 2 dword */
1996
1997 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1998 size,
1999 4096,
2000 RADEON_DOMAIN_VRAM,
2001 RADEON_FLAG_CPU_ACCESS |
2002 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2003 RADEON_FLAG_READ_ONLY);
2004 if (!descriptor_bo)
2005 goto fail;
2006 } else
2007 descriptor_bo = queue->descriptor_bo;
2008
2009 for(int i = 0; i < 3; ++i) {
2010 struct radeon_winsys_cs *cs = NULL;
2011 cs = queue->device->ws->cs_create(queue->device->ws,
2012 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2013 if (!cs)
2014 goto fail;
2015
2016 dest_cs[i] = cs;
2017
2018 if (scratch_bo)
2019 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
2020
2021 if (esgs_ring_bo)
2022 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
2023
2024 if (gsvs_ring_bo)
2025 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
2026
2027 if (tess_rings_bo)
2028 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo, 8);
2029
2030 if (descriptor_bo)
2031 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
2032
2033 if (descriptor_bo != queue->descriptor_bo) {
2034 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2035
2036 if (scratch_bo) {
2037 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2038 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2039 S_008F04_SWIZZLE_ENABLE(1);
2040 map[0] = scratch_va;
2041 map[1] = rsrc1;
2042 }
2043
2044 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
2045 add_sample_positions)
2046 fill_geom_tess_rings(queue, map, add_sample_positions,
2047 esgs_ring_size, esgs_ring_bo,
2048 gsvs_ring_size, gsvs_ring_bo,
2049 tess_factor_ring_size,
2050 tess_offchip_ring_offset,
2051 tess_offchip_ring_size,
2052 tess_rings_bo);
2053
2054 queue->device->ws->buffer_unmap(descriptor_bo);
2055 }
2056
2057 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2058 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2059 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2060 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2061 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2062 }
2063
2064 if (esgs_ring_bo || gsvs_ring_bo) {
2065 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2066 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2067 radeon_emit(cs, esgs_ring_size >> 8);
2068 radeon_emit(cs, gsvs_ring_size >> 8);
2069 } else {
2070 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2071 radeon_emit(cs, esgs_ring_size >> 8);
2072 radeon_emit(cs, gsvs_ring_size >> 8);
2073 }
2074 }
2075
2076 if (tess_rings_bo) {
2077 uint64_t tf_va = radv_buffer_get_va(tess_rings_bo);
2078 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2079 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2080 S_030938_SIZE(tess_factor_ring_size / 4));
2081 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2082 tf_va >> 8);
2083 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2084 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2085 S_030944_BASE_HI(tf_va >> 40));
2086 }
2087 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
2088 } else {
2089 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2090 S_008988_SIZE(tess_factor_ring_size / 4));
2091 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2092 tf_va >> 8);
2093 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2094 hs_offchip_param);
2095 }
2096 }
2097
2098 if (descriptor_bo) {
2099 uint64_t va = radv_buffer_get_va(descriptor_bo);
2100 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2101 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2102 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2103 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2104 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2105
2106 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2107 radeon_set_sh_reg_seq(cs, regs[i], 2);
2108 radeon_emit(cs, va);
2109 radeon_emit(cs, va >> 32);
2110 }
2111 } else {
2112 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2113 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2114 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2115 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2116 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2117 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2118
2119 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2120 radeon_set_sh_reg_seq(cs, regs[i], 2);
2121 radeon_emit(cs, va);
2122 radeon_emit(cs, va >> 32);
2123 }
2124 }
2125 }
2126
2127 if (compute_scratch_bo) {
2128 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
2129 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2130 S_008F04_SWIZZLE_ENABLE(1);
2131
2132 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
2133
2134 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2135 radeon_emit(cs, scratch_va);
2136 radeon_emit(cs, rsrc1);
2137 }
2138
2139 if (i == 0) {
2140 si_cs_emit_cache_flush(cs,
2141 queue->device->physical_device->rad_info.chip_class,
2142 NULL, 0,
2143 queue->queue_family_index == RING_COMPUTE &&
2144 queue->device->physical_device->rad_info.chip_class >= CIK,
2145 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2146 RADV_CMD_FLAG_INV_ICACHE |
2147 RADV_CMD_FLAG_INV_SMEM_L1 |
2148 RADV_CMD_FLAG_INV_VMEM_L1 |
2149 RADV_CMD_FLAG_INV_GLOBAL_L2);
2150 } else if (i == 1) {
2151 si_cs_emit_cache_flush(cs,
2152 queue->device->physical_device->rad_info.chip_class,
2153 NULL, 0,
2154 queue->queue_family_index == RING_COMPUTE &&
2155 queue->device->physical_device->rad_info.chip_class >= CIK,
2156 RADV_CMD_FLAG_INV_ICACHE |
2157 RADV_CMD_FLAG_INV_SMEM_L1 |
2158 RADV_CMD_FLAG_INV_VMEM_L1 |
2159 RADV_CMD_FLAG_INV_GLOBAL_L2);
2160 }
2161
2162 if (!queue->device->ws->cs_finalize(cs))
2163 goto fail;
2164 }
2165
2166 if (queue->initial_full_flush_preamble_cs)
2167 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2168
2169 if (queue->initial_preamble_cs)
2170 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2171
2172 if (queue->continue_preamble_cs)
2173 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2174
2175 queue->initial_full_flush_preamble_cs = dest_cs[0];
2176 queue->initial_preamble_cs = dest_cs[1];
2177 queue->continue_preamble_cs = dest_cs[2];
2178
2179 if (scratch_bo != queue->scratch_bo) {
2180 if (queue->scratch_bo)
2181 queue->device->ws->buffer_destroy(queue->scratch_bo);
2182 queue->scratch_bo = scratch_bo;
2183 queue->scratch_size = scratch_size;
2184 }
2185
2186 if (compute_scratch_bo != queue->compute_scratch_bo) {
2187 if (queue->compute_scratch_bo)
2188 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2189 queue->compute_scratch_bo = compute_scratch_bo;
2190 queue->compute_scratch_size = compute_scratch_size;
2191 }
2192
2193 if (esgs_ring_bo != queue->esgs_ring_bo) {
2194 if (queue->esgs_ring_bo)
2195 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2196 queue->esgs_ring_bo = esgs_ring_bo;
2197 queue->esgs_ring_size = esgs_ring_size;
2198 }
2199
2200 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2201 if (queue->gsvs_ring_bo)
2202 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2203 queue->gsvs_ring_bo = gsvs_ring_bo;
2204 queue->gsvs_ring_size = gsvs_ring_size;
2205 }
2206
2207 if (tess_rings_bo != queue->tess_rings_bo) {
2208 queue->tess_rings_bo = tess_rings_bo;
2209 queue->has_tess_rings = true;
2210 }
2211
2212 if (descriptor_bo != queue->descriptor_bo) {
2213 if (queue->descriptor_bo)
2214 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2215
2216 queue->descriptor_bo = descriptor_bo;
2217 }
2218
2219 if (add_sample_positions)
2220 queue->has_sample_positions = true;
2221
2222 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2223 *initial_preamble_cs = queue->initial_preamble_cs;
2224 *continue_preamble_cs = queue->continue_preamble_cs;
2225 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2226 *continue_preamble_cs = NULL;
2227 return VK_SUCCESS;
2228 fail:
2229 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2230 if (dest_cs[i])
2231 queue->device->ws->cs_destroy(dest_cs[i]);
2232 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2233 queue->device->ws->buffer_destroy(descriptor_bo);
2234 if (scratch_bo && scratch_bo != queue->scratch_bo)
2235 queue->device->ws->buffer_destroy(scratch_bo);
2236 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2237 queue->device->ws->buffer_destroy(compute_scratch_bo);
2238 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2239 queue->device->ws->buffer_destroy(esgs_ring_bo);
2240 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2241 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2242 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2243 queue->device->ws->buffer_destroy(tess_rings_bo);
2244 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2245 }
2246
2247 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
2248 int num_sems,
2249 const VkSemaphore *sems,
2250 VkFence _fence,
2251 bool reset_temp)
2252 {
2253 int syncobj_idx = 0, sem_idx = 0;
2254
2255 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2256 return VK_SUCCESS;
2257
2258 for (uint32_t i = 0; i < num_sems; i++) {
2259 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2260
2261 if (sem->temp_syncobj || sem->syncobj)
2262 counts->syncobj_count++;
2263 else
2264 counts->sem_count++;
2265 }
2266
2267 if (_fence != VK_NULL_HANDLE) {
2268 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2269 if (fence->temp_syncobj || fence->syncobj)
2270 counts->syncobj_count++;
2271 }
2272
2273 if (counts->syncobj_count) {
2274 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2275 if (!counts->syncobj)
2276 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2277 }
2278
2279 if (counts->sem_count) {
2280 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2281 if (!counts->sem) {
2282 free(counts->syncobj);
2283 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2284 }
2285 }
2286
2287 for (uint32_t i = 0; i < num_sems; i++) {
2288 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2289
2290 if (sem->temp_syncobj) {
2291 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2292 }
2293 else if (sem->syncobj)
2294 counts->syncobj[syncobj_idx++] = sem->syncobj;
2295 else {
2296 assert(sem->sem);
2297 counts->sem[sem_idx++] = sem->sem;
2298 }
2299 }
2300
2301 if (_fence != VK_NULL_HANDLE) {
2302 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2303 if (fence->temp_syncobj)
2304 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2305 else if (fence->syncobj)
2306 counts->syncobj[syncobj_idx++] = fence->syncobj;
2307 }
2308
2309 return VK_SUCCESS;
2310 }
2311
2312 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2313 {
2314 free(sem_info->wait.syncobj);
2315 free(sem_info->wait.sem);
2316 free(sem_info->signal.syncobj);
2317 free(sem_info->signal.sem);
2318 }
2319
2320
2321 static void radv_free_temp_syncobjs(struct radv_device *device,
2322 int num_sems,
2323 const VkSemaphore *sems)
2324 {
2325 for (uint32_t i = 0; i < num_sems; i++) {
2326 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2327
2328 if (sem->temp_syncobj) {
2329 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2330 sem->temp_syncobj = 0;
2331 }
2332 }
2333 }
2334
2335 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
2336 int num_wait_sems,
2337 const VkSemaphore *wait_sems,
2338 int num_signal_sems,
2339 const VkSemaphore *signal_sems,
2340 VkFence fence)
2341 {
2342 VkResult ret;
2343 memset(sem_info, 0, sizeof(*sem_info));
2344
2345 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2346 if (ret)
2347 return ret;
2348 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
2349 if (ret)
2350 radv_free_sem_info(sem_info);
2351
2352 /* caller can override these */
2353 sem_info->cs_emit_wait = true;
2354 sem_info->cs_emit_signal = true;
2355 return ret;
2356 }
2357
2358 /* Signals fence as soon as all the work currently put on queue is done. */
2359 static VkResult radv_signal_fence(struct radv_queue *queue,
2360 struct radv_fence *fence)
2361 {
2362 int ret;
2363 VkResult result;
2364 struct radv_winsys_sem_info sem_info;
2365
2366 result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
2367 radv_fence_to_handle(fence));
2368 if (result != VK_SUCCESS)
2369 return result;
2370
2371 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2372 &queue->device->empty_cs[queue->queue_family_index],
2373 1, NULL, NULL, &sem_info, NULL,
2374 false, fence->fence);
2375 radv_free_sem_info(&sem_info);
2376
2377 /* TODO: find a better error */
2378 if (ret)
2379 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2380
2381 return VK_SUCCESS;
2382 }
2383
2384 VkResult radv_QueueSubmit(
2385 VkQueue _queue,
2386 uint32_t submitCount,
2387 const VkSubmitInfo* pSubmits,
2388 VkFence _fence)
2389 {
2390 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2391 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2392 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2393 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2394 int ret;
2395 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2396 uint32_t scratch_size = 0;
2397 uint32_t compute_scratch_size = 0;
2398 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2399 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2400 VkResult result;
2401 bool fence_emitted = false;
2402 bool tess_rings_needed = false;
2403 bool sample_positions_needed = false;
2404
2405 /* Do this first so failing to allocate scratch buffers can't result in
2406 * partially executed submissions. */
2407 for (uint32_t i = 0; i < submitCount; i++) {
2408 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2409 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2410 pSubmits[i].pCommandBuffers[j]);
2411
2412 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2413 compute_scratch_size = MAX2(compute_scratch_size,
2414 cmd_buffer->compute_scratch_size_needed);
2415 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2416 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2417 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2418 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2419 }
2420 }
2421
2422 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2423 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2424 sample_positions_needed, &initial_flush_preamble_cs,
2425 &initial_preamble_cs, &continue_preamble_cs);
2426 if (result != VK_SUCCESS)
2427 return result;
2428
2429 for (uint32_t i = 0; i < submitCount; i++) {
2430 struct radeon_winsys_cs **cs_array;
2431 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2432 bool can_patch = true;
2433 uint32_t advance;
2434 struct radv_winsys_sem_info sem_info;
2435
2436 result = radv_alloc_sem_info(&sem_info,
2437 pSubmits[i].waitSemaphoreCount,
2438 pSubmits[i].pWaitSemaphores,
2439 pSubmits[i].signalSemaphoreCount,
2440 pSubmits[i].pSignalSemaphores,
2441 _fence);
2442 if (result != VK_SUCCESS)
2443 return result;
2444
2445 if (!pSubmits[i].commandBufferCount) {
2446 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2447 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2448 &queue->device->empty_cs[queue->queue_family_index],
2449 1, NULL, NULL,
2450 &sem_info, NULL,
2451 false, base_fence);
2452 if (ret) {
2453 radv_loge("failed to submit CS %d\n", i);
2454 abort();
2455 }
2456 fence_emitted = true;
2457 }
2458 radv_free_sem_info(&sem_info);
2459 continue;
2460 }
2461
2462 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2463 (pSubmits[i].commandBufferCount));
2464
2465 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2466 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2467 pSubmits[i].pCommandBuffers[j]);
2468 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2469
2470 cs_array[j] = cmd_buffer->cs;
2471 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2472 can_patch = false;
2473
2474 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2475 }
2476
2477 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2478 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2479 const struct radv_winsys_bo_list *bo_list = NULL;
2480
2481 advance = MIN2(max_cs_submission,
2482 pSubmits[i].commandBufferCount - j);
2483
2484 if (queue->device->trace_bo)
2485 *queue->device->trace_id_ptr = 0;
2486
2487 sem_info.cs_emit_wait = j == 0;
2488 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2489
2490 if (unlikely(queue->device->use_global_bo_list)) {
2491 pthread_mutex_lock(&queue->device->bo_list.mutex);
2492 bo_list = &queue->device->bo_list.list;
2493 }
2494
2495 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2496 advance, initial_preamble, continue_preamble_cs,
2497 &sem_info, bo_list,
2498 can_patch, base_fence);
2499
2500 if (unlikely(queue->device->use_global_bo_list))
2501 pthread_mutex_unlock(&queue->device->bo_list.mutex);
2502
2503 if (ret) {
2504 radv_loge("failed to submit CS %d\n", i);
2505 abort();
2506 }
2507 fence_emitted = true;
2508 if (queue->device->trace_bo) {
2509 radv_check_gpu_hangs(queue, cs_array[j]);
2510 }
2511 }
2512
2513 radv_free_temp_syncobjs(queue->device,
2514 pSubmits[i].waitSemaphoreCount,
2515 pSubmits[i].pWaitSemaphores);
2516 radv_free_sem_info(&sem_info);
2517 free(cs_array);
2518 }
2519
2520 if (fence) {
2521 if (!fence_emitted) {
2522 radv_signal_fence(queue, fence);
2523 }
2524 fence->submitted = true;
2525 }
2526
2527 return VK_SUCCESS;
2528 }
2529
2530 VkResult radv_QueueWaitIdle(
2531 VkQueue _queue)
2532 {
2533 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2534
2535 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2536 radv_queue_family_to_ring(queue->queue_family_index),
2537 queue->queue_idx);
2538 return VK_SUCCESS;
2539 }
2540
2541 VkResult radv_DeviceWaitIdle(
2542 VkDevice _device)
2543 {
2544 RADV_FROM_HANDLE(radv_device, device, _device);
2545
2546 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2547 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2548 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2549 }
2550 }
2551 return VK_SUCCESS;
2552 }
2553
2554 VkResult radv_EnumerateInstanceExtensionProperties(
2555 const char* pLayerName,
2556 uint32_t* pPropertyCount,
2557 VkExtensionProperties* pProperties)
2558 {
2559 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2560
2561 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2562 if (radv_supported_instance_extensions.extensions[i]) {
2563 vk_outarray_append(&out, prop) {
2564 *prop = radv_instance_extensions[i];
2565 }
2566 }
2567 }
2568
2569 return vk_outarray_status(&out);
2570 }
2571
2572 VkResult radv_EnumerateDeviceExtensionProperties(
2573 VkPhysicalDevice physicalDevice,
2574 const char* pLayerName,
2575 uint32_t* pPropertyCount,
2576 VkExtensionProperties* pProperties)
2577 {
2578 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2579 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2580
2581 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2582 if (device->supported_extensions.extensions[i]) {
2583 vk_outarray_append(&out, prop) {
2584 *prop = radv_device_extensions[i];
2585 }
2586 }
2587 }
2588
2589 return vk_outarray_status(&out);
2590 }
2591
2592 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2593 VkInstance _instance,
2594 const char* pName)
2595 {
2596 RADV_FROM_HANDLE(radv_instance, instance, _instance);
2597
2598 return radv_lookup_entrypoint_checked(pName,
2599 instance ? instance->apiVersion : 0,
2600 instance ? &instance->enabled_extensions : NULL,
2601 NULL);
2602 }
2603
2604 /* The loader wants us to expose a second GetInstanceProcAddr function
2605 * to work around certain LD_PRELOAD issues seen in apps.
2606 */
2607 PUBLIC
2608 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2609 VkInstance instance,
2610 const char* pName);
2611
2612 PUBLIC
2613 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2614 VkInstance instance,
2615 const char* pName)
2616 {
2617 return radv_GetInstanceProcAddr(instance, pName);
2618 }
2619
2620 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2621 VkDevice _device,
2622 const char* pName)
2623 {
2624 RADV_FROM_HANDLE(radv_device, device, _device);
2625
2626 return radv_lookup_entrypoint_checked(pName,
2627 device->instance->apiVersion,
2628 &device->instance->enabled_extensions,
2629 &device->enabled_extensions);
2630 }
2631
2632 bool radv_get_memory_fd(struct radv_device *device,
2633 struct radv_device_memory *memory,
2634 int *pFD)
2635 {
2636 struct radeon_bo_metadata metadata;
2637
2638 if (memory->image) {
2639 radv_init_metadata(device, memory->image, &metadata);
2640 device->ws->buffer_set_metadata(memory->bo, &metadata);
2641 }
2642
2643 return device->ws->buffer_get_fd(device->ws, memory->bo,
2644 pFD);
2645 }
2646
2647 static VkResult radv_alloc_memory(struct radv_device *device,
2648 const VkMemoryAllocateInfo* pAllocateInfo,
2649 const VkAllocationCallbacks* pAllocator,
2650 VkDeviceMemory* pMem)
2651 {
2652 struct radv_device_memory *mem;
2653 VkResult result;
2654 enum radeon_bo_domain domain;
2655 uint32_t flags = 0;
2656 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2657
2658 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2659
2660 if (pAllocateInfo->allocationSize == 0) {
2661 /* Apparently, this is allowed */
2662 *pMem = VK_NULL_HANDLE;
2663 return VK_SUCCESS;
2664 }
2665
2666 const VkImportMemoryFdInfoKHR *import_info =
2667 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2668 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2669 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2670 const VkExportMemoryAllocateInfoKHR *export_info =
2671 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2672 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2673 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2674
2675 const struct wsi_memory_allocate_info *wsi_info =
2676 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2677
2678 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2679 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2680 if (mem == NULL)
2681 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2682
2683 if (wsi_info && wsi_info->implicit_sync)
2684 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2685
2686 if (dedicate_info) {
2687 mem->image = radv_image_from_handle(dedicate_info->image);
2688 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2689 } else {
2690 mem->image = NULL;
2691 mem->buffer = NULL;
2692 }
2693
2694 mem->user_ptr = NULL;
2695
2696 if (import_info) {
2697 assert(import_info->handleType ==
2698 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2699 import_info->handleType ==
2700 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2701 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2702 NULL, NULL);
2703 if (!mem->bo) {
2704 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2705 goto fail;
2706 } else {
2707 close(import_info->fd);
2708 }
2709 } else if (host_ptr_info) {
2710 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2711 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2712 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2713 pAllocateInfo->allocationSize);
2714 if (!mem->bo) {
2715 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2716 goto fail;
2717 } else {
2718 mem->user_ptr = host_ptr_info->pHostPointer;
2719 }
2720 } else {
2721 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2722 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2723 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2724 domain = RADEON_DOMAIN_GTT;
2725 else
2726 domain = RADEON_DOMAIN_VRAM;
2727
2728 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2729 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2730 else
2731 flags |= RADEON_FLAG_CPU_ACCESS;
2732
2733 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2734 flags |= RADEON_FLAG_GTT_WC;
2735
2736 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
2737 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2738
2739 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2740 domain, flags);
2741
2742 if (!mem->bo) {
2743 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2744 goto fail;
2745 }
2746 mem->type_index = mem_type_index;
2747 }
2748
2749 result = radv_bo_list_add(device, mem->bo);
2750 if (result != VK_SUCCESS)
2751 goto fail_bo;
2752
2753 *pMem = radv_device_memory_to_handle(mem);
2754
2755 return VK_SUCCESS;
2756
2757 fail_bo:
2758 device->ws->buffer_destroy(mem->bo);
2759 fail:
2760 vk_free2(&device->alloc, pAllocator, mem);
2761
2762 return result;
2763 }
2764
2765 VkResult radv_AllocateMemory(
2766 VkDevice _device,
2767 const VkMemoryAllocateInfo* pAllocateInfo,
2768 const VkAllocationCallbacks* pAllocator,
2769 VkDeviceMemory* pMem)
2770 {
2771 RADV_FROM_HANDLE(radv_device, device, _device);
2772 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2773 }
2774
2775 void radv_FreeMemory(
2776 VkDevice _device,
2777 VkDeviceMemory _mem,
2778 const VkAllocationCallbacks* pAllocator)
2779 {
2780 RADV_FROM_HANDLE(radv_device, device, _device);
2781 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2782
2783 if (mem == NULL)
2784 return;
2785
2786 radv_bo_list_remove(device, mem->bo);
2787 device->ws->buffer_destroy(mem->bo);
2788 mem->bo = NULL;
2789
2790 vk_free2(&device->alloc, pAllocator, mem);
2791 }
2792
2793 VkResult radv_MapMemory(
2794 VkDevice _device,
2795 VkDeviceMemory _memory,
2796 VkDeviceSize offset,
2797 VkDeviceSize size,
2798 VkMemoryMapFlags flags,
2799 void** ppData)
2800 {
2801 RADV_FROM_HANDLE(radv_device, device, _device);
2802 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2803
2804 if (mem == NULL) {
2805 *ppData = NULL;
2806 return VK_SUCCESS;
2807 }
2808
2809 if (mem->user_ptr)
2810 *ppData = mem->user_ptr;
2811 else
2812 *ppData = device->ws->buffer_map(mem->bo);
2813
2814 if (*ppData) {
2815 *ppData += offset;
2816 return VK_SUCCESS;
2817 }
2818
2819 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2820 }
2821
2822 void radv_UnmapMemory(
2823 VkDevice _device,
2824 VkDeviceMemory _memory)
2825 {
2826 RADV_FROM_HANDLE(radv_device, device, _device);
2827 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2828
2829 if (mem == NULL)
2830 return;
2831
2832 if (mem->user_ptr == NULL)
2833 device->ws->buffer_unmap(mem->bo);
2834 }
2835
2836 VkResult radv_FlushMappedMemoryRanges(
2837 VkDevice _device,
2838 uint32_t memoryRangeCount,
2839 const VkMappedMemoryRange* pMemoryRanges)
2840 {
2841 return VK_SUCCESS;
2842 }
2843
2844 VkResult radv_InvalidateMappedMemoryRanges(
2845 VkDevice _device,
2846 uint32_t memoryRangeCount,
2847 const VkMappedMemoryRange* pMemoryRanges)
2848 {
2849 return VK_SUCCESS;
2850 }
2851
2852 void radv_GetBufferMemoryRequirements(
2853 VkDevice _device,
2854 VkBuffer _buffer,
2855 VkMemoryRequirements* pMemoryRequirements)
2856 {
2857 RADV_FROM_HANDLE(radv_device, device, _device);
2858 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2859
2860 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2861
2862 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2863 pMemoryRequirements->alignment = 4096;
2864 else
2865 pMemoryRequirements->alignment = 16;
2866
2867 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2868 }
2869
2870 void radv_GetBufferMemoryRequirements2(
2871 VkDevice device,
2872 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2873 VkMemoryRequirements2KHR* pMemoryRequirements)
2874 {
2875 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2876 &pMemoryRequirements->memoryRequirements);
2877 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2878 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2879 switch (ext->sType) {
2880 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2881 VkMemoryDedicatedRequirementsKHR *req =
2882 (VkMemoryDedicatedRequirementsKHR *) ext;
2883 req->requiresDedicatedAllocation = buffer->shareable;
2884 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2885 break;
2886 }
2887 default:
2888 break;
2889 }
2890 }
2891 }
2892
2893 void radv_GetImageMemoryRequirements(
2894 VkDevice _device,
2895 VkImage _image,
2896 VkMemoryRequirements* pMemoryRequirements)
2897 {
2898 RADV_FROM_HANDLE(radv_device, device, _device);
2899 RADV_FROM_HANDLE(radv_image, image, _image);
2900
2901 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2902
2903 pMemoryRequirements->size = image->size;
2904 pMemoryRequirements->alignment = image->alignment;
2905 }
2906
2907 void radv_GetImageMemoryRequirements2(
2908 VkDevice device,
2909 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2910 VkMemoryRequirements2KHR* pMemoryRequirements)
2911 {
2912 radv_GetImageMemoryRequirements(device, pInfo->image,
2913 &pMemoryRequirements->memoryRequirements);
2914
2915 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2916
2917 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2918 switch (ext->sType) {
2919 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2920 VkMemoryDedicatedRequirementsKHR *req =
2921 (VkMemoryDedicatedRequirementsKHR *) ext;
2922 req->requiresDedicatedAllocation = image->shareable;
2923 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2924 break;
2925 }
2926 default:
2927 break;
2928 }
2929 }
2930 }
2931
2932 void radv_GetImageSparseMemoryRequirements(
2933 VkDevice device,
2934 VkImage image,
2935 uint32_t* pSparseMemoryRequirementCount,
2936 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2937 {
2938 stub();
2939 }
2940
2941 void radv_GetImageSparseMemoryRequirements2(
2942 VkDevice device,
2943 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2944 uint32_t* pSparseMemoryRequirementCount,
2945 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2946 {
2947 stub();
2948 }
2949
2950 void radv_GetDeviceMemoryCommitment(
2951 VkDevice device,
2952 VkDeviceMemory memory,
2953 VkDeviceSize* pCommittedMemoryInBytes)
2954 {
2955 *pCommittedMemoryInBytes = 0;
2956 }
2957
2958 VkResult radv_BindBufferMemory2(VkDevice device,
2959 uint32_t bindInfoCount,
2960 const VkBindBufferMemoryInfoKHR *pBindInfos)
2961 {
2962 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2963 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2964 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2965
2966 if (mem) {
2967 buffer->bo = mem->bo;
2968 buffer->offset = pBindInfos[i].memoryOffset;
2969 } else {
2970 buffer->bo = NULL;
2971 }
2972 }
2973 return VK_SUCCESS;
2974 }
2975
2976 VkResult radv_BindBufferMemory(
2977 VkDevice device,
2978 VkBuffer buffer,
2979 VkDeviceMemory memory,
2980 VkDeviceSize memoryOffset)
2981 {
2982 const VkBindBufferMemoryInfoKHR info = {
2983 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2984 .buffer = buffer,
2985 .memory = memory,
2986 .memoryOffset = memoryOffset
2987 };
2988
2989 return radv_BindBufferMemory2(device, 1, &info);
2990 }
2991
2992 VkResult radv_BindImageMemory2(VkDevice device,
2993 uint32_t bindInfoCount,
2994 const VkBindImageMemoryInfoKHR *pBindInfos)
2995 {
2996 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2997 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2998 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2999
3000 if (mem) {
3001 image->bo = mem->bo;
3002 image->offset = pBindInfos[i].memoryOffset;
3003 } else {
3004 image->bo = NULL;
3005 image->offset = 0;
3006 }
3007 }
3008 return VK_SUCCESS;
3009 }
3010
3011
3012 VkResult radv_BindImageMemory(
3013 VkDevice device,
3014 VkImage image,
3015 VkDeviceMemory memory,
3016 VkDeviceSize memoryOffset)
3017 {
3018 const VkBindImageMemoryInfoKHR info = {
3019 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3020 .image = image,
3021 .memory = memory,
3022 .memoryOffset = memoryOffset
3023 };
3024
3025 return radv_BindImageMemory2(device, 1, &info);
3026 }
3027
3028
3029 static void
3030 radv_sparse_buffer_bind_memory(struct radv_device *device,
3031 const VkSparseBufferMemoryBindInfo *bind)
3032 {
3033 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3034
3035 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3036 struct radv_device_memory *mem = NULL;
3037
3038 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3039 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3040
3041 device->ws->buffer_virtual_bind(buffer->bo,
3042 bind->pBinds[i].resourceOffset,
3043 bind->pBinds[i].size,
3044 mem ? mem->bo : NULL,
3045 bind->pBinds[i].memoryOffset);
3046 }
3047 }
3048
3049 static void
3050 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3051 const VkSparseImageOpaqueMemoryBindInfo *bind)
3052 {
3053 RADV_FROM_HANDLE(radv_image, image, bind->image);
3054
3055 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3056 struct radv_device_memory *mem = NULL;
3057
3058 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3059 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3060
3061 device->ws->buffer_virtual_bind(image->bo,
3062 bind->pBinds[i].resourceOffset,
3063 bind->pBinds[i].size,
3064 mem ? mem->bo : NULL,
3065 bind->pBinds[i].memoryOffset);
3066 }
3067 }
3068
3069 VkResult radv_QueueBindSparse(
3070 VkQueue _queue,
3071 uint32_t bindInfoCount,
3072 const VkBindSparseInfo* pBindInfo,
3073 VkFence _fence)
3074 {
3075 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3076 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3077 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3078 bool fence_emitted = false;
3079
3080 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3081 struct radv_winsys_sem_info sem_info;
3082 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3083 radv_sparse_buffer_bind_memory(queue->device,
3084 pBindInfo[i].pBufferBinds + j);
3085 }
3086
3087 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3088 radv_sparse_image_opaque_bind_memory(queue->device,
3089 pBindInfo[i].pImageOpaqueBinds + j);
3090 }
3091
3092 VkResult result;
3093 result = radv_alloc_sem_info(&sem_info,
3094 pBindInfo[i].waitSemaphoreCount,
3095 pBindInfo[i].pWaitSemaphores,
3096 pBindInfo[i].signalSemaphoreCount,
3097 pBindInfo[i].pSignalSemaphores,
3098 _fence);
3099 if (result != VK_SUCCESS)
3100 return result;
3101
3102 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3103 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3104 &queue->device->empty_cs[queue->queue_family_index],
3105 1, NULL, NULL,
3106 &sem_info, NULL,
3107 false, base_fence);
3108 fence_emitted = true;
3109 if (fence)
3110 fence->submitted = true;
3111 }
3112
3113 radv_free_sem_info(&sem_info);
3114
3115 }
3116
3117 if (fence) {
3118 if (!fence_emitted) {
3119 radv_signal_fence(queue, fence);
3120 }
3121 fence->submitted = true;
3122 }
3123
3124 return VK_SUCCESS;
3125 }
3126
3127 VkResult radv_CreateFence(
3128 VkDevice _device,
3129 const VkFenceCreateInfo* pCreateInfo,
3130 const VkAllocationCallbacks* pAllocator,
3131 VkFence* pFence)
3132 {
3133 RADV_FROM_HANDLE(radv_device, device, _device);
3134 const VkExportFenceCreateInfoKHR *export =
3135 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
3136 VkExternalFenceHandleTypeFlagsKHR handleTypes =
3137 export ? export->handleTypes : 0;
3138
3139 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3140 sizeof(*fence), 8,
3141 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3142
3143 if (!fence)
3144 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3145
3146 fence->submitted = false;
3147 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3148 fence->temp_syncobj = 0;
3149 if (device->always_use_syncobj || handleTypes) {
3150 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3151 if (ret) {
3152 vk_free2(&device->alloc, pAllocator, fence);
3153 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3154 }
3155 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3156 device->ws->signal_syncobj(device->ws, fence->syncobj);
3157 }
3158 fence->fence = NULL;
3159 } else {
3160 fence->fence = device->ws->create_fence();
3161 if (!fence->fence) {
3162 vk_free2(&device->alloc, pAllocator, fence);
3163 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3164 }
3165 fence->syncobj = 0;
3166 }
3167
3168 *pFence = radv_fence_to_handle(fence);
3169
3170 return VK_SUCCESS;
3171 }
3172
3173 void radv_DestroyFence(
3174 VkDevice _device,
3175 VkFence _fence,
3176 const VkAllocationCallbacks* pAllocator)
3177 {
3178 RADV_FROM_HANDLE(radv_device, device, _device);
3179 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3180
3181 if (!fence)
3182 return;
3183
3184 if (fence->temp_syncobj)
3185 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3186 if (fence->syncobj)
3187 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3188 if (fence->fence)
3189 device->ws->destroy_fence(fence->fence);
3190 vk_free2(&device->alloc, pAllocator, fence);
3191 }
3192
3193
3194 static uint64_t radv_get_current_time()
3195 {
3196 struct timespec tv;
3197 clock_gettime(CLOCK_MONOTONIC, &tv);
3198 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3199 }
3200
3201 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3202 {
3203 uint64_t current_time = radv_get_current_time();
3204
3205 timeout = MIN2(UINT64_MAX - current_time, timeout);
3206
3207 return current_time + timeout;
3208 }
3209
3210
3211 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3212 {
3213 for (uint32_t i = 0; i < fenceCount; ++i) {
3214 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3215 if (fence->syncobj || fence->temp_syncobj || (!fence->signalled && !fence->submitted))
3216 return false;
3217 }
3218 return true;
3219 }
3220
3221 VkResult radv_WaitForFences(
3222 VkDevice _device,
3223 uint32_t fenceCount,
3224 const VkFence* pFences,
3225 VkBool32 waitAll,
3226 uint64_t timeout)
3227 {
3228 RADV_FROM_HANDLE(radv_device, device, _device);
3229 timeout = radv_get_absolute_timeout(timeout);
3230
3231 if (device->always_use_syncobj) {
3232 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3233 if (!handles)
3234 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3235
3236 for (uint32_t i = 0; i < fenceCount; ++i) {
3237 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3238 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3239 }
3240
3241 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3242
3243 free(handles);
3244 return success ? VK_SUCCESS : VK_TIMEOUT;
3245 }
3246
3247 if (!waitAll && fenceCount > 1) {
3248 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3249 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3250 uint32_t wait_count = 0;
3251 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3252 if (!fences)
3253 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3254
3255 for (uint32_t i = 0; i < fenceCount; ++i) {
3256 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3257
3258 if (fence->signalled) {
3259 free(fences);
3260 return VK_SUCCESS;
3261 }
3262
3263 fences[wait_count++] = fence->fence;
3264 }
3265
3266 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3267 waitAll, timeout - radv_get_current_time());
3268
3269 free(fences);
3270 return success ? VK_SUCCESS : VK_TIMEOUT;
3271 }
3272
3273 while(radv_get_current_time() <= timeout) {
3274 for (uint32_t i = 0; i < fenceCount; ++i) {
3275 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3276 return VK_SUCCESS;
3277 }
3278 }
3279 return VK_TIMEOUT;
3280 }
3281
3282 for (uint32_t i = 0; i < fenceCount; ++i) {
3283 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3284 bool expired = false;
3285
3286 if (fence->temp_syncobj) {
3287 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3288 return VK_TIMEOUT;
3289 continue;
3290 }
3291
3292 if (fence->syncobj) {
3293 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3294 return VK_TIMEOUT;
3295 continue;
3296 }
3297
3298 if (fence->signalled)
3299 continue;
3300
3301 if (!fence->submitted) {
3302 while(radv_get_current_time() <= timeout && !fence->submitted)
3303 /* Do nothing */;
3304
3305 if (!fence->submitted)
3306 return VK_TIMEOUT;
3307
3308 /* Recheck as it may have been set by submitting operations. */
3309 if (fence->signalled)
3310 continue;
3311 }
3312
3313 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
3314 if (!expired)
3315 return VK_TIMEOUT;
3316
3317 fence->signalled = true;
3318 }
3319
3320 return VK_SUCCESS;
3321 }
3322
3323 VkResult radv_ResetFences(VkDevice _device,
3324 uint32_t fenceCount,
3325 const VkFence *pFences)
3326 {
3327 RADV_FROM_HANDLE(radv_device, device, _device);
3328
3329 for (unsigned i = 0; i < fenceCount; ++i) {
3330 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3331 fence->submitted = fence->signalled = false;
3332
3333 /* Per spec, we first restore the permanent payload, and then reset, so
3334 * having a temp syncobj should not skip resetting the permanent syncobj. */
3335 if (fence->temp_syncobj) {
3336 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3337 fence->temp_syncobj = 0;
3338 }
3339
3340 if (fence->syncobj) {
3341 device->ws->reset_syncobj(device->ws, fence->syncobj);
3342 }
3343 }
3344
3345 return VK_SUCCESS;
3346 }
3347
3348 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3349 {
3350 RADV_FROM_HANDLE(radv_device, device, _device);
3351 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3352
3353 if (fence->temp_syncobj) {
3354 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3355 return success ? VK_SUCCESS : VK_NOT_READY;
3356 }
3357
3358 if (fence->syncobj) {
3359 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3360 return success ? VK_SUCCESS : VK_NOT_READY;
3361 }
3362
3363 if (fence->signalled)
3364 return VK_SUCCESS;
3365 if (!fence->submitted)
3366 return VK_NOT_READY;
3367 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3368 return VK_NOT_READY;
3369
3370 return VK_SUCCESS;
3371 }
3372
3373
3374 // Queue semaphore functions
3375
3376 VkResult radv_CreateSemaphore(
3377 VkDevice _device,
3378 const VkSemaphoreCreateInfo* pCreateInfo,
3379 const VkAllocationCallbacks* pAllocator,
3380 VkSemaphore* pSemaphore)
3381 {
3382 RADV_FROM_HANDLE(radv_device, device, _device);
3383 const VkExportSemaphoreCreateInfoKHR *export =
3384 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3385 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3386 export ? export->handleTypes : 0;
3387
3388 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3389 sizeof(*sem), 8,
3390 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3391 if (!sem)
3392 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3393
3394 sem->temp_syncobj = 0;
3395 /* create a syncobject if we are going to export this semaphore */
3396 if (device->always_use_syncobj || handleTypes) {
3397 assert (device->physical_device->rad_info.has_syncobj);
3398 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3399 if (ret) {
3400 vk_free2(&device->alloc, pAllocator, sem);
3401 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3402 }
3403 sem->sem = NULL;
3404 } else {
3405 sem->sem = device->ws->create_sem(device->ws);
3406 if (!sem->sem) {
3407 vk_free2(&device->alloc, pAllocator, sem);
3408 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3409 }
3410 sem->syncobj = 0;
3411 }
3412
3413 *pSemaphore = radv_semaphore_to_handle(sem);
3414 return VK_SUCCESS;
3415 }
3416
3417 void radv_DestroySemaphore(
3418 VkDevice _device,
3419 VkSemaphore _semaphore,
3420 const VkAllocationCallbacks* pAllocator)
3421 {
3422 RADV_FROM_HANDLE(radv_device, device, _device);
3423 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3424 if (!_semaphore)
3425 return;
3426
3427 if (sem->syncobj)
3428 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3429 else
3430 device->ws->destroy_sem(sem->sem);
3431 vk_free2(&device->alloc, pAllocator, sem);
3432 }
3433
3434 VkResult radv_CreateEvent(
3435 VkDevice _device,
3436 const VkEventCreateInfo* pCreateInfo,
3437 const VkAllocationCallbacks* pAllocator,
3438 VkEvent* pEvent)
3439 {
3440 RADV_FROM_HANDLE(radv_device, device, _device);
3441 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3442 sizeof(*event), 8,
3443 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3444
3445 if (!event)
3446 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3447
3448 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3449 RADEON_DOMAIN_GTT,
3450 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3451 if (!event->bo) {
3452 vk_free2(&device->alloc, pAllocator, event);
3453 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3454 }
3455
3456 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3457
3458 *pEvent = radv_event_to_handle(event);
3459
3460 return VK_SUCCESS;
3461 }
3462
3463 void radv_DestroyEvent(
3464 VkDevice _device,
3465 VkEvent _event,
3466 const VkAllocationCallbacks* pAllocator)
3467 {
3468 RADV_FROM_HANDLE(radv_device, device, _device);
3469 RADV_FROM_HANDLE(radv_event, event, _event);
3470
3471 if (!event)
3472 return;
3473 device->ws->buffer_destroy(event->bo);
3474 vk_free2(&device->alloc, pAllocator, event);
3475 }
3476
3477 VkResult radv_GetEventStatus(
3478 VkDevice _device,
3479 VkEvent _event)
3480 {
3481 RADV_FROM_HANDLE(radv_event, event, _event);
3482
3483 if (*event->map == 1)
3484 return VK_EVENT_SET;
3485 return VK_EVENT_RESET;
3486 }
3487
3488 VkResult radv_SetEvent(
3489 VkDevice _device,
3490 VkEvent _event)
3491 {
3492 RADV_FROM_HANDLE(radv_event, event, _event);
3493 *event->map = 1;
3494
3495 return VK_SUCCESS;
3496 }
3497
3498 VkResult radv_ResetEvent(
3499 VkDevice _device,
3500 VkEvent _event)
3501 {
3502 RADV_FROM_HANDLE(radv_event, event, _event);
3503 *event->map = 0;
3504
3505 return VK_SUCCESS;
3506 }
3507
3508 VkResult radv_CreateBuffer(
3509 VkDevice _device,
3510 const VkBufferCreateInfo* pCreateInfo,
3511 const VkAllocationCallbacks* pAllocator,
3512 VkBuffer* pBuffer)
3513 {
3514 RADV_FROM_HANDLE(radv_device, device, _device);
3515 struct radv_buffer *buffer;
3516
3517 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3518
3519 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3520 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3521 if (buffer == NULL)
3522 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3523
3524 buffer->size = pCreateInfo->size;
3525 buffer->usage = pCreateInfo->usage;
3526 buffer->bo = NULL;
3527 buffer->offset = 0;
3528 buffer->flags = pCreateInfo->flags;
3529
3530 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3531 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3532
3533 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3534 buffer->bo = device->ws->buffer_create(device->ws,
3535 align64(buffer->size, 4096),
3536 4096, 0, RADEON_FLAG_VIRTUAL);
3537 if (!buffer->bo) {
3538 vk_free2(&device->alloc, pAllocator, buffer);
3539 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3540 }
3541 }
3542
3543 *pBuffer = radv_buffer_to_handle(buffer);
3544
3545 return VK_SUCCESS;
3546 }
3547
3548 void radv_DestroyBuffer(
3549 VkDevice _device,
3550 VkBuffer _buffer,
3551 const VkAllocationCallbacks* pAllocator)
3552 {
3553 RADV_FROM_HANDLE(radv_device, device, _device);
3554 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3555
3556 if (!buffer)
3557 return;
3558
3559 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3560 device->ws->buffer_destroy(buffer->bo);
3561
3562 vk_free2(&device->alloc, pAllocator, buffer);
3563 }
3564
3565 static inline unsigned
3566 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3567 {
3568 if (stencil)
3569 return image->surface.u.legacy.stencil_tiling_index[level];
3570 else
3571 return image->surface.u.legacy.tiling_index[level];
3572 }
3573
3574 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3575 {
3576 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3577 }
3578
3579 static uint32_t
3580 radv_init_dcc_control_reg(struct radv_device *device,
3581 struct radv_image_view *iview)
3582 {
3583 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3584 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3585 unsigned max_compressed_block_size;
3586 unsigned independent_64b_blocks;
3587
3588 if (device->physical_device->rad_info.chip_class < VI)
3589 return 0;
3590
3591 if (iview->image->info.samples > 1) {
3592 if (iview->image->surface.bpe == 1)
3593 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3594 else if (iview->image->surface.bpe == 2)
3595 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3596 }
3597
3598 if (!device->physical_device->rad_info.has_dedicated_vram) {
3599 /* amdvlk: [min-compressed-block-size] should be set to 32 for
3600 * dGPU and 64 for APU because all of our APUs to date use
3601 * DIMMs which have a request granularity size of 64B while all
3602 * other chips have a 32B request size.
3603 */
3604 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3605 }
3606
3607 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3608 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3609 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3610 /* If this DCC image is potentially going to be used in texture
3611 * fetches, we need some special settings.
3612 */
3613 independent_64b_blocks = 1;
3614 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3615 } else {
3616 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
3617 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
3618 * big as possible for better compression state.
3619 */
3620 independent_64b_blocks = 0;
3621 max_compressed_block_size = max_uncompressed_block_size;
3622 }
3623
3624 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3625 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3626 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3627 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3628 }
3629
3630 static void
3631 radv_initialise_color_surface(struct radv_device *device,
3632 struct radv_color_buffer_info *cb,
3633 struct radv_image_view *iview)
3634 {
3635 const struct vk_format_description *desc;
3636 unsigned ntype, format, swap, endian;
3637 unsigned blend_clamp = 0, blend_bypass = 0;
3638 uint64_t va;
3639 const struct radeon_surf *surf = &iview->image->surface;
3640
3641 desc = vk_format_description(iview->vk_format);
3642
3643 memset(cb, 0, sizeof(*cb));
3644
3645 /* Intensity is implemented as Red, so treat it that way. */
3646 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3647
3648 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3649
3650 cb->cb_color_base = va >> 8;
3651
3652 if (device->physical_device->rad_info.chip_class >= GFX9) {
3653 struct gfx9_surf_meta_flags meta;
3654 if (iview->image->dcc_offset)
3655 meta = iview->image->surface.u.gfx9.dcc;
3656 else
3657 meta = iview->image->surface.u.gfx9.cmask;
3658
3659 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3660 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3661 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3662 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3663
3664 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3665 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3666 } else {
3667 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3668 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3669
3670 cb->cb_color_base += level_info->offset >> 8;
3671 if (level_info->mode == RADEON_SURF_MODE_2D)
3672 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3673
3674 pitch_tile_max = level_info->nblk_x / 8 - 1;
3675 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3676 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3677
3678 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3679 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3680 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3681
3682 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3683
3684 if (radv_image_has_fmask(iview->image)) {
3685 if (device->physical_device->rad_info.chip_class >= CIK)
3686 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
3687 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
3688 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
3689 } else {
3690 /* This must be set for fast clear to work without FMASK. */
3691 if (device->physical_device->rad_info.chip_class >= CIK)
3692 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
3693 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
3694 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
3695 }
3696 }
3697
3698 /* CMASK variables */
3699 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3700 va += iview->image->cmask.offset;
3701 cb->cb_color_cmask = va >> 8;
3702
3703 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3704 va += iview->image->dcc_offset;
3705 cb->cb_dcc_base = va >> 8;
3706 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
3707
3708 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3709 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
3710 S_028C6C_SLICE_MAX(max_slice);
3711
3712 if (iview->image->info.samples > 1) {
3713 unsigned log_samples = util_logbase2(iview->image->info.samples);
3714
3715 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
3716 S_028C74_NUM_FRAGMENTS(log_samples);
3717 }
3718
3719 if (radv_image_has_fmask(iview->image)) {
3720 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
3721 cb->cb_color_fmask = va >> 8;
3722 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
3723 } else {
3724 cb->cb_color_fmask = cb->cb_color_base;
3725 }
3726
3727 ntype = radv_translate_color_numformat(iview->vk_format,
3728 desc,
3729 vk_format_get_first_non_void_channel(iview->vk_format));
3730 format = radv_translate_colorformat(iview->vk_format);
3731 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3732 radv_finishme("Illegal color\n");
3733 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3734 endian = radv_colorformat_endian_swap(format);
3735
3736 /* blend clamp should be set for all NORM/SRGB types */
3737 if (ntype == V_028C70_NUMBER_UNORM ||
3738 ntype == V_028C70_NUMBER_SNORM ||
3739 ntype == V_028C70_NUMBER_SRGB)
3740 blend_clamp = 1;
3741
3742 /* set blend bypass according to docs if SINT/UINT or
3743 8/24 COLOR variants */
3744 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3745 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3746 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3747 blend_clamp = 0;
3748 blend_bypass = 1;
3749 }
3750 #if 0
3751 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3752 (format == V_028C70_COLOR_8 ||
3753 format == V_028C70_COLOR_8_8 ||
3754 format == V_028C70_COLOR_8_8_8_8))
3755 ->color_is_int8 = true;
3756 #endif
3757 cb->cb_color_info = S_028C70_FORMAT(format) |
3758 S_028C70_COMP_SWAP(swap) |
3759 S_028C70_BLEND_CLAMP(blend_clamp) |
3760 S_028C70_BLEND_BYPASS(blend_bypass) |
3761 S_028C70_SIMPLE_FLOAT(1) |
3762 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3763 ntype != V_028C70_NUMBER_SNORM &&
3764 ntype != V_028C70_NUMBER_SRGB &&
3765 format != V_028C70_COLOR_8_24 &&
3766 format != V_028C70_COLOR_24_8) |
3767 S_028C70_NUMBER_TYPE(ntype) |
3768 S_028C70_ENDIAN(endian);
3769 if (radv_image_has_fmask(iview->image)) {
3770 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3771 if (device->physical_device->rad_info.chip_class == SI) {
3772 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3773 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3774 }
3775 }
3776
3777 if (radv_image_has_cmask(iview->image) &&
3778 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3779 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3780
3781 if (radv_dcc_enabled(iview->image, iview->base_mip))
3782 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3783
3784 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
3785
3786 /* This must be set for fast clear to work without FMASK. */
3787 if (!radv_image_has_fmask(iview->image) &&
3788 device->physical_device->rad_info.chip_class == SI) {
3789 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3790 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3791 }
3792
3793 if (device->physical_device->rad_info.chip_class >= GFX9) {
3794 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3795 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3796
3797 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3798 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3799 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3800 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3801 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3802 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3803 }
3804 }
3805
3806 static unsigned
3807 radv_calc_decompress_on_z_planes(struct radv_device *device,
3808 struct radv_image_view *iview)
3809 {
3810 unsigned max_zplanes = 0;
3811
3812 assert(radv_image_is_tc_compat_htile(iview->image));
3813
3814 if (device->physical_device->rad_info.chip_class >= GFX9) {
3815 /* Default value for 32-bit depth surfaces. */
3816 max_zplanes = 4;
3817
3818 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3819 iview->image->info.samples > 1)
3820 max_zplanes = 2;
3821
3822 max_zplanes = max_zplanes + 1;
3823 } else {
3824 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
3825 /* Do not enable Z plane compression for 16-bit depth
3826 * surfaces because isn't supported on GFX8. Only
3827 * 32-bit depth surfaces are supported by the hardware.
3828 * This allows to maintain shader compatibility and to
3829 * reduce the number of depth decompressions.
3830 */
3831 max_zplanes = 1;
3832 } else {
3833 if (iview->image->info.samples <= 1)
3834 max_zplanes = 5;
3835 else if (iview->image->info.samples <= 4)
3836 max_zplanes = 3;
3837 else
3838 max_zplanes = 2;
3839 }
3840 }
3841
3842 return max_zplanes;
3843 }
3844
3845 static void
3846 radv_initialise_ds_surface(struct radv_device *device,
3847 struct radv_ds_buffer_info *ds,
3848 struct radv_image_view *iview)
3849 {
3850 unsigned level = iview->base_mip;
3851 unsigned format, stencil_format;
3852 uint64_t va, s_offs, z_offs;
3853 bool stencil_only = false;
3854 memset(ds, 0, sizeof(*ds));
3855 switch (iview->image->vk_format) {
3856 case VK_FORMAT_D24_UNORM_S8_UINT:
3857 case VK_FORMAT_X8_D24_UNORM_PACK32:
3858 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3859 ds->offset_scale = 2.0f;
3860 break;
3861 case VK_FORMAT_D16_UNORM:
3862 case VK_FORMAT_D16_UNORM_S8_UINT:
3863 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3864 ds->offset_scale = 4.0f;
3865 break;
3866 case VK_FORMAT_D32_SFLOAT:
3867 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3868 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3869 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3870 ds->offset_scale = 1.0f;
3871 break;
3872 case VK_FORMAT_S8_UINT:
3873 stencil_only = true;
3874 break;
3875 default:
3876 break;
3877 }
3878
3879 format = radv_translate_dbformat(iview->image->vk_format);
3880 stencil_format = iview->image->surface.has_stencil ?
3881 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3882
3883 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
3884 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3885 S_028008_SLICE_MAX(max_slice);
3886
3887 ds->db_htile_data_base = 0;
3888 ds->db_htile_surface = 0;
3889
3890 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3891 s_offs = z_offs = va;
3892
3893 if (device->physical_device->rad_info.chip_class >= GFX9) {
3894 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3895 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3896
3897 ds->db_z_info = S_028038_FORMAT(format) |
3898 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3899 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3900 S_028038_MAXMIP(iview->image->info.levels - 1);
3901 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3902 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3903
3904 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3905 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3906 ds->db_depth_view |= S_028008_MIPID(level);
3907
3908 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3909 S_02801C_Y_MAX(iview->image->info.height - 1);
3910
3911 if (radv_htile_enabled(iview->image, level)) {
3912 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3913
3914 if (radv_image_is_tc_compat_htile(iview->image)) {
3915 unsigned max_zplanes =
3916 radv_calc_decompress_on_z_planes(device, iview);
3917
3918 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
3919 S_028038_ITERATE_FLUSH(1);
3920 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3921 }
3922
3923 if (!iview->image->surface.has_stencil)
3924 /* Use all of the htile_buffer for depth if there's no stencil. */
3925 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3926 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3927 iview->image->htile_offset;
3928 ds->db_htile_data_base = va >> 8;
3929 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3930 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3931 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3932 }
3933 } else {
3934 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3935
3936 if (stencil_only)
3937 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3938
3939 z_offs += iview->image->surface.u.legacy.level[level].offset;
3940 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3941
3942 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
3943 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3944 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3945
3946 if (iview->image->info.samples > 1)
3947 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3948
3949 if (device->physical_device->rad_info.chip_class >= CIK) {
3950 struct radeon_info *info = &device->physical_device->rad_info;
3951 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3952 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3953 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3954 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3955 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3956 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3957
3958 if (stencil_only)
3959 tile_mode = stencil_tile_mode;
3960
3961 ds->db_depth_info |=
3962 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3963 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3964 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3965 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3966 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3967 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3968 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3969 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3970 } else {
3971 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3972 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3973 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3974 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3975 if (stencil_only)
3976 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3977 }
3978
3979 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3980 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3981 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3982
3983 if (radv_htile_enabled(iview->image, level)) {
3984 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3985
3986 if (!iview->image->surface.has_stencil &&
3987 !radv_image_is_tc_compat_htile(iview->image))
3988 /* Use all of the htile_buffer for depth if there's no stencil. */
3989 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3990
3991 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3992 iview->image->htile_offset;
3993 ds->db_htile_data_base = va >> 8;
3994 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3995
3996 if (radv_image_is_tc_compat_htile(iview->image)) {
3997 unsigned max_zplanes =
3998 radv_calc_decompress_on_z_planes(device, iview);
3999
4000 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4001 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4002 }
4003 }
4004 }
4005
4006 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4007 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4008 }
4009
4010 VkResult radv_CreateFramebuffer(
4011 VkDevice _device,
4012 const VkFramebufferCreateInfo* pCreateInfo,
4013 const VkAllocationCallbacks* pAllocator,
4014 VkFramebuffer* pFramebuffer)
4015 {
4016 RADV_FROM_HANDLE(radv_device, device, _device);
4017 struct radv_framebuffer *framebuffer;
4018
4019 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4020
4021 size_t size = sizeof(*framebuffer) +
4022 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4023 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4024 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4025 if (framebuffer == NULL)
4026 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4027
4028 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4029 framebuffer->width = pCreateInfo->width;
4030 framebuffer->height = pCreateInfo->height;
4031 framebuffer->layers = pCreateInfo->layers;
4032 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4033 VkImageView _iview = pCreateInfo->pAttachments[i];
4034 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4035 framebuffer->attachments[i].attachment = iview;
4036 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
4037 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4038 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4039 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4040 }
4041 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4042 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4043 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4044 }
4045
4046 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4047 return VK_SUCCESS;
4048 }
4049
4050 void radv_DestroyFramebuffer(
4051 VkDevice _device,
4052 VkFramebuffer _fb,
4053 const VkAllocationCallbacks* pAllocator)
4054 {
4055 RADV_FROM_HANDLE(radv_device, device, _device);
4056 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4057
4058 if (!fb)
4059 return;
4060 vk_free2(&device->alloc, pAllocator, fb);
4061 }
4062
4063 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4064 {
4065 switch (address_mode) {
4066 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4067 return V_008F30_SQ_TEX_WRAP;
4068 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4069 return V_008F30_SQ_TEX_MIRROR;
4070 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4071 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4072 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4073 return V_008F30_SQ_TEX_CLAMP_BORDER;
4074 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4075 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4076 default:
4077 unreachable("illegal tex wrap mode");
4078 break;
4079 }
4080 }
4081
4082 static unsigned
4083 radv_tex_compare(VkCompareOp op)
4084 {
4085 switch (op) {
4086 case VK_COMPARE_OP_NEVER:
4087 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4088 case VK_COMPARE_OP_LESS:
4089 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4090 case VK_COMPARE_OP_EQUAL:
4091 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4092 case VK_COMPARE_OP_LESS_OR_EQUAL:
4093 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4094 case VK_COMPARE_OP_GREATER:
4095 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4096 case VK_COMPARE_OP_NOT_EQUAL:
4097 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4098 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4099 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4100 case VK_COMPARE_OP_ALWAYS:
4101 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4102 default:
4103 unreachable("illegal compare mode");
4104 break;
4105 }
4106 }
4107
4108 static unsigned
4109 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4110 {
4111 switch (filter) {
4112 case VK_FILTER_NEAREST:
4113 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4114 V_008F38_SQ_TEX_XY_FILTER_POINT);
4115 case VK_FILTER_LINEAR:
4116 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4117 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4118 case VK_FILTER_CUBIC_IMG:
4119 default:
4120 fprintf(stderr, "illegal texture filter");
4121 return 0;
4122 }
4123 }
4124
4125 static unsigned
4126 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4127 {
4128 switch (mode) {
4129 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4130 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4131 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4132 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4133 default:
4134 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4135 }
4136 }
4137
4138 static unsigned
4139 radv_tex_bordercolor(VkBorderColor bcolor)
4140 {
4141 switch (bcolor) {
4142 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4143 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4144 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4145 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4146 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4147 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4148 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4149 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4150 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4151 default:
4152 break;
4153 }
4154 return 0;
4155 }
4156
4157 static unsigned
4158 radv_tex_aniso_filter(unsigned filter)
4159 {
4160 if (filter < 2)
4161 return 0;
4162 if (filter < 4)
4163 return 1;
4164 if (filter < 8)
4165 return 2;
4166 if (filter < 16)
4167 return 3;
4168 return 4;
4169 }
4170
4171 static unsigned
4172 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4173 {
4174 switch (mode) {
4175 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4176 return SQ_IMG_FILTER_MODE_BLEND;
4177 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4178 return SQ_IMG_FILTER_MODE_MIN;
4179 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4180 return SQ_IMG_FILTER_MODE_MAX;
4181 default:
4182 break;
4183 }
4184 return 0;
4185 }
4186
4187 static void
4188 radv_init_sampler(struct radv_device *device,
4189 struct radv_sampler *sampler,
4190 const VkSamplerCreateInfo *pCreateInfo)
4191 {
4192 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
4193 (uint32_t) pCreateInfo->maxAnisotropy : 0;
4194 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4195 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4196 unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
4197
4198 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4199 vk_find_struct_const(pCreateInfo->pNext,
4200 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4201 if (sampler_reduction)
4202 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4203
4204 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4205 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4206 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4207 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4208 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4209 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4210 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4211 S_008F30_ANISO_BIAS(max_aniso_ratio) |
4212 S_008F30_DISABLE_CUBE_WRAP(0) |
4213 S_008F30_COMPAT_MODE(is_vi) |
4214 S_008F30_FILTER_MODE(filter_mode));
4215 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4216 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4217 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4218 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4219 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4220 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4221 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4222 S_008F38_MIP_POINT_PRECLAMP(0) |
4223 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4224 S_008F38_FILTER_PREC_FIX(1) |
4225 S_008F38_ANISO_OVERRIDE(is_vi));
4226 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4227 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4228 }
4229
4230 VkResult radv_CreateSampler(
4231 VkDevice _device,
4232 const VkSamplerCreateInfo* pCreateInfo,
4233 const VkAllocationCallbacks* pAllocator,
4234 VkSampler* pSampler)
4235 {
4236 RADV_FROM_HANDLE(radv_device, device, _device);
4237 struct radv_sampler *sampler;
4238
4239 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4240
4241 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4242 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4243 if (!sampler)
4244 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4245
4246 radv_init_sampler(device, sampler, pCreateInfo);
4247 *pSampler = radv_sampler_to_handle(sampler);
4248
4249 return VK_SUCCESS;
4250 }
4251
4252 void radv_DestroySampler(
4253 VkDevice _device,
4254 VkSampler _sampler,
4255 const VkAllocationCallbacks* pAllocator)
4256 {
4257 RADV_FROM_HANDLE(radv_device, device, _device);
4258 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4259
4260 if (!sampler)
4261 return;
4262 vk_free2(&device->alloc, pAllocator, sampler);
4263 }
4264
4265 /* vk_icd.h does not declare this function, so we declare it here to
4266 * suppress Wmissing-prototypes.
4267 */
4268 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4269 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4270
4271 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4272 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4273 {
4274 /* For the full details on loader interface versioning, see
4275 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4276 * What follows is a condensed summary, to help you navigate the large and
4277 * confusing official doc.
4278 *
4279 * - Loader interface v0 is incompatible with later versions. We don't
4280 * support it.
4281 *
4282 * - In loader interface v1:
4283 * - The first ICD entrypoint called by the loader is
4284 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4285 * entrypoint.
4286 * - The ICD must statically expose no other Vulkan symbol unless it is
4287 * linked with -Bsymbolic.
4288 * - Each dispatchable Vulkan handle created by the ICD must be
4289 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4290 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4291 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4292 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4293 * such loader-managed surfaces.
4294 *
4295 * - Loader interface v2 differs from v1 in:
4296 * - The first ICD entrypoint called by the loader is
4297 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4298 * statically expose this entrypoint.
4299 *
4300 * - Loader interface v3 differs from v2 in:
4301 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4302 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4303 * because the loader no longer does so.
4304 */
4305 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4306 return VK_SUCCESS;
4307 }
4308
4309 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4310 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4311 int *pFD)
4312 {
4313 RADV_FROM_HANDLE(radv_device, device, _device);
4314 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4315
4316 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4317
4318 /* At the moment, we support only the below handle types. */
4319 assert(pGetFdInfo->handleType ==
4320 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4321 pGetFdInfo->handleType ==
4322 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4323
4324 bool ret = radv_get_memory_fd(device, memory, pFD);
4325 if (ret == false)
4326 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4327 return VK_SUCCESS;
4328 }
4329
4330 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4331 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4332 int fd,
4333 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4334 {
4335 switch (handleType) {
4336 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4337 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4338 return VK_SUCCESS;
4339
4340 default:
4341 /* The valid usage section for this function says:
4342 *
4343 * "handleType must not be one of the handle types defined as
4344 * opaque."
4345 *
4346 * So opaque handle types fall into the default "unsupported" case.
4347 */
4348 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4349 }
4350 }
4351
4352 static VkResult radv_import_opaque_fd(struct radv_device *device,
4353 int fd,
4354 uint32_t *syncobj)
4355 {
4356 uint32_t syncobj_handle = 0;
4357 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4358 if (ret != 0)
4359 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4360
4361 if (*syncobj)
4362 device->ws->destroy_syncobj(device->ws, *syncobj);
4363
4364 *syncobj = syncobj_handle;
4365 close(fd);
4366
4367 return VK_SUCCESS;
4368 }
4369
4370 static VkResult radv_import_sync_fd(struct radv_device *device,
4371 int fd,
4372 uint32_t *syncobj)
4373 {
4374 /* If we create a syncobj we do it locally so that if we have an error, we don't
4375 * leave a syncobj in an undetermined state in the fence. */
4376 uint32_t syncobj_handle = *syncobj;
4377 if (!syncobj_handle) {
4378 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4379 if (ret) {
4380 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4381 }
4382 }
4383
4384 if (fd == -1) {
4385 device->ws->signal_syncobj(device->ws, syncobj_handle);
4386 } else {
4387 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4388 if (ret != 0)
4389 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4390 }
4391
4392 *syncobj = syncobj_handle;
4393 if (fd != -1)
4394 close(fd);
4395
4396 return VK_SUCCESS;
4397 }
4398
4399 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4400 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4401 {
4402 RADV_FROM_HANDLE(radv_device, device, _device);
4403 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4404 uint32_t *syncobj_dst = NULL;
4405
4406 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4407 syncobj_dst = &sem->temp_syncobj;
4408 } else {
4409 syncobj_dst = &sem->syncobj;
4410 }
4411
4412 switch(pImportSemaphoreFdInfo->handleType) {
4413 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4414 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4415 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4416 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4417 default:
4418 unreachable("Unhandled semaphore handle type");
4419 }
4420 }
4421
4422 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4423 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4424 int *pFd)
4425 {
4426 RADV_FROM_HANDLE(radv_device, device, _device);
4427 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4428 int ret;
4429 uint32_t syncobj_handle;
4430
4431 if (sem->temp_syncobj)
4432 syncobj_handle = sem->temp_syncobj;
4433 else
4434 syncobj_handle = sem->syncobj;
4435
4436 switch(pGetFdInfo->handleType) {
4437 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4438 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4439 break;
4440 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4441 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4442 if (!ret) {
4443 if (sem->temp_syncobj) {
4444 close (sem->temp_syncobj);
4445 sem->temp_syncobj = 0;
4446 } else {
4447 device->ws->reset_syncobj(device->ws, syncobj_handle);
4448 }
4449 }
4450 break;
4451 default:
4452 unreachable("Unhandled semaphore handle type");
4453 }
4454
4455 if (ret)
4456 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4457 return VK_SUCCESS;
4458 }
4459
4460 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4461 VkPhysicalDevice physicalDevice,
4462 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4463 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
4464 {
4465 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4466
4467 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4468 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4469 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4470 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4471 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4472 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4473 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4474 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4475 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4476 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4477 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4478 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4479 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4480 } else {
4481 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4482 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4483 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4484 }
4485 }
4486
4487 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4488 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4489 {
4490 RADV_FROM_HANDLE(radv_device, device, _device);
4491 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4492 uint32_t *syncobj_dst = NULL;
4493
4494
4495 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4496 syncobj_dst = &fence->temp_syncobj;
4497 } else {
4498 syncobj_dst = &fence->syncobj;
4499 }
4500
4501 switch(pImportFenceFdInfo->handleType) {
4502 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4503 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4504 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4505 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4506 default:
4507 unreachable("Unhandled fence handle type");
4508 }
4509 }
4510
4511 VkResult radv_GetFenceFdKHR(VkDevice _device,
4512 const VkFenceGetFdInfoKHR *pGetFdInfo,
4513 int *pFd)
4514 {
4515 RADV_FROM_HANDLE(radv_device, device, _device);
4516 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4517 int ret;
4518 uint32_t syncobj_handle;
4519
4520 if (fence->temp_syncobj)
4521 syncobj_handle = fence->temp_syncobj;
4522 else
4523 syncobj_handle = fence->syncobj;
4524
4525 switch(pGetFdInfo->handleType) {
4526 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4527 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4528 break;
4529 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4530 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4531 if (!ret) {
4532 if (fence->temp_syncobj) {
4533 close (fence->temp_syncobj);
4534 fence->temp_syncobj = 0;
4535 } else {
4536 device->ws->reset_syncobj(device->ws, syncobj_handle);
4537 }
4538 }
4539 break;
4540 default:
4541 unreachable("Unhandled fence handle type");
4542 }
4543
4544 if (ret)
4545 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4546 return VK_SUCCESS;
4547 }
4548
4549 void radv_GetPhysicalDeviceExternalFenceProperties(
4550 VkPhysicalDevice physicalDevice,
4551 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4552 VkExternalFencePropertiesKHR* pExternalFenceProperties)
4553 {
4554 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4555
4556 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4557 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4558 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4559 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4560 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4561 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4562 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4563 } else {
4564 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4565 pExternalFenceProperties->compatibleHandleTypes = 0;
4566 pExternalFenceProperties->externalFenceFeatures = 0;
4567 }
4568 }
4569
4570 VkResult
4571 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4572 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4573 const VkAllocationCallbacks* pAllocator,
4574 VkDebugReportCallbackEXT* pCallback)
4575 {
4576 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4577 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4578 pCreateInfo, pAllocator, &instance->alloc,
4579 pCallback);
4580 }
4581
4582 void
4583 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4584 VkDebugReportCallbackEXT _callback,
4585 const VkAllocationCallbacks* pAllocator)
4586 {
4587 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4588 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4589 _callback, pAllocator, &instance->alloc);
4590 }
4591
4592 void
4593 radv_DebugReportMessageEXT(VkInstance _instance,
4594 VkDebugReportFlagsEXT flags,
4595 VkDebugReportObjectTypeEXT objectType,
4596 uint64_t object,
4597 size_t location,
4598 int32_t messageCode,
4599 const char* pLayerPrefix,
4600 const char* pMessage)
4601 {
4602 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4603 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4604 object, location, messageCode, pLayerPrefix, pMessage);
4605 }
4606
4607 void
4608 radv_GetDeviceGroupPeerMemoryFeatures(
4609 VkDevice device,
4610 uint32_t heapIndex,
4611 uint32_t localDeviceIndex,
4612 uint32_t remoteDeviceIndex,
4613 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
4614 {
4615 assert(localDeviceIndex == remoteDeviceIndex);
4616
4617 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4618 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4619 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4620 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4621 }