radv: Add support for VK_KHR_driver_properties.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "git_sha1.h"
47 #include "gfx9d.h"
48 #include "addrlib/gfx9/chip/gfx9_enum.h"
49 #include "util/build_id.h"
50 #include "util/debug.h"
51 #include "util/mesa-sha1.h"
52
53 static bool
54 radv_get_build_id(void *ptr, struct mesa_sha1 *ctx)
55 {
56 uint32_t timestamp;
57
58 #ifdef HAVE_DL_ITERATE_PHDR
59 const struct build_id_note *note = NULL;
60 if ((note = build_id_find_nhdr_for_addr(ptr))) {
61 _mesa_sha1_update(ctx, build_id_data(note), build_id_length(note));
62 } else
63 #endif
64 if (disk_cache_get_function_timestamp(ptr, &timestamp)) {
65 _mesa_sha1_update(ctx, &timestamp, sizeof(timestamp));
66 } else
67 return false;
68 return true;
69 }
70
71 static int
72 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
73 {
74 struct mesa_sha1 ctx;
75 unsigned char sha1[20];
76 unsigned ptr_size = sizeof(void*);
77
78 memset(uuid, 0, VK_UUID_SIZE);
79 _mesa_sha1_init(&ctx);
80
81 if (!radv_get_build_id(radv_device_get_cache_uuid, &ctx) ||
82 !radv_get_build_id(LLVMInitializeAMDGPUTargetInfo, &ctx))
83 return -1;
84
85 _mesa_sha1_update(&ctx, &family, sizeof(family));
86 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
87 _mesa_sha1_final(&ctx, sha1);
88
89 memcpy(uuid, sha1, VK_UUID_SIZE);
90 return 0;
91 }
92
93 static void
94 radv_get_driver_uuid(void *uuid)
95 {
96 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
97 }
98
99 static void
100 radv_get_device_uuid(struct radeon_info *info, void *uuid)
101 {
102 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
103 }
104
105 static void
106 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
107 {
108 const char *chip_string;
109 char llvm_string[32] = {};
110
111 switch (family) {
112 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
113 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
114 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
115 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
116 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
117 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
118 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
119 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
120 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
121 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
122 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
123 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
124 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
125 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
126 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
127 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
128 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
129 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
130 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
131 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
132 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
133 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
134 default: chip_string = "AMD RADV unknown"; break;
135 }
136
137 snprintf(llvm_string, sizeof(llvm_string),
138 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
139 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
140 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
141 }
142
143 static void
144 radv_physical_device_init_mem_types(struct radv_physical_device *device)
145 {
146 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
147 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
148 device->rad_info.vram_vis_size);
149
150 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
151 device->memory_properties.memoryHeapCount = 0;
152 if (device->rad_info.vram_size - visible_vram_size > 0) {
153 vram_index = device->memory_properties.memoryHeapCount++;
154 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
155 .size = device->rad_info.vram_size - visible_vram_size,
156 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
157 };
158 }
159 if (visible_vram_size) {
160 visible_vram_index = device->memory_properties.memoryHeapCount++;
161 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
162 .size = visible_vram_size,
163 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
164 };
165 }
166 if (device->rad_info.gart_size > 0) {
167 gart_index = device->memory_properties.memoryHeapCount++;
168 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
169 .size = device->rad_info.gart_size,
170 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
171 };
172 }
173
174 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
175 unsigned type_count = 0;
176 if (vram_index >= 0) {
177 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
178 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
179 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
180 .heapIndex = vram_index,
181 };
182 }
183 if (gart_index >= 0) {
184 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
185 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
186 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
187 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
188 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
189 .heapIndex = gart_index,
190 };
191 }
192 if (visible_vram_index >= 0) {
193 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
194 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
195 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
196 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
197 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
198 .heapIndex = visible_vram_index,
199 };
200 }
201 if (gart_index >= 0) {
202 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
203 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
204 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
205 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
206 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
207 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
208 .heapIndex = gart_index,
209 };
210 }
211 device->memory_properties.memoryTypeCount = type_count;
212 }
213
214 static void
215 radv_handle_env_var_force_family(struct radv_physical_device *device)
216 {
217 const char *family = getenv("RADV_FORCE_FAMILY");
218 unsigned i;
219
220 if (!family)
221 return;
222
223 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
224 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
225 /* Override family and chip_class. */
226 device->rad_info.family = i;
227
228 if (i >= CHIP_VEGA10)
229 device->rad_info.chip_class = GFX9;
230 else if (i >= CHIP_TONGA)
231 device->rad_info.chip_class = VI;
232 else if (i >= CHIP_BONAIRE)
233 device->rad_info.chip_class = CIK;
234 else
235 device->rad_info.chip_class = SI;
236
237 return;
238 }
239 }
240
241 fprintf(stderr, "radv: Unknown family: %s\n", family);
242 exit(1);
243 }
244
245 static VkResult
246 radv_physical_device_init(struct radv_physical_device *device,
247 struct radv_instance *instance,
248 drmDevicePtr drm_device)
249 {
250 const char *path = drm_device->nodes[DRM_NODE_RENDER];
251 VkResult result;
252 drmVersionPtr version;
253 int fd;
254 int master_fd = -1;
255
256 fd = open(path, O_RDWR | O_CLOEXEC);
257 if (fd < 0) {
258 if (instance->debug_flags & RADV_DEBUG_STARTUP)
259 radv_logi("Could not open device '%s'", path);
260
261 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
262 }
263
264 version = drmGetVersion(fd);
265 if (!version) {
266 close(fd);
267
268 if (instance->debug_flags & RADV_DEBUG_STARTUP)
269 radv_logi("Could not get the kernel driver version for device '%s'", path);
270
271 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
272 "failed to get version %s: %m", path);
273 }
274
275 if (strcmp(version->name, "amdgpu")) {
276 drmFreeVersion(version);
277 close(fd);
278
279 if (instance->debug_flags & RADV_DEBUG_STARTUP)
280 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
281
282 return VK_ERROR_INCOMPATIBLE_DRIVER;
283 }
284 drmFreeVersion(version);
285
286 if (instance->debug_flags & RADV_DEBUG_STARTUP)
287 radv_logi("Found compatible device '%s'.", path);
288
289 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
290 device->instance = instance;
291 assert(strlen(path) < ARRAY_SIZE(device->path));
292 strncpy(device->path, path, ARRAY_SIZE(device->path));
293
294 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
295 instance->perftest_flags);
296 if (!device->ws) {
297 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
298 goto fail;
299 }
300
301 if (instance->enabled_extensions.KHR_display) {
302 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
303 if (master_fd >= 0) {
304 uint32_t accel_working = 0;
305 struct drm_amdgpu_info request = {
306 .return_pointer = (uintptr_t)&accel_working,
307 .return_size = sizeof(accel_working),
308 .query = AMDGPU_INFO_ACCEL_WORKING
309 };
310
311 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
312 close(master_fd);
313 master_fd = -1;
314 }
315 }
316 }
317
318 device->master_fd = master_fd;
319 device->local_fd = fd;
320 device->ws->query_info(device->ws, &device->rad_info);
321
322 radv_handle_env_var_force_family(device);
323
324 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
325
326 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
327 device->ws->destroy(device->ws);
328 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
329 "cannot generate UUID");
330 goto fail;
331 }
332
333 /* These flags affect shader compilation. */
334 uint64_t shader_env_flags =
335 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
336 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
337
338 /* The gpu id is already embedded in the uuid so we just pass "radv"
339 * when creating the cache.
340 */
341 char buf[VK_UUID_SIZE * 2 + 1];
342 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
343 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
344
345 if (device->rad_info.chip_class < VI ||
346 device->rad_info.chip_class > GFX9)
347 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
348
349 radv_get_driver_uuid(&device->device_uuid);
350 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
351
352 if (device->rad_info.family == CHIP_STONEY ||
353 device->rad_info.chip_class >= GFX9) {
354 device->has_rbplus = true;
355 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
356 device->rad_info.family == CHIP_VEGA12 ||
357 device->rad_info.family == CHIP_RAVEN;
358 }
359
360 /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
361 * on SI.
362 */
363 device->has_clear_state = device->rad_info.chip_class >= CIK;
364
365 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
366
367 /* Vega10/Raven need a special workaround for a hardware bug. */
368 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
369 device->rad_info.family == CHIP_RAVEN;
370
371 /* Out-of-order primitive rasterization. */
372 device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
373 device->rad_info.max_se >= 2;
374 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
375 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
376
377 device->dcc_msaa_allowed =
378 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
379
380 radv_physical_device_init_mem_types(device);
381 radv_fill_device_extension_table(device, &device->supported_extensions);
382
383 device->bus_info = *drm_device->businfo.pci;
384
385 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
386 ac_print_gpu_info(&device->rad_info);
387
388 /* The WSI is structured as a layer on top of the driver, so this has
389 * to be the last part of initialization (at least until we get other
390 * semi-layers).
391 */
392 result = radv_init_wsi(device);
393 if (result != VK_SUCCESS) {
394 device->ws->destroy(device->ws);
395 vk_error(instance, result);
396 goto fail;
397 }
398
399 return VK_SUCCESS;
400
401 fail:
402 close(fd);
403 if (master_fd != -1)
404 close(master_fd);
405 return result;
406 }
407
408 static void
409 radv_physical_device_finish(struct radv_physical_device *device)
410 {
411 radv_finish_wsi(device);
412 device->ws->destroy(device->ws);
413 disk_cache_destroy(device->disk_cache);
414 close(device->local_fd);
415 if (device->master_fd != -1)
416 close(device->master_fd);
417 }
418
419 static void *
420 default_alloc_func(void *pUserData, size_t size, size_t align,
421 VkSystemAllocationScope allocationScope)
422 {
423 return malloc(size);
424 }
425
426 static void *
427 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
428 size_t align, VkSystemAllocationScope allocationScope)
429 {
430 return realloc(pOriginal, size);
431 }
432
433 static void
434 default_free_func(void *pUserData, void *pMemory)
435 {
436 free(pMemory);
437 }
438
439 static const VkAllocationCallbacks default_alloc = {
440 .pUserData = NULL,
441 .pfnAllocation = default_alloc_func,
442 .pfnReallocation = default_realloc_func,
443 .pfnFree = default_free_func,
444 };
445
446 static const struct debug_control radv_debug_options[] = {
447 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
448 {"nodcc", RADV_DEBUG_NO_DCC},
449 {"shaders", RADV_DEBUG_DUMP_SHADERS},
450 {"nocache", RADV_DEBUG_NO_CACHE},
451 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
452 {"nohiz", RADV_DEBUG_NO_HIZ},
453 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
454 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
455 {"allbos", RADV_DEBUG_ALL_BOS},
456 {"noibs", RADV_DEBUG_NO_IBS},
457 {"spirv", RADV_DEBUG_DUMP_SPIRV},
458 {"vmfaults", RADV_DEBUG_VM_FAULTS},
459 {"zerovram", RADV_DEBUG_ZERO_VRAM},
460 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
461 {"nosisched", RADV_DEBUG_NO_SISCHED},
462 {"preoptir", RADV_DEBUG_PREOPTIR},
463 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
464 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
465 {"info", RADV_DEBUG_INFO},
466 {"errors", RADV_DEBUG_ERRORS},
467 {"startup", RADV_DEBUG_STARTUP},
468 {"checkir", RADV_DEBUG_CHECKIR},
469 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
470 {NULL, 0}
471 };
472
473 const char *
474 radv_get_debug_option_name(int id)
475 {
476 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
477 return radv_debug_options[id].string;
478 }
479
480 static const struct debug_control radv_perftest_options[] = {
481 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
482 {"sisched", RADV_PERFTEST_SISCHED},
483 {"localbos", RADV_PERFTEST_LOCAL_BOS},
484 {"binning", RADV_PERFTEST_BINNING},
485 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
486 {NULL, 0}
487 };
488
489 const char *
490 radv_get_perftest_option_name(int id)
491 {
492 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
493 return radv_perftest_options[id].string;
494 }
495
496 static void
497 radv_handle_per_app_options(struct radv_instance *instance,
498 const VkApplicationInfo *info)
499 {
500 const char *name = info ? info->pApplicationName : NULL;
501
502 if (!name)
503 return;
504
505 if (!strcmp(name, "Talos - Linux - 32bit") ||
506 !strcmp(name, "Talos - Linux - 64bit")) {
507 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
508 /* Force enable LLVM sisched for Talos because it looks
509 * safe and it gives few more FPS.
510 */
511 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
512 }
513 } else if (!strcmp(name, "DOOM_VFR")) {
514 /* Work around a Doom VFR game bug */
515 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
516 }
517 }
518
519 static int radv_get_instance_extension_index(const char *name)
520 {
521 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
522 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
523 return i;
524 }
525 return -1;
526 }
527
528
529 VkResult radv_CreateInstance(
530 const VkInstanceCreateInfo* pCreateInfo,
531 const VkAllocationCallbacks* pAllocator,
532 VkInstance* pInstance)
533 {
534 struct radv_instance *instance;
535 VkResult result;
536
537 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
538
539 uint32_t client_version;
540 if (pCreateInfo->pApplicationInfo &&
541 pCreateInfo->pApplicationInfo->apiVersion != 0) {
542 client_version = pCreateInfo->pApplicationInfo->apiVersion;
543 } else {
544 radv_EnumerateInstanceVersion(&client_version);
545 }
546
547 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
548 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
549 if (!instance)
550 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
551
552 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
553
554 if (pAllocator)
555 instance->alloc = *pAllocator;
556 else
557 instance->alloc = default_alloc;
558
559 instance->apiVersion = client_version;
560 instance->physicalDeviceCount = -1;
561
562 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
563 radv_debug_options);
564
565 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
566 radv_perftest_options);
567
568
569 if (instance->debug_flags & RADV_DEBUG_STARTUP)
570 radv_logi("Created an instance");
571
572 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
573 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
574 int index = radv_get_instance_extension_index(ext_name);
575
576 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
577 vk_free2(&default_alloc, pAllocator, instance);
578 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
579 }
580
581 instance->enabled_extensions.extensions[index] = true;
582 }
583
584 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
585 if (result != VK_SUCCESS) {
586 vk_free2(&default_alloc, pAllocator, instance);
587 return vk_error(instance, result);
588 }
589
590 _mesa_locale_init();
591
592 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
593
594 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
595
596 *pInstance = radv_instance_to_handle(instance);
597
598 return VK_SUCCESS;
599 }
600
601 void radv_DestroyInstance(
602 VkInstance _instance,
603 const VkAllocationCallbacks* pAllocator)
604 {
605 RADV_FROM_HANDLE(radv_instance, instance, _instance);
606
607 if (!instance)
608 return;
609
610 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
611 radv_physical_device_finish(instance->physicalDevices + i);
612 }
613
614 VG(VALGRIND_DESTROY_MEMPOOL(instance));
615
616 _mesa_locale_fini();
617
618 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
619
620 vk_free(&instance->alloc, instance);
621 }
622
623 static VkResult
624 radv_enumerate_devices(struct radv_instance *instance)
625 {
626 /* TODO: Check for more devices ? */
627 drmDevicePtr devices[8];
628 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
629 int max_devices;
630
631 instance->physicalDeviceCount = 0;
632
633 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
634
635 if (instance->debug_flags & RADV_DEBUG_STARTUP)
636 radv_logi("Found %d drm nodes", max_devices);
637
638 if (max_devices < 1)
639 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
640
641 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
642 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
643 devices[i]->bustype == DRM_BUS_PCI &&
644 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
645
646 result = radv_physical_device_init(instance->physicalDevices +
647 instance->physicalDeviceCount,
648 instance,
649 devices[i]);
650 if (result == VK_SUCCESS)
651 ++instance->physicalDeviceCount;
652 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
653 break;
654 }
655 }
656 drmFreeDevices(devices, max_devices);
657
658 return result;
659 }
660
661 VkResult radv_EnumeratePhysicalDevices(
662 VkInstance _instance,
663 uint32_t* pPhysicalDeviceCount,
664 VkPhysicalDevice* pPhysicalDevices)
665 {
666 RADV_FROM_HANDLE(radv_instance, instance, _instance);
667 VkResult result;
668
669 if (instance->physicalDeviceCount < 0) {
670 result = radv_enumerate_devices(instance);
671 if (result != VK_SUCCESS &&
672 result != VK_ERROR_INCOMPATIBLE_DRIVER)
673 return result;
674 }
675
676 if (!pPhysicalDevices) {
677 *pPhysicalDeviceCount = instance->physicalDeviceCount;
678 } else {
679 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
680 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
681 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
682 }
683
684 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
685 : VK_SUCCESS;
686 }
687
688 VkResult radv_EnumeratePhysicalDeviceGroups(
689 VkInstance _instance,
690 uint32_t* pPhysicalDeviceGroupCount,
691 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
692 {
693 RADV_FROM_HANDLE(radv_instance, instance, _instance);
694 VkResult result;
695
696 if (instance->physicalDeviceCount < 0) {
697 result = radv_enumerate_devices(instance);
698 if (result != VK_SUCCESS &&
699 result != VK_ERROR_INCOMPATIBLE_DRIVER)
700 return result;
701 }
702
703 if (!pPhysicalDeviceGroupProperties) {
704 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
705 } else {
706 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
707 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
708 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
709 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
710 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
711 }
712 }
713 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
714 : VK_SUCCESS;
715 }
716
717 void radv_GetPhysicalDeviceFeatures(
718 VkPhysicalDevice physicalDevice,
719 VkPhysicalDeviceFeatures* pFeatures)
720 {
721 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
722 memset(pFeatures, 0, sizeof(*pFeatures));
723
724 *pFeatures = (VkPhysicalDeviceFeatures) {
725 .robustBufferAccess = true,
726 .fullDrawIndexUint32 = true,
727 .imageCubeArray = true,
728 .independentBlend = true,
729 .geometryShader = true,
730 .tessellationShader = true,
731 .sampleRateShading = true,
732 .dualSrcBlend = true,
733 .logicOp = true,
734 .multiDrawIndirect = true,
735 .drawIndirectFirstInstance = true,
736 .depthClamp = true,
737 .depthBiasClamp = true,
738 .fillModeNonSolid = true,
739 .depthBounds = true,
740 .wideLines = true,
741 .largePoints = true,
742 .alphaToOne = true,
743 .multiViewport = true,
744 .samplerAnisotropy = true,
745 .textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 ||
746 pdevice->rad_info.family == CHIP_STONEY,
747 .textureCompressionASTC_LDR = false,
748 .textureCompressionBC = true,
749 .occlusionQueryPrecise = true,
750 .pipelineStatisticsQuery = true,
751 .vertexPipelineStoresAndAtomics = true,
752 .fragmentStoresAndAtomics = true,
753 .shaderTessellationAndGeometryPointSize = true,
754 .shaderImageGatherExtended = true,
755 .shaderStorageImageExtendedFormats = true,
756 .shaderStorageImageMultisample = false,
757 .shaderUniformBufferArrayDynamicIndexing = true,
758 .shaderSampledImageArrayDynamicIndexing = true,
759 .shaderStorageBufferArrayDynamicIndexing = true,
760 .shaderStorageImageArrayDynamicIndexing = true,
761 .shaderStorageImageReadWithoutFormat = true,
762 .shaderStorageImageWriteWithoutFormat = true,
763 .shaderClipDistance = true,
764 .shaderCullDistance = true,
765 .shaderFloat64 = true,
766 .shaderInt64 = true,
767 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
768 .sparseBinding = true,
769 .variableMultisampleRate = true,
770 .inheritedQueries = true,
771 };
772 }
773
774 void radv_GetPhysicalDeviceFeatures2(
775 VkPhysicalDevice physicalDevice,
776 VkPhysicalDeviceFeatures2KHR *pFeatures)
777 {
778 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
779 vk_foreach_struct(ext, pFeatures->pNext) {
780 switch (ext->sType) {
781 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
782 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
783 features->variablePointersStorageBuffer = true;
784 features->variablePointers = false;
785 break;
786 }
787 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
788 VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
789 features->multiview = true;
790 features->multiviewGeometryShader = true;
791 features->multiviewTessellationShader = true;
792 break;
793 }
794 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
795 VkPhysicalDeviceShaderDrawParameterFeatures *features =
796 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
797 features->shaderDrawParameters = true;
798 break;
799 }
800 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
801 VkPhysicalDeviceProtectedMemoryFeatures *features =
802 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
803 features->protectedMemory = false;
804 break;
805 }
806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
807 VkPhysicalDevice16BitStorageFeatures *features =
808 (VkPhysicalDevice16BitStorageFeatures*)ext;
809 bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
810 features->storageBuffer16BitAccess = enabled;
811 features->uniformAndStorageBuffer16BitAccess = enabled;
812 features->storagePushConstant16 = enabled;
813 features->storageInputOutput16 = enabled;
814 break;
815 }
816 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
817 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
818 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
819 features->samplerYcbcrConversion = false;
820 break;
821 }
822 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
823 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
824 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
825 features->shaderInputAttachmentArrayDynamicIndexing = true;
826 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
827 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
828 features->shaderUniformBufferArrayNonUniformIndexing = false;
829 features->shaderSampledImageArrayNonUniformIndexing = false;
830 features->shaderStorageBufferArrayNonUniformIndexing = false;
831 features->shaderStorageImageArrayNonUniformIndexing = false;
832 features->shaderInputAttachmentArrayNonUniformIndexing = false;
833 features->shaderUniformTexelBufferArrayNonUniformIndexing = false;
834 features->shaderStorageTexelBufferArrayNonUniformIndexing = false;
835 features->descriptorBindingUniformBufferUpdateAfterBind = true;
836 features->descriptorBindingSampledImageUpdateAfterBind = true;
837 features->descriptorBindingStorageImageUpdateAfterBind = true;
838 features->descriptorBindingStorageBufferUpdateAfterBind = true;
839 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
840 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
841 features->descriptorBindingUpdateUnusedWhilePending = true;
842 features->descriptorBindingPartiallyBound = true;
843 features->descriptorBindingVariableDescriptorCount = true;
844 features->runtimeDescriptorArray = true;
845 break;
846 }
847 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
848 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
849 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
850 features->conditionalRendering = true;
851 features->inheritedConditionalRendering = false;
852 break;
853 }
854 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
855 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
856 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
857 features->vertexAttributeInstanceRateDivisor = VK_TRUE;
858 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
859 break;
860 }
861 default:
862 break;
863 }
864 }
865 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
866 }
867
868 void radv_GetPhysicalDeviceProperties(
869 VkPhysicalDevice physicalDevice,
870 VkPhysicalDeviceProperties* pProperties)
871 {
872 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
873 VkSampleCountFlags sample_counts = 0xf;
874
875 /* make sure that the entire descriptor set is addressable with a signed
876 * 32-bit int. So the sum of all limits scaled by descriptor size has to
877 * be at most 2 GiB. the combined image & samples object count as one of
878 * both. This limit is for the pipeline layout, not for the set layout, but
879 * there is no set limit, so we just set a pipeline limit. I don't think
880 * any app is going to hit this soon. */
881 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
882 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
883 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
884 32 /* sampler, largest when combined with image */ +
885 64 /* sampled image */ +
886 64 /* storage image */);
887
888 VkPhysicalDeviceLimits limits = {
889 .maxImageDimension1D = (1 << 14),
890 .maxImageDimension2D = (1 << 14),
891 .maxImageDimension3D = (1 << 11),
892 .maxImageDimensionCube = (1 << 14),
893 .maxImageArrayLayers = (1 << 11),
894 .maxTexelBufferElements = 128 * 1024 * 1024,
895 .maxUniformBufferRange = UINT32_MAX,
896 .maxStorageBufferRange = UINT32_MAX,
897 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
898 .maxMemoryAllocationCount = UINT32_MAX,
899 .maxSamplerAllocationCount = 64 * 1024,
900 .bufferImageGranularity = 64, /* A cache line */
901 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
902 .maxBoundDescriptorSets = MAX_SETS,
903 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
904 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
905 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
906 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
907 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
908 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
909 .maxPerStageResources = max_descriptor_set_size,
910 .maxDescriptorSetSamplers = max_descriptor_set_size,
911 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
912 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
913 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
914 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
915 .maxDescriptorSetSampledImages = max_descriptor_set_size,
916 .maxDescriptorSetStorageImages = max_descriptor_set_size,
917 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
918 .maxVertexInputAttributes = 32,
919 .maxVertexInputBindings = 32,
920 .maxVertexInputAttributeOffset = 2047,
921 .maxVertexInputBindingStride = 2048,
922 .maxVertexOutputComponents = 128,
923 .maxTessellationGenerationLevel = 64,
924 .maxTessellationPatchSize = 32,
925 .maxTessellationControlPerVertexInputComponents = 128,
926 .maxTessellationControlPerVertexOutputComponents = 128,
927 .maxTessellationControlPerPatchOutputComponents = 120,
928 .maxTessellationControlTotalOutputComponents = 4096,
929 .maxTessellationEvaluationInputComponents = 128,
930 .maxTessellationEvaluationOutputComponents = 128,
931 .maxGeometryShaderInvocations = 127,
932 .maxGeometryInputComponents = 64,
933 .maxGeometryOutputComponents = 128,
934 .maxGeometryOutputVertices = 256,
935 .maxGeometryTotalOutputComponents = 1024,
936 .maxFragmentInputComponents = 128,
937 .maxFragmentOutputAttachments = 8,
938 .maxFragmentDualSrcAttachments = 1,
939 .maxFragmentCombinedOutputResources = 8,
940 .maxComputeSharedMemorySize = 32768,
941 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
942 .maxComputeWorkGroupInvocations = 2048,
943 .maxComputeWorkGroupSize = {
944 2048,
945 2048,
946 2048
947 },
948 .subPixelPrecisionBits = 4 /* FIXME */,
949 .subTexelPrecisionBits = 4 /* FIXME */,
950 .mipmapPrecisionBits = 4 /* FIXME */,
951 .maxDrawIndexedIndexValue = UINT32_MAX,
952 .maxDrawIndirectCount = UINT32_MAX,
953 .maxSamplerLodBias = 16,
954 .maxSamplerAnisotropy = 16,
955 .maxViewports = MAX_VIEWPORTS,
956 .maxViewportDimensions = { (1 << 14), (1 << 14) },
957 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
958 .viewportSubPixelBits = 8,
959 .minMemoryMapAlignment = 4096, /* A page */
960 .minTexelBufferOffsetAlignment = 1,
961 .minUniformBufferOffsetAlignment = 4,
962 .minStorageBufferOffsetAlignment = 4,
963 .minTexelOffset = -32,
964 .maxTexelOffset = 31,
965 .minTexelGatherOffset = -32,
966 .maxTexelGatherOffset = 31,
967 .minInterpolationOffset = -2,
968 .maxInterpolationOffset = 2,
969 .subPixelInterpolationOffsetBits = 8,
970 .maxFramebufferWidth = (1 << 14),
971 .maxFramebufferHeight = (1 << 14),
972 .maxFramebufferLayers = (1 << 10),
973 .framebufferColorSampleCounts = sample_counts,
974 .framebufferDepthSampleCounts = sample_counts,
975 .framebufferStencilSampleCounts = sample_counts,
976 .framebufferNoAttachmentsSampleCounts = sample_counts,
977 .maxColorAttachments = MAX_RTS,
978 .sampledImageColorSampleCounts = sample_counts,
979 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
980 .sampledImageDepthSampleCounts = sample_counts,
981 .sampledImageStencilSampleCounts = sample_counts,
982 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
983 .maxSampleMaskWords = 1,
984 .timestampComputeAndGraphics = true,
985 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
986 .maxClipDistances = 8,
987 .maxCullDistances = 8,
988 .maxCombinedClipAndCullDistances = 8,
989 .discreteQueuePriorities = 2,
990 .pointSizeRange = { 0.125, 255.875 },
991 .lineWidthRange = { 0.0, 7.9921875 },
992 .pointSizeGranularity = (1.0 / 8.0),
993 .lineWidthGranularity = (1.0 / 128.0),
994 .strictLines = false, /* FINISHME */
995 .standardSampleLocations = true,
996 .optimalBufferCopyOffsetAlignment = 128,
997 .optimalBufferCopyRowPitchAlignment = 128,
998 .nonCoherentAtomSize = 64,
999 };
1000
1001 *pProperties = (VkPhysicalDeviceProperties) {
1002 .apiVersion = radv_physical_device_api_version(pdevice),
1003 .driverVersion = vk_get_driver_version(),
1004 .vendorID = ATI_VENDOR_ID,
1005 .deviceID = pdevice->rad_info.pci_id,
1006 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1007 .limits = limits,
1008 .sparseProperties = {0},
1009 };
1010
1011 strcpy(pProperties->deviceName, pdevice->name);
1012 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1013 }
1014
1015 void radv_GetPhysicalDeviceProperties2(
1016 VkPhysicalDevice physicalDevice,
1017 VkPhysicalDeviceProperties2KHR *pProperties)
1018 {
1019 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1020 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1021
1022 vk_foreach_struct(ext, pProperties->pNext) {
1023 switch (ext->sType) {
1024 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1025 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1026 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1027 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1028 break;
1029 }
1030 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
1031 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
1032 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1033 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1034 properties->deviceLUIDValid = false;
1035 break;
1036 }
1037 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
1038 VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
1039 properties->maxMultiviewViewCount = MAX_VIEWS;
1040 properties->maxMultiviewInstanceIndex = INT_MAX;
1041 break;
1042 }
1043 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
1044 VkPhysicalDevicePointClippingPropertiesKHR *properties =
1045 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
1046 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
1047 break;
1048 }
1049 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1050 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1051 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1052 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1053 break;
1054 }
1055 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1056 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1057 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1058 properties->minImportedHostPointerAlignment = 4096;
1059 break;
1060 }
1061 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1062 VkPhysicalDeviceSubgroupProperties *properties =
1063 (VkPhysicalDeviceSubgroupProperties*)ext;
1064 properties->subgroupSize = 64;
1065 properties->supportedStages = VK_SHADER_STAGE_ALL;
1066 /* TODO: Enable VK_SUBGROUP_FEATURE_VOTE_BIT when wwm
1067 * is fixed in LLVM.
1068 */
1069 properties->supportedOperations =
1070 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1071 VK_SUBGROUP_FEATURE_BASIC_BIT |
1072 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1073 VK_SUBGROUP_FEATURE_QUAD_BIT;
1074 if (pdevice->rad_info.chip_class >= VI) {
1075 properties->supportedOperations |=
1076 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1077 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1078 }
1079 properties->quadOperationsInAllStages = true;
1080 break;
1081 }
1082 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1083 VkPhysicalDeviceMaintenance3Properties *properties =
1084 (VkPhysicalDeviceMaintenance3Properties*)ext;
1085 /* Make sure everything is addressable by a signed 32-bit int, and
1086 * our largest descriptors are 96 bytes. */
1087 properties->maxPerSetDescriptors = (1ull << 31) / 96;
1088 /* Our buffer size fields allow only this much */
1089 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1090 break;
1091 }
1092 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1093 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1094 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1095 /* GFX6-8 only support single channel min/max filter. */
1096 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1097 properties->filterMinmaxSingleComponentFormats = true;
1098 break;
1099 }
1100 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1101 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1102 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1103
1104 /* Shader engines. */
1105 properties->shaderEngineCount =
1106 pdevice->rad_info.max_se;
1107 properties->shaderArraysPerEngineCount =
1108 pdevice->rad_info.max_sh_per_se;
1109 properties->computeUnitsPerShaderArray =
1110 pdevice->rad_info.num_good_compute_units /
1111 (pdevice->rad_info.max_se *
1112 pdevice->rad_info.max_sh_per_se);
1113 properties->simdPerComputeUnit = 4;
1114 properties->wavefrontsPerSimd =
1115 pdevice->rad_info.family == CHIP_TONGA ||
1116 pdevice->rad_info.family == CHIP_ICELAND ||
1117 pdevice->rad_info.family == CHIP_POLARIS10 ||
1118 pdevice->rad_info.family == CHIP_POLARIS11 ||
1119 pdevice->rad_info.family == CHIP_POLARIS12 ||
1120 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1121 properties->wavefrontSize = 64;
1122
1123 /* SGPR. */
1124 properties->sgprsPerSimd =
1125 radv_get_num_physical_sgprs(pdevice);
1126 properties->minSgprAllocation =
1127 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1128 properties->maxSgprAllocation =
1129 pdevice->rad_info.family == CHIP_TONGA ||
1130 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1131 properties->sgprAllocationGranularity =
1132 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1133
1134 /* VGPR. */
1135 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1136 properties->minVgprAllocation = 4;
1137 properties->maxVgprAllocation = 256;
1138 properties->vgprAllocationGranularity = 4;
1139 break;
1140 }
1141 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1142 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1143 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1144 properties->maxVertexAttribDivisor = UINT32_MAX;
1145 break;
1146 }
1147 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1148 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1149 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1150 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1151 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1152 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1153 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1154 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1155 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1156 properties->robustBufferAccessUpdateAfterBind = false;
1157 properties->quadDivergentImplicitLod = false;
1158
1159 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1160 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1161 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1162 32 /* sampler, largest when combined with image */ +
1163 64 /* sampled image */ +
1164 64 /* storage image */);
1165 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1166 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1167 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1168 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1169 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1170 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1171 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1172 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1173 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1174 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1175 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1176 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1177 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1178 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1179 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1180 break;
1181 }
1182 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1183 VkPhysicalDeviceProtectedMemoryProperties *properties =
1184 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1185 properties->protectedNoFault = false;
1186 break;
1187 }
1188 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1189 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1190 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1191 properties->primitiveOverestimationSize = 0;
1192 properties->maxExtraPrimitiveOverestimationSize = 0;
1193 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1194 properties->primitiveUnderestimation = VK_FALSE;
1195 properties->conservativePointAndLineRasterization = VK_FALSE;
1196 properties->degenerateTrianglesRasterized = VK_FALSE;
1197 properties->degenerateLinesRasterized = VK_FALSE;
1198 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1199 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1200 break;
1201 }
1202 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1203 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1204 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1205 properties->pciDomain = pdevice->bus_info.domain;
1206 properties->pciBus = pdevice->bus_info.bus;
1207 properties->pciDevice = pdevice->bus_info.dev;
1208 properties->pciFunction = pdevice->bus_info.func;
1209 break;
1210 }
1211 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
1212 VkPhysicalDeviceDriverPropertiesKHR *driver_props =
1213 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
1214
1215 driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
1216 memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
1217 strcpy(driver_props->driverName, "radv");
1218
1219 memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
1220 snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
1221 "Mesa " PACKAGE_VERSION " (" MESA_GIT_SHA1 ")"
1222 " (LLVM %d.%d.%d)",
1223 (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff,
1224 MESA_LLVM_VERSION_PATCH);
1225
1226 driver_props->conformanceVersion = (VkConformanceVersionKHR) {
1227 .major = 1,
1228 .minor = 1,
1229 .subminor = 2,
1230 .patch = 0,
1231 };
1232 break;
1233 }
1234 default:
1235 break;
1236 }
1237 }
1238 }
1239
1240 static void radv_get_physical_device_queue_family_properties(
1241 struct radv_physical_device* pdevice,
1242 uint32_t* pCount,
1243 VkQueueFamilyProperties** pQueueFamilyProperties)
1244 {
1245 int num_queue_families = 1;
1246 int idx;
1247 if (pdevice->rad_info.num_compute_rings > 0 &&
1248 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1249 num_queue_families++;
1250
1251 if (pQueueFamilyProperties == NULL) {
1252 *pCount = num_queue_families;
1253 return;
1254 }
1255
1256 if (!*pCount)
1257 return;
1258
1259 idx = 0;
1260 if (*pCount >= 1) {
1261 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1262 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1263 VK_QUEUE_COMPUTE_BIT |
1264 VK_QUEUE_TRANSFER_BIT |
1265 VK_QUEUE_SPARSE_BINDING_BIT,
1266 .queueCount = 1,
1267 .timestampValidBits = 64,
1268 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1269 };
1270 idx++;
1271 }
1272
1273 if (pdevice->rad_info.num_compute_rings > 0 &&
1274 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1275 if (*pCount > idx) {
1276 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1277 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1278 VK_QUEUE_TRANSFER_BIT |
1279 VK_QUEUE_SPARSE_BINDING_BIT,
1280 .queueCount = pdevice->rad_info.num_compute_rings,
1281 .timestampValidBits = 64,
1282 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1283 };
1284 idx++;
1285 }
1286 }
1287 *pCount = idx;
1288 }
1289
1290 void radv_GetPhysicalDeviceQueueFamilyProperties(
1291 VkPhysicalDevice physicalDevice,
1292 uint32_t* pCount,
1293 VkQueueFamilyProperties* pQueueFamilyProperties)
1294 {
1295 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1296 if (!pQueueFamilyProperties) {
1297 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1298 return;
1299 }
1300 VkQueueFamilyProperties *properties[] = {
1301 pQueueFamilyProperties + 0,
1302 pQueueFamilyProperties + 1,
1303 pQueueFamilyProperties + 2,
1304 };
1305 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1306 assert(*pCount <= 3);
1307 }
1308
1309 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1310 VkPhysicalDevice physicalDevice,
1311 uint32_t* pCount,
1312 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
1313 {
1314 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1315 if (!pQueueFamilyProperties) {
1316 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1317 return;
1318 }
1319 VkQueueFamilyProperties *properties[] = {
1320 &pQueueFamilyProperties[0].queueFamilyProperties,
1321 &pQueueFamilyProperties[1].queueFamilyProperties,
1322 &pQueueFamilyProperties[2].queueFamilyProperties,
1323 };
1324 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1325 assert(*pCount <= 3);
1326 }
1327
1328 void radv_GetPhysicalDeviceMemoryProperties(
1329 VkPhysicalDevice physicalDevice,
1330 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1331 {
1332 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1333
1334 *pMemoryProperties = physical_device->memory_properties;
1335 }
1336
1337 void radv_GetPhysicalDeviceMemoryProperties2(
1338 VkPhysicalDevice physicalDevice,
1339 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1340 {
1341 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1342 &pMemoryProperties->memoryProperties);
1343 }
1344
1345 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1346 VkDevice _device,
1347 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1348 const void *pHostPointer,
1349 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1350 {
1351 RADV_FROM_HANDLE(radv_device, device, _device);
1352
1353 switch (handleType)
1354 {
1355 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1356 const struct radv_physical_device *physical_device = device->physical_device;
1357 uint32_t memoryTypeBits = 0;
1358 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1359 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1360 memoryTypeBits = (1 << i);
1361 break;
1362 }
1363 }
1364 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1365 return VK_SUCCESS;
1366 }
1367 default:
1368 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1369 }
1370 }
1371
1372 static enum radeon_ctx_priority
1373 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1374 {
1375 /* Default to MEDIUM when a specific global priority isn't requested */
1376 if (!pObj)
1377 return RADEON_CTX_PRIORITY_MEDIUM;
1378
1379 switch(pObj->globalPriority) {
1380 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1381 return RADEON_CTX_PRIORITY_REALTIME;
1382 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1383 return RADEON_CTX_PRIORITY_HIGH;
1384 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1385 return RADEON_CTX_PRIORITY_MEDIUM;
1386 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1387 return RADEON_CTX_PRIORITY_LOW;
1388 default:
1389 unreachable("Illegal global priority value");
1390 return RADEON_CTX_PRIORITY_INVALID;
1391 }
1392 }
1393
1394 static int
1395 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1396 uint32_t queue_family_index, int idx,
1397 VkDeviceQueueCreateFlags flags,
1398 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1399 {
1400 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1401 queue->device = device;
1402 queue->queue_family_index = queue_family_index;
1403 queue->queue_idx = idx;
1404 queue->priority = radv_get_queue_global_priority(global_priority);
1405 queue->flags = flags;
1406
1407 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1408 if (!queue->hw_ctx)
1409 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1410
1411 return VK_SUCCESS;
1412 }
1413
1414 static void
1415 radv_queue_finish(struct radv_queue *queue)
1416 {
1417 if (queue->hw_ctx)
1418 queue->device->ws->ctx_destroy(queue->hw_ctx);
1419
1420 if (queue->initial_full_flush_preamble_cs)
1421 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1422 if (queue->initial_preamble_cs)
1423 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1424 if (queue->continue_preamble_cs)
1425 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1426 if (queue->descriptor_bo)
1427 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1428 if (queue->scratch_bo)
1429 queue->device->ws->buffer_destroy(queue->scratch_bo);
1430 if (queue->esgs_ring_bo)
1431 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1432 if (queue->gsvs_ring_bo)
1433 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1434 if (queue->tess_rings_bo)
1435 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1436 if (queue->compute_scratch_bo)
1437 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1438 }
1439
1440 static void
1441 radv_bo_list_init(struct radv_bo_list *bo_list)
1442 {
1443 pthread_mutex_init(&bo_list->mutex, NULL);
1444 bo_list->list.count = bo_list->capacity = 0;
1445 bo_list->list.bos = NULL;
1446 }
1447
1448 static void
1449 radv_bo_list_finish(struct radv_bo_list *bo_list)
1450 {
1451 free(bo_list->list.bos);
1452 pthread_mutex_destroy(&bo_list->mutex);
1453 }
1454
1455 static VkResult radv_bo_list_add(struct radv_device *device,
1456 struct radeon_winsys_bo *bo)
1457 {
1458 struct radv_bo_list *bo_list = &device->bo_list;
1459
1460 if (unlikely(!device->use_global_bo_list))
1461 return VK_SUCCESS;
1462
1463 pthread_mutex_lock(&bo_list->mutex);
1464 if (bo_list->list.count == bo_list->capacity) {
1465 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1466 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1467
1468 if (!data) {
1469 pthread_mutex_unlock(&bo_list->mutex);
1470 return VK_ERROR_OUT_OF_HOST_MEMORY;
1471 }
1472
1473 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1474 bo_list->capacity = capacity;
1475 }
1476
1477 bo_list->list.bos[bo_list->list.count++] = bo;
1478 pthread_mutex_unlock(&bo_list->mutex);
1479 return VK_SUCCESS;
1480 }
1481
1482 static void radv_bo_list_remove(struct radv_device *device,
1483 struct radeon_winsys_bo *bo)
1484 {
1485 struct radv_bo_list *bo_list = &device->bo_list;
1486
1487 if (unlikely(!device->use_global_bo_list))
1488 return;
1489
1490 pthread_mutex_lock(&bo_list->mutex);
1491 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1492 if (bo_list->list.bos[i] == bo) {
1493 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1494 --bo_list->list.count;
1495 break;
1496 }
1497 }
1498 pthread_mutex_unlock(&bo_list->mutex);
1499 }
1500
1501 static void
1502 radv_device_init_gs_info(struct radv_device *device)
1503 {
1504 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1505 device->physical_device->rad_info.family);
1506 }
1507
1508 static int radv_get_device_extension_index(const char *name)
1509 {
1510 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1511 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1512 return i;
1513 }
1514 return -1;
1515 }
1516
1517 static int
1518 radv_get_int_debug_option(const char *name, int default_value)
1519 {
1520 const char *str;
1521 int result;
1522
1523 str = getenv(name);
1524 if (!str) {
1525 result = default_value;
1526 } else {
1527 char *endptr;
1528
1529 result = strtol(str, &endptr, 0);
1530 if (str == endptr) {
1531 /* No digits founs. */
1532 result = default_value;
1533 }
1534 }
1535
1536 return result;
1537 }
1538
1539 VkResult radv_CreateDevice(
1540 VkPhysicalDevice physicalDevice,
1541 const VkDeviceCreateInfo* pCreateInfo,
1542 const VkAllocationCallbacks* pAllocator,
1543 VkDevice* pDevice)
1544 {
1545 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1546 VkResult result;
1547 struct radv_device *device;
1548
1549 bool keep_shader_info = false;
1550
1551 /* Check enabled features */
1552 if (pCreateInfo->pEnabledFeatures) {
1553 VkPhysicalDeviceFeatures supported_features;
1554 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1555 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1556 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1557 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1558 for (uint32_t i = 0; i < num_features; i++) {
1559 if (enabled_feature[i] && !supported_feature[i])
1560 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1561 }
1562 }
1563
1564 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1565 sizeof(*device), 8,
1566 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1567 if (!device)
1568 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1569
1570 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1571 device->instance = physical_device->instance;
1572 device->physical_device = physical_device;
1573
1574 device->ws = physical_device->ws;
1575 if (pAllocator)
1576 device->alloc = *pAllocator;
1577 else
1578 device->alloc = physical_device->instance->alloc;
1579
1580 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1581 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1582 int index = radv_get_device_extension_index(ext_name);
1583 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1584 vk_free(&device->alloc, device);
1585 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1586 }
1587
1588 device->enabled_extensions.extensions[index] = true;
1589 }
1590
1591 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1592
1593 /* With update after bind we can't attach bo's to the command buffer
1594 * from the descriptor set anymore, so we have to use a global BO list.
1595 */
1596 device->use_global_bo_list =
1597 device->enabled_extensions.EXT_descriptor_indexing;
1598
1599 mtx_init(&device->shader_slab_mutex, mtx_plain);
1600 list_inithead(&device->shader_slabs);
1601
1602 radv_bo_list_init(&device->bo_list);
1603
1604 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1605 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1606 uint32_t qfi = queue_create->queueFamilyIndex;
1607 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1608 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1609
1610 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1611
1612 device->queues[qfi] = vk_alloc(&device->alloc,
1613 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1614 if (!device->queues[qfi]) {
1615 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1616 goto fail;
1617 }
1618
1619 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1620
1621 device->queue_count[qfi] = queue_create->queueCount;
1622
1623 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1624 result = radv_queue_init(device, &device->queues[qfi][q],
1625 qfi, q, queue_create->flags,
1626 global_priority);
1627 if (result != VK_SUCCESS)
1628 goto fail;
1629 }
1630 }
1631
1632 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1633 ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
1634 device->physical_device->rad_info.family == CHIP_RAVEN);
1635
1636 /* Disabled and not implemented for now. */
1637 device->dfsm_allowed = device->pbb_allowed &&
1638 device->physical_device->rad_info.family == CHIP_RAVEN;
1639
1640 #ifdef ANDROID
1641 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1642 #endif
1643
1644 /* The maximum number of scratch waves. Scratch space isn't divided
1645 * evenly between CUs. The number is only a function of the number of CUs.
1646 * We can decrease the constant to decrease the scratch buffer size.
1647 *
1648 * sctx->scratch_waves must be >= the maximum possible size of
1649 * 1 threadgroup, so that the hw doesn't hang from being unable
1650 * to start any.
1651 *
1652 * The recommended value is 4 per CU at most. Higher numbers don't
1653 * bring much benefit, but they still occupy chip resources (think
1654 * async compute). I've seen ~2% performance difference between 4 and 32.
1655 */
1656 uint32_t max_threads_per_block = 2048;
1657 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1658 max_threads_per_block / 64);
1659
1660 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1661
1662 if (device->physical_device->rad_info.chip_class >= CIK) {
1663 /* If the KMD allows it (there is a KMD hw register for it),
1664 * allow launching waves out-of-order.
1665 */
1666 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1667 }
1668
1669 radv_device_init_gs_info(device);
1670
1671 device->tess_offchip_block_dw_size =
1672 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1673 device->has_distributed_tess =
1674 device->physical_device->rad_info.chip_class >= VI &&
1675 device->physical_device->rad_info.max_se >= 2;
1676
1677 if (getenv("RADV_TRACE_FILE")) {
1678 const char *filename = getenv("RADV_TRACE_FILE");
1679
1680 keep_shader_info = true;
1681
1682 if (!radv_init_trace(device))
1683 goto fail;
1684
1685 fprintf(stderr, "*****************************************************************************\n");
1686 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
1687 fprintf(stderr, "*****************************************************************************\n");
1688
1689 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1690 radv_dump_enabled_options(device, stderr);
1691 }
1692
1693 device->keep_shader_info = keep_shader_info;
1694
1695 result = radv_device_init_meta(device);
1696 if (result != VK_SUCCESS)
1697 goto fail;
1698
1699 radv_device_init_msaa(device);
1700
1701 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1702 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1703 switch (family) {
1704 case RADV_QUEUE_GENERAL:
1705 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1706 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1707 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1708 break;
1709 case RADV_QUEUE_COMPUTE:
1710 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1711 radeon_emit(device->empty_cs[family], 0);
1712 break;
1713 }
1714 device->ws->cs_finalize(device->empty_cs[family]);
1715 }
1716
1717 if (device->physical_device->rad_info.chip_class >= CIK)
1718 cik_create_gfx_config(device);
1719
1720 VkPipelineCacheCreateInfo ci;
1721 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1722 ci.pNext = NULL;
1723 ci.flags = 0;
1724 ci.pInitialData = NULL;
1725 ci.initialDataSize = 0;
1726 VkPipelineCache pc;
1727 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1728 &ci, NULL, &pc);
1729 if (result != VK_SUCCESS)
1730 goto fail_meta;
1731
1732 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1733
1734 device->force_aniso =
1735 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
1736 if (device->force_aniso >= 0) {
1737 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
1738 1 << util_logbase2(device->force_aniso));
1739 }
1740
1741 *pDevice = radv_device_to_handle(device);
1742 return VK_SUCCESS;
1743
1744 fail_meta:
1745 radv_device_finish_meta(device);
1746 fail:
1747 radv_bo_list_finish(&device->bo_list);
1748
1749 if (device->trace_bo)
1750 device->ws->buffer_destroy(device->trace_bo);
1751
1752 if (device->gfx_init)
1753 device->ws->buffer_destroy(device->gfx_init);
1754
1755 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1756 for (unsigned q = 0; q < device->queue_count[i]; q++)
1757 radv_queue_finish(&device->queues[i][q]);
1758 if (device->queue_count[i])
1759 vk_free(&device->alloc, device->queues[i]);
1760 }
1761
1762 vk_free(&device->alloc, device);
1763 return result;
1764 }
1765
1766 void radv_DestroyDevice(
1767 VkDevice _device,
1768 const VkAllocationCallbacks* pAllocator)
1769 {
1770 RADV_FROM_HANDLE(radv_device, device, _device);
1771
1772 if (!device)
1773 return;
1774
1775 if (device->trace_bo)
1776 device->ws->buffer_destroy(device->trace_bo);
1777
1778 if (device->gfx_init)
1779 device->ws->buffer_destroy(device->gfx_init);
1780
1781 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1782 for (unsigned q = 0; q < device->queue_count[i]; q++)
1783 radv_queue_finish(&device->queues[i][q]);
1784 if (device->queue_count[i])
1785 vk_free(&device->alloc, device->queues[i]);
1786 if (device->empty_cs[i])
1787 device->ws->cs_destroy(device->empty_cs[i]);
1788 }
1789 radv_device_finish_meta(device);
1790
1791 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1792 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1793
1794 radv_destroy_shader_slabs(device);
1795
1796 radv_bo_list_finish(&device->bo_list);
1797 vk_free(&device->alloc, device);
1798 }
1799
1800 VkResult radv_EnumerateInstanceLayerProperties(
1801 uint32_t* pPropertyCount,
1802 VkLayerProperties* pProperties)
1803 {
1804 if (pProperties == NULL) {
1805 *pPropertyCount = 0;
1806 return VK_SUCCESS;
1807 }
1808
1809 /* None supported at this time */
1810 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1811 }
1812
1813 VkResult radv_EnumerateDeviceLayerProperties(
1814 VkPhysicalDevice physicalDevice,
1815 uint32_t* pPropertyCount,
1816 VkLayerProperties* pProperties)
1817 {
1818 if (pProperties == NULL) {
1819 *pPropertyCount = 0;
1820 return VK_SUCCESS;
1821 }
1822
1823 /* None supported at this time */
1824 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1825 }
1826
1827 void radv_GetDeviceQueue2(
1828 VkDevice _device,
1829 const VkDeviceQueueInfo2* pQueueInfo,
1830 VkQueue* pQueue)
1831 {
1832 RADV_FROM_HANDLE(radv_device, device, _device);
1833 struct radv_queue *queue;
1834
1835 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1836 if (pQueueInfo->flags != queue->flags) {
1837 /* From the Vulkan 1.1.70 spec:
1838 *
1839 * "The queue returned by vkGetDeviceQueue2 must have the same
1840 * flags value from this structure as that used at device
1841 * creation time in a VkDeviceQueueCreateInfo instance. If no
1842 * matching flags were specified at device creation time then
1843 * pQueue will return VK_NULL_HANDLE."
1844 */
1845 *pQueue = VK_NULL_HANDLE;
1846 return;
1847 }
1848
1849 *pQueue = radv_queue_to_handle(queue);
1850 }
1851
1852 void radv_GetDeviceQueue(
1853 VkDevice _device,
1854 uint32_t queueFamilyIndex,
1855 uint32_t queueIndex,
1856 VkQueue* pQueue)
1857 {
1858 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1859 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1860 .queueFamilyIndex = queueFamilyIndex,
1861 .queueIndex = queueIndex
1862 };
1863
1864 radv_GetDeviceQueue2(_device, &info, pQueue);
1865 }
1866
1867 static void
1868 fill_geom_tess_rings(struct radv_queue *queue,
1869 uint32_t *map,
1870 bool add_sample_positions,
1871 uint32_t esgs_ring_size,
1872 struct radeon_winsys_bo *esgs_ring_bo,
1873 uint32_t gsvs_ring_size,
1874 struct radeon_winsys_bo *gsvs_ring_bo,
1875 uint32_t tess_factor_ring_size,
1876 uint32_t tess_offchip_ring_offset,
1877 uint32_t tess_offchip_ring_size,
1878 struct radeon_winsys_bo *tess_rings_bo)
1879 {
1880 uint64_t esgs_va = 0, gsvs_va = 0;
1881 uint64_t tess_va = 0, tess_offchip_va = 0;
1882 uint32_t *desc = &map[4];
1883
1884 if (esgs_ring_bo)
1885 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1886 if (gsvs_ring_bo)
1887 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1888 if (tess_rings_bo) {
1889 tess_va = radv_buffer_get_va(tess_rings_bo);
1890 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1891 }
1892
1893 /* stride 0, num records - size, add tid, swizzle, elsize4,
1894 index stride 64 */
1895 desc[0] = esgs_va;
1896 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1897 S_008F04_STRIDE(0) |
1898 S_008F04_SWIZZLE_ENABLE(true);
1899 desc[2] = esgs_ring_size;
1900 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1901 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1902 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1903 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1904 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1905 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1906 S_008F0C_ELEMENT_SIZE(1) |
1907 S_008F0C_INDEX_STRIDE(3) |
1908 S_008F0C_ADD_TID_ENABLE(true);
1909
1910 desc += 4;
1911 /* GS entry for ES->GS ring */
1912 /* stride 0, num records - size, elsize0,
1913 index stride 0 */
1914 desc[0] = esgs_va;
1915 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1916 S_008F04_STRIDE(0) |
1917 S_008F04_SWIZZLE_ENABLE(false);
1918 desc[2] = esgs_ring_size;
1919 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1920 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1921 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1922 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1923 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1924 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1925 S_008F0C_ELEMENT_SIZE(0) |
1926 S_008F0C_INDEX_STRIDE(0) |
1927 S_008F0C_ADD_TID_ENABLE(false);
1928
1929 desc += 4;
1930 /* VS entry for GS->VS ring */
1931 /* stride 0, num records - size, elsize0,
1932 index stride 0 */
1933 desc[0] = gsvs_va;
1934 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1935 S_008F04_STRIDE(0) |
1936 S_008F04_SWIZZLE_ENABLE(false);
1937 desc[2] = gsvs_ring_size;
1938 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1939 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1940 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1941 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1942 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1943 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1944 S_008F0C_ELEMENT_SIZE(0) |
1945 S_008F0C_INDEX_STRIDE(0) |
1946 S_008F0C_ADD_TID_ENABLE(false);
1947 desc += 4;
1948
1949 /* stride gsvs_itemsize, num records 64
1950 elsize 4, index stride 16 */
1951 /* shader will patch stride and desc[2] */
1952 desc[0] = gsvs_va;
1953 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1954 S_008F04_STRIDE(0) |
1955 S_008F04_SWIZZLE_ENABLE(true);
1956 desc[2] = 0;
1957 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1958 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1959 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1960 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1961 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1962 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1963 S_008F0C_ELEMENT_SIZE(1) |
1964 S_008F0C_INDEX_STRIDE(1) |
1965 S_008F0C_ADD_TID_ENABLE(true);
1966 desc += 4;
1967
1968 desc[0] = tess_va;
1969 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1970 S_008F04_STRIDE(0) |
1971 S_008F04_SWIZZLE_ENABLE(false);
1972 desc[2] = tess_factor_ring_size;
1973 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1974 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1975 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1976 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1977 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1978 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1979 S_008F0C_ELEMENT_SIZE(0) |
1980 S_008F0C_INDEX_STRIDE(0) |
1981 S_008F0C_ADD_TID_ENABLE(false);
1982 desc += 4;
1983
1984 desc[0] = tess_offchip_va;
1985 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1986 S_008F04_STRIDE(0) |
1987 S_008F04_SWIZZLE_ENABLE(false);
1988 desc[2] = tess_offchip_ring_size;
1989 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1990 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1991 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1992 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1993 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1994 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1995 S_008F0C_ELEMENT_SIZE(0) |
1996 S_008F0C_INDEX_STRIDE(0) |
1997 S_008F0C_ADD_TID_ENABLE(false);
1998 desc += 4;
1999
2000 /* add sample positions after all rings */
2001 memcpy(desc, queue->device->sample_locations_1x, 8);
2002 desc += 2;
2003 memcpy(desc, queue->device->sample_locations_2x, 16);
2004 desc += 4;
2005 memcpy(desc, queue->device->sample_locations_4x, 32);
2006 desc += 8;
2007 memcpy(desc, queue->device->sample_locations_8x, 64);
2008 desc += 16;
2009 memcpy(desc, queue->device->sample_locations_16x, 128);
2010 }
2011
2012 static unsigned
2013 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
2014 {
2015 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
2016 device->physical_device->rad_info.family != CHIP_CARRIZO &&
2017 device->physical_device->rad_info.family != CHIP_STONEY;
2018 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
2019 unsigned max_offchip_buffers;
2020 unsigned offchip_granularity;
2021 unsigned hs_offchip_param;
2022
2023 /*
2024 * Per RadeonSI:
2025 * This must be one less than the maximum number due to a hw limitation.
2026 * Various hardware bugs in SI, CIK, and GFX9 need this.
2027 *
2028 * Per AMDVLK:
2029 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
2030 * Gfx7 should limit max_offchip_buffers to 508
2031 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
2032 *
2033 * Follow AMDVLK here.
2034 */
2035 if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2036 device->physical_device->rad_info.chip_class == CIK ||
2037 device->physical_device->rad_info.chip_class == SI)
2038 --max_offchip_buffers_per_se;
2039
2040 max_offchip_buffers = max_offchip_buffers_per_se *
2041 device->physical_device->rad_info.max_se;
2042
2043 switch (device->tess_offchip_block_dw_size) {
2044 default:
2045 assert(0);
2046 /* fall through */
2047 case 8192:
2048 offchip_granularity = V_03093C_X_8K_DWORDS;
2049 break;
2050 case 4096:
2051 offchip_granularity = V_03093C_X_4K_DWORDS;
2052 break;
2053 }
2054
2055 switch (device->physical_device->rad_info.chip_class) {
2056 case SI:
2057 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2058 break;
2059 case CIK:
2060 case VI:
2061 case GFX9:
2062 default:
2063 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2064 break;
2065 }
2066
2067 *max_offchip_buffers_p = max_offchip_buffers;
2068 if (device->physical_device->rad_info.chip_class >= CIK) {
2069 if (device->physical_device->rad_info.chip_class >= VI)
2070 --max_offchip_buffers;
2071 hs_offchip_param =
2072 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2073 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2074 } else {
2075 hs_offchip_param =
2076 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2077 }
2078 return hs_offchip_param;
2079 }
2080
2081 static void
2082 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2083 struct radeon_winsys_bo *esgs_ring_bo,
2084 uint32_t esgs_ring_size,
2085 struct radeon_winsys_bo *gsvs_ring_bo,
2086 uint32_t gsvs_ring_size)
2087 {
2088 if (!esgs_ring_bo && !gsvs_ring_bo)
2089 return;
2090
2091 if (esgs_ring_bo)
2092 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2093
2094 if (gsvs_ring_bo)
2095 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2096
2097 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2098 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2099 radeon_emit(cs, esgs_ring_size >> 8);
2100 radeon_emit(cs, gsvs_ring_size >> 8);
2101 } else {
2102 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2103 radeon_emit(cs, esgs_ring_size >> 8);
2104 radeon_emit(cs, gsvs_ring_size >> 8);
2105 }
2106 }
2107
2108 static void
2109 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2110 unsigned hs_offchip_param, unsigned tf_ring_size,
2111 struct radeon_winsys_bo *tess_rings_bo)
2112 {
2113 uint64_t tf_va;
2114
2115 if (!tess_rings_bo)
2116 return;
2117
2118 tf_va = radv_buffer_get_va(tess_rings_bo);
2119
2120 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2121
2122 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2123 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2124 S_030938_SIZE(tf_ring_size / 4));
2125 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2126 tf_va >> 8);
2127 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2128 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2129 S_030944_BASE_HI(tf_va >> 40));
2130 }
2131 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2132 hs_offchip_param);
2133 } else {
2134 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2135 S_008988_SIZE(tf_ring_size / 4));
2136 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2137 tf_va >> 8);
2138 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2139 hs_offchip_param);
2140 }
2141 }
2142
2143 static void
2144 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2145 struct radeon_winsys_bo *compute_scratch_bo)
2146 {
2147 uint64_t scratch_va;
2148
2149 if (!compute_scratch_bo)
2150 return;
2151
2152 scratch_va = radv_buffer_get_va(compute_scratch_bo);
2153
2154 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2155
2156 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2157 radeon_emit(cs, scratch_va);
2158 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2159 S_008F04_SWIZZLE_ENABLE(1));
2160 }
2161
2162 static void
2163 radv_emit_global_shader_pointers(struct radv_queue *queue,
2164 struct radeon_cmdbuf *cs,
2165 struct radeon_winsys_bo *descriptor_bo)
2166 {
2167 uint64_t va;
2168
2169 if (!descriptor_bo)
2170 return;
2171
2172 va = radv_buffer_get_va(descriptor_bo);
2173
2174 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2175
2176 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2177 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2178 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2179 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2180 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2181
2182 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2183 radv_emit_shader_pointer(queue->device, cs, regs[i],
2184 va, true);
2185 }
2186 } else {
2187 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2188 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2189 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2190 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2191 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2192 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2193
2194 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2195 radv_emit_shader_pointer(queue->device, cs, regs[i],
2196 va, true);
2197 }
2198 }
2199 }
2200
2201 static void
2202 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2203 {
2204 struct radv_device *device = queue->device;
2205
2206 if (device->gfx_init) {
2207 uint64_t va = radv_buffer_get_va(device->gfx_init);
2208
2209 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2210 radeon_emit(cs, va);
2211 radeon_emit(cs, va >> 32);
2212 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2213
2214 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2215 } else {
2216 struct radv_physical_device *physical_device = device->physical_device;
2217 si_emit_graphics(physical_device, cs);
2218 }
2219 }
2220
2221 static void
2222 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2223 {
2224 struct radv_physical_device *physical_device = queue->device->physical_device;
2225 si_emit_compute(physical_device, cs);
2226 }
2227
2228 static VkResult
2229 radv_get_preamble_cs(struct radv_queue *queue,
2230 uint32_t scratch_size,
2231 uint32_t compute_scratch_size,
2232 uint32_t esgs_ring_size,
2233 uint32_t gsvs_ring_size,
2234 bool needs_tess_rings,
2235 bool needs_sample_positions,
2236 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2237 struct radeon_cmdbuf **initial_preamble_cs,
2238 struct radeon_cmdbuf **continue_preamble_cs)
2239 {
2240 struct radeon_winsys_bo *scratch_bo = NULL;
2241 struct radeon_winsys_bo *descriptor_bo = NULL;
2242 struct radeon_winsys_bo *compute_scratch_bo = NULL;
2243 struct radeon_winsys_bo *esgs_ring_bo = NULL;
2244 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2245 struct radeon_winsys_bo *tess_rings_bo = NULL;
2246 struct radeon_cmdbuf *dest_cs[3] = {0};
2247 bool add_tess_rings = false, add_sample_positions = false;
2248 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2249 unsigned max_offchip_buffers;
2250 unsigned hs_offchip_param = 0;
2251 unsigned tess_offchip_ring_offset;
2252 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2253 if (!queue->has_tess_rings) {
2254 if (needs_tess_rings)
2255 add_tess_rings = true;
2256 }
2257 if (!queue->has_sample_positions) {
2258 if (needs_sample_positions)
2259 add_sample_positions = true;
2260 }
2261 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2262 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2263 &max_offchip_buffers);
2264 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2265 tess_offchip_ring_size = max_offchip_buffers *
2266 queue->device->tess_offchip_block_dw_size * 4;
2267
2268 if (scratch_size <= queue->scratch_size &&
2269 compute_scratch_size <= queue->compute_scratch_size &&
2270 esgs_ring_size <= queue->esgs_ring_size &&
2271 gsvs_ring_size <= queue->gsvs_ring_size &&
2272 !add_tess_rings && !add_sample_positions &&
2273 queue->initial_preamble_cs) {
2274 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2275 *initial_preamble_cs = queue->initial_preamble_cs;
2276 *continue_preamble_cs = queue->continue_preamble_cs;
2277 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2278 *continue_preamble_cs = NULL;
2279 return VK_SUCCESS;
2280 }
2281
2282 if (scratch_size > queue->scratch_size) {
2283 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2284 scratch_size,
2285 4096,
2286 RADEON_DOMAIN_VRAM,
2287 ring_bo_flags);
2288 if (!scratch_bo)
2289 goto fail;
2290 } else
2291 scratch_bo = queue->scratch_bo;
2292
2293 if (compute_scratch_size > queue->compute_scratch_size) {
2294 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2295 compute_scratch_size,
2296 4096,
2297 RADEON_DOMAIN_VRAM,
2298 ring_bo_flags);
2299 if (!compute_scratch_bo)
2300 goto fail;
2301
2302 } else
2303 compute_scratch_bo = queue->compute_scratch_bo;
2304
2305 if (esgs_ring_size > queue->esgs_ring_size) {
2306 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2307 esgs_ring_size,
2308 4096,
2309 RADEON_DOMAIN_VRAM,
2310 ring_bo_flags);
2311 if (!esgs_ring_bo)
2312 goto fail;
2313 } else {
2314 esgs_ring_bo = queue->esgs_ring_bo;
2315 esgs_ring_size = queue->esgs_ring_size;
2316 }
2317
2318 if (gsvs_ring_size > queue->gsvs_ring_size) {
2319 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2320 gsvs_ring_size,
2321 4096,
2322 RADEON_DOMAIN_VRAM,
2323 ring_bo_flags);
2324 if (!gsvs_ring_bo)
2325 goto fail;
2326 } else {
2327 gsvs_ring_bo = queue->gsvs_ring_bo;
2328 gsvs_ring_size = queue->gsvs_ring_size;
2329 }
2330
2331 if (add_tess_rings) {
2332 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2333 tess_offchip_ring_offset + tess_offchip_ring_size,
2334 256,
2335 RADEON_DOMAIN_VRAM,
2336 ring_bo_flags);
2337 if (!tess_rings_bo)
2338 goto fail;
2339 } else {
2340 tess_rings_bo = queue->tess_rings_bo;
2341 }
2342
2343 if (scratch_bo != queue->scratch_bo ||
2344 esgs_ring_bo != queue->esgs_ring_bo ||
2345 gsvs_ring_bo != queue->gsvs_ring_bo ||
2346 tess_rings_bo != queue->tess_rings_bo ||
2347 add_sample_positions) {
2348 uint32_t size = 0;
2349 if (gsvs_ring_bo || esgs_ring_bo ||
2350 tess_rings_bo || add_sample_positions) {
2351 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2352 if (add_sample_positions)
2353 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
2354 }
2355 else if (scratch_bo)
2356 size = 8; /* 2 dword */
2357
2358 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2359 size,
2360 4096,
2361 RADEON_DOMAIN_VRAM,
2362 RADEON_FLAG_CPU_ACCESS |
2363 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2364 RADEON_FLAG_READ_ONLY);
2365 if (!descriptor_bo)
2366 goto fail;
2367 } else
2368 descriptor_bo = queue->descriptor_bo;
2369
2370 for(int i = 0; i < 3; ++i) {
2371 struct radeon_cmdbuf *cs = NULL;
2372 cs = queue->device->ws->cs_create(queue->device->ws,
2373 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2374 if (!cs)
2375 goto fail;
2376
2377 dest_cs[i] = cs;
2378
2379 if (scratch_bo)
2380 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2381
2382 /* Emit initial configuration. */
2383 switch (queue->queue_family_index) {
2384 case RADV_QUEUE_GENERAL:
2385 radv_init_graphics_state(cs, queue);
2386 break;
2387 case RADV_QUEUE_COMPUTE:
2388 radv_init_compute_state(cs, queue);
2389 break;
2390 case RADV_QUEUE_TRANSFER:
2391 break;
2392 }
2393
2394 if (descriptor_bo != queue->descriptor_bo) {
2395 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2396
2397 if (scratch_bo) {
2398 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2399 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2400 S_008F04_SWIZZLE_ENABLE(1);
2401 map[0] = scratch_va;
2402 map[1] = rsrc1;
2403 }
2404
2405 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
2406 add_sample_positions)
2407 fill_geom_tess_rings(queue, map, add_sample_positions,
2408 esgs_ring_size, esgs_ring_bo,
2409 gsvs_ring_size, gsvs_ring_bo,
2410 tess_factor_ring_size,
2411 tess_offchip_ring_offset,
2412 tess_offchip_ring_size,
2413 tess_rings_bo);
2414
2415 queue->device->ws->buffer_unmap(descriptor_bo);
2416 }
2417
2418 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2419 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2420 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2421 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2422 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2423 }
2424
2425 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2426 gsvs_ring_bo, gsvs_ring_size);
2427 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2428 tess_factor_ring_size, tess_rings_bo);
2429 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2430 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2431
2432 if (i == 0) {
2433 si_cs_emit_cache_flush(cs,
2434 queue->device->physical_device->rad_info.chip_class,
2435 NULL, 0,
2436 queue->queue_family_index == RING_COMPUTE &&
2437 queue->device->physical_device->rad_info.chip_class >= CIK,
2438 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2439 RADV_CMD_FLAG_INV_ICACHE |
2440 RADV_CMD_FLAG_INV_SMEM_L1 |
2441 RADV_CMD_FLAG_INV_VMEM_L1 |
2442 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2443 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2444 } else if (i == 1) {
2445 si_cs_emit_cache_flush(cs,
2446 queue->device->physical_device->rad_info.chip_class,
2447 NULL, 0,
2448 queue->queue_family_index == RING_COMPUTE &&
2449 queue->device->physical_device->rad_info.chip_class >= CIK,
2450 RADV_CMD_FLAG_INV_ICACHE |
2451 RADV_CMD_FLAG_INV_SMEM_L1 |
2452 RADV_CMD_FLAG_INV_VMEM_L1 |
2453 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2454 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2455 }
2456
2457 if (!queue->device->ws->cs_finalize(cs))
2458 goto fail;
2459 }
2460
2461 if (queue->initial_full_flush_preamble_cs)
2462 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2463
2464 if (queue->initial_preamble_cs)
2465 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2466
2467 if (queue->continue_preamble_cs)
2468 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2469
2470 queue->initial_full_flush_preamble_cs = dest_cs[0];
2471 queue->initial_preamble_cs = dest_cs[1];
2472 queue->continue_preamble_cs = dest_cs[2];
2473
2474 if (scratch_bo != queue->scratch_bo) {
2475 if (queue->scratch_bo)
2476 queue->device->ws->buffer_destroy(queue->scratch_bo);
2477 queue->scratch_bo = scratch_bo;
2478 queue->scratch_size = scratch_size;
2479 }
2480
2481 if (compute_scratch_bo != queue->compute_scratch_bo) {
2482 if (queue->compute_scratch_bo)
2483 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2484 queue->compute_scratch_bo = compute_scratch_bo;
2485 queue->compute_scratch_size = compute_scratch_size;
2486 }
2487
2488 if (esgs_ring_bo != queue->esgs_ring_bo) {
2489 if (queue->esgs_ring_bo)
2490 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2491 queue->esgs_ring_bo = esgs_ring_bo;
2492 queue->esgs_ring_size = esgs_ring_size;
2493 }
2494
2495 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2496 if (queue->gsvs_ring_bo)
2497 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2498 queue->gsvs_ring_bo = gsvs_ring_bo;
2499 queue->gsvs_ring_size = gsvs_ring_size;
2500 }
2501
2502 if (tess_rings_bo != queue->tess_rings_bo) {
2503 queue->tess_rings_bo = tess_rings_bo;
2504 queue->has_tess_rings = true;
2505 }
2506
2507 if (descriptor_bo != queue->descriptor_bo) {
2508 if (queue->descriptor_bo)
2509 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2510
2511 queue->descriptor_bo = descriptor_bo;
2512 }
2513
2514 if (add_sample_positions)
2515 queue->has_sample_positions = true;
2516
2517 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2518 *initial_preamble_cs = queue->initial_preamble_cs;
2519 *continue_preamble_cs = queue->continue_preamble_cs;
2520 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2521 *continue_preamble_cs = NULL;
2522 return VK_SUCCESS;
2523 fail:
2524 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2525 if (dest_cs[i])
2526 queue->device->ws->cs_destroy(dest_cs[i]);
2527 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2528 queue->device->ws->buffer_destroy(descriptor_bo);
2529 if (scratch_bo && scratch_bo != queue->scratch_bo)
2530 queue->device->ws->buffer_destroy(scratch_bo);
2531 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2532 queue->device->ws->buffer_destroy(compute_scratch_bo);
2533 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2534 queue->device->ws->buffer_destroy(esgs_ring_bo);
2535 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2536 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2537 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2538 queue->device->ws->buffer_destroy(tess_rings_bo);
2539 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2540 }
2541
2542 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2543 struct radv_winsys_sem_counts *counts,
2544 int num_sems,
2545 const VkSemaphore *sems,
2546 VkFence _fence,
2547 bool reset_temp)
2548 {
2549 int syncobj_idx = 0, sem_idx = 0;
2550
2551 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2552 return VK_SUCCESS;
2553
2554 for (uint32_t i = 0; i < num_sems; i++) {
2555 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2556
2557 if (sem->temp_syncobj || sem->syncobj)
2558 counts->syncobj_count++;
2559 else
2560 counts->sem_count++;
2561 }
2562
2563 if (_fence != VK_NULL_HANDLE) {
2564 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2565 if (fence->temp_syncobj || fence->syncobj)
2566 counts->syncobj_count++;
2567 }
2568
2569 if (counts->syncobj_count) {
2570 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2571 if (!counts->syncobj)
2572 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2573 }
2574
2575 if (counts->sem_count) {
2576 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2577 if (!counts->sem) {
2578 free(counts->syncobj);
2579 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2580 }
2581 }
2582
2583 for (uint32_t i = 0; i < num_sems; i++) {
2584 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2585
2586 if (sem->temp_syncobj) {
2587 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2588 }
2589 else if (sem->syncobj)
2590 counts->syncobj[syncobj_idx++] = sem->syncobj;
2591 else {
2592 assert(sem->sem);
2593 counts->sem[sem_idx++] = sem->sem;
2594 }
2595 }
2596
2597 if (_fence != VK_NULL_HANDLE) {
2598 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2599 if (fence->temp_syncobj)
2600 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2601 else if (fence->syncobj)
2602 counts->syncobj[syncobj_idx++] = fence->syncobj;
2603 }
2604
2605 return VK_SUCCESS;
2606 }
2607
2608 static void
2609 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2610 {
2611 free(sem_info->wait.syncobj);
2612 free(sem_info->wait.sem);
2613 free(sem_info->signal.syncobj);
2614 free(sem_info->signal.sem);
2615 }
2616
2617
2618 static void radv_free_temp_syncobjs(struct radv_device *device,
2619 int num_sems,
2620 const VkSemaphore *sems)
2621 {
2622 for (uint32_t i = 0; i < num_sems; i++) {
2623 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2624
2625 if (sem->temp_syncobj) {
2626 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2627 sem->temp_syncobj = 0;
2628 }
2629 }
2630 }
2631
2632 static VkResult
2633 radv_alloc_sem_info(struct radv_instance *instance,
2634 struct radv_winsys_sem_info *sem_info,
2635 int num_wait_sems,
2636 const VkSemaphore *wait_sems,
2637 int num_signal_sems,
2638 const VkSemaphore *signal_sems,
2639 VkFence fence)
2640 {
2641 VkResult ret;
2642 memset(sem_info, 0, sizeof(*sem_info));
2643
2644 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2645 if (ret)
2646 return ret;
2647 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
2648 if (ret)
2649 radv_free_sem_info(sem_info);
2650
2651 /* caller can override these */
2652 sem_info->cs_emit_wait = true;
2653 sem_info->cs_emit_signal = true;
2654 return ret;
2655 }
2656
2657 /* Signals fence as soon as all the work currently put on queue is done. */
2658 static VkResult radv_signal_fence(struct radv_queue *queue,
2659 struct radv_fence *fence)
2660 {
2661 int ret;
2662 VkResult result;
2663 struct radv_winsys_sem_info sem_info;
2664
2665 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
2666 radv_fence_to_handle(fence));
2667 if (result != VK_SUCCESS)
2668 return result;
2669
2670 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2671 &queue->device->empty_cs[queue->queue_family_index],
2672 1, NULL, NULL, &sem_info, NULL,
2673 false, fence->fence);
2674 radv_free_sem_info(&sem_info);
2675
2676 if (ret)
2677 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
2678
2679 return VK_SUCCESS;
2680 }
2681
2682 VkResult radv_QueueSubmit(
2683 VkQueue _queue,
2684 uint32_t submitCount,
2685 const VkSubmitInfo* pSubmits,
2686 VkFence _fence)
2687 {
2688 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2689 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2690 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2691 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2692 int ret;
2693 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2694 uint32_t scratch_size = 0;
2695 uint32_t compute_scratch_size = 0;
2696 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2697 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2698 VkResult result;
2699 bool fence_emitted = false;
2700 bool tess_rings_needed = false;
2701 bool sample_positions_needed = false;
2702
2703 /* Do this first so failing to allocate scratch buffers can't result in
2704 * partially executed submissions. */
2705 for (uint32_t i = 0; i < submitCount; i++) {
2706 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2707 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2708 pSubmits[i].pCommandBuffers[j]);
2709
2710 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2711 compute_scratch_size = MAX2(compute_scratch_size,
2712 cmd_buffer->compute_scratch_size_needed);
2713 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2714 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2715 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2716 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2717 }
2718 }
2719
2720 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2721 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2722 sample_positions_needed, &initial_flush_preamble_cs,
2723 &initial_preamble_cs, &continue_preamble_cs);
2724 if (result != VK_SUCCESS)
2725 return result;
2726
2727 for (uint32_t i = 0; i < submitCount; i++) {
2728 struct radeon_cmdbuf **cs_array;
2729 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2730 bool can_patch = true;
2731 uint32_t advance;
2732 struct radv_winsys_sem_info sem_info;
2733
2734 result = radv_alloc_sem_info(queue->device->instance,
2735 &sem_info,
2736 pSubmits[i].waitSemaphoreCount,
2737 pSubmits[i].pWaitSemaphores,
2738 pSubmits[i].signalSemaphoreCount,
2739 pSubmits[i].pSignalSemaphores,
2740 _fence);
2741 if (result != VK_SUCCESS)
2742 return result;
2743
2744 if (!pSubmits[i].commandBufferCount) {
2745 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2746 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2747 &queue->device->empty_cs[queue->queue_family_index],
2748 1, NULL, NULL,
2749 &sem_info, NULL,
2750 false, base_fence);
2751 if (ret) {
2752 radv_loge("failed to submit CS %d\n", i);
2753 abort();
2754 }
2755 fence_emitted = true;
2756 }
2757 radv_free_sem_info(&sem_info);
2758 continue;
2759 }
2760
2761 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
2762 (pSubmits[i].commandBufferCount));
2763
2764 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2765 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2766 pSubmits[i].pCommandBuffers[j]);
2767 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2768
2769 cs_array[j] = cmd_buffer->cs;
2770 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2771 can_patch = false;
2772
2773 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2774 }
2775
2776 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2777 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2778 const struct radv_winsys_bo_list *bo_list = NULL;
2779
2780 advance = MIN2(max_cs_submission,
2781 pSubmits[i].commandBufferCount - j);
2782
2783 if (queue->device->trace_bo)
2784 *queue->device->trace_id_ptr = 0;
2785
2786 sem_info.cs_emit_wait = j == 0;
2787 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2788
2789 if (unlikely(queue->device->use_global_bo_list)) {
2790 pthread_mutex_lock(&queue->device->bo_list.mutex);
2791 bo_list = &queue->device->bo_list.list;
2792 }
2793
2794 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2795 advance, initial_preamble, continue_preamble_cs,
2796 &sem_info, bo_list,
2797 can_patch, base_fence);
2798
2799 if (unlikely(queue->device->use_global_bo_list))
2800 pthread_mutex_unlock(&queue->device->bo_list.mutex);
2801
2802 if (ret) {
2803 radv_loge("failed to submit CS %d\n", i);
2804 abort();
2805 }
2806 fence_emitted = true;
2807 if (queue->device->trace_bo) {
2808 radv_check_gpu_hangs(queue, cs_array[j]);
2809 }
2810 }
2811
2812 radv_free_temp_syncobjs(queue->device,
2813 pSubmits[i].waitSemaphoreCount,
2814 pSubmits[i].pWaitSemaphores);
2815 radv_free_sem_info(&sem_info);
2816 free(cs_array);
2817 }
2818
2819 if (fence) {
2820 if (!fence_emitted) {
2821 result = radv_signal_fence(queue, fence);
2822 if (result != VK_SUCCESS)
2823 return result;
2824 }
2825 fence->submitted = true;
2826 }
2827
2828 return VK_SUCCESS;
2829 }
2830
2831 VkResult radv_QueueWaitIdle(
2832 VkQueue _queue)
2833 {
2834 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2835
2836 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2837 radv_queue_family_to_ring(queue->queue_family_index),
2838 queue->queue_idx);
2839 return VK_SUCCESS;
2840 }
2841
2842 VkResult radv_DeviceWaitIdle(
2843 VkDevice _device)
2844 {
2845 RADV_FROM_HANDLE(radv_device, device, _device);
2846
2847 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2848 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2849 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2850 }
2851 }
2852 return VK_SUCCESS;
2853 }
2854
2855 VkResult radv_EnumerateInstanceExtensionProperties(
2856 const char* pLayerName,
2857 uint32_t* pPropertyCount,
2858 VkExtensionProperties* pProperties)
2859 {
2860 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2861
2862 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2863 if (radv_supported_instance_extensions.extensions[i]) {
2864 vk_outarray_append(&out, prop) {
2865 *prop = radv_instance_extensions[i];
2866 }
2867 }
2868 }
2869
2870 return vk_outarray_status(&out);
2871 }
2872
2873 VkResult radv_EnumerateDeviceExtensionProperties(
2874 VkPhysicalDevice physicalDevice,
2875 const char* pLayerName,
2876 uint32_t* pPropertyCount,
2877 VkExtensionProperties* pProperties)
2878 {
2879 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2880 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2881
2882 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2883 if (device->supported_extensions.extensions[i]) {
2884 vk_outarray_append(&out, prop) {
2885 *prop = radv_device_extensions[i];
2886 }
2887 }
2888 }
2889
2890 return vk_outarray_status(&out);
2891 }
2892
2893 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2894 VkInstance _instance,
2895 const char* pName)
2896 {
2897 RADV_FROM_HANDLE(radv_instance, instance, _instance);
2898
2899 return radv_lookup_entrypoint_checked(pName,
2900 instance ? instance->apiVersion : 0,
2901 instance ? &instance->enabled_extensions : NULL,
2902 NULL);
2903 }
2904
2905 /* The loader wants us to expose a second GetInstanceProcAddr function
2906 * to work around certain LD_PRELOAD issues seen in apps.
2907 */
2908 PUBLIC
2909 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2910 VkInstance instance,
2911 const char* pName);
2912
2913 PUBLIC
2914 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2915 VkInstance instance,
2916 const char* pName)
2917 {
2918 return radv_GetInstanceProcAddr(instance, pName);
2919 }
2920
2921 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2922 VkDevice _device,
2923 const char* pName)
2924 {
2925 RADV_FROM_HANDLE(radv_device, device, _device);
2926
2927 return radv_lookup_entrypoint_checked(pName,
2928 device->instance->apiVersion,
2929 &device->instance->enabled_extensions,
2930 &device->enabled_extensions);
2931 }
2932
2933 bool radv_get_memory_fd(struct radv_device *device,
2934 struct radv_device_memory *memory,
2935 int *pFD)
2936 {
2937 struct radeon_bo_metadata metadata;
2938
2939 if (memory->image) {
2940 radv_init_metadata(device, memory->image, &metadata);
2941 device->ws->buffer_set_metadata(memory->bo, &metadata);
2942 }
2943
2944 return device->ws->buffer_get_fd(device->ws, memory->bo,
2945 pFD);
2946 }
2947
2948 static VkResult radv_alloc_memory(struct radv_device *device,
2949 const VkMemoryAllocateInfo* pAllocateInfo,
2950 const VkAllocationCallbacks* pAllocator,
2951 VkDeviceMemory* pMem)
2952 {
2953 struct radv_device_memory *mem;
2954 VkResult result;
2955 enum radeon_bo_domain domain;
2956 uint32_t flags = 0;
2957 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2958
2959 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2960
2961 if (pAllocateInfo->allocationSize == 0) {
2962 /* Apparently, this is allowed */
2963 *pMem = VK_NULL_HANDLE;
2964 return VK_SUCCESS;
2965 }
2966
2967 const VkImportMemoryFdInfoKHR *import_info =
2968 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2969 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2970 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2971 const VkExportMemoryAllocateInfoKHR *export_info =
2972 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2973 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2974 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2975
2976 const struct wsi_memory_allocate_info *wsi_info =
2977 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2978
2979 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2980 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2981 if (mem == NULL)
2982 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2983
2984 if (wsi_info && wsi_info->implicit_sync)
2985 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2986
2987 if (dedicate_info) {
2988 mem->image = radv_image_from_handle(dedicate_info->image);
2989 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2990 } else {
2991 mem->image = NULL;
2992 mem->buffer = NULL;
2993 }
2994
2995 mem->user_ptr = NULL;
2996
2997 if (import_info) {
2998 assert(import_info->handleType ==
2999 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3000 import_info->handleType ==
3001 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3002 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
3003 NULL, NULL);
3004 if (!mem->bo) {
3005 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3006 goto fail;
3007 } else {
3008 close(import_info->fd);
3009 }
3010 } else if (host_ptr_info) {
3011 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3012 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
3013 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
3014 pAllocateInfo->allocationSize);
3015 if (!mem->bo) {
3016 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3017 goto fail;
3018 } else {
3019 mem->user_ptr = host_ptr_info->pHostPointer;
3020 }
3021 } else {
3022 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
3023 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
3024 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
3025 domain = RADEON_DOMAIN_GTT;
3026 else
3027 domain = RADEON_DOMAIN_VRAM;
3028
3029 if (mem_type_index == RADV_MEM_TYPE_VRAM)
3030 flags |= RADEON_FLAG_NO_CPU_ACCESS;
3031 else
3032 flags |= RADEON_FLAG_CPU_ACCESS;
3033
3034 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3035 flags |= RADEON_FLAG_GTT_WC;
3036
3037 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
3038 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3039
3040 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3041 domain, flags);
3042
3043 if (!mem->bo) {
3044 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3045 goto fail;
3046 }
3047 mem->type_index = mem_type_index;
3048 }
3049
3050 result = radv_bo_list_add(device, mem->bo);
3051 if (result != VK_SUCCESS)
3052 goto fail_bo;
3053
3054 *pMem = radv_device_memory_to_handle(mem);
3055
3056 return VK_SUCCESS;
3057
3058 fail_bo:
3059 device->ws->buffer_destroy(mem->bo);
3060 fail:
3061 vk_free2(&device->alloc, pAllocator, mem);
3062
3063 return result;
3064 }
3065
3066 VkResult radv_AllocateMemory(
3067 VkDevice _device,
3068 const VkMemoryAllocateInfo* pAllocateInfo,
3069 const VkAllocationCallbacks* pAllocator,
3070 VkDeviceMemory* pMem)
3071 {
3072 RADV_FROM_HANDLE(radv_device, device, _device);
3073 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3074 }
3075
3076 void radv_FreeMemory(
3077 VkDevice _device,
3078 VkDeviceMemory _mem,
3079 const VkAllocationCallbacks* pAllocator)
3080 {
3081 RADV_FROM_HANDLE(radv_device, device, _device);
3082 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3083
3084 if (mem == NULL)
3085 return;
3086
3087 radv_bo_list_remove(device, mem->bo);
3088 device->ws->buffer_destroy(mem->bo);
3089 mem->bo = NULL;
3090
3091 vk_free2(&device->alloc, pAllocator, mem);
3092 }
3093
3094 VkResult radv_MapMemory(
3095 VkDevice _device,
3096 VkDeviceMemory _memory,
3097 VkDeviceSize offset,
3098 VkDeviceSize size,
3099 VkMemoryMapFlags flags,
3100 void** ppData)
3101 {
3102 RADV_FROM_HANDLE(radv_device, device, _device);
3103 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3104
3105 if (mem == NULL) {
3106 *ppData = NULL;
3107 return VK_SUCCESS;
3108 }
3109
3110 if (mem->user_ptr)
3111 *ppData = mem->user_ptr;
3112 else
3113 *ppData = device->ws->buffer_map(mem->bo);
3114
3115 if (*ppData) {
3116 *ppData += offset;
3117 return VK_SUCCESS;
3118 }
3119
3120 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3121 }
3122
3123 void radv_UnmapMemory(
3124 VkDevice _device,
3125 VkDeviceMemory _memory)
3126 {
3127 RADV_FROM_HANDLE(radv_device, device, _device);
3128 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3129
3130 if (mem == NULL)
3131 return;
3132
3133 if (mem->user_ptr == NULL)
3134 device->ws->buffer_unmap(mem->bo);
3135 }
3136
3137 VkResult radv_FlushMappedMemoryRanges(
3138 VkDevice _device,
3139 uint32_t memoryRangeCount,
3140 const VkMappedMemoryRange* pMemoryRanges)
3141 {
3142 return VK_SUCCESS;
3143 }
3144
3145 VkResult radv_InvalidateMappedMemoryRanges(
3146 VkDevice _device,
3147 uint32_t memoryRangeCount,
3148 const VkMappedMemoryRange* pMemoryRanges)
3149 {
3150 return VK_SUCCESS;
3151 }
3152
3153 void radv_GetBufferMemoryRequirements(
3154 VkDevice _device,
3155 VkBuffer _buffer,
3156 VkMemoryRequirements* pMemoryRequirements)
3157 {
3158 RADV_FROM_HANDLE(radv_device, device, _device);
3159 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3160
3161 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3162
3163 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3164 pMemoryRequirements->alignment = 4096;
3165 else
3166 pMemoryRequirements->alignment = 16;
3167
3168 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3169 }
3170
3171 void radv_GetBufferMemoryRequirements2(
3172 VkDevice device,
3173 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
3174 VkMemoryRequirements2KHR* pMemoryRequirements)
3175 {
3176 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3177 &pMemoryRequirements->memoryRequirements);
3178 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3179 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3180 switch (ext->sType) {
3181 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3182 VkMemoryDedicatedRequirementsKHR *req =
3183 (VkMemoryDedicatedRequirementsKHR *) ext;
3184 req->requiresDedicatedAllocation = buffer->shareable;
3185 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3186 break;
3187 }
3188 default:
3189 break;
3190 }
3191 }
3192 }
3193
3194 void radv_GetImageMemoryRequirements(
3195 VkDevice _device,
3196 VkImage _image,
3197 VkMemoryRequirements* pMemoryRequirements)
3198 {
3199 RADV_FROM_HANDLE(radv_device, device, _device);
3200 RADV_FROM_HANDLE(radv_image, image, _image);
3201
3202 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3203
3204 pMemoryRequirements->size = image->size;
3205 pMemoryRequirements->alignment = image->alignment;
3206 }
3207
3208 void radv_GetImageMemoryRequirements2(
3209 VkDevice device,
3210 const VkImageMemoryRequirementsInfo2KHR* pInfo,
3211 VkMemoryRequirements2KHR* pMemoryRequirements)
3212 {
3213 radv_GetImageMemoryRequirements(device, pInfo->image,
3214 &pMemoryRequirements->memoryRequirements);
3215
3216 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3217
3218 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3219 switch (ext->sType) {
3220 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3221 VkMemoryDedicatedRequirementsKHR *req =
3222 (VkMemoryDedicatedRequirementsKHR *) ext;
3223 req->requiresDedicatedAllocation = image->shareable;
3224 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3225 break;
3226 }
3227 default:
3228 break;
3229 }
3230 }
3231 }
3232
3233 void radv_GetImageSparseMemoryRequirements(
3234 VkDevice device,
3235 VkImage image,
3236 uint32_t* pSparseMemoryRequirementCount,
3237 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
3238 {
3239 stub();
3240 }
3241
3242 void radv_GetImageSparseMemoryRequirements2(
3243 VkDevice device,
3244 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
3245 uint32_t* pSparseMemoryRequirementCount,
3246 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
3247 {
3248 stub();
3249 }
3250
3251 void radv_GetDeviceMemoryCommitment(
3252 VkDevice device,
3253 VkDeviceMemory memory,
3254 VkDeviceSize* pCommittedMemoryInBytes)
3255 {
3256 *pCommittedMemoryInBytes = 0;
3257 }
3258
3259 VkResult radv_BindBufferMemory2(VkDevice device,
3260 uint32_t bindInfoCount,
3261 const VkBindBufferMemoryInfoKHR *pBindInfos)
3262 {
3263 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3264 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3265 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3266
3267 if (mem) {
3268 buffer->bo = mem->bo;
3269 buffer->offset = pBindInfos[i].memoryOffset;
3270 } else {
3271 buffer->bo = NULL;
3272 }
3273 }
3274 return VK_SUCCESS;
3275 }
3276
3277 VkResult radv_BindBufferMemory(
3278 VkDevice device,
3279 VkBuffer buffer,
3280 VkDeviceMemory memory,
3281 VkDeviceSize memoryOffset)
3282 {
3283 const VkBindBufferMemoryInfoKHR info = {
3284 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3285 .buffer = buffer,
3286 .memory = memory,
3287 .memoryOffset = memoryOffset
3288 };
3289
3290 return radv_BindBufferMemory2(device, 1, &info);
3291 }
3292
3293 VkResult radv_BindImageMemory2(VkDevice device,
3294 uint32_t bindInfoCount,
3295 const VkBindImageMemoryInfoKHR *pBindInfos)
3296 {
3297 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3298 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3299 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3300
3301 if (mem) {
3302 image->bo = mem->bo;
3303 image->offset = pBindInfos[i].memoryOffset;
3304 } else {
3305 image->bo = NULL;
3306 image->offset = 0;
3307 }
3308 }
3309 return VK_SUCCESS;
3310 }
3311
3312
3313 VkResult radv_BindImageMemory(
3314 VkDevice device,
3315 VkImage image,
3316 VkDeviceMemory memory,
3317 VkDeviceSize memoryOffset)
3318 {
3319 const VkBindImageMemoryInfoKHR info = {
3320 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3321 .image = image,
3322 .memory = memory,
3323 .memoryOffset = memoryOffset
3324 };
3325
3326 return radv_BindImageMemory2(device, 1, &info);
3327 }
3328
3329
3330 static void
3331 radv_sparse_buffer_bind_memory(struct radv_device *device,
3332 const VkSparseBufferMemoryBindInfo *bind)
3333 {
3334 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3335
3336 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3337 struct radv_device_memory *mem = NULL;
3338
3339 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3340 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3341
3342 device->ws->buffer_virtual_bind(buffer->bo,
3343 bind->pBinds[i].resourceOffset,
3344 bind->pBinds[i].size,
3345 mem ? mem->bo : NULL,
3346 bind->pBinds[i].memoryOffset);
3347 }
3348 }
3349
3350 static void
3351 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3352 const VkSparseImageOpaqueMemoryBindInfo *bind)
3353 {
3354 RADV_FROM_HANDLE(radv_image, image, bind->image);
3355
3356 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3357 struct radv_device_memory *mem = NULL;
3358
3359 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3360 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3361
3362 device->ws->buffer_virtual_bind(image->bo,
3363 bind->pBinds[i].resourceOffset,
3364 bind->pBinds[i].size,
3365 mem ? mem->bo : NULL,
3366 bind->pBinds[i].memoryOffset);
3367 }
3368 }
3369
3370 VkResult radv_QueueBindSparse(
3371 VkQueue _queue,
3372 uint32_t bindInfoCount,
3373 const VkBindSparseInfo* pBindInfo,
3374 VkFence _fence)
3375 {
3376 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3377 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3378 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3379 bool fence_emitted = false;
3380 VkResult result;
3381 int ret;
3382
3383 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3384 struct radv_winsys_sem_info sem_info;
3385 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3386 radv_sparse_buffer_bind_memory(queue->device,
3387 pBindInfo[i].pBufferBinds + j);
3388 }
3389
3390 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3391 radv_sparse_image_opaque_bind_memory(queue->device,
3392 pBindInfo[i].pImageOpaqueBinds + j);
3393 }
3394
3395 VkResult result;
3396 result = radv_alloc_sem_info(queue->device->instance,
3397 &sem_info,
3398 pBindInfo[i].waitSemaphoreCount,
3399 pBindInfo[i].pWaitSemaphores,
3400 pBindInfo[i].signalSemaphoreCount,
3401 pBindInfo[i].pSignalSemaphores,
3402 _fence);
3403 if (result != VK_SUCCESS)
3404 return result;
3405
3406 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3407 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3408 &queue->device->empty_cs[queue->queue_family_index],
3409 1, NULL, NULL,
3410 &sem_info, NULL,
3411 false, base_fence);
3412 if (ret) {
3413 radv_loge("failed to submit CS %d\n", i);
3414 abort();
3415 }
3416
3417 fence_emitted = true;
3418 if (fence)
3419 fence->submitted = true;
3420 }
3421
3422 radv_free_sem_info(&sem_info);
3423
3424 }
3425
3426 if (fence) {
3427 if (!fence_emitted) {
3428 result = radv_signal_fence(queue, fence);
3429 if (result != VK_SUCCESS)
3430 return result;
3431 }
3432 fence->submitted = true;
3433 }
3434
3435 return VK_SUCCESS;
3436 }
3437
3438 VkResult radv_CreateFence(
3439 VkDevice _device,
3440 const VkFenceCreateInfo* pCreateInfo,
3441 const VkAllocationCallbacks* pAllocator,
3442 VkFence* pFence)
3443 {
3444 RADV_FROM_HANDLE(radv_device, device, _device);
3445 const VkExportFenceCreateInfoKHR *export =
3446 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
3447 VkExternalFenceHandleTypeFlagsKHR handleTypes =
3448 export ? export->handleTypes : 0;
3449
3450 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3451 sizeof(*fence), 8,
3452 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3453
3454 if (!fence)
3455 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3456
3457 fence->fence_wsi = NULL;
3458 fence->submitted = false;
3459 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3460 fence->temp_syncobj = 0;
3461 if (device->always_use_syncobj || handleTypes) {
3462 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3463 if (ret) {
3464 vk_free2(&device->alloc, pAllocator, fence);
3465 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3466 }
3467 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3468 device->ws->signal_syncobj(device->ws, fence->syncobj);
3469 }
3470 fence->fence = NULL;
3471 } else {
3472 fence->fence = device->ws->create_fence();
3473 if (!fence->fence) {
3474 vk_free2(&device->alloc, pAllocator, fence);
3475 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3476 }
3477 fence->syncobj = 0;
3478 }
3479
3480 *pFence = radv_fence_to_handle(fence);
3481
3482 return VK_SUCCESS;
3483 }
3484
3485 void radv_DestroyFence(
3486 VkDevice _device,
3487 VkFence _fence,
3488 const VkAllocationCallbacks* pAllocator)
3489 {
3490 RADV_FROM_HANDLE(radv_device, device, _device);
3491 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3492
3493 if (!fence)
3494 return;
3495
3496 if (fence->temp_syncobj)
3497 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3498 if (fence->syncobj)
3499 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3500 if (fence->fence)
3501 device->ws->destroy_fence(fence->fence);
3502 if (fence->fence_wsi)
3503 fence->fence_wsi->destroy(fence->fence_wsi);
3504 vk_free2(&device->alloc, pAllocator, fence);
3505 }
3506
3507
3508 static uint64_t radv_get_current_time()
3509 {
3510 struct timespec tv;
3511 clock_gettime(CLOCK_MONOTONIC, &tv);
3512 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3513 }
3514
3515 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3516 {
3517 uint64_t current_time = radv_get_current_time();
3518
3519 timeout = MIN2(UINT64_MAX - current_time, timeout);
3520
3521 return current_time + timeout;
3522 }
3523
3524
3525 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3526 {
3527 for (uint32_t i = 0; i < fenceCount; ++i) {
3528 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3529 if (fence->fence == NULL || fence->syncobj ||
3530 fence->temp_syncobj ||
3531 (!fence->signalled && !fence->submitted))
3532 return false;
3533 }
3534 return true;
3535 }
3536
3537 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3538 {
3539 for (uint32_t i = 0; i < fenceCount; ++i) {
3540 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3541 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3542 return false;
3543 }
3544 return true;
3545 }
3546
3547 VkResult radv_WaitForFences(
3548 VkDevice _device,
3549 uint32_t fenceCount,
3550 const VkFence* pFences,
3551 VkBool32 waitAll,
3552 uint64_t timeout)
3553 {
3554 RADV_FROM_HANDLE(radv_device, device, _device);
3555 timeout = radv_get_absolute_timeout(timeout);
3556
3557 if (device->always_use_syncobj &&
3558 radv_all_fences_syncobj(fenceCount, pFences))
3559 {
3560 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3561 if (!handles)
3562 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3563
3564 for (uint32_t i = 0; i < fenceCount; ++i) {
3565 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3566 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3567 }
3568
3569 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3570
3571 free(handles);
3572 return success ? VK_SUCCESS : VK_TIMEOUT;
3573 }
3574
3575 if (!waitAll && fenceCount > 1) {
3576 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3577 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3578 uint32_t wait_count = 0;
3579 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3580 if (!fences)
3581 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3582
3583 for (uint32_t i = 0; i < fenceCount; ++i) {
3584 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3585
3586 if (fence->signalled) {
3587 free(fences);
3588 return VK_SUCCESS;
3589 }
3590
3591 fences[wait_count++] = fence->fence;
3592 }
3593
3594 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3595 waitAll, timeout - radv_get_current_time());
3596
3597 free(fences);
3598 return success ? VK_SUCCESS : VK_TIMEOUT;
3599 }
3600
3601 while(radv_get_current_time() <= timeout) {
3602 for (uint32_t i = 0; i < fenceCount; ++i) {
3603 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3604 return VK_SUCCESS;
3605 }
3606 }
3607 return VK_TIMEOUT;
3608 }
3609
3610 for (uint32_t i = 0; i < fenceCount; ++i) {
3611 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3612 bool expired = false;
3613
3614 if (fence->temp_syncobj) {
3615 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3616 return VK_TIMEOUT;
3617 continue;
3618 }
3619
3620 if (fence->syncobj) {
3621 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3622 return VK_TIMEOUT;
3623 continue;
3624 }
3625
3626 if (fence->signalled)
3627 continue;
3628
3629 if (fence->fence) {
3630 if (!fence->submitted) {
3631 while(radv_get_current_time() <= timeout &&
3632 !fence->submitted)
3633 /* Do nothing */;
3634
3635 if (!fence->submitted)
3636 return VK_TIMEOUT;
3637
3638 /* Recheck as it may have been set by
3639 * submitting operations. */
3640
3641 if (fence->signalled)
3642 continue;
3643 }
3644
3645 expired = device->ws->fence_wait(device->ws,
3646 fence->fence,
3647 true, timeout);
3648 if (!expired)
3649 return VK_TIMEOUT;
3650 }
3651
3652 if (fence->fence_wsi) {
3653 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
3654 if (result != VK_SUCCESS)
3655 return result;
3656 }
3657
3658 fence->signalled = true;
3659 }
3660
3661 return VK_SUCCESS;
3662 }
3663
3664 VkResult radv_ResetFences(VkDevice _device,
3665 uint32_t fenceCount,
3666 const VkFence *pFences)
3667 {
3668 RADV_FROM_HANDLE(radv_device, device, _device);
3669
3670 for (unsigned i = 0; i < fenceCount; ++i) {
3671 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3672 fence->submitted = fence->signalled = false;
3673
3674 /* Per spec, we first restore the permanent payload, and then reset, so
3675 * having a temp syncobj should not skip resetting the permanent syncobj. */
3676 if (fence->temp_syncobj) {
3677 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3678 fence->temp_syncobj = 0;
3679 }
3680
3681 if (fence->syncobj) {
3682 device->ws->reset_syncobj(device->ws, fence->syncobj);
3683 }
3684 }
3685
3686 return VK_SUCCESS;
3687 }
3688
3689 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3690 {
3691 RADV_FROM_HANDLE(radv_device, device, _device);
3692 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3693
3694 if (fence->temp_syncobj) {
3695 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3696 return success ? VK_SUCCESS : VK_NOT_READY;
3697 }
3698
3699 if (fence->syncobj) {
3700 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3701 return success ? VK_SUCCESS : VK_NOT_READY;
3702 }
3703
3704 if (fence->signalled)
3705 return VK_SUCCESS;
3706 if (!fence->submitted)
3707 return VK_NOT_READY;
3708 if (fence->fence) {
3709 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3710 return VK_NOT_READY;
3711 }
3712 if (fence->fence_wsi) {
3713 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
3714
3715 if (result != VK_SUCCESS) {
3716 if (result == VK_TIMEOUT)
3717 return VK_NOT_READY;
3718 return result;
3719 }
3720 }
3721 return VK_SUCCESS;
3722 }
3723
3724
3725 // Queue semaphore functions
3726
3727 VkResult radv_CreateSemaphore(
3728 VkDevice _device,
3729 const VkSemaphoreCreateInfo* pCreateInfo,
3730 const VkAllocationCallbacks* pAllocator,
3731 VkSemaphore* pSemaphore)
3732 {
3733 RADV_FROM_HANDLE(radv_device, device, _device);
3734 const VkExportSemaphoreCreateInfoKHR *export =
3735 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3736 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3737 export ? export->handleTypes : 0;
3738
3739 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3740 sizeof(*sem), 8,
3741 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3742 if (!sem)
3743 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3744
3745 sem->temp_syncobj = 0;
3746 /* create a syncobject if we are going to export this semaphore */
3747 if (device->always_use_syncobj || handleTypes) {
3748 assert (device->physical_device->rad_info.has_syncobj);
3749 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3750 if (ret) {
3751 vk_free2(&device->alloc, pAllocator, sem);
3752 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3753 }
3754 sem->sem = NULL;
3755 } else {
3756 sem->sem = device->ws->create_sem(device->ws);
3757 if (!sem->sem) {
3758 vk_free2(&device->alloc, pAllocator, sem);
3759 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3760 }
3761 sem->syncobj = 0;
3762 }
3763
3764 *pSemaphore = radv_semaphore_to_handle(sem);
3765 return VK_SUCCESS;
3766 }
3767
3768 void radv_DestroySemaphore(
3769 VkDevice _device,
3770 VkSemaphore _semaphore,
3771 const VkAllocationCallbacks* pAllocator)
3772 {
3773 RADV_FROM_HANDLE(radv_device, device, _device);
3774 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3775 if (!_semaphore)
3776 return;
3777
3778 if (sem->syncobj)
3779 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3780 else
3781 device->ws->destroy_sem(sem->sem);
3782 vk_free2(&device->alloc, pAllocator, sem);
3783 }
3784
3785 VkResult radv_CreateEvent(
3786 VkDevice _device,
3787 const VkEventCreateInfo* pCreateInfo,
3788 const VkAllocationCallbacks* pAllocator,
3789 VkEvent* pEvent)
3790 {
3791 RADV_FROM_HANDLE(radv_device, device, _device);
3792 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3793 sizeof(*event), 8,
3794 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3795
3796 if (!event)
3797 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3798
3799 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3800 RADEON_DOMAIN_GTT,
3801 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3802 if (!event->bo) {
3803 vk_free2(&device->alloc, pAllocator, event);
3804 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3805 }
3806
3807 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3808
3809 *pEvent = radv_event_to_handle(event);
3810
3811 return VK_SUCCESS;
3812 }
3813
3814 void radv_DestroyEvent(
3815 VkDevice _device,
3816 VkEvent _event,
3817 const VkAllocationCallbacks* pAllocator)
3818 {
3819 RADV_FROM_HANDLE(radv_device, device, _device);
3820 RADV_FROM_HANDLE(radv_event, event, _event);
3821
3822 if (!event)
3823 return;
3824 device->ws->buffer_destroy(event->bo);
3825 vk_free2(&device->alloc, pAllocator, event);
3826 }
3827
3828 VkResult radv_GetEventStatus(
3829 VkDevice _device,
3830 VkEvent _event)
3831 {
3832 RADV_FROM_HANDLE(radv_event, event, _event);
3833
3834 if (*event->map == 1)
3835 return VK_EVENT_SET;
3836 return VK_EVENT_RESET;
3837 }
3838
3839 VkResult radv_SetEvent(
3840 VkDevice _device,
3841 VkEvent _event)
3842 {
3843 RADV_FROM_HANDLE(radv_event, event, _event);
3844 *event->map = 1;
3845
3846 return VK_SUCCESS;
3847 }
3848
3849 VkResult radv_ResetEvent(
3850 VkDevice _device,
3851 VkEvent _event)
3852 {
3853 RADV_FROM_HANDLE(radv_event, event, _event);
3854 *event->map = 0;
3855
3856 return VK_SUCCESS;
3857 }
3858
3859 VkResult radv_CreateBuffer(
3860 VkDevice _device,
3861 const VkBufferCreateInfo* pCreateInfo,
3862 const VkAllocationCallbacks* pAllocator,
3863 VkBuffer* pBuffer)
3864 {
3865 RADV_FROM_HANDLE(radv_device, device, _device);
3866 struct radv_buffer *buffer;
3867
3868 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3869
3870 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3871 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3872 if (buffer == NULL)
3873 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3874
3875 buffer->size = pCreateInfo->size;
3876 buffer->usage = pCreateInfo->usage;
3877 buffer->bo = NULL;
3878 buffer->offset = 0;
3879 buffer->flags = pCreateInfo->flags;
3880
3881 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3882 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3883
3884 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3885 buffer->bo = device->ws->buffer_create(device->ws,
3886 align64(buffer->size, 4096),
3887 4096, 0, RADEON_FLAG_VIRTUAL);
3888 if (!buffer->bo) {
3889 vk_free2(&device->alloc, pAllocator, buffer);
3890 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3891 }
3892 }
3893
3894 *pBuffer = radv_buffer_to_handle(buffer);
3895
3896 return VK_SUCCESS;
3897 }
3898
3899 void radv_DestroyBuffer(
3900 VkDevice _device,
3901 VkBuffer _buffer,
3902 const VkAllocationCallbacks* pAllocator)
3903 {
3904 RADV_FROM_HANDLE(radv_device, device, _device);
3905 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3906
3907 if (!buffer)
3908 return;
3909
3910 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3911 device->ws->buffer_destroy(buffer->bo);
3912
3913 vk_free2(&device->alloc, pAllocator, buffer);
3914 }
3915
3916 static inline unsigned
3917 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3918 {
3919 if (stencil)
3920 return image->surface.u.legacy.stencil_tiling_index[level];
3921 else
3922 return image->surface.u.legacy.tiling_index[level];
3923 }
3924
3925 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3926 {
3927 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3928 }
3929
3930 static uint32_t
3931 radv_init_dcc_control_reg(struct radv_device *device,
3932 struct radv_image_view *iview)
3933 {
3934 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3935 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3936 unsigned max_compressed_block_size;
3937 unsigned independent_64b_blocks;
3938
3939 if (!radv_image_has_dcc(iview->image))
3940 return 0;
3941
3942 if (iview->image->info.samples > 1) {
3943 if (iview->image->surface.bpe == 1)
3944 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3945 else if (iview->image->surface.bpe == 2)
3946 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3947 }
3948
3949 if (!device->physical_device->rad_info.has_dedicated_vram) {
3950 /* amdvlk: [min-compressed-block-size] should be set to 32 for
3951 * dGPU and 64 for APU because all of our APUs to date use
3952 * DIMMs which have a request granularity size of 64B while all
3953 * other chips have a 32B request size.
3954 */
3955 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3956 }
3957
3958 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3959 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3960 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3961 /* If this DCC image is potentially going to be used in texture
3962 * fetches, we need some special settings.
3963 */
3964 independent_64b_blocks = 1;
3965 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3966 } else {
3967 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
3968 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
3969 * big as possible for better compression state.
3970 */
3971 independent_64b_blocks = 0;
3972 max_compressed_block_size = max_uncompressed_block_size;
3973 }
3974
3975 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3976 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3977 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3978 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3979 }
3980
3981 static void
3982 radv_initialise_color_surface(struct radv_device *device,
3983 struct radv_color_buffer_info *cb,
3984 struct radv_image_view *iview)
3985 {
3986 const struct vk_format_description *desc;
3987 unsigned ntype, format, swap, endian;
3988 unsigned blend_clamp = 0, blend_bypass = 0;
3989 uint64_t va;
3990 const struct radeon_surf *surf = &iview->image->surface;
3991
3992 desc = vk_format_description(iview->vk_format);
3993
3994 memset(cb, 0, sizeof(*cb));
3995
3996 /* Intensity is implemented as Red, so treat it that way. */
3997 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3998
3999 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4000
4001 cb->cb_color_base = va >> 8;
4002
4003 if (device->physical_device->rad_info.chip_class >= GFX9) {
4004 struct gfx9_surf_meta_flags meta;
4005 if (iview->image->dcc_offset)
4006 meta = iview->image->surface.u.gfx9.dcc;
4007 else
4008 meta = iview->image->surface.u.gfx9.cmask;
4009
4010 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4011 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
4012 S_028C74_RB_ALIGNED(meta.rb_aligned) |
4013 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
4014
4015 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
4016 cb->cb_color_base |= iview->image->surface.tile_swizzle;
4017 } else {
4018 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
4019 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
4020
4021 cb->cb_color_base += level_info->offset >> 8;
4022 if (level_info->mode == RADEON_SURF_MODE_2D)
4023 cb->cb_color_base |= iview->image->surface.tile_swizzle;
4024
4025 pitch_tile_max = level_info->nblk_x / 8 - 1;
4026 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
4027 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
4028
4029 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
4030 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
4031 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
4032
4033 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
4034
4035 if (radv_image_has_fmask(iview->image)) {
4036 if (device->physical_device->rad_info.chip_class >= CIK)
4037 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
4038 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
4039 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
4040 } else {
4041 /* This must be set for fast clear to work without FMASK. */
4042 if (device->physical_device->rad_info.chip_class >= CIK)
4043 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4044 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4045 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4046 }
4047 }
4048
4049 /* CMASK variables */
4050 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4051 va += iview->image->cmask.offset;
4052 cb->cb_color_cmask = va >> 8;
4053
4054 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4055 va += iview->image->dcc_offset;
4056 cb->cb_dcc_base = va >> 8;
4057 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
4058
4059 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4060 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4061 S_028C6C_SLICE_MAX(max_slice);
4062
4063 if (iview->image->info.samples > 1) {
4064 unsigned log_samples = util_logbase2(iview->image->info.samples);
4065
4066 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4067 S_028C74_NUM_FRAGMENTS(log_samples);
4068 }
4069
4070 if (radv_image_has_fmask(iview->image)) {
4071 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
4072 cb->cb_color_fmask = va >> 8;
4073 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
4074 } else {
4075 cb->cb_color_fmask = cb->cb_color_base;
4076 }
4077
4078 ntype = radv_translate_color_numformat(iview->vk_format,
4079 desc,
4080 vk_format_get_first_non_void_channel(iview->vk_format));
4081 format = radv_translate_colorformat(iview->vk_format);
4082 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4083 radv_finishme("Illegal color\n");
4084 swap = radv_translate_colorswap(iview->vk_format, FALSE);
4085 endian = radv_colorformat_endian_swap(format);
4086
4087 /* blend clamp should be set for all NORM/SRGB types */
4088 if (ntype == V_028C70_NUMBER_UNORM ||
4089 ntype == V_028C70_NUMBER_SNORM ||
4090 ntype == V_028C70_NUMBER_SRGB)
4091 blend_clamp = 1;
4092
4093 /* set blend bypass according to docs if SINT/UINT or
4094 8/24 COLOR variants */
4095 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4096 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4097 format == V_028C70_COLOR_X24_8_32_FLOAT) {
4098 blend_clamp = 0;
4099 blend_bypass = 1;
4100 }
4101 #if 0
4102 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4103 (format == V_028C70_COLOR_8 ||
4104 format == V_028C70_COLOR_8_8 ||
4105 format == V_028C70_COLOR_8_8_8_8))
4106 ->color_is_int8 = true;
4107 #endif
4108 cb->cb_color_info = S_028C70_FORMAT(format) |
4109 S_028C70_COMP_SWAP(swap) |
4110 S_028C70_BLEND_CLAMP(blend_clamp) |
4111 S_028C70_BLEND_BYPASS(blend_bypass) |
4112 S_028C70_SIMPLE_FLOAT(1) |
4113 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4114 ntype != V_028C70_NUMBER_SNORM &&
4115 ntype != V_028C70_NUMBER_SRGB &&
4116 format != V_028C70_COLOR_8_24 &&
4117 format != V_028C70_COLOR_24_8) |
4118 S_028C70_NUMBER_TYPE(ntype) |
4119 S_028C70_ENDIAN(endian);
4120 if (radv_image_has_fmask(iview->image)) {
4121 cb->cb_color_info |= S_028C70_COMPRESSION(1);
4122 if (device->physical_device->rad_info.chip_class == SI) {
4123 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
4124 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4125 }
4126 }
4127
4128 if (radv_image_has_cmask(iview->image) &&
4129 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4130 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4131
4132 if (radv_dcc_enabled(iview->image, iview->base_mip))
4133 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4134
4135 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4136
4137 /* This must be set for fast clear to work without FMASK. */
4138 if (!radv_image_has_fmask(iview->image) &&
4139 device->physical_device->rad_info.chip_class == SI) {
4140 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
4141 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4142 }
4143
4144 if (device->physical_device->rad_info.chip_class >= GFX9) {
4145 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4146 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4147
4148 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
4149 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4150 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
4151 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
4152 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
4153 S_028C68_MAX_MIP(iview->image->info.levels - 1);
4154 }
4155 }
4156
4157 static unsigned
4158 radv_calc_decompress_on_z_planes(struct radv_device *device,
4159 struct radv_image_view *iview)
4160 {
4161 unsigned max_zplanes = 0;
4162
4163 assert(radv_image_is_tc_compat_htile(iview->image));
4164
4165 if (device->physical_device->rad_info.chip_class >= GFX9) {
4166 /* Default value for 32-bit depth surfaces. */
4167 max_zplanes = 4;
4168
4169 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4170 iview->image->info.samples > 1)
4171 max_zplanes = 2;
4172
4173 max_zplanes = max_zplanes + 1;
4174 } else {
4175 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4176 /* Do not enable Z plane compression for 16-bit depth
4177 * surfaces because isn't supported on GFX8. Only
4178 * 32-bit depth surfaces are supported by the hardware.
4179 * This allows to maintain shader compatibility and to
4180 * reduce the number of depth decompressions.
4181 */
4182 max_zplanes = 1;
4183 } else {
4184 if (iview->image->info.samples <= 1)
4185 max_zplanes = 5;
4186 else if (iview->image->info.samples <= 4)
4187 max_zplanes = 3;
4188 else
4189 max_zplanes = 2;
4190 }
4191 }
4192
4193 return max_zplanes;
4194 }
4195
4196 static void
4197 radv_initialise_ds_surface(struct radv_device *device,
4198 struct radv_ds_buffer_info *ds,
4199 struct radv_image_view *iview)
4200 {
4201 unsigned level = iview->base_mip;
4202 unsigned format, stencil_format;
4203 uint64_t va, s_offs, z_offs;
4204 bool stencil_only = false;
4205 memset(ds, 0, sizeof(*ds));
4206 switch (iview->image->vk_format) {
4207 case VK_FORMAT_D24_UNORM_S8_UINT:
4208 case VK_FORMAT_X8_D24_UNORM_PACK32:
4209 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4210 ds->offset_scale = 2.0f;
4211 break;
4212 case VK_FORMAT_D16_UNORM:
4213 case VK_FORMAT_D16_UNORM_S8_UINT:
4214 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4215 ds->offset_scale = 4.0f;
4216 break;
4217 case VK_FORMAT_D32_SFLOAT:
4218 case VK_FORMAT_D32_SFLOAT_S8_UINT:
4219 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4220 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4221 ds->offset_scale = 1.0f;
4222 break;
4223 case VK_FORMAT_S8_UINT:
4224 stencil_only = true;
4225 break;
4226 default:
4227 break;
4228 }
4229
4230 format = radv_translate_dbformat(iview->image->vk_format);
4231 stencil_format = iview->image->surface.has_stencil ?
4232 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4233
4234 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4235 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4236 S_028008_SLICE_MAX(max_slice);
4237
4238 ds->db_htile_data_base = 0;
4239 ds->db_htile_surface = 0;
4240
4241 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4242 s_offs = z_offs = va;
4243
4244 if (device->physical_device->rad_info.chip_class >= GFX9) {
4245 assert(iview->image->surface.u.gfx9.surf_offset == 0);
4246 s_offs += iview->image->surface.u.gfx9.stencil_offset;
4247
4248 ds->db_z_info = S_028038_FORMAT(format) |
4249 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4250 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4251 S_028038_MAXMIP(iview->image->info.levels - 1) |
4252 S_028038_ZRANGE_PRECISION(1);
4253 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4254 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
4255
4256 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
4257 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
4258 ds->db_depth_view |= S_028008_MIPID(level);
4259
4260 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4261 S_02801C_Y_MAX(iview->image->info.height - 1);
4262
4263 if (radv_htile_enabled(iview->image, level)) {
4264 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4265
4266 if (radv_image_is_tc_compat_htile(iview->image)) {
4267 unsigned max_zplanes =
4268 radv_calc_decompress_on_z_planes(device, iview);
4269
4270 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
4271 S_028038_ITERATE_FLUSH(1);
4272 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4273 }
4274
4275 if (!iview->image->surface.has_stencil)
4276 /* Use all of the htile_buffer for depth if there's no stencil. */
4277 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4278 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4279 iview->image->htile_offset;
4280 ds->db_htile_data_base = va >> 8;
4281 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4282 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
4283 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
4284 }
4285 } else {
4286 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
4287
4288 if (stencil_only)
4289 level_info = &iview->image->surface.u.legacy.stencil_level[level];
4290
4291 z_offs += iview->image->surface.u.legacy.level[level].offset;
4292 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
4293
4294 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4295 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4296 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4297
4298 if (iview->image->info.samples > 1)
4299 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4300
4301 if (device->physical_device->rad_info.chip_class >= CIK) {
4302 struct radeon_info *info = &device->physical_device->rad_info;
4303 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
4304 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
4305 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
4306 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4307 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4308 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4309
4310 if (stencil_only)
4311 tile_mode = stencil_tile_mode;
4312
4313 ds->db_depth_info |=
4314 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4315 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4316 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4317 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4318 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4319 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4320 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4321 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4322 } else {
4323 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
4324 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4325 tile_mode_index = si_tile_mode_index(iview->image, level, true);
4326 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4327 if (stencil_only)
4328 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4329 }
4330
4331 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4332 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4333 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4334
4335 if (radv_htile_enabled(iview->image, level)) {
4336 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4337
4338 if (!iview->image->surface.has_stencil &&
4339 !radv_image_is_tc_compat_htile(iview->image))
4340 /* Use all of the htile_buffer for depth if there's no stencil. */
4341 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4342
4343 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4344 iview->image->htile_offset;
4345 ds->db_htile_data_base = va >> 8;
4346 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4347
4348 if (radv_image_is_tc_compat_htile(iview->image)) {
4349 unsigned max_zplanes =
4350 radv_calc_decompress_on_z_planes(device, iview);
4351
4352 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4353 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4354 }
4355 }
4356 }
4357
4358 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4359 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4360 }
4361
4362 VkResult radv_CreateFramebuffer(
4363 VkDevice _device,
4364 const VkFramebufferCreateInfo* pCreateInfo,
4365 const VkAllocationCallbacks* pAllocator,
4366 VkFramebuffer* pFramebuffer)
4367 {
4368 RADV_FROM_HANDLE(radv_device, device, _device);
4369 struct radv_framebuffer *framebuffer;
4370
4371 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4372
4373 size_t size = sizeof(*framebuffer) +
4374 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4375 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4376 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4377 if (framebuffer == NULL)
4378 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4379
4380 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4381 framebuffer->width = pCreateInfo->width;
4382 framebuffer->height = pCreateInfo->height;
4383 framebuffer->layers = pCreateInfo->layers;
4384 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4385 VkImageView _iview = pCreateInfo->pAttachments[i];
4386 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4387 framebuffer->attachments[i].attachment = iview;
4388 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
4389 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4390 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4391 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4392 }
4393 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4394 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4395 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4396 }
4397
4398 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4399 return VK_SUCCESS;
4400 }
4401
4402 void radv_DestroyFramebuffer(
4403 VkDevice _device,
4404 VkFramebuffer _fb,
4405 const VkAllocationCallbacks* pAllocator)
4406 {
4407 RADV_FROM_HANDLE(radv_device, device, _device);
4408 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4409
4410 if (!fb)
4411 return;
4412 vk_free2(&device->alloc, pAllocator, fb);
4413 }
4414
4415 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4416 {
4417 switch (address_mode) {
4418 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4419 return V_008F30_SQ_TEX_WRAP;
4420 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4421 return V_008F30_SQ_TEX_MIRROR;
4422 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4423 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4424 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4425 return V_008F30_SQ_TEX_CLAMP_BORDER;
4426 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4427 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4428 default:
4429 unreachable("illegal tex wrap mode");
4430 break;
4431 }
4432 }
4433
4434 static unsigned
4435 radv_tex_compare(VkCompareOp op)
4436 {
4437 switch (op) {
4438 case VK_COMPARE_OP_NEVER:
4439 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4440 case VK_COMPARE_OP_LESS:
4441 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4442 case VK_COMPARE_OP_EQUAL:
4443 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4444 case VK_COMPARE_OP_LESS_OR_EQUAL:
4445 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4446 case VK_COMPARE_OP_GREATER:
4447 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4448 case VK_COMPARE_OP_NOT_EQUAL:
4449 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4450 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4451 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4452 case VK_COMPARE_OP_ALWAYS:
4453 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4454 default:
4455 unreachable("illegal compare mode");
4456 break;
4457 }
4458 }
4459
4460 static unsigned
4461 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4462 {
4463 switch (filter) {
4464 case VK_FILTER_NEAREST:
4465 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4466 V_008F38_SQ_TEX_XY_FILTER_POINT);
4467 case VK_FILTER_LINEAR:
4468 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4469 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4470 case VK_FILTER_CUBIC_IMG:
4471 default:
4472 fprintf(stderr, "illegal texture filter");
4473 return 0;
4474 }
4475 }
4476
4477 static unsigned
4478 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4479 {
4480 switch (mode) {
4481 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4482 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4483 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4484 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4485 default:
4486 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4487 }
4488 }
4489
4490 static unsigned
4491 radv_tex_bordercolor(VkBorderColor bcolor)
4492 {
4493 switch (bcolor) {
4494 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4495 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4496 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4497 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4498 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4499 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4500 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4501 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4502 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4503 default:
4504 break;
4505 }
4506 return 0;
4507 }
4508
4509 static unsigned
4510 radv_tex_aniso_filter(unsigned filter)
4511 {
4512 if (filter < 2)
4513 return 0;
4514 if (filter < 4)
4515 return 1;
4516 if (filter < 8)
4517 return 2;
4518 if (filter < 16)
4519 return 3;
4520 return 4;
4521 }
4522
4523 static unsigned
4524 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4525 {
4526 switch (mode) {
4527 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4528 return SQ_IMG_FILTER_MODE_BLEND;
4529 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4530 return SQ_IMG_FILTER_MODE_MIN;
4531 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4532 return SQ_IMG_FILTER_MODE_MAX;
4533 default:
4534 break;
4535 }
4536 return 0;
4537 }
4538
4539 static uint32_t
4540 radv_get_max_anisotropy(struct radv_device *device,
4541 const VkSamplerCreateInfo *pCreateInfo)
4542 {
4543 if (device->force_aniso >= 0)
4544 return device->force_aniso;
4545
4546 if (pCreateInfo->anisotropyEnable &&
4547 pCreateInfo->maxAnisotropy > 1.0f)
4548 return (uint32_t)pCreateInfo->maxAnisotropy;
4549
4550 return 0;
4551 }
4552
4553 static void
4554 radv_init_sampler(struct radv_device *device,
4555 struct radv_sampler *sampler,
4556 const VkSamplerCreateInfo *pCreateInfo)
4557 {
4558 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
4559 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4560 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4561 unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
4562
4563 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4564 vk_find_struct_const(pCreateInfo->pNext,
4565 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4566 if (sampler_reduction)
4567 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4568
4569 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4570 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4571 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4572 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4573 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4574 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4575 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4576 S_008F30_ANISO_BIAS(max_aniso_ratio) |
4577 S_008F30_DISABLE_CUBE_WRAP(0) |
4578 S_008F30_COMPAT_MODE(is_vi) |
4579 S_008F30_FILTER_MODE(filter_mode));
4580 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4581 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4582 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4583 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4584 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4585 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4586 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4587 S_008F38_MIP_POINT_PRECLAMP(0) |
4588 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4589 S_008F38_FILTER_PREC_FIX(1) |
4590 S_008F38_ANISO_OVERRIDE(is_vi));
4591 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4592 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4593 }
4594
4595 VkResult radv_CreateSampler(
4596 VkDevice _device,
4597 const VkSamplerCreateInfo* pCreateInfo,
4598 const VkAllocationCallbacks* pAllocator,
4599 VkSampler* pSampler)
4600 {
4601 RADV_FROM_HANDLE(radv_device, device, _device);
4602 struct radv_sampler *sampler;
4603
4604 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4605
4606 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4607 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4608 if (!sampler)
4609 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4610
4611 radv_init_sampler(device, sampler, pCreateInfo);
4612 *pSampler = radv_sampler_to_handle(sampler);
4613
4614 return VK_SUCCESS;
4615 }
4616
4617 void radv_DestroySampler(
4618 VkDevice _device,
4619 VkSampler _sampler,
4620 const VkAllocationCallbacks* pAllocator)
4621 {
4622 RADV_FROM_HANDLE(radv_device, device, _device);
4623 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4624
4625 if (!sampler)
4626 return;
4627 vk_free2(&device->alloc, pAllocator, sampler);
4628 }
4629
4630 /* vk_icd.h does not declare this function, so we declare it here to
4631 * suppress Wmissing-prototypes.
4632 */
4633 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4634 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4635
4636 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4637 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4638 {
4639 /* For the full details on loader interface versioning, see
4640 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4641 * What follows is a condensed summary, to help you navigate the large and
4642 * confusing official doc.
4643 *
4644 * - Loader interface v0 is incompatible with later versions. We don't
4645 * support it.
4646 *
4647 * - In loader interface v1:
4648 * - The first ICD entrypoint called by the loader is
4649 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4650 * entrypoint.
4651 * - The ICD must statically expose no other Vulkan symbol unless it is
4652 * linked with -Bsymbolic.
4653 * - Each dispatchable Vulkan handle created by the ICD must be
4654 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4655 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4656 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4657 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4658 * such loader-managed surfaces.
4659 *
4660 * - Loader interface v2 differs from v1 in:
4661 * - The first ICD entrypoint called by the loader is
4662 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4663 * statically expose this entrypoint.
4664 *
4665 * - Loader interface v3 differs from v2 in:
4666 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4667 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4668 * because the loader no longer does so.
4669 */
4670 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4671 return VK_SUCCESS;
4672 }
4673
4674 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4675 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4676 int *pFD)
4677 {
4678 RADV_FROM_HANDLE(radv_device, device, _device);
4679 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4680
4681 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4682
4683 /* At the moment, we support only the below handle types. */
4684 assert(pGetFdInfo->handleType ==
4685 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4686 pGetFdInfo->handleType ==
4687 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4688
4689 bool ret = radv_get_memory_fd(device, memory, pFD);
4690 if (ret == false)
4691 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4692 return VK_SUCCESS;
4693 }
4694
4695 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4696 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4697 int fd,
4698 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4699 {
4700 RADV_FROM_HANDLE(radv_device, device, _device);
4701
4702 switch (handleType) {
4703 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4704 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4705 return VK_SUCCESS;
4706
4707 default:
4708 /* The valid usage section for this function says:
4709 *
4710 * "handleType must not be one of the handle types defined as
4711 * opaque."
4712 *
4713 * So opaque handle types fall into the default "unsupported" case.
4714 */
4715 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4716 }
4717 }
4718
4719 static VkResult radv_import_opaque_fd(struct radv_device *device,
4720 int fd,
4721 uint32_t *syncobj)
4722 {
4723 uint32_t syncobj_handle = 0;
4724 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4725 if (ret != 0)
4726 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4727
4728 if (*syncobj)
4729 device->ws->destroy_syncobj(device->ws, *syncobj);
4730
4731 *syncobj = syncobj_handle;
4732 close(fd);
4733
4734 return VK_SUCCESS;
4735 }
4736
4737 static VkResult radv_import_sync_fd(struct radv_device *device,
4738 int fd,
4739 uint32_t *syncobj)
4740 {
4741 /* If we create a syncobj we do it locally so that if we have an error, we don't
4742 * leave a syncobj in an undetermined state in the fence. */
4743 uint32_t syncobj_handle = *syncobj;
4744 if (!syncobj_handle) {
4745 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4746 if (ret) {
4747 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4748 }
4749 }
4750
4751 if (fd == -1) {
4752 device->ws->signal_syncobj(device->ws, syncobj_handle);
4753 } else {
4754 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4755 if (ret != 0)
4756 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4757 }
4758
4759 *syncobj = syncobj_handle;
4760 if (fd != -1)
4761 close(fd);
4762
4763 return VK_SUCCESS;
4764 }
4765
4766 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4767 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4768 {
4769 RADV_FROM_HANDLE(radv_device, device, _device);
4770 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4771 uint32_t *syncobj_dst = NULL;
4772
4773 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4774 syncobj_dst = &sem->temp_syncobj;
4775 } else {
4776 syncobj_dst = &sem->syncobj;
4777 }
4778
4779 switch(pImportSemaphoreFdInfo->handleType) {
4780 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4781 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4782 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4783 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4784 default:
4785 unreachable("Unhandled semaphore handle type");
4786 }
4787 }
4788
4789 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4790 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4791 int *pFd)
4792 {
4793 RADV_FROM_HANDLE(radv_device, device, _device);
4794 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4795 int ret;
4796 uint32_t syncobj_handle;
4797
4798 if (sem->temp_syncobj)
4799 syncobj_handle = sem->temp_syncobj;
4800 else
4801 syncobj_handle = sem->syncobj;
4802
4803 switch(pGetFdInfo->handleType) {
4804 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4805 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4806 break;
4807 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4808 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4809 if (!ret) {
4810 if (sem->temp_syncobj) {
4811 close (sem->temp_syncobj);
4812 sem->temp_syncobj = 0;
4813 } else {
4814 device->ws->reset_syncobj(device->ws, syncobj_handle);
4815 }
4816 }
4817 break;
4818 default:
4819 unreachable("Unhandled semaphore handle type");
4820 }
4821
4822 if (ret)
4823 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4824 return VK_SUCCESS;
4825 }
4826
4827 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4828 VkPhysicalDevice physicalDevice,
4829 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4830 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
4831 {
4832 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4833
4834 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4835 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4836 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4837 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4838 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4839 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4840 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4841 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4842 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4843 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4844 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4845 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4846 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4847 } else {
4848 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4849 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4850 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4851 }
4852 }
4853
4854 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4855 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4856 {
4857 RADV_FROM_HANDLE(radv_device, device, _device);
4858 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4859 uint32_t *syncobj_dst = NULL;
4860
4861
4862 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4863 syncobj_dst = &fence->temp_syncobj;
4864 } else {
4865 syncobj_dst = &fence->syncobj;
4866 }
4867
4868 switch(pImportFenceFdInfo->handleType) {
4869 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4870 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4871 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4872 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4873 default:
4874 unreachable("Unhandled fence handle type");
4875 }
4876 }
4877
4878 VkResult radv_GetFenceFdKHR(VkDevice _device,
4879 const VkFenceGetFdInfoKHR *pGetFdInfo,
4880 int *pFd)
4881 {
4882 RADV_FROM_HANDLE(radv_device, device, _device);
4883 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4884 int ret;
4885 uint32_t syncobj_handle;
4886
4887 if (fence->temp_syncobj)
4888 syncobj_handle = fence->temp_syncobj;
4889 else
4890 syncobj_handle = fence->syncobj;
4891
4892 switch(pGetFdInfo->handleType) {
4893 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4894 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4895 break;
4896 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4897 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4898 if (!ret) {
4899 if (fence->temp_syncobj) {
4900 close (fence->temp_syncobj);
4901 fence->temp_syncobj = 0;
4902 } else {
4903 device->ws->reset_syncobj(device->ws, syncobj_handle);
4904 }
4905 }
4906 break;
4907 default:
4908 unreachable("Unhandled fence handle type");
4909 }
4910
4911 if (ret)
4912 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4913 return VK_SUCCESS;
4914 }
4915
4916 void radv_GetPhysicalDeviceExternalFenceProperties(
4917 VkPhysicalDevice physicalDevice,
4918 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4919 VkExternalFencePropertiesKHR* pExternalFenceProperties)
4920 {
4921 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4922
4923 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4924 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4925 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4926 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4927 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4928 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4929 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4930 } else {
4931 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4932 pExternalFenceProperties->compatibleHandleTypes = 0;
4933 pExternalFenceProperties->externalFenceFeatures = 0;
4934 }
4935 }
4936
4937 VkResult
4938 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4939 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4940 const VkAllocationCallbacks* pAllocator,
4941 VkDebugReportCallbackEXT* pCallback)
4942 {
4943 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4944 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4945 pCreateInfo, pAllocator, &instance->alloc,
4946 pCallback);
4947 }
4948
4949 void
4950 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4951 VkDebugReportCallbackEXT _callback,
4952 const VkAllocationCallbacks* pAllocator)
4953 {
4954 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4955 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4956 _callback, pAllocator, &instance->alloc);
4957 }
4958
4959 void
4960 radv_DebugReportMessageEXT(VkInstance _instance,
4961 VkDebugReportFlagsEXT flags,
4962 VkDebugReportObjectTypeEXT objectType,
4963 uint64_t object,
4964 size_t location,
4965 int32_t messageCode,
4966 const char* pLayerPrefix,
4967 const char* pMessage)
4968 {
4969 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4970 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4971 object, location, messageCode, pLayerPrefix, pMessage);
4972 }
4973
4974 void
4975 radv_GetDeviceGroupPeerMemoryFeatures(
4976 VkDevice device,
4977 uint32_t heapIndex,
4978 uint32_t localDeviceIndex,
4979 uint32_t remoteDeviceIndex,
4980 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
4981 {
4982 assert(localDeviceIndex == remoteDeviceIndex);
4983
4984 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4985 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4986 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4987 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4988 }
4989
4990 static const VkTimeDomainEXT radv_time_domains[] = {
4991 VK_TIME_DOMAIN_DEVICE_EXT,
4992 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4993 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4994 };
4995
4996 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4997 VkPhysicalDevice physicalDevice,
4998 uint32_t *pTimeDomainCount,
4999 VkTimeDomainEXT *pTimeDomains)
5000 {
5001 int d;
5002 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
5003
5004 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
5005 vk_outarray_append(&out, i) {
5006 *i = radv_time_domains[d];
5007 }
5008 }
5009
5010 return vk_outarray_status(&out);
5011 }
5012
5013 static uint64_t
5014 radv_clock_gettime(clockid_t clock_id)
5015 {
5016 struct timespec current;
5017 int ret;
5018
5019 ret = clock_gettime(clock_id, &current);
5020 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
5021 ret = clock_gettime(CLOCK_MONOTONIC, &current);
5022 if (ret < 0)
5023 return 0;
5024
5025 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
5026 }
5027
5028 VkResult radv_GetCalibratedTimestampsEXT(
5029 VkDevice _device,
5030 uint32_t timestampCount,
5031 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
5032 uint64_t *pTimestamps,
5033 uint64_t *pMaxDeviation)
5034 {
5035 RADV_FROM_HANDLE(radv_device, device, _device);
5036 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
5037 int d;
5038 uint64_t begin, end;
5039 uint64_t max_clock_period = 0;
5040
5041 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5042
5043 for (d = 0; d < timestampCount; d++) {
5044 switch (pTimestampInfos[d].timeDomain) {
5045 case VK_TIME_DOMAIN_DEVICE_EXT:
5046 pTimestamps[d] = device->ws->query_value(device->ws,
5047 RADEON_TIMESTAMP);
5048 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
5049 max_clock_period = MAX2(max_clock_period, device_period);
5050 break;
5051 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
5052 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
5053 max_clock_period = MAX2(max_clock_period, 1);
5054 break;
5055
5056 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
5057 pTimestamps[d] = begin;
5058 break;
5059 default:
5060 pTimestamps[d] = 0;
5061 break;
5062 }
5063 }
5064
5065 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5066
5067 /*
5068 * The maximum deviation is the sum of the interval over which we
5069 * perform the sampling and the maximum period of any sampled
5070 * clock. That's because the maximum skew between any two sampled
5071 * clock edges is when the sampled clock with the largest period is
5072 * sampled at the end of that period but right at the beginning of the
5073 * sampling interval and some other clock is sampled right at the
5074 * begining of its sampling period and right at the end of the
5075 * sampling interval. Let's assume the GPU has the longest clock
5076 * period and that the application is sampling GPU and monotonic:
5077 *
5078 * s e
5079 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
5080 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5081 *
5082 * g
5083 * 0 1 2 3
5084 * GPU -----_____-----_____-----_____-----_____
5085 *
5086 * m
5087 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
5088 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5089 *
5090 * Interval <----------------->
5091 * Deviation <-------------------------->
5092 *
5093 * s = read(raw) 2
5094 * g = read(GPU) 1
5095 * m = read(monotonic) 2
5096 * e = read(raw) b
5097 *
5098 * We round the sample interval up by one tick to cover sampling error
5099 * in the interval clock
5100 */
5101
5102 uint64_t sample_interval = end - begin + 1;
5103
5104 *pMaxDeviation = sample_interval + max_clock_period;
5105
5106 return VK_SUCCESS;
5107 }