radv,vulkan: add a new x11 wsi drirc workaround for DOOM Eternal
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "dirent.h"
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <linux/audit.h>
32 #include <linux/bpf.h>
33 #include <linux/filter.h>
34 #include <linux/seccomp.h>
35 #include <linux/unistd.h>
36 #include <stdbool.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <sys/prctl.h>
41 #include <sys/wait.h>
42 #include <unistd.h>
43 #include <fcntl.h>
44
45 #include "radv_debug.h"
46 #include "radv_private.h"
47 #include "radv_shader.h"
48 #include "radv_cs.h"
49 #include "util/disk_cache.h"
50 #include "vk_util.h"
51 #include <xf86drm.h>
52 #include <amdgpu.h>
53 #include "drm-uapi/amdgpu_drm.h"
54 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
55 #include "winsys/null/radv_null_winsys_public.h"
56 #include "ac_llvm_util.h"
57 #include "vk_format.h"
58 #include "sid.h"
59 #include "git_sha1.h"
60 #include "util/build_id.h"
61 #include "util/debug.h"
62 #include "util/mesa-sha1.h"
63 #include "util/timespec.h"
64 #include "util/u_atomic.h"
65 #include "compiler/glsl_types.h"
66 #include "util/driconf.h"
67
68 static struct radv_timeline_point *
69 radv_timeline_find_point_at_least_locked(struct radv_device *device,
70 struct radv_timeline *timeline,
71 uint64_t p);
72
73 static struct radv_timeline_point *
74 radv_timeline_add_point_locked(struct radv_device *device,
75 struct radv_timeline *timeline,
76 uint64_t p);
77
78 static void
79 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
80 struct list_head *processing_list);
81
82 static
83 void radv_destroy_semaphore_part(struct radv_device *device,
84 struct radv_semaphore_part *part);
85
86 static int
87 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
88 {
89 struct mesa_sha1 ctx;
90 unsigned char sha1[20];
91 unsigned ptr_size = sizeof(void*);
92
93 memset(uuid, 0, VK_UUID_SIZE);
94 _mesa_sha1_init(&ctx);
95
96 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
97 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
98 return -1;
99
100 _mesa_sha1_update(&ctx, &family, sizeof(family));
101 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
102 _mesa_sha1_final(&ctx, sha1);
103
104 memcpy(uuid, sha1, VK_UUID_SIZE);
105 return 0;
106 }
107
108 static void
109 radv_get_driver_uuid(void *uuid)
110 {
111 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
112 }
113
114 static void
115 radv_get_device_uuid(struct radeon_info *info, void *uuid)
116 {
117 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
118 }
119
120 static uint64_t
121 radv_get_visible_vram_size(struct radv_physical_device *device)
122 {
123 return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
124 }
125
126 static uint64_t
127 radv_get_vram_size(struct radv_physical_device *device)
128 {
129 return device->rad_info.vram_size - radv_get_visible_vram_size(device);
130 }
131
132 static void
133 radv_physical_device_init_mem_types(struct radv_physical_device *device)
134 {
135 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
136 uint64_t vram_size = radv_get_vram_size(device);
137 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
138 device->memory_properties.memoryHeapCount = 0;
139 if (vram_size > 0) {
140 vram_index = device->memory_properties.memoryHeapCount++;
141 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
142 .size = vram_size,
143 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
144 };
145 }
146
147 if (device->rad_info.gart_size > 0) {
148 gart_index = device->memory_properties.memoryHeapCount++;
149 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
150 .size = device->rad_info.gart_size,
151 .flags = 0,
152 };
153 }
154
155 if (visible_vram_size) {
156 visible_vram_index = device->memory_properties.memoryHeapCount++;
157 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
158 .size = visible_vram_size,
159 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
160 };
161 }
162
163 unsigned type_count = 0;
164
165 if (vram_index >= 0 || visible_vram_index >= 0) {
166 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
167 device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
168 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
169 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
170 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
171 };
172 }
173
174 if (gart_index >= 0) {
175 device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
176 device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
177 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
178 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
179 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
180 .heapIndex = gart_index,
181 };
182 }
183 if (visible_vram_index >= 0) {
184 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
185 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
186 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
187 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
188 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
189 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
190 .heapIndex = visible_vram_index,
191 };
192 }
193
194 if (gart_index >= 0) {
195 device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
196 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
197 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
198 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
199 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
200 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
201 .heapIndex = gart_index,
202 };
203 }
204 device->memory_properties.memoryTypeCount = type_count;
205
206 if (device->rad_info.has_l2_uncached) {
207 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
208 VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
209
210 if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
211 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
212 mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
213
214 VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
215 VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
216 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
217
218 device->memory_domains[type_count] = device->memory_domains[i];
219 device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
220 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
221 .propertyFlags = property_flags,
222 .heapIndex = mem_type.heapIndex,
223 };
224 }
225 }
226 device->memory_properties.memoryTypeCount = type_count;
227 }
228 }
229
230 static const char *
231 radv_get_compiler_string(struct radv_physical_device *pdevice)
232 {
233 if (!pdevice->use_llvm) {
234 /* Some games like SotTR apply shader workarounds if the LLVM
235 * version is too old or if the LLVM version string is
236 * missing. This gives 2-5% performance with SotTR and ACO.
237 */
238 if (driQueryOptionb(&pdevice->instance->dri_options,
239 "radv_report_llvm9_version_string")) {
240 return "ACO/LLVM 9.0.1";
241 }
242
243 return "ACO";
244 }
245
246 return "LLVM " MESA_LLVM_VERSION_STRING;
247 }
248
249 static VkResult
250 radv_physical_device_try_create(struct radv_instance *instance,
251 drmDevicePtr drm_device,
252 struct radv_physical_device **device_out)
253 {
254 VkResult result;
255 int fd = -1;
256 int master_fd = -1;
257
258 if (drm_device) {
259 const char *path = drm_device->nodes[DRM_NODE_RENDER];
260 drmVersionPtr version;
261
262 fd = open(path, O_RDWR | O_CLOEXEC);
263 if (fd < 0) {
264 if (instance->debug_flags & RADV_DEBUG_STARTUP)
265 radv_logi("Could not open device '%s'", path);
266
267 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
268 }
269
270 version = drmGetVersion(fd);
271 if (!version) {
272 close(fd);
273
274 if (instance->debug_flags & RADV_DEBUG_STARTUP)
275 radv_logi("Could not get the kernel driver version for device '%s'", path);
276
277 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
278 "failed to get version %s: %m", path);
279 }
280
281 if (strcmp(version->name, "amdgpu")) {
282 drmFreeVersion(version);
283 close(fd);
284
285 if (instance->debug_flags & RADV_DEBUG_STARTUP)
286 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
287
288 return VK_ERROR_INCOMPATIBLE_DRIVER;
289 }
290 drmFreeVersion(version);
291
292 if (instance->debug_flags & RADV_DEBUG_STARTUP)
293 radv_logi("Found compatible device '%s'.", path);
294 }
295
296 struct radv_physical_device *device =
297 vk_zalloc2(&instance->alloc, NULL, sizeof(*device), 8,
298 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
299 if (!device) {
300 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
301 goto fail_fd;
302 }
303
304 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
305 device->instance = instance;
306
307 if (drm_device) {
308 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
309 instance->perftest_flags);
310 } else {
311 device->ws = radv_null_winsys_create();
312 }
313
314 if (!device->ws) {
315 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
316 "failed to initialize winsys");
317 goto fail_alloc;
318 }
319
320 if (drm_device && instance->enabled_extensions.KHR_display) {
321 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
322 if (master_fd >= 0) {
323 uint32_t accel_working = 0;
324 struct drm_amdgpu_info request = {
325 .return_pointer = (uintptr_t)&accel_working,
326 .return_size = sizeof(accel_working),
327 .query = AMDGPU_INFO_ACCEL_WORKING
328 };
329
330 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
331 close(master_fd);
332 master_fd = -1;
333 }
334 }
335 }
336
337 device->master_fd = master_fd;
338 device->local_fd = fd;
339 device->ws->query_info(device->ws, &device->rad_info);
340
341 device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
342
343 snprintf(device->name, sizeof(device->name),
344 "AMD RADV %s (%s)",
345 device->rad_info.name, radv_get_compiler_string(device));
346
347 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
348 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
349 "cannot generate UUID");
350 goto fail_wsi;
351 }
352
353 /* These flags affect shader compilation. */
354 uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
355
356 /* The gpu id is already embedded in the uuid so we just pass "radv"
357 * when creating the cache.
358 */
359 char buf[VK_UUID_SIZE * 2 + 1];
360 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
361 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
362
363 if (device->rad_info.chip_class < GFX8 || !device->use_llvm)
364 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
365
366 radv_get_driver_uuid(&device->driver_uuid);
367 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
368
369 device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
370 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
371
372 device->dcc_msaa_allowed =
373 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
374
375 device->use_ngg = device->rad_info.chip_class >= GFX10 &&
376 device->rad_info.family != CHIP_NAVI14 &&
377 !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
378
379 /* TODO: Implement NGG GS with ACO. */
380 device->use_ngg_gs = device->use_ngg && device->use_llvm;
381 device->use_ngg_streamout = false;
382
383 /* Determine the number of threads per wave for all stages. */
384 device->cs_wave_size = 64;
385 device->ps_wave_size = 64;
386 device->ge_wave_size = 64;
387
388 if (device->rad_info.chip_class >= GFX10) {
389 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
390 device->cs_wave_size = 32;
391
392 /* For pixel shaders, wave64 is recommanded. */
393 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
394 device->ps_wave_size = 32;
395
396 if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
397 device->ge_wave_size = 32;
398 }
399
400 radv_physical_device_init_mem_types(device);
401
402 radv_physical_device_get_supported_extensions(device,
403 &device->supported_extensions);
404
405 if (drm_device)
406 device->bus_info = *drm_device->businfo.pci;
407
408 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
409 ac_print_gpu_info(&device->rad_info);
410
411 /* The WSI is structured as a layer on top of the driver, so this has
412 * to be the last part of initialization (at least until we get other
413 * semi-layers).
414 */
415 result = radv_init_wsi(device);
416 if (result != VK_SUCCESS) {
417 vk_error(instance, result);
418 goto fail_disk_cache;
419 }
420
421 *device_out = device;
422
423 return VK_SUCCESS;
424
425 fail_disk_cache:
426 disk_cache_destroy(device->disk_cache);
427 fail_wsi:
428 device->ws->destroy(device->ws);
429 fail_alloc:
430 vk_free(&instance->alloc, device);
431 fail_fd:
432 if (fd != -1)
433 close(fd);
434 if (master_fd != -1)
435 close(master_fd);
436 return result;
437 }
438
439 static void
440 radv_physical_device_destroy(struct radv_physical_device *device)
441 {
442 radv_finish_wsi(device);
443 device->ws->destroy(device->ws);
444 disk_cache_destroy(device->disk_cache);
445 close(device->local_fd);
446 if (device->master_fd != -1)
447 close(device->master_fd);
448 vk_free(&device->instance->alloc, device);
449 }
450
451 static void *
452 default_alloc_func(void *pUserData, size_t size, size_t align,
453 VkSystemAllocationScope allocationScope)
454 {
455 return malloc(size);
456 }
457
458 static void *
459 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
460 size_t align, VkSystemAllocationScope allocationScope)
461 {
462 return realloc(pOriginal, size);
463 }
464
465 static void
466 default_free_func(void *pUserData, void *pMemory)
467 {
468 free(pMemory);
469 }
470
471 static const VkAllocationCallbacks default_alloc = {
472 .pUserData = NULL,
473 .pfnAllocation = default_alloc_func,
474 .pfnReallocation = default_realloc_func,
475 .pfnFree = default_free_func,
476 };
477
478 static const struct debug_control radv_debug_options[] = {
479 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
480 {"nodcc", RADV_DEBUG_NO_DCC},
481 {"shaders", RADV_DEBUG_DUMP_SHADERS},
482 {"nocache", RADV_DEBUG_NO_CACHE},
483 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
484 {"nohiz", RADV_DEBUG_NO_HIZ},
485 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
486 {"allbos", RADV_DEBUG_ALL_BOS},
487 {"noibs", RADV_DEBUG_NO_IBS},
488 {"spirv", RADV_DEBUG_DUMP_SPIRV},
489 {"vmfaults", RADV_DEBUG_VM_FAULTS},
490 {"zerovram", RADV_DEBUG_ZERO_VRAM},
491 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
492 {"preoptir", RADV_DEBUG_PREOPTIR},
493 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
494 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
495 {"info", RADV_DEBUG_INFO},
496 {"errors", RADV_DEBUG_ERRORS},
497 {"startup", RADV_DEBUG_STARTUP},
498 {"checkir", RADV_DEBUG_CHECKIR},
499 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
500 {"nobinning", RADV_DEBUG_NOBINNING},
501 {"nongg", RADV_DEBUG_NO_NGG},
502 {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
503 {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
504 {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
505 {"llvm", RADV_DEBUG_LLVM},
506 {NULL, 0}
507 };
508
509 const char *
510 radv_get_debug_option_name(int id)
511 {
512 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
513 return radv_debug_options[id].string;
514 }
515
516 static const struct debug_control radv_perftest_options[] = {
517 {"localbos", RADV_PERFTEST_LOCAL_BOS},
518 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
519 {"bolist", RADV_PERFTEST_BO_LIST},
520 {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
521 {"cswave32", RADV_PERFTEST_CS_WAVE_32},
522 {"pswave32", RADV_PERFTEST_PS_WAVE_32},
523 {"gewave32", RADV_PERFTEST_GE_WAVE_32},
524 {"dfsm", RADV_PERFTEST_DFSM},
525 {NULL, 0}
526 };
527
528 const char *
529 radv_get_perftest_option_name(int id)
530 {
531 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
532 return radv_perftest_options[id].string;
533 }
534
535 static void
536 radv_handle_per_app_options(struct radv_instance *instance,
537 const VkApplicationInfo *info)
538 {
539 const char *name = info ? info->pApplicationName : NULL;
540 const char *engine_name = info ? info->pEngineName : NULL;
541
542 if (name) {
543 if (!strcmp(name, "DOOM_VFR")) {
544 /* Work around a Doom VFR game bug */
545 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
546 } else if (!strcmp(name, "Fledge")) {
547 /*
548 * Zero VRAM for "The Surge 2"
549 *
550 * This avoid a hang when when rendering any level. Likely
551 * uninitialized data in an indirect draw.
552 */
553 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
554 } else if (!strcmp(name, "No Man's Sky")) {
555 /* Work around a NMS game bug */
556 instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
557 } else if (!strcmp(name, "DOOMEternal")) {
558 /* Zero VRAM for Doom Eternal to fix rendering issues. */
559 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
560 } else if (!strcmp(name, "Red Dead Redemption 2")) {
561 /* Work around a RDR2 game bug */
562 instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
563 }
564 }
565
566 if (engine_name) {
567 if (!strcmp(engine_name, "vkd3d")) {
568 /* Zero VRAM for all VKD3D (DX12->VK) games to fix
569 * rendering issues.
570 */
571 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
572 }
573 }
574
575 instance->enable_mrt_output_nan_fixup =
576 driQueryOptionb(&instance->dri_options,
577 "radv_enable_mrt_output_nan_fixup");
578
579 if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
580 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
581 }
582
583 static const char radv_dri_options_xml[] =
584 DRI_CONF_BEGIN
585 DRI_CONF_SECTION_PERFORMANCE
586 DRI_CONF_ADAPTIVE_SYNC("true")
587 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
588 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT("false")
589 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT("false")
590 DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING("false")
591 DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP("false")
592 DRI_CONF_RADV_NO_DYNAMIC_BOUNDS("false")
593 DRI_CONF_SECTION_END
594
595 DRI_CONF_SECTION_DEBUG
596 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST("false")
597 DRI_CONF_SECTION_END
598 DRI_CONF_END;
599
600 static void radv_init_dri_options(struct radv_instance *instance)
601 {
602 driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml);
603 driParseConfigFiles(&instance->dri_options,
604 &instance->available_dri_options,
605 0, "radv", NULL,
606 instance->engineName,
607 instance->engineVersion);
608 }
609
610 VkResult radv_CreateInstance(
611 const VkInstanceCreateInfo* pCreateInfo,
612 const VkAllocationCallbacks* pAllocator,
613 VkInstance* pInstance)
614 {
615 struct radv_instance *instance;
616 VkResult result;
617
618 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
619 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
620 if (!instance)
621 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
622
623 vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE);
624
625 if (pAllocator)
626 instance->alloc = *pAllocator;
627 else
628 instance->alloc = default_alloc;
629
630 if (pCreateInfo->pApplicationInfo) {
631 const VkApplicationInfo *app = pCreateInfo->pApplicationInfo;
632
633 instance->engineName =
634 vk_strdup(&instance->alloc, app->pEngineName,
635 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
636 instance->engineVersion = app->engineVersion;
637 instance->apiVersion = app->apiVersion;
638 }
639
640 if (instance->apiVersion == 0)
641 instance->apiVersion = VK_API_VERSION_1_0;
642
643 /* Get secure compile thread count. NOTE: We cap this at 32 */
644 #define MAX_SC_PROCS 32
645 char *num_sc_threads = getenv("RADV_SECURE_COMPILE_THREADS");
646 if (num_sc_threads)
647 instance->num_sc_threads = MIN2(strtoul(num_sc_threads, NULL, 10), MAX_SC_PROCS);
648
649 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
650 radv_debug_options);
651
652 /* Disable memory cache when secure compile is set */
653 if (radv_device_use_secure_compile(instance))
654 instance->debug_flags |= RADV_DEBUG_NO_MEMORY_CACHE;
655
656 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
657 radv_perftest_options);
658
659 if (instance->debug_flags & RADV_DEBUG_STARTUP)
660 radv_logi("Created an instance");
661
662 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
663 int idx;
664 for (idx = 0; idx < RADV_INSTANCE_EXTENSION_COUNT; idx++) {
665 if (!strcmp(pCreateInfo->ppEnabledExtensionNames[i],
666 radv_instance_extensions[idx].extensionName))
667 break;
668 }
669
670 if (idx >= RADV_INSTANCE_EXTENSION_COUNT ||
671 !radv_instance_extensions_supported.extensions[idx]) {
672 vk_free2(&default_alloc, pAllocator, instance);
673 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
674 }
675
676 instance->enabled_extensions.extensions[idx] = true;
677 }
678
679 bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
680
681 for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
682 /* Vulkan requires that entrypoints for extensions which have
683 * not been enabled must not be advertised.
684 */
685 if (!unchecked &&
686 !radv_instance_entrypoint_is_enabled(i, instance->apiVersion,
687 &instance->enabled_extensions)) {
688 instance->dispatch.entrypoints[i] = NULL;
689 } else {
690 instance->dispatch.entrypoints[i] =
691 radv_instance_dispatch_table.entrypoints[i];
692 }
693 }
694
695 for (unsigned i = 0; i < ARRAY_SIZE(instance->physical_device_dispatch.entrypoints); i++) {
696 /* Vulkan requires that entrypoints for extensions which have
697 * not been enabled must not be advertised.
698 */
699 if (!unchecked &&
700 !radv_physical_device_entrypoint_is_enabled(i, instance->apiVersion,
701 &instance->enabled_extensions)) {
702 instance->physical_device_dispatch.entrypoints[i] = NULL;
703 } else {
704 instance->physical_device_dispatch.entrypoints[i] =
705 radv_physical_device_dispatch_table.entrypoints[i];
706 }
707 }
708
709 for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
710 /* Vulkan requires that entrypoints for extensions which have
711 * not been enabled must not be advertised.
712 */
713 if (!unchecked &&
714 !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
715 &instance->enabled_extensions, NULL)) {
716 instance->device_dispatch.entrypoints[i] = NULL;
717 } else {
718 instance->device_dispatch.entrypoints[i] =
719 radv_device_dispatch_table.entrypoints[i];
720 }
721 }
722
723 instance->physical_devices_enumerated = false;
724 list_inithead(&instance->physical_devices);
725
726 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
727 if (result != VK_SUCCESS) {
728 vk_free2(&default_alloc, pAllocator, instance);
729 return vk_error(instance, result);
730 }
731
732 glsl_type_singleton_init_or_ref();
733
734 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
735
736 radv_init_dri_options(instance);
737 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
738
739 *pInstance = radv_instance_to_handle(instance);
740
741 return VK_SUCCESS;
742 }
743
744 void radv_DestroyInstance(
745 VkInstance _instance,
746 const VkAllocationCallbacks* pAllocator)
747 {
748 RADV_FROM_HANDLE(radv_instance, instance, _instance);
749
750 if (!instance)
751 return;
752
753 list_for_each_entry_safe(struct radv_physical_device, pdevice,
754 &instance->physical_devices, link) {
755 radv_physical_device_destroy(pdevice);
756 }
757
758 vk_free(&instance->alloc, instance->engineName);
759
760 VG(VALGRIND_DESTROY_MEMPOOL(instance));
761
762 glsl_type_singleton_decref();
763
764 driDestroyOptionCache(&instance->dri_options);
765 driDestroyOptionInfo(&instance->available_dri_options);
766
767 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
768
769 vk_object_base_finish(&instance->base);
770 vk_free(&instance->alloc, instance);
771 }
772
773 static VkResult
774 radv_enumerate_physical_devices(struct radv_instance *instance)
775 {
776 if (instance->physical_devices_enumerated)
777 return VK_SUCCESS;
778
779 instance->physical_devices_enumerated = true;
780
781 /* TODO: Check for more devices ? */
782 drmDevicePtr devices[8];
783 VkResult result = VK_SUCCESS;
784 int max_devices;
785
786 if (getenv("RADV_FORCE_FAMILY")) {
787 /* When RADV_FORCE_FAMILY is set, the driver creates a nul
788 * device that allows to test the compiler without having an
789 * AMDGPU instance.
790 */
791 struct radv_physical_device *pdevice;
792
793 result = radv_physical_device_try_create(instance, NULL, &pdevice);
794 if (result != VK_SUCCESS)
795 return result;
796
797 list_addtail(&pdevice->link, &instance->physical_devices);
798 return VK_SUCCESS;
799 }
800
801 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
802
803 if (instance->debug_flags & RADV_DEBUG_STARTUP)
804 radv_logi("Found %d drm nodes", max_devices);
805
806 if (max_devices < 1)
807 return vk_error(instance, VK_SUCCESS);
808
809 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
810 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
811 devices[i]->bustype == DRM_BUS_PCI &&
812 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
813
814 struct radv_physical_device *pdevice;
815 result = radv_physical_device_try_create(instance, devices[i],
816 &pdevice);
817 /* Incompatible DRM device, skip. */
818 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
819 result = VK_SUCCESS;
820 continue;
821 }
822
823 /* Error creating the physical device, report the error. */
824 if (result != VK_SUCCESS)
825 break;
826
827 list_addtail(&pdevice->link, &instance->physical_devices);
828 }
829 }
830 drmFreeDevices(devices, max_devices);
831
832 /* If we successfully enumerated any devices, call it success */
833 return result;
834 }
835
836 VkResult radv_EnumeratePhysicalDevices(
837 VkInstance _instance,
838 uint32_t* pPhysicalDeviceCount,
839 VkPhysicalDevice* pPhysicalDevices)
840 {
841 RADV_FROM_HANDLE(radv_instance, instance, _instance);
842 VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
843
844 VkResult result = radv_enumerate_physical_devices(instance);
845 if (result != VK_SUCCESS)
846 return result;
847
848 list_for_each_entry(struct radv_physical_device, pdevice,
849 &instance->physical_devices, link) {
850 vk_outarray_append(&out, i) {
851 *i = radv_physical_device_to_handle(pdevice);
852 }
853 }
854
855 return vk_outarray_status(&out);
856 }
857
858 VkResult radv_EnumeratePhysicalDeviceGroups(
859 VkInstance _instance,
860 uint32_t* pPhysicalDeviceGroupCount,
861 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
862 {
863 RADV_FROM_HANDLE(radv_instance, instance, _instance);
864 VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
865 pPhysicalDeviceGroupCount);
866
867 VkResult result = radv_enumerate_physical_devices(instance);
868 if (result != VK_SUCCESS)
869 return result;
870
871 list_for_each_entry(struct radv_physical_device, pdevice,
872 &instance->physical_devices, link) {
873 vk_outarray_append(&out, p) {
874 p->physicalDeviceCount = 1;
875 memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
876 p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
877 p->subsetAllocation = false;
878 }
879 }
880
881 return vk_outarray_status(&out);
882 }
883
884 void radv_GetPhysicalDeviceFeatures(
885 VkPhysicalDevice physicalDevice,
886 VkPhysicalDeviceFeatures* pFeatures)
887 {
888 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
889 memset(pFeatures, 0, sizeof(*pFeatures));
890
891 *pFeatures = (VkPhysicalDeviceFeatures) {
892 .robustBufferAccess = true,
893 .fullDrawIndexUint32 = true,
894 .imageCubeArray = true,
895 .independentBlend = true,
896 .geometryShader = true,
897 .tessellationShader = true,
898 .sampleRateShading = true,
899 .dualSrcBlend = true,
900 .logicOp = true,
901 .multiDrawIndirect = true,
902 .drawIndirectFirstInstance = true,
903 .depthClamp = true,
904 .depthBiasClamp = true,
905 .fillModeNonSolid = true,
906 .depthBounds = true,
907 .wideLines = true,
908 .largePoints = true,
909 .alphaToOne = true,
910 .multiViewport = true,
911 .samplerAnisotropy = true,
912 .textureCompressionETC2 = radv_device_supports_etc(pdevice),
913 .textureCompressionASTC_LDR = false,
914 .textureCompressionBC = true,
915 .occlusionQueryPrecise = true,
916 .pipelineStatisticsQuery = true,
917 .vertexPipelineStoresAndAtomics = true,
918 .fragmentStoresAndAtomics = true,
919 .shaderTessellationAndGeometryPointSize = true,
920 .shaderImageGatherExtended = true,
921 .shaderStorageImageExtendedFormats = true,
922 .shaderStorageImageMultisample = true,
923 .shaderUniformBufferArrayDynamicIndexing = true,
924 .shaderSampledImageArrayDynamicIndexing = true,
925 .shaderStorageBufferArrayDynamicIndexing = true,
926 .shaderStorageImageArrayDynamicIndexing = true,
927 .shaderStorageImageReadWithoutFormat = true,
928 .shaderStorageImageWriteWithoutFormat = true,
929 .shaderClipDistance = true,
930 .shaderCullDistance = true,
931 .shaderFloat64 = true,
932 .shaderInt64 = true,
933 .shaderInt16 = true,
934 .sparseBinding = true,
935 .variableMultisampleRate = true,
936 .shaderResourceMinLod = true,
937 .inheritedQueries = true,
938 };
939 }
940
941 static void
942 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
943 VkPhysicalDeviceVulkan11Features *f)
944 {
945 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
946
947 f->storageBuffer16BitAccess = true;
948 f->uniformAndStorageBuffer16BitAccess = true;
949 f->storagePushConstant16 = true;
950 f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
951 f->multiview = true;
952 f->multiviewGeometryShader = true;
953 f->multiviewTessellationShader = true;
954 f->variablePointersStorageBuffer = true;
955 f->variablePointers = true;
956 f->protectedMemory = false;
957 f->samplerYcbcrConversion = true;
958 f->shaderDrawParameters = true;
959 }
960
961 static void
962 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
963 VkPhysicalDeviceVulkan12Features *f)
964 {
965 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
966
967 f->samplerMirrorClampToEdge = true;
968 f->drawIndirectCount = true;
969 f->storageBuffer8BitAccess = true;
970 f->uniformAndStorageBuffer8BitAccess = true;
971 f->storagePushConstant8 = true;
972 f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
973 f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
974 f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
975 f->shaderInt8 = true;
976
977 f->descriptorIndexing = true;
978 f->shaderInputAttachmentArrayDynamicIndexing = true;
979 f->shaderUniformTexelBufferArrayDynamicIndexing = true;
980 f->shaderStorageTexelBufferArrayDynamicIndexing = true;
981 f->shaderUniformBufferArrayNonUniformIndexing = true;
982 f->shaderSampledImageArrayNonUniformIndexing = true;
983 f->shaderStorageBufferArrayNonUniformIndexing = true;
984 f->shaderStorageImageArrayNonUniformIndexing = true;
985 f->shaderInputAttachmentArrayNonUniformIndexing = true;
986 f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
987 f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
988 f->descriptorBindingUniformBufferUpdateAfterBind = true;
989 f->descriptorBindingSampledImageUpdateAfterBind = true;
990 f->descriptorBindingStorageImageUpdateAfterBind = true;
991 f->descriptorBindingStorageBufferUpdateAfterBind = true;
992 f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
993 f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
994 f->descriptorBindingUpdateUnusedWhilePending = true;
995 f->descriptorBindingPartiallyBound = true;
996 f->descriptorBindingVariableDescriptorCount = true;
997 f->runtimeDescriptorArray = true;
998
999 f->samplerFilterMinmax = true;
1000 f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1001 f->imagelessFramebuffer = true;
1002 f->uniformBufferStandardLayout = true;
1003 f->shaderSubgroupExtendedTypes = true;
1004 f->separateDepthStencilLayouts = true;
1005 f->hostQueryReset = true;
1006 f->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
1007 f->bufferDeviceAddress = true;
1008 f->bufferDeviceAddressCaptureReplay = false;
1009 f->bufferDeviceAddressMultiDevice = false;
1010 f->vulkanMemoryModel = false;
1011 f->vulkanMemoryModelDeviceScope = false;
1012 f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1013 f->shaderOutputViewportIndex = true;
1014 f->shaderOutputLayer = true;
1015 f->subgroupBroadcastDynamicId = true;
1016 }
1017
1018 void radv_GetPhysicalDeviceFeatures2(
1019 VkPhysicalDevice physicalDevice,
1020 VkPhysicalDeviceFeatures2 *pFeatures)
1021 {
1022 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1023 radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1024
1025 VkPhysicalDeviceVulkan11Features core_1_1 = {
1026 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1027 };
1028 radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1029
1030 VkPhysicalDeviceVulkan12Features core_1_2 = {
1031 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1032 };
1033 radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1034
1035 #define CORE_FEATURE(major, minor, feature) \
1036 features->feature = core_##major##_##minor.feature
1037
1038 vk_foreach_struct(ext, pFeatures->pNext) {
1039 switch (ext->sType) {
1040 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1041 VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1042 CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1043 CORE_FEATURE(1, 1, variablePointers);
1044 break;
1045 }
1046 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1047 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
1048 CORE_FEATURE(1, 1, multiview);
1049 CORE_FEATURE(1, 1, multiviewGeometryShader);
1050 CORE_FEATURE(1, 1, multiviewTessellationShader);
1051 break;
1052 }
1053 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1054 VkPhysicalDeviceShaderDrawParametersFeatures *features =
1055 (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
1056 CORE_FEATURE(1, 1, shaderDrawParameters);
1057 break;
1058 }
1059 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1060 VkPhysicalDeviceProtectedMemoryFeatures *features =
1061 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
1062 CORE_FEATURE(1, 1, protectedMemory);
1063 break;
1064 }
1065 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1066 VkPhysicalDevice16BitStorageFeatures *features =
1067 (VkPhysicalDevice16BitStorageFeatures*)ext;
1068 CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1069 CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1070 CORE_FEATURE(1, 1, storagePushConstant16);
1071 CORE_FEATURE(1, 1, storageInputOutput16);
1072 break;
1073 }
1074 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1075 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1076 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
1077 CORE_FEATURE(1, 1, samplerYcbcrConversion);
1078 break;
1079 }
1080 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
1081 VkPhysicalDeviceDescriptorIndexingFeatures *features =
1082 (VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
1083 CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1084 CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1085 CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1086 CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1087 CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1088 CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1089 CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1090 CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1091 CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1092 CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1093 CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1094 CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1095 CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1096 CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1097 CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1098 CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1099 CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1100 CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1101 CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1102 CORE_FEATURE(1, 2, runtimeDescriptorArray);
1103 break;
1104 }
1105 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1106 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1107 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1108 features->conditionalRendering = true;
1109 features->inheritedConditionalRendering = false;
1110 break;
1111 }
1112 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1113 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1114 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1115 features->vertexAttributeInstanceRateDivisor = true;
1116 features->vertexAttributeInstanceRateZeroDivisor = true;
1117 break;
1118 }
1119 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1120 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1121 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
1122 features->transformFeedback = true;
1123 features->geometryStreams = !pdevice->use_ngg_streamout;
1124 break;
1125 }
1126 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1127 VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1128 (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1129 CORE_FEATURE(1, 2, scalarBlockLayout);
1130 break;
1131 }
1132 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1133 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1134 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1135 features->memoryPriority = true;
1136 break;
1137 }
1138 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1139 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1140 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1141 features->bufferDeviceAddress = true;
1142 features->bufferDeviceAddressCaptureReplay = false;
1143 features->bufferDeviceAddressMultiDevice = false;
1144 break;
1145 }
1146 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
1147 VkPhysicalDeviceBufferDeviceAddressFeatures *features =
1148 (VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
1149 CORE_FEATURE(1, 2, bufferDeviceAddress);
1150 CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1151 CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1152 break;
1153 }
1154 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1155 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1156 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1157 features->depthClipEnable = true;
1158 break;
1159 }
1160 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
1161 VkPhysicalDeviceHostQueryResetFeatures *features =
1162 (VkPhysicalDeviceHostQueryResetFeatures *)ext;
1163 CORE_FEATURE(1, 2, hostQueryReset);
1164 break;
1165 }
1166 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
1167 VkPhysicalDevice8BitStorageFeatures *features =
1168 (VkPhysicalDevice8BitStorageFeatures *)ext;
1169 CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1170 CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1171 CORE_FEATURE(1, 2, storagePushConstant8);
1172 break;
1173 }
1174 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
1175 VkPhysicalDeviceShaderFloat16Int8Features *features =
1176 (VkPhysicalDeviceShaderFloat16Int8Features*)ext;
1177 CORE_FEATURE(1, 2, shaderFloat16);
1178 CORE_FEATURE(1, 2, shaderInt8);
1179 break;
1180 }
1181 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
1182 VkPhysicalDeviceShaderAtomicInt64Features *features =
1183 (VkPhysicalDeviceShaderAtomicInt64Features *)ext;
1184 CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1185 CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1186 break;
1187 }
1188 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1189 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1190 (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1191 features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1192 break;
1193 }
1194 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1195 VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1196 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1197
1198 features->inlineUniformBlock = true;
1199 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1200 break;
1201 }
1202 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1203 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1204 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1205 features->computeDerivativeGroupQuads = false;
1206 features->computeDerivativeGroupLinear = true;
1207 break;
1208 }
1209 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1210 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1211 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1212 features->ycbcrImageArrays = true;
1213 break;
1214 }
1215 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
1216 VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
1217 (VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
1218 CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1219 break;
1220 }
1221 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1222 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1223 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1224 features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1225 break;
1226 }
1227 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
1228 VkPhysicalDeviceImagelessFramebufferFeatures *features =
1229 (VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
1230 CORE_FEATURE(1, 2, imagelessFramebuffer);
1231 break;
1232 }
1233 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1234 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1235 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1236 features->pipelineExecutableInfo = true;
1237 break;
1238 }
1239 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1240 VkPhysicalDeviceShaderClockFeaturesKHR *features =
1241 (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1242 features->shaderSubgroupClock = true;
1243 features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1244 break;
1245 }
1246 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1247 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1248 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1249 features->texelBufferAlignment = true;
1250 break;
1251 }
1252 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
1253 VkPhysicalDeviceTimelineSemaphoreFeatures *features =
1254 (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
1255 CORE_FEATURE(1, 2, timelineSemaphore);
1256 break;
1257 }
1258 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1259 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1260 (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1261 features->subgroupSizeControl = true;
1262 features->computeFullSubgroups = true;
1263 break;
1264 }
1265 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1266 VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1267 (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1268 features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1269 break;
1270 }
1271 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
1272 VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
1273 (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
1274 CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1275 break;
1276 }
1277 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1278 VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1279 (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1280 CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1281 break;
1282 }
1283 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
1284 radv_get_physical_device_features_1_1(pdevice, (void *)ext);
1285 break;
1286 }
1287 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
1288 radv_get_physical_device_features_1_2(pdevice, (void *)ext);
1289 break;
1290 }
1291 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1292 VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1293 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1294 features->rectangularLines = false;
1295 features->bresenhamLines = true;
1296 features->smoothLines = false;
1297 features->stippledRectangularLines = false;
1298 features->stippledBresenhamLines = true;
1299 features->stippledSmoothLines = false;
1300 break;
1301 }
1302 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1303 VkDeviceMemoryOverallocationCreateInfoAMD *features =
1304 (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1305 features->overallocationBehavior = true;
1306 break;
1307 }
1308 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1309 VkPhysicalDeviceRobustness2FeaturesEXT *features =
1310 (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1311 features->robustBufferAccess2 = true;
1312 features->robustImageAccess2 = true;
1313 features->nullDescriptor = true;
1314 break;
1315 }
1316 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1317 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1318 (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1319 features->customBorderColors = true;
1320 features->customBorderColorWithoutFormat = true;
1321 break;
1322 }
1323 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1324 VkPhysicalDevicePrivateDataFeaturesEXT *features =
1325 (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1326 features->privateData = true;
1327 break;
1328 }
1329 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1330 VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1331 (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1332 features-> pipelineCreationCacheControl = true;
1333 break;
1334 }
1335 default:
1336 break;
1337 }
1338 }
1339 #undef CORE_FEATURE
1340 }
1341
1342 static size_t
1343 radv_max_descriptor_set_size()
1344 {
1345 /* make sure that the entire descriptor set is addressable with a signed
1346 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1347 * be at most 2 GiB. the combined image & samples object count as one of
1348 * both. This limit is for the pipeline layout, not for the set layout, but
1349 * there is no set limit, so we just set a pipeline limit. I don't think
1350 * any app is going to hit this soon. */
1351 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
1352 - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1353 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1354 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1355 32 /* sampler, largest when combined with image */ +
1356 64 /* sampled image */ +
1357 64 /* storage image */);
1358 }
1359
1360 void radv_GetPhysicalDeviceProperties(
1361 VkPhysicalDevice physicalDevice,
1362 VkPhysicalDeviceProperties* pProperties)
1363 {
1364 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1365 VkSampleCountFlags sample_counts = 0xf;
1366
1367 size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1368
1369 VkPhysicalDeviceLimits limits = {
1370 .maxImageDimension1D = (1 << 14),
1371 .maxImageDimension2D = (1 << 14),
1372 .maxImageDimension3D = (1 << 11),
1373 .maxImageDimensionCube = (1 << 14),
1374 .maxImageArrayLayers = (1 << 11),
1375 .maxTexelBufferElements = UINT32_MAX,
1376 .maxUniformBufferRange = UINT32_MAX,
1377 .maxStorageBufferRange = UINT32_MAX,
1378 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1379 .maxMemoryAllocationCount = UINT32_MAX,
1380 .maxSamplerAllocationCount = 64 * 1024,
1381 .bufferImageGranularity = 64, /* A cache line */
1382 .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1383 .maxBoundDescriptorSets = MAX_SETS,
1384 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1385 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1386 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1387 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1388 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1389 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1390 .maxPerStageResources = max_descriptor_set_size,
1391 .maxDescriptorSetSamplers = max_descriptor_set_size,
1392 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1393 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1394 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1395 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1396 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1397 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1398 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1399 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1400 .maxVertexInputBindings = MAX_VBS,
1401 .maxVertexInputAttributeOffset = 2047,
1402 .maxVertexInputBindingStride = 2048,
1403 .maxVertexOutputComponents = 128,
1404 .maxTessellationGenerationLevel = 64,
1405 .maxTessellationPatchSize = 32,
1406 .maxTessellationControlPerVertexInputComponents = 128,
1407 .maxTessellationControlPerVertexOutputComponents = 128,
1408 .maxTessellationControlPerPatchOutputComponents = 120,
1409 .maxTessellationControlTotalOutputComponents = 4096,
1410 .maxTessellationEvaluationInputComponents = 128,
1411 .maxTessellationEvaluationOutputComponents = 128,
1412 .maxGeometryShaderInvocations = 127,
1413 .maxGeometryInputComponents = 64,
1414 .maxGeometryOutputComponents = 128,
1415 .maxGeometryOutputVertices = 256,
1416 .maxGeometryTotalOutputComponents = 1024,
1417 .maxFragmentInputComponents = 128,
1418 .maxFragmentOutputAttachments = 8,
1419 .maxFragmentDualSrcAttachments = 1,
1420 .maxFragmentCombinedOutputResources = 8,
1421 .maxComputeSharedMemorySize = 32768,
1422 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1423 .maxComputeWorkGroupInvocations = 1024,
1424 .maxComputeWorkGroupSize = {
1425 1024,
1426 1024,
1427 1024
1428 },
1429 .subPixelPrecisionBits = 8,
1430 .subTexelPrecisionBits = 8,
1431 .mipmapPrecisionBits = 8,
1432 .maxDrawIndexedIndexValue = UINT32_MAX,
1433 .maxDrawIndirectCount = UINT32_MAX,
1434 .maxSamplerLodBias = 16,
1435 .maxSamplerAnisotropy = 16,
1436 .maxViewports = MAX_VIEWPORTS,
1437 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1438 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1439 .viewportSubPixelBits = 8,
1440 .minMemoryMapAlignment = 4096, /* A page */
1441 .minTexelBufferOffsetAlignment = 4,
1442 .minUniformBufferOffsetAlignment = 4,
1443 .minStorageBufferOffsetAlignment = 4,
1444 .minTexelOffset = -32,
1445 .maxTexelOffset = 31,
1446 .minTexelGatherOffset = -32,
1447 .maxTexelGatherOffset = 31,
1448 .minInterpolationOffset = -2,
1449 .maxInterpolationOffset = 2,
1450 .subPixelInterpolationOffsetBits = 8,
1451 .maxFramebufferWidth = (1 << 14),
1452 .maxFramebufferHeight = (1 << 14),
1453 .maxFramebufferLayers = (1 << 10),
1454 .framebufferColorSampleCounts = sample_counts,
1455 .framebufferDepthSampleCounts = sample_counts,
1456 .framebufferStencilSampleCounts = sample_counts,
1457 .framebufferNoAttachmentsSampleCounts = sample_counts,
1458 .maxColorAttachments = MAX_RTS,
1459 .sampledImageColorSampleCounts = sample_counts,
1460 .sampledImageIntegerSampleCounts = sample_counts,
1461 .sampledImageDepthSampleCounts = sample_counts,
1462 .sampledImageStencilSampleCounts = sample_counts,
1463 .storageImageSampleCounts = sample_counts,
1464 .maxSampleMaskWords = 1,
1465 .timestampComputeAndGraphics = true,
1466 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1467 .maxClipDistances = 8,
1468 .maxCullDistances = 8,
1469 .maxCombinedClipAndCullDistances = 8,
1470 .discreteQueuePriorities = 2,
1471 .pointSizeRange = { 0.0, 8191.875 },
1472 .lineWidthRange = { 0.0, 8191.875 },
1473 .pointSizeGranularity = (1.0 / 8.0),
1474 .lineWidthGranularity = (1.0 / 8.0),
1475 .strictLines = false, /* FINISHME */
1476 .standardSampleLocations = true,
1477 .optimalBufferCopyOffsetAlignment = 128,
1478 .optimalBufferCopyRowPitchAlignment = 128,
1479 .nonCoherentAtomSize = 64,
1480 };
1481
1482 *pProperties = (VkPhysicalDeviceProperties) {
1483 .apiVersion = radv_physical_device_api_version(pdevice),
1484 .driverVersion = vk_get_driver_version(),
1485 .vendorID = ATI_VENDOR_ID,
1486 .deviceID = pdevice->rad_info.pci_id,
1487 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1488 .limits = limits,
1489 .sparseProperties = {0},
1490 };
1491
1492 strcpy(pProperties->deviceName, pdevice->name);
1493 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1494 }
1495
1496 static void
1497 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1498 VkPhysicalDeviceVulkan11Properties *p)
1499 {
1500 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1501
1502 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1503 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1504 memset(p->deviceLUID, 0, VK_LUID_SIZE);
1505 /* The LUID is for Windows. */
1506 p->deviceLUIDValid = false;
1507 p->deviceNodeMask = 0;
1508
1509 p->subgroupSize = RADV_SUBGROUP_SIZE;
1510 p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
1511 VK_SHADER_STAGE_COMPUTE_BIT;
1512 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1513 VK_SUBGROUP_FEATURE_VOTE_BIT |
1514 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1515 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1516 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
1517 VK_SUBGROUP_FEATURE_QUAD_BIT |
1518 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1519 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1520 p->subgroupQuadOperationsInAllStages = true;
1521
1522 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1523 p->maxMultiviewViewCount = MAX_VIEWS;
1524 p->maxMultiviewInstanceIndex = INT_MAX;
1525 p->protectedNoFault = false;
1526 p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1527 p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1528 }
1529
1530 static void
1531 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1532 VkPhysicalDeviceVulkan12Properties *p)
1533 {
1534 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1535
1536 p->driverID = VK_DRIVER_ID_MESA_RADV;
1537 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1538 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1539 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
1540 radv_get_compiler_string(pdevice));
1541 p->conformanceVersion = (VkConformanceVersion) {
1542 .major = 1,
1543 .minor = 2,
1544 .subminor = 0,
1545 .patch = 0,
1546 };
1547
1548 /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1549 * controlled by the same config register.
1550 */
1551 if (pdevice->rad_info.has_packed_math_16bit) {
1552 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1553 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1554 } else {
1555 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1556 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1557 }
1558
1559 /* Do not allow both preserving and flushing denorms because different
1560 * shaders in the same pipeline can have different settings and this
1561 * won't work for merged shaders. To make it work, this requires LLVM
1562 * support for changing the register. The same logic applies for the
1563 * rounding modes because they are configured with the same config
1564 * register. TODO: we can enable a lot of these for ACO when it
1565 * supports all stages.
1566 */
1567 p->shaderDenormFlushToZeroFloat32 = true;
1568 p->shaderDenormPreserveFloat32 = false;
1569 p->shaderRoundingModeRTEFloat32 = true;
1570 p->shaderRoundingModeRTZFloat32 = false;
1571 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1572
1573 p->shaderDenormFlushToZeroFloat16 = false;
1574 p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1575 p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1576 p->shaderRoundingModeRTZFloat16 = false;
1577 p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1578
1579 p->shaderDenormFlushToZeroFloat64 = false;
1580 p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1581 p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1582 p->shaderRoundingModeRTZFloat64 = false;
1583 p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1584
1585 p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1586 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1587 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1588 p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1589 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1590 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1591 p->robustBufferAccessUpdateAfterBind = false;
1592 p->quadDivergentImplicitLod = false;
1593
1594 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1595 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1596 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1597 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1598 32 /* sampler, largest when combined with image */ +
1599 64 /* sampled image */ +
1600 64 /* storage image */);
1601 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1602 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1603 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1604 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1605 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1606 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1607 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1608 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1609 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1610 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1611 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1612 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1613 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1614 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1615 p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1616
1617 /* We support all of the depth resolve modes */
1618 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1619 VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1620 VK_RESOLVE_MODE_MIN_BIT_KHR |
1621 VK_RESOLVE_MODE_MAX_BIT_KHR;
1622
1623 /* Average doesn't make sense for stencil so we don't support that */
1624 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1625 VK_RESOLVE_MODE_MIN_BIT_KHR |
1626 VK_RESOLVE_MODE_MAX_BIT_KHR;
1627
1628 p->independentResolveNone = true;
1629 p->independentResolve = true;
1630
1631 /* GFX6-8 only support single channel min/max filter. */
1632 p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1633 p->filterMinmaxSingleComponentFormats = true;
1634
1635 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1636
1637 p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1638 }
1639
1640 void radv_GetPhysicalDeviceProperties2(
1641 VkPhysicalDevice physicalDevice,
1642 VkPhysicalDeviceProperties2 *pProperties)
1643 {
1644 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1645 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1646
1647 VkPhysicalDeviceVulkan11Properties core_1_1 = {
1648 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1649 };
1650 radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1651
1652 VkPhysicalDeviceVulkan12Properties core_1_2 = {
1653 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1654 };
1655 radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1656
1657 #define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
1658 memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
1659 sizeof(core_##major##_##minor.core_property))
1660
1661 #define CORE_PROPERTY(major, minor, property) \
1662 CORE_RENAMED_PROPERTY(major, minor, property, property)
1663
1664 vk_foreach_struct(ext, pProperties->pNext) {
1665 switch (ext->sType) {
1666 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1667 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1668 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1669 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1670 break;
1671 }
1672 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1673 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1674 CORE_PROPERTY(1, 1, deviceUUID);
1675 CORE_PROPERTY(1, 1, driverUUID);
1676 CORE_PROPERTY(1, 1, deviceLUID);
1677 CORE_PROPERTY(1, 1, deviceLUIDValid);
1678 break;
1679 }
1680 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1681 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1682 CORE_PROPERTY(1, 1, maxMultiviewViewCount);
1683 CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
1684 break;
1685 }
1686 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1687 VkPhysicalDevicePointClippingProperties *properties =
1688 (VkPhysicalDevicePointClippingProperties*)ext;
1689 CORE_PROPERTY(1, 1, pointClippingBehavior);
1690 break;
1691 }
1692 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1693 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1694 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1695 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1696 break;
1697 }
1698 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1699 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1700 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1701 properties->minImportedHostPointerAlignment = 4096;
1702 break;
1703 }
1704 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1705 VkPhysicalDeviceSubgroupProperties *properties =
1706 (VkPhysicalDeviceSubgroupProperties*)ext;
1707 CORE_PROPERTY(1, 1, subgroupSize);
1708 CORE_RENAMED_PROPERTY(1, 1, supportedStages,
1709 subgroupSupportedStages);
1710 CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
1711 subgroupSupportedOperations);
1712 CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
1713 subgroupQuadOperationsInAllStages);
1714 break;
1715 }
1716 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1717 VkPhysicalDeviceMaintenance3Properties *properties =
1718 (VkPhysicalDeviceMaintenance3Properties*)ext;
1719 CORE_PROPERTY(1, 1, maxPerSetDescriptors);
1720 CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
1721 break;
1722 }
1723 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
1724 VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
1725 (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
1726 CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
1727 CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
1728 break;
1729 }
1730 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1731 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1732 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1733
1734 /* Shader engines. */
1735 properties->shaderEngineCount =
1736 pdevice->rad_info.max_se;
1737 properties->shaderArraysPerEngineCount =
1738 pdevice->rad_info.max_sh_per_se;
1739 properties->computeUnitsPerShaderArray =
1740 pdevice->rad_info.min_good_cu_per_sa;
1741 properties->simdPerComputeUnit =
1742 pdevice->rad_info.num_simd_per_compute_unit;
1743 properties->wavefrontsPerSimd =
1744 pdevice->rad_info.max_wave64_per_simd;
1745 properties->wavefrontSize = 64;
1746
1747 /* SGPR. */
1748 properties->sgprsPerSimd =
1749 pdevice->rad_info.num_physical_sgprs_per_simd;
1750 properties->minSgprAllocation =
1751 pdevice->rad_info.min_sgpr_alloc;
1752 properties->maxSgprAllocation =
1753 pdevice->rad_info.max_sgpr_alloc;
1754 properties->sgprAllocationGranularity =
1755 pdevice->rad_info.sgpr_alloc_granularity;
1756
1757 /* VGPR. */
1758 properties->vgprsPerSimd =
1759 pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
1760 properties->minVgprAllocation =
1761 pdevice->rad_info.min_wave64_vgpr_alloc;
1762 properties->maxVgprAllocation =
1763 pdevice->rad_info.max_vgpr_alloc;
1764 properties->vgprAllocationGranularity =
1765 pdevice->rad_info.wave64_vgpr_alloc_granularity;
1766 break;
1767 }
1768 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1769 VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1770 (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1771
1772 properties->shaderCoreFeatures = 0;
1773 properties->activeComputeUnitCount =
1774 pdevice->rad_info.num_good_compute_units;
1775 break;
1776 }
1777 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1778 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1779 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1780 properties->maxVertexAttribDivisor = UINT32_MAX;
1781 break;
1782 }
1783 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
1784 VkPhysicalDeviceDescriptorIndexingProperties *properties =
1785 (VkPhysicalDeviceDescriptorIndexingProperties*)ext;
1786 CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
1787 CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
1788 CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
1789 CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
1790 CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
1791 CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
1792 CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
1793 CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
1794 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
1795 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
1796 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
1797 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
1798 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
1799 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
1800 CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
1801 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
1802 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
1803 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
1804 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
1805 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
1806 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
1807 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
1808 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
1809 break;
1810 }
1811 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1812 VkPhysicalDeviceProtectedMemoryProperties *properties =
1813 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1814 CORE_PROPERTY(1, 1, protectedNoFault);
1815 break;
1816 }
1817 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1818 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1819 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1820 properties->primitiveOverestimationSize = 0;
1821 properties->maxExtraPrimitiveOverestimationSize = 0;
1822 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1823 properties->primitiveUnderestimation = false;
1824 properties->conservativePointAndLineRasterization = false;
1825 properties->degenerateTrianglesRasterized = false;
1826 properties->degenerateLinesRasterized = false;
1827 properties->fullyCoveredFragmentShaderInputVariable = false;
1828 properties->conservativeRasterizationPostDepthCoverage = false;
1829 break;
1830 }
1831 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1832 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1833 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1834 properties->pciDomain = pdevice->bus_info.domain;
1835 properties->pciBus = pdevice->bus_info.bus;
1836 properties->pciDevice = pdevice->bus_info.dev;
1837 properties->pciFunction = pdevice->bus_info.func;
1838 break;
1839 }
1840 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
1841 VkPhysicalDeviceDriverProperties *properties =
1842 (VkPhysicalDeviceDriverProperties *) ext;
1843 CORE_PROPERTY(1, 2, driverID);
1844 CORE_PROPERTY(1, 2, driverName);
1845 CORE_PROPERTY(1, 2, driverInfo);
1846 CORE_PROPERTY(1, 2, conformanceVersion);
1847 break;
1848 }
1849 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1850 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1851 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1852 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1853 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1854 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1855 properties->maxTransformFeedbackStreamDataSize = 512;
1856 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1857 properties->maxTransformFeedbackBufferDataStride = 512;
1858 properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
1859 properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
1860 properties->transformFeedbackRasterizationStreamSelect = false;
1861 properties->transformFeedbackDraw = true;
1862 break;
1863 }
1864 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
1865 VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
1866 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
1867
1868 props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1869 props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1870 props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1871 props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1872 props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1873 break;
1874 }
1875 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1876 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1877 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1878 properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
1879 VK_SAMPLE_COUNT_4_BIT |
1880 VK_SAMPLE_COUNT_8_BIT;
1881 properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
1882 properties->sampleLocationCoordinateRange[0] = 0.0f;
1883 properties->sampleLocationCoordinateRange[1] = 0.9375f;
1884 properties->sampleLocationSubPixelBits = 4;
1885 properties->variableSampleLocations = false;
1886 break;
1887 }
1888 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
1889 VkPhysicalDeviceDepthStencilResolveProperties *properties =
1890 (VkPhysicalDeviceDepthStencilResolveProperties *)ext;
1891 CORE_PROPERTY(1, 2, supportedDepthResolveModes);
1892 CORE_PROPERTY(1, 2, supportedStencilResolveModes);
1893 CORE_PROPERTY(1, 2, independentResolveNone);
1894 CORE_PROPERTY(1, 2, independentResolve);
1895 break;
1896 }
1897 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
1898 VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
1899 (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
1900 properties->storageTexelBufferOffsetAlignmentBytes = 4;
1901 properties->storageTexelBufferOffsetSingleTexelAlignment = true;
1902 properties->uniformTexelBufferOffsetAlignmentBytes = 4;
1903 properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
1904 break;
1905 }
1906 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
1907 VkPhysicalDeviceFloatControlsProperties *properties =
1908 (VkPhysicalDeviceFloatControlsProperties *)ext;
1909 CORE_PROPERTY(1, 2, denormBehaviorIndependence);
1910 CORE_PROPERTY(1, 2, roundingModeIndependence);
1911 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
1912 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
1913 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
1914 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
1915 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
1916 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
1917 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
1918 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
1919 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
1920 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
1921 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
1922 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
1923 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
1924 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
1925 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
1926 break;
1927 }
1928 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
1929 VkPhysicalDeviceTimelineSemaphoreProperties *properties =
1930 (VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
1931 CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
1932 break;
1933 }
1934 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
1935 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
1936 (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
1937 props->minSubgroupSize = 64;
1938 props->maxSubgroupSize = 64;
1939 props->maxComputeWorkgroupSubgroups = UINT32_MAX;
1940 props->requiredSubgroupSizeStages = 0;
1941
1942 if (pdevice->rad_info.chip_class >= GFX10) {
1943 /* Only GFX10+ supports wave32. */
1944 props->minSubgroupSize = 32;
1945 props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1946 }
1947 break;
1948 }
1949 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
1950 radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
1951 break;
1952 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
1953 radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
1954 break;
1955 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1956 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1957 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1958 props->lineSubPixelPrecisionBits = 4;
1959 break;
1960 }
1961 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1962 VkPhysicalDeviceRobustness2PropertiesEXT *properties =
1963 (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
1964 properties->robustStorageBufferAccessSizeAlignment = 4;
1965 properties->robustUniformBufferAccessSizeAlignment = 4;
1966 break;
1967 }
1968 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1969 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
1970 (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
1971 props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
1972 break;
1973 }
1974 default:
1975 break;
1976 }
1977 }
1978 }
1979
1980 static void radv_get_physical_device_queue_family_properties(
1981 struct radv_physical_device* pdevice,
1982 uint32_t* pCount,
1983 VkQueueFamilyProperties** pQueueFamilyProperties)
1984 {
1985 int num_queue_families = 1;
1986 int idx;
1987 if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
1988 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1989 num_queue_families++;
1990
1991 if (pQueueFamilyProperties == NULL) {
1992 *pCount = num_queue_families;
1993 return;
1994 }
1995
1996 if (!*pCount)
1997 return;
1998
1999 idx = 0;
2000 if (*pCount >= 1) {
2001 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2002 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2003 VK_QUEUE_COMPUTE_BIT |
2004 VK_QUEUE_TRANSFER_BIT |
2005 VK_QUEUE_SPARSE_BINDING_BIT,
2006 .queueCount = 1,
2007 .timestampValidBits = 64,
2008 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2009 };
2010 idx++;
2011 }
2012
2013 if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2014 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2015 if (*pCount > idx) {
2016 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2017 .queueFlags = VK_QUEUE_COMPUTE_BIT |
2018 VK_QUEUE_TRANSFER_BIT |
2019 VK_QUEUE_SPARSE_BINDING_BIT,
2020 .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2021 .timestampValidBits = 64,
2022 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2023 };
2024 idx++;
2025 }
2026 }
2027 *pCount = idx;
2028 }
2029
2030 void radv_GetPhysicalDeviceQueueFamilyProperties(
2031 VkPhysicalDevice physicalDevice,
2032 uint32_t* pCount,
2033 VkQueueFamilyProperties* pQueueFamilyProperties)
2034 {
2035 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2036 if (!pQueueFamilyProperties) {
2037 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2038 return;
2039 }
2040 VkQueueFamilyProperties *properties[] = {
2041 pQueueFamilyProperties + 0,
2042 pQueueFamilyProperties + 1,
2043 pQueueFamilyProperties + 2,
2044 };
2045 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2046 assert(*pCount <= 3);
2047 }
2048
2049 void radv_GetPhysicalDeviceQueueFamilyProperties2(
2050 VkPhysicalDevice physicalDevice,
2051 uint32_t* pCount,
2052 VkQueueFamilyProperties2 *pQueueFamilyProperties)
2053 {
2054 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2055 if (!pQueueFamilyProperties) {
2056 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2057 return;
2058 }
2059 VkQueueFamilyProperties *properties[] = {
2060 &pQueueFamilyProperties[0].queueFamilyProperties,
2061 &pQueueFamilyProperties[1].queueFamilyProperties,
2062 &pQueueFamilyProperties[2].queueFamilyProperties,
2063 };
2064 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2065 assert(*pCount <= 3);
2066 }
2067
2068 void radv_GetPhysicalDeviceMemoryProperties(
2069 VkPhysicalDevice physicalDevice,
2070 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
2071 {
2072 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2073
2074 *pMemoryProperties = physical_device->memory_properties;
2075 }
2076
2077 static void
2078 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2079 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2080 {
2081 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2082 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2083 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
2084 uint64_t vram_size = radv_get_vram_size(device);
2085 uint64_t gtt_size = device->rad_info.gart_size;
2086 uint64_t heap_budget, heap_usage;
2087
2088 /* For all memory heaps, the computation of budget is as follow:
2089 * heap_budget = heap_size - global_heap_usage + app_heap_usage
2090 *
2091 * The Vulkan spec 1.1.97 says that the budget should include any
2092 * currently allocated device memory.
2093 *
2094 * Note that the application heap usages are not really accurate (eg.
2095 * in presence of shared buffers).
2096 */
2097 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
2098 uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
2099
2100 if ((device->memory_domains[i] & RADEON_DOMAIN_VRAM) && (device->memory_flags[i] & RADEON_FLAG_NO_CPU_ACCESS)) {
2101 heap_usage = device->ws->query_value(device->ws,
2102 RADEON_ALLOCATED_VRAM);
2103
2104 heap_budget = vram_size -
2105 device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
2106 heap_usage;
2107
2108 memoryBudget->heapBudget[heap_index] = heap_budget;
2109 memoryBudget->heapUsage[heap_index] = heap_usage;
2110 } else if (device->memory_domains[i] & RADEON_DOMAIN_VRAM) {
2111 heap_usage = device->ws->query_value(device->ws,
2112 RADEON_ALLOCATED_VRAM_VIS);
2113
2114 heap_budget = visible_vram_size -
2115 device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2116 heap_usage;
2117
2118 memoryBudget->heapBudget[heap_index] = heap_budget;
2119 memoryBudget->heapUsage[heap_index] = heap_usage;
2120 } else {
2121 assert(device->memory_domains[i] & RADEON_DOMAIN_GTT);
2122
2123 heap_usage = device->ws->query_value(device->ws,
2124 RADEON_ALLOCATED_GTT);
2125
2126 heap_budget = gtt_size -
2127 device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
2128 heap_usage;
2129
2130 memoryBudget->heapBudget[heap_index] = heap_budget;
2131 memoryBudget->heapUsage[heap_index] = heap_usage;
2132 }
2133 }
2134
2135 /* The heapBudget and heapUsage values must be zero for array elements
2136 * greater than or equal to
2137 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2138 */
2139 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2140 memoryBudget->heapBudget[i] = 0;
2141 memoryBudget->heapUsage[i] = 0;
2142 }
2143 }
2144
2145 void radv_GetPhysicalDeviceMemoryProperties2(
2146 VkPhysicalDevice physicalDevice,
2147 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2148 {
2149 radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2150 &pMemoryProperties->memoryProperties);
2151
2152 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2153 vk_find_struct(pMemoryProperties->pNext,
2154 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2155 if (memory_budget)
2156 radv_get_memory_budget_properties(physicalDevice, memory_budget);
2157 }
2158
2159 VkResult radv_GetMemoryHostPointerPropertiesEXT(
2160 VkDevice _device,
2161 VkExternalMemoryHandleTypeFlagBits handleType,
2162 const void *pHostPointer,
2163 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2164 {
2165 RADV_FROM_HANDLE(radv_device, device, _device);
2166
2167 switch (handleType)
2168 {
2169 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2170 const struct radv_physical_device *physical_device = device->physical_device;
2171 uint32_t memoryTypeBits = 0;
2172 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2173 if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2174 !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2175 memoryTypeBits = (1 << i);
2176 break;
2177 }
2178 }
2179 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2180 return VK_SUCCESS;
2181 }
2182 default:
2183 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2184 }
2185 }
2186
2187 static enum radeon_ctx_priority
2188 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2189 {
2190 /* Default to MEDIUM when a specific global priority isn't requested */
2191 if (!pObj)
2192 return RADEON_CTX_PRIORITY_MEDIUM;
2193
2194 switch(pObj->globalPriority) {
2195 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2196 return RADEON_CTX_PRIORITY_REALTIME;
2197 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2198 return RADEON_CTX_PRIORITY_HIGH;
2199 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2200 return RADEON_CTX_PRIORITY_MEDIUM;
2201 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2202 return RADEON_CTX_PRIORITY_LOW;
2203 default:
2204 unreachable("Illegal global priority value");
2205 return RADEON_CTX_PRIORITY_INVALID;
2206 }
2207 }
2208
2209 static int
2210 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2211 uint32_t queue_family_index, int idx,
2212 VkDeviceQueueCreateFlags flags,
2213 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2214 {
2215 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
2216 queue->device = device;
2217 queue->queue_family_index = queue_family_index;
2218 queue->queue_idx = idx;
2219 queue->priority = radv_get_queue_global_priority(global_priority);
2220 queue->flags = flags;
2221 queue->hw_ctx = NULL;
2222
2223 VkResult result = device->ws->ctx_create(device->ws, queue->priority, &queue->hw_ctx);
2224 if (result != VK_SUCCESS)
2225 return vk_error(device->instance, result);
2226
2227 list_inithead(&queue->pending_submissions);
2228 pthread_mutex_init(&queue->pending_mutex, NULL);
2229
2230 return VK_SUCCESS;
2231 }
2232
2233 static void
2234 radv_queue_finish(struct radv_queue *queue)
2235 {
2236 pthread_mutex_destroy(&queue->pending_mutex);
2237
2238 if (queue->hw_ctx)
2239 queue->device->ws->ctx_destroy(queue->hw_ctx);
2240
2241 if (queue->initial_full_flush_preamble_cs)
2242 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2243 if (queue->initial_preamble_cs)
2244 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2245 if (queue->continue_preamble_cs)
2246 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2247 if (queue->descriptor_bo)
2248 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2249 if (queue->scratch_bo)
2250 queue->device->ws->buffer_destroy(queue->scratch_bo);
2251 if (queue->esgs_ring_bo)
2252 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2253 if (queue->gsvs_ring_bo)
2254 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2255 if (queue->tess_rings_bo)
2256 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
2257 if (queue->gds_bo)
2258 queue->device->ws->buffer_destroy(queue->gds_bo);
2259 if (queue->gds_oa_bo)
2260 queue->device->ws->buffer_destroy(queue->gds_oa_bo);
2261 if (queue->compute_scratch_bo)
2262 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2263 }
2264
2265 static void
2266 radv_bo_list_init(struct radv_bo_list *bo_list)
2267 {
2268 pthread_mutex_init(&bo_list->mutex, NULL);
2269 bo_list->list.count = bo_list->capacity = 0;
2270 bo_list->list.bos = NULL;
2271 }
2272
2273 static void
2274 radv_bo_list_finish(struct radv_bo_list *bo_list)
2275 {
2276 free(bo_list->list.bos);
2277 pthread_mutex_destroy(&bo_list->mutex);
2278 }
2279
2280 VkResult radv_bo_list_add(struct radv_device *device,
2281 struct radeon_winsys_bo *bo)
2282 {
2283 struct radv_bo_list *bo_list = &device->bo_list;
2284
2285 if (bo->is_local)
2286 return VK_SUCCESS;
2287
2288 if (unlikely(!device->use_global_bo_list))
2289 return VK_SUCCESS;
2290
2291 pthread_mutex_lock(&bo_list->mutex);
2292 if (bo_list->list.count == bo_list->capacity) {
2293 unsigned capacity = MAX2(4, bo_list->capacity * 2);
2294 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
2295
2296 if (!data) {
2297 pthread_mutex_unlock(&bo_list->mutex);
2298 return VK_ERROR_OUT_OF_HOST_MEMORY;
2299 }
2300
2301 bo_list->list.bos = (struct radeon_winsys_bo**)data;
2302 bo_list->capacity = capacity;
2303 }
2304
2305 bo_list->list.bos[bo_list->list.count++] = bo;
2306 pthread_mutex_unlock(&bo_list->mutex);
2307 return VK_SUCCESS;
2308 }
2309
2310 void radv_bo_list_remove(struct radv_device *device,
2311 struct radeon_winsys_bo *bo)
2312 {
2313 struct radv_bo_list *bo_list = &device->bo_list;
2314
2315 if (bo->is_local)
2316 return;
2317
2318 if (unlikely(!device->use_global_bo_list))
2319 return;
2320
2321 pthread_mutex_lock(&bo_list->mutex);
2322 /* Loop the list backwards so we find the most recently added
2323 * memory first. */
2324 for(unsigned i = bo_list->list.count; i-- > 0;) {
2325 if (bo_list->list.bos[i] == bo) {
2326 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
2327 --bo_list->list.count;
2328 break;
2329 }
2330 }
2331 pthread_mutex_unlock(&bo_list->mutex);
2332 }
2333
2334 static void
2335 radv_device_init_gs_info(struct radv_device *device)
2336 {
2337 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2338 device->physical_device->rad_info.family);
2339 }
2340
2341 static int radv_get_device_extension_index(const char *name)
2342 {
2343 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
2344 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
2345 return i;
2346 }
2347 return -1;
2348 }
2349
2350 static int
2351 radv_get_int_debug_option(const char *name, int default_value)
2352 {
2353 const char *str;
2354 int result;
2355
2356 str = getenv(name);
2357 if (!str) {
2358 result = default_value;
2359 } else {
2360 char *endptr;
2361
2362 result = strtol(str, &endptr, 0);
2363 if (str == endptr) {
2364 /* No digits founs. */
2365 result = default_value;
2366 }
2367 }
2368
2369 return result;
2370 }
2371
2372 static int install_seccomp_filter() {
2373
2374 struct sock_filter filter[] = {
2375 /* Check arch is 64bit x86 */
2376 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, arch))),
2377 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 0, 12),
2378
2379 /* Futex is required for mutex locks */
2380 #if defined __NR__newselect
2381 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2382 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR__newselect, 11, 0),
2383 #elif defined __NR_select
2384 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2385 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_select, 11, 0),
2386 #else
2387 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2388 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_pselect6, 11, 0),
2389 #endif
2390
2391 /* Allow system exit calls for the forked process */
2392 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2393 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_exit_group, 9, 0),
2394
2395 /* Allow system read calls */
2396 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2397 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_read, 7, 0),
2398
2399 /* Allow system write calls */
2400 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2401 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_write, 5, 0),
2402
2403 /* Allow system brk calls (we need this for malloc) */
2404 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2405 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_brk, 3, 0),
2406
2407 /* Futex is required for mutex locks */
2408 BPF_STMT(BPF_LD + BPF_W + BPF_ABS, (offsetof(struct seccomp_data, nr))),
2409 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_futex, 1, 0),
2410
2411 /* Return error if we hit a system call not on the whitelist */
2412 BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | (EPERM & SECCOMP_RET_DATA)),
2413
2414 /* Allow whitelisted system calls */
2415 BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW),
2416 };
2417
2418 struct sock_fprog prog = {
2419 .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])),
2420 .filter = filter,
2421 };
2422
2423 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0))
2424 return -1;
2425
2426 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog))
2427 return -1;
2428
2429 return 0;
2430 }
2431
2432 /* Helper function with timeout support for reading from the pipe between
2433 * processes used for secure compile.
2434 */
2435 bool radv_sc_read(int fd, void *buf, size_t size, bool timeout)
2436 {
2437 fd_set fds;
2438 struct timeval tv;
2439
2440 FD_ZERO(&fds);
2441 FD_SET(fd, &fds);
2442
2443 while (true) {
2444 /* We can't rely on the value of tv after calling select() so
2445 * we must reset it on each iteration of the loop.
2446 */
2447 tv.tv_sec = 5;
2448 tv.tv_usec = 0;
2449
2450 int rval = select(fd + 1, &fds, NULL, NULL, timeout ? &tv : NULL);
2451
2452 if (rval == -1) {
2453 /* select error */
2454 return false;
2455 } else if (rval) {
2456 ssize_t bytes_read = read(fd, buf, size);
2457 if (bytes_read < 0)
2458 return false;
2459
2460 buf += bytes_read;
2461 size -= bytes_read;
2462 if (size == 0)
2463 return true;
2464 } else {
2465 /* select timeout */
2466 return false;
2467 }
2468 }
2469 }
2470
2471 static bool radv_close_all_fds(const int *keep_fds, int keep_fd_count)
2472 {
2473 DIR *d;
2474 struct dirent *dir;
2475 d = opendir("/proc/self/fd");
2476 if (!d)
2477 return false;
2478 int dir_fd = dirfd(d);
2479
2480 while ((dir = readdir(d)) != NULL) {
2481 if (dir->d_name[0] == '.')
2482 continue;
2483
2484 int fd = atoi(dir->d_name);
2485 if (fd == dir_fd)
2486 continue;
2487
2488 bool keep = false;
2489 for (int i = 0; !keep && i < keep_fd_count; ++i)
2490 if (keep_fds[i] == fd)
2491 keep = true;
2492
2493 if (keep)
2494 continue;
2495
2496 close(fd);
2497 }
2498 closedir(d);
2499 return true;
2500 }
2501
2502 static bool secure_compile_open_fifo_fds(struct radv_secure_compile_state *sc,
2503 int *fd_server, int *fd_client,
2504 unsigned process, bool make_fifo)
2505 {
2506 bool result = false;
2507 char *fifo_server_path = NULL;
2508 char *fifo_client_path = NULL;
2509
2510 if (asprintf(&fifo_server_path, "/tmp/radv_server_%s_%u", sc->uid, process) == -1)
2511 goto open_fifo_exit;
2512
2513 if (asprintf(&fifo_client_path, "/tmp/radv_client_%s_%u", sc->uid, process) == -1)
2514 goto open_fifo_exit;
2515
2516 if (make_fifo) {
2517 int file1 = mkfifo(fifo_server_path, 0666);
2518 if(file1 < 0)
2519 goto open_fifo_exit;
2520
2521 int file2 = mkfifo(fifo_client_path, 0666);
2522 if(file2 < 0)
2523 goto open_fifo_exit;
2524 }
2525
2526 *fd_server = open(fifo_server_path, O_RDWR);
2527 if(*fd_server < 1)
2528 goto open_fifo_exit;
2529
2530 *fd_client = open(fifo_client_path, O_RDWR);
2531 if(*fd_client < 1) {
2532 close(*fd_server);
2533 goto open_fifo_exit;
2534 }
2535
2536 result = true;
2537
2538 open_fifo_exit:
2539 free(fifo_server_path);
2540 free(fifo_client_path);
2541
2542 return result;
2543 }
2544
2545 static void run_secure_compile_device(struct radv_device *device, unsigned process,
2546 int fd_idle_device_output)
2547 {
2548 int fd_secure_input;
2549 int fd_secure_output;
2550 bool fifo_result = secure_compile_open_fifo_fds(device->sc_state,
2551 &fd_secure_input,
2552 &fd_secure_output,
2553 process, false);
2554
2555 enum radv_secure_compile_type sc_type;
2556
2557 const int needed_fds[] = {
2558 fd_secure_input,
2559 fd_secure_output,
2560 fd_idle_device_output,
2561 };
2562
2563 if (!fifo_result || !radv_close_all_fds(needed_fds, ARRAY_SIZE(needed_fds)) ||
2564 install_seccomp_filter() == -1) {
2565 sc_type = RADV_SC_TYPE_INIT_FAILURE;
2566 } else {
2567 sc_type = RADV_SC_TYPE_INIT_SUCCESS;
2568 device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input;
2569 device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output;
2570 }
2571
2572 write(fd_idle_device_output, &sc_type, sizeof(sc_type));
2573
2574 if (sc_type == RADV_SC_TYPE_INIT_FAILURE)
2575 goto secure_compile_exit;
2576
2577 while (true) {
2578 radv_sc_read(fd_secure_input, &sc_type, sizeof(sc_type), false);
2579
2580 if (sc_type == RADV_SC_TYPE_COMPILE_PIPELINE) {
2581 struct radv_pipeline *pipeline;
2582 bool sc_read = true;
2583
2584 pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(*pipeline), 8,
2585 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2586
2587 pipeline->device = device;
2588
2589 /* Read pipeline layout */
2590 struct radv_pipeline_layout layout;
2591 sc_read = radv_sc_read(fd_secure_input, &layout, sizeof(struct radv_pipeline_layout), true);
2592 sc_read &= radv_sc_read(fd_secure_input, &layout.num_sets, sizeof(uint32_t), true);
2593 if (!sc_read)
2594 goto secure_compile_exit;
2595
2596 for (uint32_t set = 0; set < layout.num_sets; set++) {
2597 uint32_t layout_size;
2598 sc_read &= radv_sc_read(fd_secure_input, &layout_size, sizeof(uint32_t), true);
2599 if (!sc_read)
2600 goto secure_compile_exit;
2601
2602 layout.set[set].layout = malloc(layout_size);
2603 layout.set[set].layout->layout_size = layout_size;
2604 sc_read &= radv_sc_read(fd_secure_input, layout.set[set].layout,
2605 layout.set[set].layout->layout_size, true);
2606 }
2607
2608 pipeline->layout = &layout;
2609
2610 /* Read pipeline key */
2611 struct radv_pipeline_key key;
2612 sc_read &= radv_sc_read(fd_secure_input, &key, sizeof(struct radv_pipeline_key), true);
2613
2614 /* Read pipeline create flags */
2615 VkPipelineCreateFlags flags;
2616 sc_read &= radv_sc_read(fd_secure_input, &flags, sizeof(VkPipelineCreateFlags), true);
2617
2618 /* Read stage and shader information */
2619 uint32_t num_stages;
2620 const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
2621 sc_read &= radv_sc_read(fd_secure_input, &num_stages, sizeof(uint32_t), true);
2622 if (!sc_read)
2623 goto secure_compile_exit;
2624
2625 for (uint32_t i = 0; i < num_stages; i++) {
2626
2627 /* Read stage */
2628 gl_shader_stage stage;
2629 sc_read &= radv_sc_read(fd_secure_input, &stage, sizeof(gl_shader_stage), true);
2630
2631 VkPipelineShaderStageCreateInfo *pStage = calloc(1, sizeof(VkPipelineShaderStageCreateInfo));
2632
2633 /* Read entry point name */
2634 size_t name_size;
2635 sc_read &= radv_sc_read(fd_secure_input, &name_size, sizeof(size_t), true);
2636 if (!sc_read)
2637 goto secure_compile_exit;
2638
2639 char *ep_name = malloc(name_size);
2640 sc_read &= radv_sc_read(fd_secure_input, ep_name, name_size, true);
2641 pStage->pName = ep_name;
2642
2643 /* Read shader module */
2644 size_t module_size;
2645 sc_read &= radv_sc_read(fd_secure_input, &module_size, sizeof(size_t), true);
2646 if (!sc_read)
2647 goto secure_compile_exit;
2648
2649 struct radv_shader_module *module = malloc(module_size);
2650 sc_read &= radv_sc_read(fd_secure_input, module, module_size, true);
2651 pStage->module = radv_shader_module_to_handle(module);
2652
2653 /* Read specialization info */
2654 bool has_spec_info;
2655 sc_read &= radv_sc_read(fd_secure_input, &has_spec_info, sizeof(bool), true);
2656 if (!sc_read)
2657 goto secure_compile_exit;
2658
2659 if (has_spec_info) {
2660 VkSpecializationInfo *specInfo = malloc(sizeof(VkSpecializationInfo));
2661 pStage->pSpecializationInfo = specInfo;
2662
2663 sc_read &= radv_sc_read(fd_secure_input, &specInfo->dataSize, sizeof(size_t), true);
2664 if (!sc_read)
2665 goto secure_compile_exit;
2666
2667 void *si_data = malloc(specInfo->dataSize);
2668 sc_read &= radv_sc_read(fd_secure_input, si_data, specInfo->dataSize, true);
2669 specInfo->pData = si_data;
2670
2671 sc_read &= radv_sc_read(fd_secure_input, &specInfo->mapEntryCount, sizeof(uint32_t), true);
2672 if (!sc_read)
2673 goto secure_compile_exit;
2674
2675 VkSpecializationMapEntry *mapEntries = malloc(sizeof(VkSpecializationMapEntry) * specInfo->mapEntryCount);
2676 for (uint32_t j = 0; j < specInfo->mapEntryCount; j++) {
2677 sc_read &= radv_sc_read(fd_secure_input, &mapEntries[j], sizeof(VkSpecializationMapEntry), true);
2678 if (!sc_read)
2679 goto secure_compile_exit;
2680 }
2681
2682 specInfo->pMapEntries = mapEntries;
2683 }
2684
2685 pStages[stage] = pStage;
2686 }
2687
2688 /* Compile the shaders */
2689 VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
2690
2691 /* Not fully to spec but if we're doing sandboxed compilations already this doesn't matter. */
2692 flags &= ~VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT;
2693
2694 radv_create_shaders(pipeline, device, NULL, &key, pStages, flags, NULL, stage_feedbacks);
2695
2696 /* free memory allocated above */
2697 for (uint32_t set = 0; set < layout.num_sets; set++)
2698 free(layout.set[set].layout);
2699
2700 for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
2701 if (!pStages[i])
2702 continue;
2703
2704 free((void *) pStages[i]->pName);
2705 free(radv_shader_module_from_handle(pStages[i]->module));
2706 if (pStages[i]->pSpecializationInfo) {
2707 free((void *) pStages[i]->pSpecializationInfo->pData);
2708 free((void *) pStages[i]->pSpecializationInfo->pMapEntries);
2709 free((void *) pStages[i]->pSpecializationInfo);
2710 }
2711 free((void *) pStages[i]);
2712 }
2713
2714 vk_free(&device->vk.alloc, pipeline);
2715
2716 sc_type = RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED;
2717 write(fd_secure_output, &sc_type, sizeof(sc_type));
2718
2719 } else if (sc_type == RADV_SC_TYPE_DESTROY_DEVICE) {
2720 goto secure_compile_exit;
2721 }
2722 }
2723
2724 secure_compile_exit:
2725 close(fd_secure_input);
2726 close(fd_secure_output);
2727 close(fd_idle_device_output);
2728 _exit(0);
2729 }
2730
2731 static enum radv_secure_compile_type fork_secure_compile_device(struct radv_device *device, unsigned process)
2732 {
2733 int fd_secure_input[2];
2734 int fd_secure_output[2];
2735
2736 /* create pipe descriptors (used to communicate between processes) */
2737 if (pipe(fd_secure_input) == -1 || pipe(fd_secure_output) == -1)
2738 return RADV_SC_TYPE_INIT_FAILURE;
2739
2740
2741 int sc_pid;
2742 if ((sc_pid = fork()) == 0) {
2743 device->sc_state->secure_compile_thread_counter = process;
2744 run_secure_compile_device(device, process, fd_secure_output[1]);
2745 } else {
2746 if (sc_pid == -1)
2747 return RADV_SC_TYPE_INIT_FAILURE;
2748
2749 /* Read the init result returned from the secure process */
2750 enum radv_secure_compile_type sc_type;
2751 bool sc_read = radv_sc_read(fd_secure_output[0], &sc_type, sizeof(sc_type), true);
2752
2753 if (sc_type == RADV_SC_TYPE_INIT_FAILURE || !sc_read) {
2754 close(fd_secure_input[0]);
2755 close(fd_secure_input[1]);
2756 close(fd_secure_output[1]);
2757 close(fd_secure_output[0]);
2758 int status;
2759 waitpid(sc_pid, &status, 0);
2760
2761 return RADV_SC_TYPE_INIT_FAILURE;
2762 } else {
2763 assert(sc_type == RADV_SC_TYPE_INIT_SUCCESS);
2764 write(device->sc_state->secure_compile_processes[process].fd_secure_output, &sc_type, sizeof(sc_type));
2765
2766 close(fd_secure_input[0]);
2767 close(fd_secure_input[1]);
2768 close(fd_secure_output[1]);
2769 close(fd_secure_output[0]);
2770
2771 int status;
2772 waitpid(sc_pid, &status, 0);
2773 }
2774 }
2775
2776 return RADV_SC_TYPE_INIT_SUCCESS;
2777 }
2778
2779 /* Run a bare bones fork of a device that was forked right after its creation.
2780 * This device will have low overhead when it is forked again before each
2781 * pipeline compilation. This device sits idle and its only job is to fork
2782 * itself.
2783 */
2784 static void run_secure_compile_idle_device(struct radv_device *device, unsigned process,
2785 int fd_secure_input, int fd_secure_output)
2786 {
2787 enum radv_secure_compile_type sc_type = RADV_SC_TYPE_INIT_SUCCESS;
2788 device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input;
2789 device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output;
2790
2791 write(fd_secure_output, &sc_type, sizeof(sc_type));
2792
2793 while (true) {
2794 radv_sc_read(fd_secure_input, &sc_type, sizeof(sc_type), false);
2795
2796 if (sc_type == RADV_SC_TYPE_FORK_DEVICE) {
2797 sc_type = fork_secure_compile_device(device, process);
2798
2799 if (sc_type == RADV_SC_TYPE_INIT_FAILURE)
2800 goto secure_compile_exit;
2801
2802 } else if (sc_type == RADV_SC_TYPE_DESTROY_DEVICE) {
2803 goto secure_compile_exit;
2804 }
2805 }
2806
2807 secure_compile_exit:
2808 close(fd_secure_input);
2809 close(fd_secure_output);
2810 _exit(0);
2811 }
2812
2813 static void destroy_secure_compile_device(struct radv_device *device, unsigned process)
2814 {
2815 int fd_secure_input = device->sc_state->secure_compile_processes[process].fd_secure_input;
2816
2817 enum radv_secure_compile_type sc_type = RADV_SC_TYPE_DESTROY_DEVICE;
2818 write(fd_secure_input, &sc_type, sizeof(sc_type));
2819
2820 close(device->sc_state->secure_compile_processes[process].fd_secure_input);
2821 close(device->sc_state->secure_compile_processes[process].fd_secure_output);
2822
2823 int status;
2824 waitpid(device->sc_state->secure_compile_processes[process].sc_pid, &status, 0);
2825 }
2826
2827 static VkResult fork_secure_compile_idle_device(struct radv_device *device)
2828 {
2829 device->sc_state = vk_zalloc(&device->vk.alloc,
2830 sizeof(struct radv_secure_compile_state),
2831 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2832
2833 mtx_init(&device->sc_state->secure_compile_mutex, mtx_plain);
2834
2835 pid_t upid = getpid();
2836 time_t seconds = time(NULL);
2837
2838 char *uid;
2839 if (asprintf(&uid, "%ld_%ld", (long) upid, (long) seconds) == -1)
2840 return VK_ERROR_INITIALIZATION_FAILED;
2841
2842 device->sc_state->uid = uid;
2843
2844 uint8_t sc_threads = device->instance->num_sc_threads;
2845 int fd_secure_input[MAX_SC_PROCS][2];
2846 int fd_secure_output[MAX_SC_PROCS][2];
2847
2848 /* create pipe descriptors (used to communicate between processes) */
2849 for (unsigned i = 0; i < sc_threads; i++) {
2850 if (pipe(fd_secure_input[i]) == -1 ||
2851 pipe(fd_secure_output[i]) == -1) {
2852 return VK_ERROR_INITIALIZATION_FAILED;
2853 }
2854 }
2855
2856 device->sc_state->secure_compile_processes = vk_zalloc(&device->vk.alloc,
2857 sizeof(struct radv_secure_compile_process) * sc_threads, 8,
2858 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2859
2860 for (unsigned process = 0; process < sc_threads; process++) {
2861 if ((device->sc_state->secure_compile_processes[process].sc_pid = fork()) == 0) {
2862 device->sc_state->secure_compile_thread_counter = process;
2863 run_secure_compile_idle_device(device, process, fd_secure_input[process][0], fd_secure_output[process][1]);
2864 } else {
2865 if (device->sc_state->secure_compile_processes[process].sc_pid == -1)
2866 return VK_ERROR_INITIALIZATION_FAILED;
2867
2868 /* Read the init result returned from the secure process */
2869 enum radv_secure_compile_type sc_type;
2870 bool sc_read = radv_sc_read(fd_secure_output[process][0], &sc_type, sizeof(sc_type), true);
2871
2872 bool fifo_result;
2873 if (sc_read && sc_type == RADV_SC_TYPE_INIT_SUCCESS) {
2874 fifo_result = secure_compile_open_fifo_fds(device->sc_state,
2875 &device->sc_state->secure_compile_processes[process].fd_server,
2876 &device->sc_state->secure_compile_processes[process].fd_client,
2877 process, true);
2878
2879 device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input[process][1];
2880 device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output[process][0];
2881 }
2882
2883 if (sc_type == RADV_SC_TYPE_INIT_FAILURE || !sc_read || !fifo_result) {
2884 close(fd_secure_input[process][0]);
2885 close(fd_secure_input[process][1]);
2886 close(fd_secure_output[process][1]);
2887 close(fd_secure_output[process][0]);
2888 int status;
2889 waitpid(device->sc_state->secure_compile_processes[process].sc_pid, &status, 0);
2890
2891 /* Destroy any forks that were created sucessfully */
2892 for (unsigned i = 0; i < process; i++) {
2893 destroy_secure_compile_device(device, i);
2894 }
2895
2896 return VK_ERROR_INITIALIZATION_FAILED;
2897 }
2898 }
2899 }
2900 return VK_SUCCESS;
2901 }
2902
2903 static void
2904 radv_device_init_dispatch(struct radv_device *device)
2905 {
2906 const struct radv_instance *instance = device->physical_device->instance;
2907 const struct radv_device_dispatch_table *dispatch_table_layer = NULL;
2908 bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
2909 int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
2910
2911 if (radv_thread_trace >= 0) {
2912 /* Use device entrypoints from the SQTT layer if enabled. */
2913 dispatch_table_layer = &sqtt_device_dispatch_table;
2914 }
2915
2916 for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) {
2917 /* Vulkan requires that entrypoints for extensions which have not been
2918 * enabled must not be advertised.
2919 */
2920 if (!unchecked &&
2921 !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
2922 &instance->enabled_extensions,
2923 &device->enabled_extensions)) {
2924 device->dispatch.entrypoints[i] = NULL;
2925 } else if (dispatch_table_layer &&
2926 dispatch_table_layer->entrypoints[i]) {
2927 device->dispatch.entrypoints[i] =
2928 dispatch_table_layer->entrypoints[i];
2929 } else {
2930 device->dispatch.entrypoints[i] =
2931 radv_device_dispatch_table.entrypoints[i];
2932 }
2933 }
2934 }
2935
2936 static VkResult
2937 radv_create_pthread_cond(pthread_cond_t *cond)
2938 {
2939 pthread_condattr_t condattr;
2940 if (pthread_condattr_init(&condattr)) {
2941 return VK_ERROR_INITIALIZATION_FAILED;
2942 }
2943
2944 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) {
2945 pthread_condattr_destroy(&condattr);
2946 return VK_ERROR_INITIALIZATION_FAILED;
2947 }
2948 if (pthread_cond_init(cond, &condattr)) {
2949 pthread_condattr_destroy(&condattr);
2950 return VK_ERROR_INITIALIZATION_FAILED;
2951 }
2952 pthread_condattr_destroy(&condattr);
2953 return VK_SUCCESS;
2954 }
2955
2956 static VkResult
2957 check_physical_device_features(VkPhysicalDevice physicalDevice,
2958 const VkPhysicalDeviceFeatures *features)
2959 {
2960 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2961 VkPhysicalDeviceFeatures supported_features;
2962 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
2963 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
2964 VkBool32 *enabled_feature = (VkBool32 *)features;
2965 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
2966 for (uint32_t i = 0; i < num_features; i++) {
2967 if (enabled_feature[i] && !supported_feature[i])
2968 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
2969 }
2970
2971 return VK_SUCCESS;
2972 }
2973
2974 static VkResult radv_device_init_border_color(struct radv_device *device)
2975 {
2976 device->border_color_data.bo =
2977 device->ws->buffer_create(device->ws,
2978 RADV_BORDER_COLOR_BUFFER_SIZE,
2979 4096,
2980 RADEON_DOMAIN_VRAM,
2981 RADEON_FLAG_CPU_ACCESS |
2982 RADEON_FLAG_READ_ONLY |
2983 RADEON_FLAG_NO_INTERPROCESS_SHARING,
2984 RADV_BO_PRIORITY_SHADER);
2985
2986 if (device->border_color_data.bo == NULL)
2987 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2988
2989 device->border_color_data.colors_gpu_ptr =
2990 device->ws->buffer_map(device->border_color_data.bo);
2991 if (!device->border_color_data.colors_gpu_ptr)
2992 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2993 pthread_mutex_init(&device->border_color_data.mutex, NULL);
2994
2995 return VK_SUCCESS;
2996 }
2997
2998 static void radv_device_finish_border_color(struct radv_device *device)
2999 {
3000 if (device->border_color_data.bo) {
3001 device->ws->buffer_destroy(device->border_color_data.bo);
3002
3003 pthread_mutex_destroy(&device->border_color_data.mutex);
3004 }
3005 }
3006
3007 VkResult radv_CreateDevice(
3008 VkPhysicalDevice physicalDevice,
3009 const VkDeviceCreateInfo* pCreateInfo,
3010 const VkAllocationCallbacks* pAllocator,
3011 VkDevice* pDevice)
3012 {
3013 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
3014 VkResult result;
3015 struct radv_device *device;
3016
3017 bool keep_shader_info = false;
3018 bool robust_buffer_access = false;
3019 bool overallocation_disallowed = false;
3020 bool custom_border_colors = false;
3021
3022 /* Check enabled features */
3023 if (pCreateInfo->pEnabledFeatures) {
3024 result = check_physical_device_features(physicalDevice,
3025 pCreateInfo->pEnabledFeatures);
3026 if (result != VK_SUCCESS)
3027 return result;
3028
3029 if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3030 robust_buffer_access = true;
3031 }
3032
3033 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3034 switch (ext->sType) {
3035 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3036 const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3037 result = check_physical_device_features(physicalDevice,
3038 &features->features);
3039 if (result != VK_SUCCESS)
3040 return result;
3041
3042 if (features->features.robustBufferAccess)
3043 robust_buffer_access = true;
3044 break;
3045 }
3046 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
3047 const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
3048 if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
3049 overallocation_disallowed = true;
3050 break;
3051 }
3052 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
3053 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
3054 custom_border_colors = border_color_features->customBorderColors;
3055 break;
3056 }
3057 default:
3058 break;
3059 }
3060 }
3061
3062 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
3063 sizeof(*device), 8,
3064 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3065 if (!device)
3066 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3067
3068 vk_device_init(&device->vk, pCreateInfo,
3069 &physical_device->instance->alloc, pAllocator);
3070
3071 device->instance = physical_device->instance;
3072 device->physical_device = physical_device;
3073
3074 device->ws = physical_device->ws;
3075
3076 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
3077 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
3078 int index = radv_get_device_extension_index(ext_name);
3079 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
3080 vk_free(&device->vk.alloc, device);
3081 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
3082 }
3083
3084 device->enabled_extensions.extensions[index] = true;
3085 }
3086
3087 radv_device_init_dispatch(device);
3088
3089 keep_shader_info = device->enabled_extensions.AMD_shader_info;
3090
3091 /* With update after bind we can't attach bo's to the command buffer
3092 * from the descriptor set anymore, so we have to use a global BO list.
3093 */
3094 device->use_global_bo_list =
3095 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3096 device->enabled_extensions.EXT_descriptor_indexing ||
3097 device->enabled_extensions.EXT_buffer_device_address ||
3098 device->enabled_extensions.KHR_buffer_device_address;
3099
3100 device->robust_buffer_access = robust_buffer_access;
3101
3102 mtx_init(&device->shader_slab_mutex, mtx_plain);
3103 list_inithead(&device->shader_slabs);
3104
3105 device->overallocation_disallowed = overallocation_disallowed;
3106 mtx_init(&device->overallocation_mutex, mtx_plain);
3107
3108 radv_bo_list_init(&device->bo_list);
3109
3110 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3111 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3112 uint32_t qfi = queue_create->queueFamilyIndex;
3113 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3114 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3115
3116 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
3117
3118 device->queues[qfi] = vk_alloc(&device->vk.alloc,
3119 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3120 if (!device->queues[qfi]) {
3121 result = VK_ERROR_OUT_OF_HOST_MEMORY;
3122 goto fail;
3123 }
3124
3125 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3126
3127 device->queue_count[qfi] = queue_create->queueCount;
3128
3129 for (unsigned q = 0; q < queue_create->queueCount; q++) {
3130 result = radv_queue_init(device, &device->queues[qfi][q],
3131 qfi, q, queue_create->flags,
3132 global_priority);
3133 if (result != VK_SUCCESS)
3134 goto fail;
3135 }
3136 }
3137
3138 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
3139 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3140
3141 /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
3142 device->dfsm_allowed = device->pbb_allowed &&
3143 (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
3144
3145 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
3146
3147 /* The maximum number of scratch waves. Scratch space isn't divided
3148 * evenly between CUs. The number is only a function of the number of CUs.
3149 * We can decrease the constant to decrease the scratch buffer size.
3150 *
3151 * sctx->scratch_waves must be >= the maximum possible size of
3152 * 1 threadgroup, so that the hw doesn't hang from being unable
3153 * to start any.
3154 *
3155 * The recommended value is 4 per CU at most. Higher numbers don't
3156 * bring much benefit, but they still occupy chip resources (think
3157 * async compute). I've seen ~2% performance difference between 4 and 32.
3158 */
3159 uint32_t max_threads_per_block = 2048;
3160 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
3161 max_threads_per_block / 64);
3162
3163 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3164
3165 if (device->physical_device->rad_info.chip_class >= GFX7) {
3166 /* If the KMD allows it (there is a KMD hw register for it),
3167 * allow launching waves out-of-order.
3168 */
3169 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3170 }
3171
3172 radv_device_init_gs_info(device);
3173
3174 device->tess_offchip_block_dw_size =
3175 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
3176
3177 if (getenv("RADV_TRACE_FILE")) {
3178 const char *filename = getenv("RADV_TRACE_FILE");
3179
3180 keep_shader_info = true;
3181
3182 if (!radv_init_trace(device))
3183 goto fail;
3184
3185 fprintf(stderr, "*****************************************************************************\n");
3186 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
3187 fprintf(stderr, "*****************************************************************************\n");
3188
3189 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
3190 radv_dump_enabled_options(device, stderr);
3191 }
3192
3193 int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
3194 if (radv_thread_trace >= 0) {
3195 fprintf(stderr, "*************************************************\n");
3196 fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
3197 fprintf(stderr, "*************************************************\n");
3198
3199 if (device->physical_device->rad_info.chip_class < GFX8) {
3200 fprintf(stderr, "GPU hardware not supported: refer to "
3201 "the RGP documentation for the list of "
3202 "supported GPUs!\n");
3203 abort();
3204 }
3205
3206 /* Default buffer size set to 1MB per SE. */
3207 device->thread_trace_buffer_size =
3208 radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
3209 device->thread_trace_start_frame = radv_thread_trace;
3210
3211 if (!radv_thread_trace_init(device))
3212 goto fail;
3213 }
3214
3215 /* Temporarily disable secure compile while we create meta shaders, etc */
3216 uint8_t sc_threads = device->instance->num_sc_threads;
3217 if (sc_threads)
3218 device->instance->num_sc_threads = 0;
3219
3220 device->keep_shader_info = keep_shader_info;
3221 result = radv_device_init_meta(device);
3222 if (result != VK_SUCCESS)
3223 goto fail;
3224
3225 radv_device_init_msaa(device);
3226
3227 /* If the border color extension is enabled, let's create the buffer we need. */
3228 if (custom_border_colors) {
3229 result = radv_device_init_border_color(device);
3230 if (result != VK_SUCCESS)
3231 goto fail;
3232 }
3233
3234 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
3235 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
3236 switch (family) {
3237 case RADV_QUEUE_GENERAL:
3238 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
3239 radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
3240 radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
3241 break;
3242 case RADV_QUEUE_COMPUTE:
3243 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
3244 radeon_emit(device->empty_cs[family], 0);
3245 break;
3246 }
3247 device->ws->cs_finalize(device->empty_cs[family]);
3248 }
3249
3250 if (device->physical_device->rad_info.chip_class >= GFX7)
3251 cik_create_gfx_config(device);
3252
3253 VkPipelineCacheCreateInfo ci;
3254 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3255 ci.pNext = NULL;
3256 ci.flags = 0;
3257 ci.pInitialData = NULL;
3258 ci.initialDataSize = 0;
3259 VkPipelineCache pc;
3260 result = radv_CreatePipelineCache(radv_device_to_handle(device),
3261 &ci, NULL, &pc);
3262 if (result != VK_SUCCESS)
3263 goto fail_meta;
3264
3265 device->mem_cache = radv_pipeline_cache_from_handle(pc);
3266
3267 result = radv_create_pthread_cond(&device->timeline_cond);
3268 if (result != VK_SUCCESS)
3269 goto fail_mem_cache;
3270
3271 device->force_aniso =
3272 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3273 if (device->force_aniso >= 0) {
3274 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3275 1 << util_logbase2(device->force_aniso));
3276 }
3277
3278 /* Fork device for secure compile as required */
3279 device->instance->num_sc_threads = sc_threads;
3280 if (radv_device_use_secure_compile(device->instance)) {
3281
3282 result = fork_secure_compile_idle_device(device);
3283 if (result != VK_SUCCESS)
3284 goto fail_meta;
3285 }
3286
3287 *pDevice = radv_device_to_handle(device);
3288 return VK_SUCCESS;
3289
3290 fail_mem_cache:
3291 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3292 fail_meta:
3293 radv_device_finish_meta(device);
3294 fail:
3295 radv_bo_list_finish(&device->bo_list);
3296
3297 radv_thread_trace_finish(device);
3298
3299 if (device->trace_bo)
3300 device->ws->buffer_destroy(device->trace_bo);
3301
3302 if (device->gfx_init)
3303 device->ws->buffer_destroy(device->gfx_init);
3304
3305 radv_device_finish_border_color(device);
3306
3307 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3308 for (unsigned q = 0; q < device->queue_count[i]; q++)
3309 radv_queue_finish(&device->queues[i][q]);
3310 if (device->queue_count[i])
3311 vk_free(&device->vk.alloc, device->queues[i]);
3312 }
3313
3314 vk_free(&device->vk.alloc, device);
3315 return result;
3316 }
3317
3318 void radv_DestroyDevice(
3319 VkDevice _device,
3320 const VkAllocationCallbacks* pAllocator)
3321 {
3322 RADV_FROM_HANDLE(radv_device, device, _device);
3323
3324 if (!device)
3325 return;
3326
3327 if (device->trace_bo)
3328 device->ws->buffer_destroy(device->trace_bo);
3329
3330 if (device->gfx_init)
3331 device->ws->buffer_destroy(device->gfx_init);
3332
3333 radv_device_finish_border_color(device);
3334
3335 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3336 for (unsigned q = 0; q < device->queue_count[i]; q++)
3337 radv_queue_finish(&device->queues[i][q]);
3338 if (device->queue_count[i])
3339 vk_free(&device->vk.alloc, device->queues[i]);
3340 if (device->empty_cs[i])
3341 device->ws->cs_destroy(device->empty_cs[i]);
3342 }
3343 radv_device_finish_meta(device);
3344
3345 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3346 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3347
3348 radv_destroy_shader_slabs(device);
3349
3350 pthread_cond_destroy(&device->timeline_cond);
3351 radv_bo_list_finish(&device->bo_list);
3352
3353 radv_thread_trace_finish(device);
3354
3355 if (radv_device_use_secure_compile(device->instance)) {
3356 for (unsigned i = 0; i < device->instance->num_sc_threads; i++ ) {
3357 destroy_secure_compile_device(device, i);
3358 }
3359 }
3360
3361 if (device->sc_state) {
3362 free(device->sc_state->uid);
3363 vk_free(&device->vk.alloc, device->sc_state->secure_compile_processes);
3364 }
3365 vk_free(&device->vk.alloc, device->sc_state);
3366 vk_free(&device->vk.alloc, device);
3367 }
3368
3369 VkResult radv_EnumerateInstanceLayerProperties(
3370 uint32_t* pPropertyCount,
3371 VkLayerProperties* pProperties)
3372 {
3373 if (pProperties == NULL) {
3374 *pPropertyCount = 0;
3375 return VK_SUCCESS;
3376 }
3377
3378 /* None supported at this time */
3379 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3380 }
3381
3382 VkResult radv_EnumerateDeviceLayerProperties(
3383 VkPhysicalDevice physicalDevice,
3384 uint32_t* pPropertyCount,
3385 VkLayerProperties* pProperties)
3386 {
3387 if (pProperties == NULL) {
3388 *pPropertyCount = 0;
3389 return VK_SUCCESS;
3390 }
3391
3392 /* None supported at this time */
3393 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3394 }
3395
3396 void radv_GetDeviceQueue2(
3397 VkDevice _device,
3398 const VkDeviceQueueInfo2* pQueueInfo,
3399 VkQueue* pQueue)
3400 {
3401 RADV_FROM_HANDLE(radv_device, device, _device);
3402 struct radv_queue *queue;
3403
3404 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
3405 if (pQueueInfo->flags != queue->flags) {
3406 /* From the Vulkan 1.1.70 spec:
3407 *
3408 * "The queue returned by vkGetDeviceQueue2 must have the same
3409 * flags value from this structure as that used at device
3410 * creation time in a VkDeviceQueueCreateInfo instance. If no
3411 * matching flags were specified at device creation time then
3412 * pQueue will return VK_NULL_HANDLE."
3413 */
3414 *pQueue = VK_NULL_HANDLE;
3415 return;
3416 }
3417
3418 *pQueue = radv_queue_to_handle(queue);
3419 }
3420
3421 void radv_GetDeviceQueue(
3422 VkDevice _device,
3423 uint32_t queueFamilyIndex,
3424 uint32_t queueIndex,
3425 VkQueue* pQueue)
3426 {
3427 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
3428 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
3429 .queueFamilyIndex = queueFamilyIndex,
3430 .queueIndex = queueIndex
3431 };
3432
3433 radv_GetDeviceQueue2(_device, &info, pQueue);
3434 }
3435
3436 static void
3437 fill_geom_tess_rings(struct radv_queue *queue,
3438 uint32_t *map,
3439 bool add_sample_positions,
3440 uint32_t esgs_ring_size,
3441 struct radeon_winsys_bo *esgs_ring_bo,
3442 uint32_t gsvs_ring_size,
3443 struct radeon_winsys_bo *gsvs_ring_bo,
3444 uint32_t tess_factor_ring_size,
3445 uint32_t tess_offchip_ring_offset,
3446 uint32_t tess_offchip_ring_size,
3447 struct radeon_winsys_bo *tess_rings_bo)
3448 {
3449 uint32_t *desc = &map[4];
3450
3451 if (esgs_ring_bo) {
3452 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3453
3454 /* stride 0, num records - size, add tid, swizzle, elsize4,
3455 index stride 64 */
3456 desc[0] = esgs_va;
3457 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
3458 S_008F04_SWIZZLE_ENABLE(true);
3459 desc[2] = esgs_ring_size;
3460 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3461 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3462 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3463 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3464 S_008F0C_INDEX_STRIDE(3) |
3465 S_008F0C_ADD_TID_ENABLE(1);
3466
3467 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3468 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3469 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3470 S_008F0C_RESOURCE_LEVEL(1);
3471 } else {
3472 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3473 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3474 S_008F0C_ELEMENT_SIZE(1);
3475 }
3476
3477 /* GS entry for ES->GS ring */
3478 /* stride 0, num records - size, elsize0,
3479 index stride 0 */
3480 desc[4] = esgs_va;
3481 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3482 desc[6] = esgs_ring_size;
3483 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3484 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3485 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3486 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3487
3488 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3489 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3490 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3491 S_008F0C_RESOURCE_LEVEL(1);
3492 } else {
3493 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3494 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3495 }
3496 }
3497
3498 desc += 8;
3499
3500 if (gsvs_ring_bo) {
3501 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3502
3503 /* VS entry for GS->VS ring */
3504 /* stride 0, num records - size, elsize0,
3505 index stride 0 */
3506 desc[0] = gsvs_va;
3507 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3508 desc[2] = gsvs_ring_size;
3509 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3510 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3511 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3512 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3513
3514 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3515 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3516 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3517 S_008F0C_RESOURCE_LEVEL(1);
3518 } else {
3519 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3520 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3521 }
3522
3523 /* stride gsvs_itemsize, num records 64
3524 elsize 4, index stride 16 */
3525 /* shader will patch stride and desc[2] */
3526 desc[4] = gsvs_va;
3527 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
3528 S_008F04_SWIZZLE_ENABLE(1);
3529 desc[6] = 0;
3530 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3531 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3532 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3533 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3534 S_008F0C_INDEX_STRIDE(1) |
3535 S_008F0C_ADD_TID_ENABLE(true);
3536
3537 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3538 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3539 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3540 S_008F0C_RESOURCE_LEVEL(1);
3541 } else {
3542 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3543 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3544 S_008F0C_ELEMENT_SIZE(1);
3545 }
3546
3547 }
3548
3549 desc += 8;
3550
3551 if (tess_rings_bo) {
3552 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3553 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3554
3555 desc[0] = tess_va;
3556 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3557 desc[2] = tess_factor_ring_size;
3558 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3559 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3560 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3561 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3562
3563 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3564 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3565 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3566 S_008F0C_RESOURCE_LEVEL(1);
3567 } else {
3568 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3569 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3570 }
3571
3572 desc[4] = tess_offchip_va;
3573 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3574 desc[6] = tess_offchip_ring_size;
3575 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3576 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3577 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3578 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3579
3580 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3581 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3582 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3583 S_008F0C_RESOURCE_LEVEL(1);
3584 } else {
3585 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3586 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3587 }
3588 }
3589
3590 desc += 8;
3591
3592 if (add_sample_positions) {
3593 /* add sample positions after all rings */
3594 memcpy(desc, queue->device->sample_locations_1x, 8);
3595 desc += 2;
3596 memcpy(desc, queue->device->sample_locations_2x, 16);
3597 desc += 4;
3598 memcpy(desc, queue->device->sample_locations_4x, 32);
3599 desc += 8;
3600 memcpy(desc, queue->device->sample_locations_8x, 64);
3601 }
3602 }
3603
3604 static unsigned
3605 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3606 {
3607 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3608 device->physical_device->rad_info.family != CHIP_CARRIZO &&
3609 device->physical_device->rad_info.family != CHIP_STONEY;
3610 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3611 unsigned max_offchip_buffers;
3612 unsigned offchip_granularity;
3613 unsigned hs_offchip_param;
3614
3615 /*
3616 * Per RadeonSI:
3617 * This must be one less than the maximum number due to a hw limitation.
3618 * Various hardware bugs need thGFX7
3619 *
3620 * Per AMDVLK:
3621 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3622 * Gfx7 should limit max_offchip_buffers to 508
3623 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3624 *
3625 * Follow AMDVLK here.
3626 */
3627 if (device->physical_device->rad_info.chip_class >= GFX10) {
3628 max_offchip_buffers_per_se = 256;
3629 } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3630 device->physical_device->rad_info.chip_class == GFX7 ||
3631 device->physical_device->rad_info.chip_class == GFX6)
3632 --max_offchip_buffers_per_se;
3633
3634 max_offchip_buffers = max_offchip_buffers_per_se *
3635 device->physical_device->rad_info.max_se;
3636
3637 /* Hawaii has a bug with offchip buffers > 256 that can be worked
3638 * around by setting 4K granularity.
3639 */
3640 if (device->tess_offchip_block_dw_size == 4096) {
3641 assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3642 offchip_granularity = V_03093C_X_4K_DWORDS;
3643 } else {
3644 assert(device->tess_offchip_block_dw_size == 8192);
3645 offchip_granularity = V_03093C_X_8K_DWORDS;
3646 }
3647
3648 switch (device->physical_device->rad_info.chip_class) {
3649 case GFX6:
3650 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3651 break;
3652 case GFX7:
3653 case GFX8:
3654 case GFX9:
3655 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3656 break;
3657 case GFX10:
3658 break;
3659 default:
3660 break;
3661 }
3662
3663 *max_offchip_buffers_p = max_offchip_buffers;
3664 if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3665 hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3666 S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3667 } else if (device->physical_device->rad_info.chip_class >= GFX7) {
3668 if (device->physical_device->rad_info.chip_class >= GFX8)
3669 --max_offchip_buffers;
3670 hs_offchip_param =
3671 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
3672 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
3673 } else {
3674 hs_offchip_param =
3675 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3676 }
3677 return hs_offchip_param;
3678 }
3679
3680 static void
3681 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3682 struct radeon_winsys_bo *esgs_ring_bo,
3683 uint32_t esgs_ring_size,
3684 struct radeon_winsys_bo *gsvs_ring_bo,
3685 uint32_t gsvs_ring_size)
3686 {
3687 if (!esgs_ring_bo && !gsvs_ring_bo)
3688 return;
3689
3690 if (esgs_ring_bo)
3691 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3692
3693 if (gsvs_ring_bo)
3694 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3695
3696 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3697 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3698 radeon_emit(cs, esgs_ring_size >> 8);
3699 radeon_emit(cs, gsvs_ring_size >> 8);
3700 } else {
3701 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3702 radeon_emit(cs, esgs_ring_size >> 8);
3703 radeon_emit(cs, gsvs_ring_size >> 8);
3704 }
3705 }
3706
3707 static void
3708 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3709 unsigned hs_offchip_param, unsigned tf_ring_size,
3710 struct radeon_winsys_bo *tess_rings_bo)
3711 {
3712 uint64_t tf_va;
3713
3714 if (!tess_rings_bo)
3715 return;
3716
3717 tf_va = radv_buffer_get_va(tess_rings_bo);
3718
3719 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3720
3721 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3722 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
3723 S_030938_SIZE(tf_ring_size / 4));
3724 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
3725 tf_va >> 8);
3726
3727 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3728 radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3729 S_030984_BASE_HI(tf_va >> 40));
3730 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3731 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
3732 S_030944_BASE_HI(tf_va >> 40));
3733 }
3734 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
3735 hs_offchip_param);
3736 } else {
3737 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
3738 S_008988_SIZE(tf_ring_size / 4));
3739 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
3740 tf_va >> 8);
3741 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
3742 hs_offchip_param);
3743 }
3744 }
3745
3746 static void
3747 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3748 uint32_t size_per_wave, uint32_t waves,
3749 struct radeon_winsys_bo *scratch_bo)
3750 {
3751 if (queue->queue_family_index != RADV_QUEUE_GENERAL)
3752 return;
3753
3754 if (!scratch_bo)
3755 return;
3756
3757 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3758
3759 radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
3760 S_0286E8_WAVES(waves) |
3761 S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3762 }
3763
3764 static void
3765 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3766 uint32_t size_per_wave, uint32_t waves,
3767 struct radeon_winsys_bo *compute_scratch_bo)
3768 {
3769 uint64_t scratch_va;
3770
3771 if (!compute_scratch_bo)
3772 return;
3773
3774 scratch_va = radv_buffer_get_va(compute_scratch_bo);
3775
3776 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3777
3778 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3779 radeon_emit(cs, scratch_va);
3780 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3781 S_008F04_SWIZZLE_ENABLE(1));
3782
3783 radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3784 S_00B860_WAVES(waves) |
3785 S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3786 }
3787
3788 static void
3789 radv_emit_global_shader_pointers(struct radv_queue *queue,
3790 struct radeon_cmdbuf *cs,
3791 struct radeon_winsys_bo *descriptor_bo)
3792 {
3793 uint64_t va;
3794
3795 if (!descriptor_bo)
3796 return;
3797
3798 va = radv_buffer_get_va(descriptor_bo);
3799
3800 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3801
3802 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3803 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3804 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3805 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3806 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3807
3808 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3809 radv_emit_shader_pointer(queue->device, cs, regs[i],
3810 va, true);
3811 }
3812 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3813 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3814 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3815 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3816 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3817
3818 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3819 radv_emit_shader_pointer(queue->device, cs, regs[i],
3820 va, true);
3821 }
3822 } else {
3823 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3824 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3825 R_00B230_SPI_SHADER_USER_DATA_GS_0,
3826 R_00B330_SPI_SHADER_USER_DATA_ES_0,
3827 R_00B430_SPI_SHADER_USER_DATA_HS_0,
3828 R_00B530_SPI_SHADER_USER_DATA_LS_0};
3829
3830 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3831 radv_emit_shader_pointer(queue->device, cs, regs[i],
3832 va, true);
3833 }
3834 }
3835 }
3836
3837 static void
3838 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3839 {
3840 struct radv_device *device = queue->device;
3841
3842 if (device->gfx_init) {
3843 uint64_t va = radv_buffer_get_va(device->gfx_init);
3844
3845 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3846 radeon_emit(cs, va);
3847 radeon_emit(cs, va >> 32);
3848 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3849
3850 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3851 } else {
3852 si_emit_graphics(device, cs);
3853 }
3854 }
3855
3856 static void
3857 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3858 {
3859 struct radv_physical_device *physical_device = queue->device->physical_device;
3860 si_emit_compute(physical_device, cs);
3861 }
3862
3863 static VkResult
3864 radv_get_preamble_cs(struct radv_queue *queue,
3865 uint32_t scratch_size_per_wave,
3866 uint32_t scratch_waves,
3867 uint32_t compute_scratch_size_per_wave,
3868 uint32_t compute_scratch_waves,
3869 uint32_t esgs_ring_size,
3870 uint32_t gsvs_ring_size,
3871 bool needs_tess_rings,
3872 bool needs_gds,
3873 bool needs_gds_oa,
3874 bool needs_sample_positions,
3875 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3876 struct radeon_cmdbuf **initial_preamble_cs,
3877 struct radeon_cmdbuf **continue_preamble_cs)
3878 {
3879 struct radeon_winsys_bo *scratch_bo = NULL;
3880 struct radeon_winsys_bo *descriptor_bo = NULL;
3881 struct radeon_winsys_bo *compute_scratch_bo = NULL;
3882 struct radeon_winsys_bo *esgs_ring_bo = NULL;
3883 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3884 struct radeon_winsys_bo *tess_rings_bo = NULL;
3885 struct radeon_winsys_bo *gds_bo = NULL;
3886 struct radeon_winsys_bo *gds_oa_bo = NULL;
3887 struct radeon_cmdbuf *dest_cs[3] = {0};
3888 bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3889 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3890 unsigned max_offchip_buffers;
3891 unsigned hs_offchip_param = 0;
3892 unsigned tess_offchip_ring_offset;
3893 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3894 if (!queue->has_tess_rings) {
3895 if (needs_tess_rings)
3896 add_tess_rings = true;
3897 }
3898 if (!queue->has_gds) {
3899 if (needs_gds)
3900 add_gds = true;
3901 }
3902 if (!queue->has_gds_oa) {
3903 if (needs_gds_oa)
3904 add_gds_oa = true;
3905 }
3906 if (!queue->has_sample_positions) {
3907 if (needs_sample_positions)
3908 add_sample_positions = true;
3909 }
3910 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3911 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
3912 &max_offchip_buffers);
3913 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3914 tess_offchip_ring_size = max_offchip_buffers *
3915 queue->device->tess_offchip_block_dw_size * 4;
3916
3917 scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3918 if (scratch_size_per_wave)
3919 scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3920 else
3921 scratch_waves = 0;
3922
3923 compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3924 if (compute_scratch_size_per_wave)
3925 compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3926 else
3927 compute_scratch_waves = 0;
3928
3929 if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3930 scratch_waves <= queue->scratch_waves &&
3931 compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3932 compute_scratch_waves <= queue->compute_scratch_waves &&
3933 esgs_ring_size <= queue->esgs_ring_size &&
3934 gsvs_ring_size <= queue->gsvs_ring_size &&
3935 !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3936 queue->initial_preamble_cs) {
3937 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3938 *initial_preamble_cs = queue->initial_preamble_cs;
3939 *continue_preamble_cs = queue->continue_preamble_cs;
3940 if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
3941 !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
3942 !needs_gds && !needs_gds_oa && !needs_sample_positions)
3943 *continue_preamble_cs = NULL;
3944 return VK_SUCCESS;
3945 }
3946
3947 uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3948 uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3949 if (scratch_size > queue_scratch_size) {
3950 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3951 scratch_size,
3952 4096,
3953 RADEON_DOMAIN_VRAM,
3954 ring_bo_flags,
3955 RADV_BO_PRIORITY_SCRATCH);
3956 if (!scratch_bo)
3957 goto fail;
3958 } else
3959 scratch_bo = queue->scratch_bo;
3960
3961 uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3962 uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3963 if (compute_scratch_size > compute_queue_scratch_size) {
3964 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3965 compute_scratch_size,
3966 4096,
3967 RADEON_DOMAIN_VRAM,
3968 ring_bo_flags,
3969 RADV_BO_PRIORITY_SCRATCH);
3970 if (!compute_scratch_bo)
3971 goto fail;
3972
3973 } else
3974 compute_scratch_bo = queue->compute_scratch_bo;
3975
3976 if (esgs_ring_size > queue->esgs_ring_size) {
3977 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3978 esgs_ring_size,
3979 4096,
3980 RADEON_DOMAIN_VRAM,
3981 ring_bo_flags,
3982 RADV_BO_PRIORITY_SCRATCH);
3983 if (!esgs_ring_bo)
3984 goto fail;
3985 } else {
3986 esgs_ring_bo = queue->esgs_ring_bo;
3987 esgs_ring_size = queue->esgs_ring_size;
3988 }
3989
3990 if (gsvs_ring_size > queue->gsvs_ring_size) {
3991 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3992 gsvs_ring_size,
3993 4096,
3994 RADEON_DOMAIN_VRAM,
3995 ring_bo_flags,
3996 RADV_BO_PRIORITY_SCRATCH);
3997 if (!gsvs_ring_bo)
3998 goto fail;
3999 } else {
4000 gsvs_ring_bo = queue->gsvs_ring_bo;
4001 gsvs_ring_size = queue->gsvs_ring_size;
4002 }
4003
4004 if (add_tess_rings) {
4005 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
4006 tess_offchip_ring_offset + tess_offchip_ring_size,
4007 256,
4008 RADEON_DOMAIN_VRAM,
4009 ring_bo_flags,
4010 RADV_BO_PRIORITY_SCRATCH);
4011 if (!tess_rings_bo)
4012 goto fail;
4013 } else {
4014 tess_rings_bo = queue->tess_rings_bo;
4015 }
4016
4017 if (add_gds) {
4018 assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
4019
4020 /* 4 streamout GDS counters.
4021 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
4022 */
4023 gds_bo = queue->device->ws->buffer_create(queue->device->ws,
4024 256, 4,
4025 RADEON_DOMAIN_GDS,
4026 ring_bo_flags,
4027 RADV_BO_PRIORITY_SCRATCH);
4028 if (!gds_bo)
4029 goto fail;
4030 } else {
4031 gds_bo = queue->gds_bo;
4032 }
4033
4034 if (add_gds_oa) {
4035 assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
4036
4037 gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
4038 4, 1,
4039 RADEON_DOMAIN_OA,
4040 ring_bo_flags,
4041 RADV_BO_PRIORITY_SCRATCH);
4042 if (!gds_oa_bo)
4043 goto fail;
4044 } else {
4045 gds_oa_bo = queue->gds_oa_bo;
4046 }
4047
4048 if (scratch_bo != queue->scratch_bo ||
4049 esgs_ring_bo != queue->esgs_ring_bo ||
4050 gsvs_ring_bo != queue->gsvs_ring_bo ||
4051 tess_rings_bo != queue->tess_rings_bo ||
4052 add_sample_positions) {
4053 uint32_t size = 0;
4054 if (gsvs_ring_bo || esgs_ring_bo ||
4055 tess_rings_bo || add_sample_positions) {
4056 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
4057 if (add_sample_positions)
4058 size += 128; /* 64+32+16+8 = 120 bytes */
4059 }
4060 else if (scratch_bo)
4061 size = 8; /* 2 dword */
4062
4063 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
4064 size,
4065 4096,
4066 RADEON_DOMAIN_VRAM,
4067 RADEON_FLAG_CPU_ACCESS |
4068 RADEON_FLAG_NO_INTERPROCESS_SHARING |
4069 RADEON_FLAG_READ_ONLY,
4070 RADV_BO_PRIORITY_DESCRIPTOR);
4071 if (!descriptor_bo)
4072 goto fail;
4073 } else
4074 descriptor_bo = queue->descriptor_bo;
4075
4076 if (descriptor_bo != queue->descriptor_bo) {
4077 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
4078 if (!map)
4079 goto fail;
4080
4081 if (scratch_bo) {
4082 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
4083 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
4084 S_008F04_SWIZZLE_ENABLE(1);
4085 map[0] = scratch_va;
4086 map[1] = rsrc1;
4087 }
4088
4089 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
4090 fill_geom_tess_rings(queue, map, add_sample_positions,
4091 esgs_ring_size, esgs_ring_bo,
4092 gsvs_ring_size, gsvs_ring_bo,
4093 tess_factor_ring_size,
4094 tess_offchip_ring_offset,
4095 tess_offchip_ring_size,
4096 tess_rings_bo);
4097
4098 queue->device->ws->buffer_unmap(descriptor_bo);
4099 }
4100
4101 for(int i = 0; i < 3; ++i) {
4102 struct radeon_cmdbuf *cs = NULL;
4103 cs = queue->device->ws->cs_create(queue->device->ws,
4104 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
4105 if (!cs)
4106 goto fail;
4107
4108 dest_cs[i] = cs;
4109
4110 if (scratch_bo)
4111 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
4112
4113 /* Emit initial configuration. */
4114 switch (queue->queue_family_index) {
4115 case RADV_QUEUE_GENERAL:
4116 radv_init_graphics_state(cs, queue);
4117 break;
4118 case RADV_QUEUE_COMPUTE:
4119 radv_init_compute_state(cs, queue);
4120 break;
4121 case RADV_QUEUE_TRANSFER:
4122 break;
4123 }
4124
4125 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
4126 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4127 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
4128
4129 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4130 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
4131 }
4132
4133 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
4134 gsvs_ring_bo, gsvs_ring_size);
4135 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
4136 tess_factor_ring_size, tess_rings_bo);
4137 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
4138 radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
4139 compute_scratch_waves, compute_scratch_bo);
4140 radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
4141 scratch_waves, scratch_bo);
4142
4143 if (gds_bo)
4144 radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
4145 if (gds_oa_bo)
4146 radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
4147
4148 if (queue->device->trace_bo)
4149 radv_cs_add_buffer(queue->device->ws, cs, queue->device->trace_bo);
4150
4151 if (i == 0) {
4152 si_cs_emit_cache_flush(cs,
4153 queue->device->physical_device->rad_info.chip_class,
4154 NULL, 0,
4155 queue->queue_family_index == RING_COMPUTE &&
4156 queue->device->physical_device->rad_info.chip_class >= GFX7,
4157 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
4158 RADV_CMD_FLAG_INV_ICACHE |
4159 RADV_CMD_FLAG_INV_SCACHE |
4160 RADV_CMD_FLAG_INV_VCACHE |
4161 RADV_CMD_FLAG_INV_L2 |
4162 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
4163 } else if (i == 1) {
4164 si_cs_emit_cache_flush(cs,
4165 queue->device->physical_device->rad_info.chip_class,
4166 NULL, 0,
4167 queue->queue_family_index == RING_COMPUTE &&
4168 queue->device->physical_device->rad_info.chip_class >= GFX7,
4169 RADV_CMD_FLAG_INV_ICACHE |
4170 RADV_CMD_FLAG_INV_SCACHE |
4171 RADV_CMD_FLAG_INV_VCACHE |
4172 RADV_CMD_FLAG_INV_L2 |
4173 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
4174 }
4175
4176 if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
4177 goto fail;
4178 }
4179
4180 if (queue->initial_full_flush_preamble_cs)
4181 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4182
4183 if (queue->initial_preamble_cs)
4184 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
4185
4186 if (queue->continue_preamble_cs)
4187 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
4188
4189 queue->initial_full_flush_preamble_cs = dest_cs[0];
4190 queue->initial_preamble_cs = dest_cs[1];
4191 queue->continue_preamble_cs = dest_cs[2];
4192
4193 if (scratch_bo != queue->scratch_bo) {
4194 if (queue->scratch_bo)
4195 queue->device->ws->buffer_destroy(queue->scratch_bo);
4196 queue->scratch_bo = scratch_bo;
4197 }
4198 queue->scratch_size_per_wave = scratch_size_per_wave;
4199 queue->scratch_waves = scratch_waves;
4200
4201 if (compute_scratch_bo != queue->compute_scratch_bo) {
4202 if (queue->compute_scratch_bo)
4203 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
4204 queue->compute_scratch_bo = compute_scratch_bo;
4205 }
4206 queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
4207 queue->compute_scratch_waves = compute_scratch_waves;
4208
4209 if (esgs_ring_bo != queue->esgs_ring_bo) {
4210 if (queue->esgs_ring_bo)
4211 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
4212 queue->esgs_ring_bo = esgs_ring_bo;
4213 queue->esgs_ring_size = esgs_ring_size;
4214 }
4215
4216 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4217 if (queue->gsvs_ring_bo)
4218 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
4219 queue->gsvs_ring_bo = gsvs_ring_bo;
4220 queue->gsvs_ring_size = gsvs_ring_size;
4221 }
4222
4223 if (tess_rings_bo != queue->tess_rings_bo) {
4224 queue->tess_rings_bo = tess_rings_bo;
4225 queue->has_tess_rings = true;
4226 }
4227
4228 if (gds_bo != queue->gds_bo) {
4229 queue->gds_bo = gds_bo;
4230 queue->has_gds = true;
4231 }
4232
4233 if (gds_oa_bo != queue->gds_oa_bo) {
4234 queue->gds_oa_bo = gds_oa_bo;
4235 queue->has_gds_oa = true;
4236 }
4237
4238 if (descriptor_bo != queue->descriptor_bo) {
4239 if (queue->descriptor_bo)
4240 queue->device->ws->buffer_destroy(queue->descriptor_bo);
4241
4242 queue->descriptor_bo = descriptor_bo;
4243 }
4244
4245 if (add_sample_positions)
4246 queue->has_sample_positions = true;
4247
4248 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
4249 *initial_preamble_cs = queue->initial_preamble_cs;
4250 *continue_preamble_cs = queue->continue_preamble_cs;
4251 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
4252 *continue_preamble_cs = NULL;
4253 return VK_SUCCESS;
4254 fail:
4255 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4256 if (dest_cs[i])
4257 queue->device->ws->cs_destroy(dest_cs[i]);
4258 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4259 queue->device->ws->buffer_destroy(descriptor_bo);
4260 if (scratch_bo && scratch_bo != queue->scratch_bo)
4261 queue->device->ws->buffer_destroy(scratch_bo);
4262 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4263 queue->device->ws->buffer_destroy(compute_scratch_bo);
4264 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4265 queue->device->ws->buffer_destroy(esgs_ring_bo);
4266 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4267 queue->device->ws->buffer_destroy(gsvs_ring_bo);
4268 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4269 queue->device->ws->buffer_destroy(tess_rings_bo);
4270 if (gds_bo && gds_bo != queue->gds_bo)
4271 queue->device->ws->buffer_destroy(gds_bo);
4272 if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4273 queue->device->ws->buffer_destroy(gds_oa_bo);
4274
4275 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4276 }
4277
4278 static VkResult radv_alloc_sem_counts(struct radv_device *device,
4279 struct radv_winsys_sem_counts *counts,
4280 int num_sems,
4281 struct radv_semaphore_part **sems,
4282 const uint64_t *timeline_values,
4283 VkFence _fence,
4284 bool is_signal)
4285 {
4286 int syncobj_idx = 0, sem_idx = 0;
4287
4288 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4289 return VK_SUCCESS;
4290
4291 for (uint32_t i = 0; i < num_sems; i++) {
4292 switch(sems[i]->kind) {
4293 case RADV_SEMAPHORE_SYNCOBJ:
4294 counts->syncobj_count++;
4295 break;
4296 case RADV_SEMAPHORE_WINSYS:
4297 counts->sem_count++;
4298 break;
4299 case RADV_SEMAPHORE_NONE:
4300 break;
4301 case RADV_SEMAPHORE_TIMELINE:
4302 counts->syncobj_count++;
4303 break;
4304 }
4305 }
4306
4307 if (_fence != VK_NULL_HANDLE) {
4308 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4309 if (fence->temp_syncobj || fence->syncobj)
4310 counts->syncobj_count++;
4311 }
4312
4313 if (counts->syncobj_count) {
4314 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
4315 if (!counts->syncobj)
4316 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4317 }
4318
4319 if (counts->sem_count) {
4320 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
4321 if (!counts->sem) {
4322 free(counts->syncobj);
4323 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4324 }
4325 }
4326
4327 for (uint32_t i = 0; i < num_sems; i++) {
4328 switch(sems[i]->kind) {
4329 case RADV_SEMAPHORE_NONE:
4330 unreachable("Empty semaphore");
4331 break;
4332 case RADV_SEMAPHORE_SYNCOBJ:
4333 counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4334 break;
4335 case RADV_SEMAPHORE_WINSYS:
4336 counts->sem[sem_idx++] = sems[i]->ws_sem;
4337 break;
4338 case RADV_SEMAPHORE_TIMELINE: {
4339 pthread_mutex_lock(&sems[i]->timeline.mutex);
4340 struct radv_timeline_point *point = NULL;
4341 if (is_signal) {
4342 point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4343 } else {
4344 point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
4345 }
4346
4347 pthread_mutex_unlock(&sems[i]->timeline.mutex);
4348
4349 if (point) {
4350 counts->syncobj[syncobj_idx++] = point->syncobj;
4351 } else {
4352 /* Explicitly remove the semaphore so we might not find
4353 * a point later post-submit. */
4354 sems[i] = NULL;
4355 }
4356 break;
4357 }
4358 }
4359 }
4360
4361 if (_fence != VK_NULL_HANDLE) {
4362 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4363 if (fence->temp_syncobj)
4364 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
4365 else if (fence->syncobj)
4366 counts->syncobj[syncobj_idx++] = fence->syncobj;
4367 }
4368
4369 assert(syncobj_idx <= counts->syncobj_count);
4370 counts->syncobj_count = syncobj_idx;
4371
4372 return VK_SUCCESS;
4373 }
4374
4375 static void
4376 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4377 {
4378 free(sem_info->wait.syncobj);
4379 free(sem_info->wait.sem);
4380 free(sem_info->signal.syncobj);
4381 free(sem_info->signal.sem);
4382 }
4383
4384
4385 static void radv_free_temp_syncobjs(struct radv_device *device,
4386 int num_sems,
4387 struct radv_semaphore_part *sems)
4388 {
4389 for (uint32_t i = 0; i < num_sems; i++) {
4390 radv_destroy_semaphore_part(device, sems + i);
4391 }
4392 }
4393
4394 static VkResult
4395 radv_alloc_sem_info(struct radv_device *device,
4396 struct radv_winsys_sem_info *sem_info,
4397 int num_wait_sems,
4398 struct radv_semaphore_part **wait_sems,
4399 const uint64_t *wait_values,
4400 int num_signal_sems,
4401 struct radv_semaphore_part **signal_sems,
4402 const uint64_t *signal_values,
4403 VkFence fence)
4404 {
4405 VkResult ret;
4406 memset(sem_info, 0, sizeof(*sem_info));
4407
4408 ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
4409 if (ret)
4410 return ret;
4411 ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
4412 if (ret)
4413 radv_free_sem_info(sem_info);
4414
4415 /* caller can override these */
4416 sem_info->cs_emit_wait = true;
4417 sem_info->cs_emit_signal = true;
4418 return ret;
4419 }
4420
4421 static void
4422 radv_finalize_timelines(struct radv_device *device,
4423 uint32_t num_wait_sems,
4424 struct radv_semaphore_part **wait_sems,
4425 const uint64_t *wait_values,
4426 uint32_t num_signal_sems,
4427 struct radv_semaphore_part **signal_sems,
4428 const uint64_t *signal_values,
4429 struct list_head *processing_list)
4430 {
4431 for (uint32_t i = 0; i < num_wait_sems; ++i) {
4432 if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4433 pthread_mutex_lock(&wait_sems[i]->timeline.mutex);
4434 struct radv_timeline_point *point =
4435 radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
4436 point->wait_count -= 2;
4437 pthread_mutex_unlock(&wait_sems[i]->timeline.mutex);
4438 }
4439 }
4440 for (uint32_t i = 0; i < num_signal_sems; ++i) {
4441 if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4442 pthread_mutex_lock(&signal_sems[i]->timeline.mutex);
4443 struct radv_timeline_point *point =
4444 radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
4445 signal_sems[i]->timeline.highest_submitted =
4446 MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4447 point->wait_count -= 2;
4448 radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4449 pthread_mutex_unlock(&signal_sems[i]->timeline.mutex);
4450 }
4451 }
4452 }
4453
4454 static void
4455 radv_sparse_buffer_bind_memory(struct radv_device *device,
4456 const VkSparseBufferMemoryBindInfo *bind)
4457 {
4458 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4459
4460 for (uint32_t i = 0; i < bind->bindCount; ++i) {
4461 struct radv_device_memory *mem = NULL;
4462
4463 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4464 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4465
4466 device->ws->buffer_virtual_bind(buffer->bo,
4467 bind->pBinds[i].resourceOffset,
4468 bind->pBinds[i].size,
4469 mem ? mem->bo : NULL,
4470 bind->pBinds[i].memoryOffset);
4471 }
4472 }
4473
4474 static void
4475 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4476 const VkSparseImageOpaqueMemoryBindInfo *bind)
4477 {
4478 RADV_FROM_HANDLE(radv_image, image, bind->image);
4479
4480 for (uint32_t i = 0; i < bind->bindCount; ++i) {
4481 struct radv_device_memory *mem = NULL;
4482
4483 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4484 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4485
4486 device->ws->buffer_virtual_bind(image->bo,
4487 bind->pBinds[i].resourceOffset,
4488 bind->pBinds[i].size,
4489 mem ? mem->bo : NULL,
4490 bind->pBinds[i].memoryOffset);
4491 }
4492 }
4493
4494 static VkResult
4495 radv_get_preambles(struct radv_queue *queue,
4496 const VkCommandBuffer *cmd_buffers,
4497 uint32_t cmd_buffer_count,
4498 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4499 struct radeon_cmdbuf **initial_preamble_cs,
4500 struct radeon_cmdbuf **continue_preamble_cs)
4501 {
4502 uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4503 uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4504 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4505 bool tess_rings_needed = false;
4506 bool gds_needed = false;
4507 bool gds_oa_needed = false;
4508 bool sample_positions_needed = false;
4509
4510 for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4511 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
4512 cmd_buffers[j]);
4513
4514 scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4515 waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4516 compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
4517 cmd_buffer->compute_scratch_size_per_wave_needed);
4518 compute_waves_wanted = MAX2(compute_waves_wanted,
4519 cmd_buffer->compute_scratch_waves_wanted);
4520 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4521 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4522 tess_rings_needed |= cmd_buffer->tess_rings_needed;
4523 gds_needed |= cmd_buffer->gds_needed;
4524 gds_oa_needed |= cmd_buffer->gds_oa_needed;
4525 sample_positions_needed |= cmd_buffer->sample_positions_needed;
4526 }
4527
4528 return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4529 compute_scratch_size_per_wave, compute_waves_wanted,
4530 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
4531 gds_needed, gds_oa_needed, sample_positions_needed,
4532 initial_full_flush_preamble_cs,
4533 initial_preamble_cs, continue_preamble_cs);
4534 }
4535
4536 struct radv_deferred_queue_submission {
4537 struct radv_queue *queue;
4538 VkCommandBuffer *cmd_buffers;
4539 uint32_t cmd_buffer_count;
4540
4541 /* Sparse bindings that happen on a queue. */
4542 VkSparseBufferMemoryBindInfo *buffer_binds;
4543 uint32_t buffer_bind_count;
4544 VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4545 uint32_t image_opaque_bind_count;
4546
4547 bool flush_caches;
4548 VkShaderStageFlags wait_dst_stage_mask;
4549 struct radv_semaphore_part **wait_semaphores;
4550 uint32_t wait_semaphore_count;
4551 struct radv_semaphore_part **signal_semaphores;
4552 uint32_t signal_semaphore_count;
4553 VkFence fence;
4554
4555 uint64_t *wait_values;
4556 uint64_t *signal_values;
4557
4558 struct radv_semaphore_part *temporary_semaphore_parts;
4559 uint32_t temporary_semaphore_part_count;
4560
4561 struct list_head queue_pending_list;
4562 uint32_t submission_wait_count;
4563 struct radv_timeline_waiter *wait_nodes;
4564
4565 struct list_head processing_list;
4566 };
4567
4568 struct radv_queue_submission {
4569 const VkCommandBuffer *cmd_buffers;
4570 uint32_t cmd_buffer_count;
4571
4572 /* Sparse bindings that happen on a queue. */
4573 const VkSparseBufferMemoryBindInfo *buffer_binds;
4574 uint32_t buffer_bind_count;
4575 const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4576 uint32_t image_opaque_bind_count;
4577
4578 bool flush_caches;
4579 VkPipelineStageFlags wait_dst_stage_mask;
4580 const VkSemaphore *wait_semaphores;
4581 uint32_t wait_semaphore_count;
4582 const VkSemaphore *signal_semaphores;
4583 uint32_t signal_semaphore_count;
4584 VkFence fence;
4585
4586 const uint64_t *wait_values;
4587 uint32_t wait_value_count;
4588 const uint64_t *signal_values;
4589 uint32_t signal_value_count;
4590 };
4591
4592 static VkResult
4593 radv_create_deferred_submission(struct radv_queue *queue,
4594 const struct radv_queue_submission *submission,
4595 struct radv_deferred_queue_submission **out)
4596 {
4597 struct radv_deferred_queue_submission *deferred = NULL;
4598 size_t size = sizeof(struct radv_deferred_queue_submission);
4599
4600 uint32_t temporary_count = 0;
4601 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4602 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4603 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4604 ++temporary_count;
4605 }
4606
4607 size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4608 size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4609 size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4610 size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4611 size += temporary_count * sizeof(struct radv_semaphore_part);
4612 size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4613 size += submission->wait_value_count * sizeof(uint64_t);
4614 size += submission->signal_value_count * sizeof(uint64_t);
4615 size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4616
4617 deferred = calloc(1, size);
4618 if (!deferred)
4619 return VK_ERROR_OUT_OF_HOST_MEMORY;
4620
4621 deferred->queue = queue;
4622
4623 deferred->cmd_buffers = (void*)(deferred + 1);
4624 deferred->cmd_buffer_count = submission->cmd_buffer_count;
4625 memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4626 submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4627
4628 deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
4629 deferred->buffer_bind_count = submission->buffer_bind_count;
4630 memcpy(deferred->buffer_binds, submission->buffer_binds,
4631 submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4632
4633 deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
4634 deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4635 memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4636 submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4637
4638 deferred->flush_caches = submission->flush_caches;
4639 deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4640
4641 deferred->wait_semaphores = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4642 deferred->wait_semaphore_count = submission->wait_semaphore_count;
4643
4644 deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4645 deferred->signal_semaphore_count = submission->signal_semaphore_count;
4646
4647 deferred->fence = submission->fence;
4648
4649 deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4650 deferred->temporary_semaphore_part_count = temporary_count;
4651
4652 uint32_t temporary_idx = 0;
4653 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4654 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4655 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4656 deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4657 deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4658 semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4659 ++temporary_idx;
4660 } else
4661 deferred->wait_semaphores[i] = &semaphore->permanent;
4662 }
4663
4664 for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4665 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4666 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4667 deferred->signal_semaphores[i] = &semaphore->temporary;
4668 } else {
4669 deferred->signal_semaphores[i] = &semaphore->permanent;
4670 }
4671 }
4672
4673 deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
4674 memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
4675 deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4676 memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
4677
4678 deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
4679 /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4680 * ensure the submission is not accidentally triggered early when adding wait timelines. */
4681 deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4682
4683 *out = deferred;
4684 return VK_SUCCESS;
4685 }
4686
4687 static void
4688 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4689 struct list_head *processing_list)
4690 {
4691 uint32_t wait_cnt = 0;
4692 struct radv_timeline_waiter *waiter = submission->wait_nodes;
4693 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4694 if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4695 pthread_mutex_lock(&submission->wait_semaphores[i]->timeline.mutex);
4696 if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
4697 ++wait_cnt;
4698 waiter->value = submission->wait_values[i];
4699 waiter->submission = submission;
4700 list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4701 ++waiter;
4702 }
4703 pthread_mutex_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4704 }
4705 }
4706
4707 pthread_mutex_lock(&submission->queue->pending_mutex);
4708
4709 bool is_first = list_is_empty(&submission->queue->pending_submissions);
4710 list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4711
4712 pthread_mutex_unlock(&submission->queue->pending_mutex);
4713
4714 /* If there is already a submission in the queue, that will decrement the counter by 1 when
4715 * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4716 * submission. */
4717 uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4718 if (__atomic_sub_fetch(&submission->submission_wait_count, decrement, __ATOMIC_ACQ_REL) == 0) {
4719 list_addtail(&submission->processing_list, processing_list);
4720 }
4721 }
4722
4723 static void
4724 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4725 struct list_head *processing_list)
4726 {
4727 pthread_mutex_lock(&submission->queue->pending_mutex);
4728 list_del(&submission->queue_pending_list);
4729
4730 /* trigger the next submission in the queue. */
4731 if (!list_is_empty(&submission->queue->pending_submissions)) {
4732 struct radv_deferred_queue_submission *next_submission =
4733 list_first_entry(&submission->queue->pending_submissions,
4734 struct radv_deferred_queue_submission,
4735 queue_pending_list);
4736 if (p_atomic_dec_zero(&next_submission->submission_wait_count)) {
4737 list_addtail(&next_submission->processing_list, processing_list);
4738 }
4739 }
4740 pthread_mutex_unlock(&submission->queue->pending_mutex);
4741
4742 pthread_cond_broadcast(&submission->queue->device->timeline_cond);
4743 }
4744
4745 static VkResult
4746 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4747 struct list_head *processing_list)
4748 {
4749 RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
4750 struct radv_queue *queue = submission->queue;
4751 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4752 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4753 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
4754 bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4755 bool can_patch = true;
4756 uint32_t advance;
4757 struct radv_winsys_sem_info sem_info;
4758 VkResult result;
4759 int ret;
4760 struct radeon_cmdbuf *initial_preamble_cs = NULL;
4761 struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4762 struct radeon_cmdbuf *continue_preamble_cs = NULL;
4763
4764 result = radv_get_preambles(queue, submission->cmd_buffers,
4765 submission->cmd_buffer_count,
4766 &initial_preamble_cs,
4767 &initial_flush_preamble_cs,
4768 &continue_preamble_cs);
4769 if (result != VK_SUCCESS)
4770 goto fail;
4771
4772 result = radv_alloc_sem_info(queue->device,
4773 &sem_info,
4774 submission->wait_semaphore_count,
4775 submission->wait_semaphores,
4776 submission->wait_values,
4777 submission->signal_semaphore_count,
4778 submission->signal_semaphores,
4779 submission->signal_values,
4780 submission->fence);
4781 if (result != VK_SUCCESS)
4782 goto fail;
4783
4784 for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4785 radv_sparse_buffer_bind_memory(queue->device,
4786 submission->buffer_binds + i);
4787 }
4788
4789 for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4790 radv_sparse_image_opaque_bind_memory(queue->device,
4791 submission->image_opaque_binds + i);
4792 }
4793
4794 if (!submission->cmd_buffer_count) {
4795 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
4796 &queue->device->empty_cs[queue->queue_family_index],
4797 1, NULL, NULL,
4798 &sem_info, NULL,
4799 false, base_fence);
4800 if (ret) {
4801 radv_loge("failed to submit CS\n");
4802 abort();
4803 }
4804
4805 goto success;
4806 } else {
4807 struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
4808 (submission->cmd_buffer_count));
4809
4810 for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4811 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4812 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4813
4814 cs_array[j] = cmd_buffer->cs;
4815 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4816 can_patch = false;
4817
4818 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4819 }
4820
4821 for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4822 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4823 const struct radv_winsys_bo_list *bo_list = NULL;
4824
4825 advance = MIN2(max_cs_submission,
4826 submission->cmd_buffer_count - j);
4827
4828 if (queue->device->trace_bo)
4829 *queue->device->trace_id_ptr = 0;
4830
4831 sem_info.cs_emit_wait = j == 0;
4832 sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4833
4834 if (unlikely(queue->device->use_global_bo_list)) {
4835 pthread_mutex_lock(&queue->device->bo_list.mutex);
4836 bo_list = &queue->device->bo_list.list;
4837 }
4838
4839 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
4840 advance, initial_preamble, continue_preamble_cs,
4841 &sem_info, bo_list,
4842 can_patch, base_fence);
4843
4844 if (unlikely(queue->device->use_global_bo_list))
4845 pthread_mutex_unlock(&queue->device->bo_list.mutex);
4846
4847 if (ret) {
4848 radv_loge("failed to submit CS\n");
4849 abort();
4850 }
4851 if (queue->device->trace_bo) {
4852 radv_check_gpu_hangs(queue, cs_array[j]);
4853 }
4854 }
4855
4856 free(cs_array);
4857 }
4858
4859 success:
4860 radv_free_temp_syncobjs(queue->device,
4861 submission->temporary_semaphore_part_count,
4862 submission->temporary_semaphore_parts);
4863 radv_finalize_timelines(queue->device,
4864 submission->wait_semaphore_count,
4865 submission->wait_semaphores,
4866 submission->wait_values,
4867 submission->signal_semaphore_count,
4868 submission->signal_semaphores,
4869 submission->signal_values,
4870 processing_list);
4871 /* Has to happen after timeline finalization to make sure the
4872 * condition variable is only triggered when timelines and queue have
4873 * been updated. */
4874 radv_queue_submission_update_queue(submission, processing_list);
4875 radv_free_sem_info(&sem_info);
4876 free(submission);
4877 return VK_SUCCESS;
4878
4879 fail:
4880 radv_free_temp_syncobjs(queue->device,
4881 submission->temporary_semaphore_part_count,
4882 submission->temporary_semaphore_parts);
4883 free(submission);
4884 return VK_ERROR_DEVICE_LOST;
4885 }
4886
4887 static VkResult
4888 radv_process_submissions(struct list_head *processing_list)
4889 {
4890 while(!list_is_empty(processing_list)) {
4891 struct radv_deferred_queue_submission *submission =
4892 list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4893 list_del(&submission->processing_list);
4894
4895 VkResult result = radv_queue_submit_deferred(submission, processing_list);
4896 if (result != VK_SUCCESS)
4897 return result;
4898 }
4899 return VK_SUCCESS;
4900 }
4901
4902 static VkResult radv_queue_submit(struct radv_queue *queue,
4903 const struct radv_queue_submission *submission)
4904 {
4905 struct radv_deferred_queue_submission *deferred = NULL;
4906
4907 VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4908 if (result != VK_SUCCESS)
4909 return result;
4910
4911 struct list_head processing_list;
4912 list_inithead(&processing_list);
4913
4914 radv_queue_enqueue_submission(deferred, &processing_list);
4915 return radv_process_submissions(&processing_list);
4916 }
4917
4918 bool
4919 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4920 {
4921 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4922 struct radv_winsys_sem_info sem_info;
4923 VkResult result;
4924 int ret;
4925
4926 result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
4927 0, NULL, VK_NULL_HANDLE);
4928 if (result != VK_SUCCESS)
4929 return false;
4930
4931 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL,
4932 NULL, &sem_info, NULL, false, NULL);
4933 radv_free_sem_info(&sem_info);
4934 return !ret;
4935 }
4936
4937 /* Signals fence as soon as all the work currently put on queue is done. */
4938 static VkResult radv_signal_fence(struct radv_queue *queue,
4939 VkFence fence)
4940 {
4941 return radv_queue_submit(queue, &(struct radv_queue_submission) {
4942 .fence = fence
4943 });
4944 }
4945
4946 static bool radv_submit_has_effects(const VkSubmitInfo *info)
4947 {
4948 return info->commandBufferCount ||
4949 info->waitSemaphoreCount ||
4950 info->signalSemaphoreCount;
4951 }
4952
4953 VkResult radv_QueueSubmit(
4954 VkQueue _queue,
4955 uint32_t submitCount,
4956 const VkSubmitInfo* pSubmits,
4957 VkFence fence)
4958 {
4959 RADV_FROM_HANDLE(radv_queue, queue, _queue);
4960 VkResult result;
4961 uint32_t fence_idx = 0;
4962 bool flushed_caches = false;
4963
4964 if (fence != VK_NULL_HANDLE) {
4965 for (uint32_t i = 0; i < submitCount; ++i)
4966 if (radv_submit_has_effects(pSubmits + i))
4967 fence_idx = i;
4968 } else
4969 fence_idx = UINT32_MAX;
4970
4971 for (uint32_t i = 0; i < submitCount; i++) {
4972 if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
4973 continue;
4974
4975 VkPipelineStageFlags wait_dst_stage_mask = 0;
4976 for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
4977 wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
4978 }
4979
4980 const VkTimelineSemaphoreSubmitInfo *timeline_info =
4981 vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
4982
4983 result = radv_queue_submit(queue, &(struct radv_queue_submission) {
4984 .cmd_buffers = pSubmits[i].pCommandBuffers,
4985 .cmd_buffer_count = pSubmits[i].commandBufferCount,
4986 .wait_dst_stage_mask = wait_dst_stage_mask,
4987 .flush_caches = !flushed_caches,
4988 .wait_semaphores = pSubmits[i].pWaitSemaphores,
4989 .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
4990 .signal_semaphores = pSubmits[i].pSignalSemaphores,
4991 .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
4992 .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
4993 .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
4994 .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
4995 .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
4996 .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
4997 });
4998 if (result != VK_SUCCESS)
4999 return result;
5000
5001 flushed_caches = true;
5002 }
5003
5004 if (fence != VK_NULL_HANDLE && !submitCount) {
5005 result = radv_signal_fence(queue, fence);
5006 if (result != VK_SUCCESS)
5007 return result;
5008 }
5009
5010 return VK_SUCCESS;
5011 }
5012
5013 VkResult radv_QueueWaitIdle(
5014 VkQueue _queue)
5015 {
5016 RADV_FROM_HANDLE(radv_queue, queue, _queue);
5017
5018 pthread_mutex_lock(&queue->pending_mutex);
5019 while (!list_is_empty(&queue->pending_submissions)) {
5020 pthread_cond_wait(&queue->device->timeline_cond, &queue->pending_mutex);
5021 }
5022 pthread_mutex_unlock(&queue->pending_mutex);
5023
5024 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
5025 radv_queue_family_to_ring(queue->queue_family_index),
5026 queue->queue_idx);
5027 return VK_SUCCESS;
5028 }
5029
5030 VkResult radv_DeviceWaitIdle(
5031 VkDevice _device)
5032 {
5033 RADV_FROM_HANDLE(radv_device, device, _device);
5034
5035 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
5036 for (unsigned q = 0; q < device->queue_count[i]; q++) {
5037 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
5038 }
5039 }
5040 return VK_SUCCESS;
5041 }
5042
5043 VkResult radv_EnumerateInstanceExtensionProperties(
5044 const char* pLayerName,
5045 uint32_t* pPropertyCount,
5046 VkExtensionProperties* pProperties)
5047 {
5048 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
5049
5050 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
5051 if (radv_instance_extensions_supported.extensions[i]) {
5052 vk_outarray_append(&out, prop) {
5053 *prop = radv_instance_extensions[i];
5054 }
5055 }
5056 }
5057
5058 return vk_outarray_status(&out);
5059 }
5060
5061 VkResult radv_EnumerateDeviceExtensionProperties(
5062 VkPhysicalDevice physicalDevice,
5063 const char* pLayerName,
5064 uint32_t* pPropertyCount,
5065 VkExtensionProperties* pProperties)
5066 {
5067 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
5068 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
5069
5070 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
5071 if (device->supported_extensions.extensions[i]) {
5072 vk_outarray_append(&out, prop) {
5073 *prop = radv_device_extensions[i];
5074 }
5075 }
5076 }
5077
5078 return vk_outarray_status(&out);
5079 }
5080
5081 PFN_vkVoidFunction radv_GetInstanceProcAddr(
5082 VkInstance _instance,
5083 const char* pName)
5084 {
5085 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5086
5087 /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5088 * when we have to return valid function pointers, NULL, or it's left
5089 * undefined. See the table for exact details.
5090 */
5091 if (pName == NULL)
5092 return NULL;
5093
5094 #define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
5095 if (strcmp(pName, "vk" #entrypoint) == 0) \
5096 return (PFN_vkVoidFunction)radv_##entrypoint
5097
5098 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5099 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5100 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5101 LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5102
5103 /* GetInstanceProcAddr() can also be called with a NULL instance.
5104 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5105 */
5106 LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5107
5108 #undef LOOKUP_RADV_ENTRYPOINT
5109
5110 if (instance == NULL)
5111 return NULL;
5112
5113 int idx = radv_get_instance_entrypoint_index(pName);
5114 if (idx >= 0)
5115 return instance->dispatch.entrypoints[idx];
5116
5117 idx = radv_get_physical_device_entrypoint_index(pName);
5118 if (idx >= 0)
5119 return instance->physical_device_dispatch.entrypoints[idx];
5120
5121 idx = radv_get_device_entrypoint_index(pName);
5122 if (idx >= 0)
5123 return instance->device_dispatch.entrypoints[idx];
5124
5125 return NULL;
5126 }
5127
5128 /* The loader wants us to expose a second GetInstanceProcAddr function
5129 * to work around certain LD_PRELOAD issues seen in apps.
5130 */
5131 PUBLIC
5132 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5133 VkInstance instance,
5134 const char* pName);
5135
5136 PUBLIC
5137 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5138 VkInstance instance,
5139 const char* pName)
5140 {
5141 return radv_GetInstanceProcAddr(instance, pName);
5142 }
5143
5144 PUBLIC
5145 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5146 VkInstance _instance,
5147 const char* pName);
5148
5149 PUBLIC
5150 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5151 VkInstance _instance,
5152 const char* pName)
5153 {
5154 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5155
5156 if (!pName || !instance)
5157 return NULL;
5158
5159 int idx = radv_get_physical_device_entrypoint_index(pName);
5160 if (idx < 0)
5161 return NULL;
5162
5163 return instance->physical_device_dispatch.entrypoints[idx];
5164 }
5165
5166 PFN_vkVoidFunction radv_GetDeviceProcAddr(
5167 VkDevice _device,
5168 const char* pName)
5169 {
5170 RADV_FROM_HANDLE(radv_device, device, _device);
5171
5172 if (!device || !pName)
5173 return NULL;
5174
5175 int idx = radv_get_device_entrypoint_index(pName);
5176 if (idx < 0)
5177 return NULL;
5178
5179 return device->dispatch.entrypoints[idx];
5180 }
5181
5182 bool radv_get_memory_fd(struct radv_device *device,
5183 struct radv_device_memory *memory,
5184 int *pFD)
5185 {
5186 struct radeon_bo_metadata metadata;
5187
5188 if (memory->image) {
5189 if (memory->image->tiling != VK_IMAGE_TILING_LINEAR)
5190 radv_init_metadata(device, memory->image, &metadata);
5191 device->ws->buffer_set_metadata(memory->bo, &metadata);
5192 }
5193
5194 return device->ws->buffer_get_fd(device->ws, memory->bo,
5195 pFD);
5196 }
5197
5198
5199 static void radv_free_memory(struct radv_device *device,
5200 const VkAllocationCallbacks* pAllocator,
5201 struct radv_device_memory *mem)
5202 {
5203 if (mem == NULL)
5204 return;
5205
5206 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5207 if (mem->android_hardware_buffer)
5208 AHardwareBuffer_release(mem->android_hardware_buffer);
5209 #endif
5210
5211 if (mem->bo) {
5212 if (device->overallocation_disallowed) {
5213 mtx_lock(&device->overallocation_mutex);
5214 device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5215 mtx_unlock(&device->overallocation_mutex);
5216 }
5217
5218 radv_bo_list_remove(device, mem->bo);
5219 device->ws->buffer_destroy(mem->bo);
5220 mem->bo = NULL;
5221 }
5222
5223 vk_object_base_finish(&mem->base);
5224 vk_free2(&device->vk.alloc, pAllocator, mem);
5225 }
5226
5227 static VkResult radv_alloc_memory(struct radv_device *device,
5228 const VkMemoryAllocateInfo* pAllocateInfo,
5229 const VkAllocationCallbacks* pAllocator,
5230 VkDeviceMemory* pMem)
5231 {
5232 struct radv_device_memory *mem;
5233 VkResult result;
5234 enum radeon_bo_domain domain;
5235 uint32_t flags = 0;
5236
5237 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5238
5239 const VkImportMemoryFdInfoKHR *import_info =
5240 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5241 const VkMemoryDedicatedAllocateInfo *dedicate_info =
5242 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5243 const VkExportMemoryAllocateInfo *export_info =
5244 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5245 const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5246 vk_find_struct_const(pAllocateInfo->pNext,
5247 IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5248 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5249 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5250
5251 const struct wsi_memory_allocate_info *wsi_info =
5252 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5253
5254 if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5255 !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5256 /* Apparently, this is allowed */
5257 *pMem = VK_NULL_HANDLE;
5258 return VK_SUCCESS;
5259 }
5260
5261 mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
5262 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5263 if (mem == NULL)
5264 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5265
5266 vk_object_base_init(&device->vk, &mem->base,
5267 VK_OBJECT_TYPE_DEVICE_MEMORY);
5268
5269 if (wsi_info && wsi_info->implicit_sync)
5270 flags |= RADEON_FLAG_IMPLICIT_SYNC;
5271
5272 if (dedicate_info) {
5273 mem->image = radv_image_from_handle(dedicate_info->image);
5274 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5275 } else {
5276 mem->image = NULL;
5277 mem->buffer = NULL;
5278 }
5279
5280 float priority_float = 0.5;
5281 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5282 vk_find_struct_const(pAllocateInfo->pNext,
5283 MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5284 if (priority_ext)
5285 priority_float = priority_ext->priority;
5286
5287 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5288 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5289
5290 mem->user_ptr = NULL;
5291 mem->bo = NULL;
5292
5293 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5294 mem->android_hardware_buffer = NULL;
5295 #endif
5296
5297 if (ahb_import_info) {
5298 result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5299 if (result != VK_SUCCESS)
5300 goto fail;
5301 } else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5302 result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5303 if (result != VK_SUCCESS)
5304 goto fail;
5305 } else if (import_info) {
5306 assert(import_info->handleType ==
5307 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5308 import_info->handleType ==
5309 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5310 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
5311 priority, NULL);
5312 if (!mem->bo) {
5313 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5314 goto fail;
5315 } else {
5316 close(import_info->fd);
5317 }
5318 } else if (host_ptr_info) {
5319 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5320 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5321 pAllocateInfo->allocationSize,
5322 priority);
5323 if (!mem->bo) {
5324 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5325 goto fail;
5326 } else {
5327 mem->user_ptr = host_ptr_info->pHostPointer;
5328 }
5329 } else {
5330 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5331 uint32_t heap_index;
5332
5333 heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
5334 domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5335 flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5336
5337 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
5338 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5339 if (device->use_global_bo_list) {
5340 flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5341 }
5342 }
5343
5344 if (device->overallocation_disallowed) {
5345 uint64_t total_size =
5346 device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5347
5348 mtx_lock(&device->overallocation_mutex);
5349 if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5350 mtx_unlock(&device->overallocation_mutex);
5351 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5352 goto fail;
5353 }
5354 device->allocated_memory_size[heap_index] += alloc_size;
5355 mtx_unlock(&device->overallocation_mutex);
5356 }
5357
5358 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
5359 domain, flags, priority);
5360
5361 if (!mem->bo) {
5362 if (device->overallocation_disallowed) {
5363 mtx_lock(&device->overallocation_mutex);
5364 device->allocated_memory_size[heap_index] -= alloc_size;
5365 mtx_unlock(&device->overallocation_mutex);
5366 }
5367 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5368 goto fail;
5369 }
5370
5371 mem->heap_index = heap_index;
5372 mem->alloc_size = alloc_size;
5373 }
5374
5375 if (!wsi_info) {
5376 result = radv_bo_list_add(device, mem->bo);
5377 if (result != VK_SUCCESS)
5378 goto fail;
5379 }
5380
5381 *pMem = radv_device_memory_to_handle(mem);
5382
5383 return VK_SUCCESS;
5384
5385 fail:
5386 radv_free_memory(device, pAllocator,mem);
5387
5388 return result;
5389 }
5390
5391 VkResult radv_AllocateMemory(
5392 VkDevice _device,
5393 const VkMemoryAllocateInfo* pAllocateInfo,
5394 const VkAllocationCallbacks* pAllocator,
5395 VkDeviceMemory* pMem)
5396 {
5397 RADV_FROM_HANDLE(radv_device, device, _device);
5398 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5399 }
5400
5401 void radv_FreeMemory(
5402 VkDevice _device,
5403 VkDeviceMemory _mem,
5404 const VkAllocationCallbacks* pAllocator)
5405 {
5406 RADV_FROM_HANDLE(radv_device, device, _device);
5407 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5408
5409 radv_free_memory(device, pAllocator, mem);
5410 }
5411
5412 VkResult radv_MapMemory(
5413 VkDevice _device,
5414 VkDeviceMemory _memory,
5415 VkDeviceSize offset,
5416 VkDeviceSize size,
5417 VkMemoryMapFlags flags,
5418 void** ppData)
5419 {
5420 RADV_FROM_HANDLE(radv_device, device, _device);
5421 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5422
5423 if (mem == NULL) {
5424 *ppData = NULL;
5425 return VK_SUCCESS;
5426 }
5427
5428 if (mem->user_ptr)
5429 *ppData = mem->user_ptr;
5430 else
5431 *ppData = device->ws->buffer_map(mem->bo);
5432
5433 if (*ppData) {
5434 *ppData += offset;
5435 return VK_SUCCESS;
5436 }
5437
5438 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
5439 }
5440
5441 void radv_UnmapMemory(
5442 VkDevice _device,
5443 VkDeviceMemory _memory)
5444 {
5445 RADV_FROM_HANDLE(radv_device, device, _device);
5446 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5447
5448 if (mem == NULL)
5449 return;
5450
5451 if (mem->user_ptr == NULL)
5452 device->ws->buffer_unmap(mem->bo);
5453 }
5454
5455 VkResult radv_FlushMappedMemoryRanges(
5456 VkDevice _device,
5457 uint32_t memoryRangeCount,
5458 const VkMappedMemoryRange* pMemoryRanges)
5459 {
5460 return VK_SUCCESS;
5461 }
5462
5463 VkResult radv_InvalidateMappedMemoryRanges(
5464 VkDevice _device,
5465 uint32_t memoryRangeCount,
5466 const VkMappedMemoryRange* pMemoryRanges)
5467 {
5468 return VK_SUCCESS;
5469 }
5470
5471 void radv_GetBufferMemoryRequirements(
5472 VkDevice _device,
5473 VkBuffer _buffer,
5474 VkMemoryRequirements* pMemoryRequirements)
5475 {
5476 RADV_FROM_HANDLE(radv_device, device, _device);
5477 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5478
5479 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5480
5481 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5482 pMemoryRequirements->alignment = 4096;
5483 else
5484 pMemoryRequirements->alignment = 16;
5485
5486 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
5487 }
5488
5489 void radv_GetBufferMemoryRequirements2(
5490 VkDevice device,
5491 const VkBufferMemoryRequirementsInfo2 *pInfo,
5492 VkMemoryRequirements2 *pMemoryRequirements)
5493 {
5494 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
5495 &pMemoryRequirements->memoryRequirements);
5496 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5497 switch (ext->sType) {
5498 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5499 VkMemoryDedicatedRequirements *req =
5500 (VkMemoryDedicatedRequirements *) ext;
5501 req->requiresDedicatedAllocation = false;
5502 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5503 break;
5504 }
5505 default:
5506 break;
5507 }
5508 }
5509 }
5510
5511 void radv_GetImageMemoryRequirements(
5512 VkDevice _device,
5513 VkImage _image,
5514 VkMemoryRequirements* pMemoryRequirements)
5515 {
5516 RADV_FROM_HANDLE(radv_device, device, _device);
5517 RADV_FROM_HANDLE(radv_image, image, _image);
5518
5519 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5520
5521 pMemoryRequirements->size = image->size;
5522 pMemoryRequirements->alignment = image->alignment;
5523 }
5524
5525 void radv_GetImageMemoryRequirements2(
5526 VkDevice device,
5527 const VkImageMemoryRequirementsInfo2 *pInfo,
5528 VkMemoryRequirements2 *pMemoryRequirements)
5529 {
5530 radv_GetImageMemoryRequirements(device, pInfo->image,
5531 &pMemoryRequirements->memoryRequirements);
5532
5533 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5534
5535 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5536 switch (ext->sType) {
5537 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5538 VkMemoryDedicatedRequirements *req =
5539 (VkMemoryDedicatedRequirements *) ext;
5540 req->requiresDedicatedAllocation = image->shareable &&
5541 image->tiling != VK_IMAGE_TILING_LINEAR;
5542 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5543 break;
5544 }
5545 default:
5546 break;
5547 }
5548 }
5549 }
5550
5551 void radv_GetImageSparseMemoryRequirements(
5552 VkDevice device,
5553 VkImage image,
5554 uint32_t* pSparseMemoryRequirementCount,
5555 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
5556 {
5557 stub();
5558 }
5559
5560 void radv_GetImageSparseMemoryRequirements2(
5561 VkDevice device,
5562 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
5563 uint32_t* pSparseMemoryRequirementCount,
5564 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
5565 {
5566 stub();
5567 }
5568
5569 void radv_GetDeviceMemoryCommitment(
5570 VkDevice device,
5571 VkDeviceMemory memory,
5572 VkDeviceSize* pCommittedMemoryInBytes)
5573 {
5574 *pCommittedMemoryInBytes = 0;
5575 }
5576
5577 VkResult radv_BindBufferMemory2(VkDevice device,
5578 uint32_t bindInfoCount,
5579 const VkBindBufferMemoryInfo *pBindInfos)
5580 {
5581 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5582 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5583 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5584
5585 if (mem) {
5586 buffer->bo = mem->bo;
5587 buffer->offset = pBindInfos[i].memoryOffset;
5588 } else {
5589 buffer->bo = NULL;
5590 }
5591 }
5592 return VK_SUCCESS;
5593 }
5594
5595 VkResult radv_BindBufferMemory(
5596 VkDevice device,
5597 VkBuffer buffer,
5598 VkDeviceMemory memory,
5599 VkDeviceSize memoryOffset)
5600 {
5601 const VkBindBufferMemoryInfo info = {
5602 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5603 .buffer = buffer,
5604 .memory = memory,
5605 .memoryOffset = memoryOffset
5606 };
5607
5608 return radv_BindBufferMemory2(device, 1, &info);
5609 }
5610
5611 VkResult radv_BindImageMemory2(VkDevice device,
5612 uint32_t bindInfoCount,
5613 const VkBindImageMemoryInfo *pBindInfos)
5614 {
5615 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5616 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5617 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5618
5619 if (mem) {
5620 image->bo = mem->bo;
5621 image->offset = pBindInfos[i].memoryOffset;
5622 } else {
5623 image->bo = NULL;
5624 image->offset = 0;
5625 }
5626 }
5627 return VK_SUCCESS;
5628 }
5629
5630
5631 VkResult radv_BindImageMemory(
5632 VkDevice device,
5633 VkImage image,
5634 VkDeviceMemory memory,
5635 VkDeviceSize memoryOffset)
5636 {
5637 const VkBindImageMemoryInfo info = {
5638 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5639 .image = image,
5640 .memory = memory,
5641 .memoryOffset = memoryOffset
5642 };
5643
5644 return radv_BindImageMemory2(device, 1, &info);
5645 }
5646
5647 static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5648 {
5649 return info->bufferBindCount ||
5650 info->imageOpaqueBindCount ||
5651 info->imageBindCount ||
5652 info->waitSemaphoreCount ||
5653 info->signalSemaphoreCount;
5654 }
5655
5656 VkResult radv_QueueBindSparse(
5657 VkQueue _queue,
5658 uint32_t bindInfoCount,
5659 const VkBindSparseInfo* pBindInfo,
5660 VkFence fence)
5661 {
5662 RADV_FROM_HANDLE(radv_queue, queue, _queue);
5663 VkResult result;
5664 uint32_t fence_idx = 0;
5665
5666 if (fence != VK_NULL_HANDLE) {
5667 for (uint32_t i = 0; i < bindInfoCount; ++i)
5668 if (radv_sparse_bind_has_effects(pBindInfo + i))
5669 fence_idx = i;
5670 } else
5671 fence_idx = UINT32_MAX;
5672
5673 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5674 if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5675 continue;
5676
5677 const VkTimelineSemaphoreSubmitInfo *timeline_info =
5678 vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5679
5680 VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
5681 .buffer_binds = pBindInfo[i].pBufferBinds,
5682 .buffer_bind_count = pBindInfo[i].bufferBindCount,
5683 .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5684 .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5685 .wait_semaphores = pBindInfo[i].pWaitSemaphores,
5686 .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5687 .signal_semaphores = pBindInfo[i].pSignalSemaphores,
5688 .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5689 .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5690 .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5691 .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
5692 .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5693 .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
5694 });
5695
5696 if (result != VK_SUCCESS)
5697 return result;
5698 }
5699
5700 if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5701 result = radv_signal_fence(queue, fence);
5702 if (result != VK_SUCCESS)
5703 return result;
5704 }
5705
5706 return VK_SUCCESS;
5707 }
5708
5709 VkResult radv_CreateFence(
5710 VkDevice _device,
5711 const VkFenceCreateInfo* pCreateInfo,
5712 const VkAllocationCallbacks* pAllocator,
5713 VkFence* pFence)
5714 {
5715 RADV_FROM_HANDLE(radv_device, device, _device);
5716 const VkExportFenceCreateInfo *export =
5717 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
5718 VkExternalFenceHandleTypeFlags handleTypes =
5719 export ? export->handleTypes : 0;
5720
5721 struct radv_fence *fence = vk_alloc2(&device->vk.alloc, pAllocator,
5722 sizeof(*fence), 8,
5723 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5724
5725 if (!fence)
5726 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5727
5728 vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5729
5730 fence->fence_wsi = NULL;
5731 fence->temp_syncobj = 0;
5732 if (device->always_use_syncobj || handleTypes) {
5733 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
5734 if (ret) {
5735 vk_free2(&device->vk.alloc, pAllocator, fence);
5736 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5737 }
5738 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
5739 device->ws->signal_syncobj(device->ws, fence->syncobj);
5740 }
5741 fence->fence = NULL;
5742 } else {
5743 fence->fence = device->ws->create_fence();
5744 if (!fence->fence) {
5745 vk_free2(&device->vk.alloc, pAllocator, fence);
5746 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5747 }
5748 fence->syncobj = 0;
5749 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5750 device->ws->signal_fence(fence->fence);
5751 }
5752
5753 *pFence = radv_fence_to_handle(fence);
5754
5755 return VK_SUCCESS;
5756 }
5757
5758 void radv_DestroyFence(
5759 VkDevice _device,
5760 VkFence _fence,
5761 const VkAllocationCallbacks* pAllocator)
5762 {
5763 RADV_FROM_HANDLE(radv_device, device, _device);
5764 RADV_FROM_HANDLE(radv_fence, fence, _fence);
5765
5766 if (!fence)
5767 return;
5768
5769 if (fence->temp_syncobj)
5770 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
5771 if (fence->syncobj)
5772 device->ws->destroy_syncobj(device->ws, fence->syncobj);
5773 if (fence->fence)
5774 device->ws->destroy_fence(fence->fence);
5775 if (fence->fence_wsi)
5776 fence->fence_wsi->destroy(fence->fence_wsi);
5777
5778 vk_object_base_finish(&fence->base);
5779 vk_free2(&device->vk.alloc, pAllocator, fence);
5780 }
5781
5782
5783 uint64_t radv_get_current_time(void)
5784 {
5785 struct timespec tv;
5786 clock_gettime(CLOCK_MONOTONIC, &tv);
5787 return tv.tv_nsec + tv.tv_sec*1000000000ull;
5788 }
5789
5790 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
5791 {
5792 uint64_t current_time = radv_get_current_time();
5793
5794 timeout = MIN2(UINT64_MAX - current_time, timeout);
5795
5796 return current_time + timeout;
5797 }
5798
5799
5800 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
5801 uint32_t fenceCount, const VkFence *pFences)
5802 {
5803 for (uint32_t i = 0; i < fenceCount; ++i) {
5804 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5805 if (fence->fence == NULL || fence->syncobj ||
5806 fence->temp_syncobj || fence->fence_wsi ||
5807 (!device->ws->is_fence_waitable(fence->fence)))
5808 return false;
5809 }
5810 return true;
5811 }
5812
5813 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
5814 {
5815 for (uint32_t i = 0; i < fenceCount; ++i) {
5816 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5817 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
5818 return false;
5819 }
5820 return true;
5821 }
5822
5823 VkResult radv_WaitForFences(
5824 VkDevice _device,
5825 uint32_t fenceCount,
5826 const VkFence* pFences,
5827 VkBool32 waitAll,
5828 uint64_t timeout)
5829 {
5830 RADV_FROM_HANDLE(radv_device, device, _device);
5831 timeout = radv_get_absolute_timeout(timeout);
5832
5833 if (device->always_use_syncobj &&
5834 radv_all_fences_syncobj(fenceCount, pFences))
5835 {
5836 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
5837 if (!handles)
5838 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5839
5840 for (uint32_t i = 0; i < fenceCount; ++i) {
5841 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5842 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
5843 }
5844
5845 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5846
5847 free(handles);
5848 return success ? VK_SUCCESS : VK_TIMEOUT;
5849 }
5850
5851 if (!waitAll && fenceCount > 1) {
5852 /* Not doing this by default for waitAll, due to needing to allocate twice. */
5853 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
5854 uint32_t wait_count = 0;
5855 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
5856 if (!fences)
5857 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5858
5859 for (uint32_t i = 0; i < fenceCount; ++i) {
5860 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5861
5862 if (device->ws->fence_wait(device->ws, fence->fence, false, 0)) {
5863 free(fences);
5864 return VK_SUCCESS;
5865 }
5866
5867 fences[wait_count++] = fence->fence;
5868 }
5869
5870 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
5871 waitAll, timeout - radv_get_current_time());
5872
5873 free(fences);
5874 return success ? VK_SUCCESS : VK_TIMEOUT;
5875 }
5876
5877 while(radv_get_current_time() <= timeout) {
5878 for (uint32_t i = 0; i < fenceCount; ++i) {
5879 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
5880 return VK_SUCCESS;
5881 }
5882 }
5883 return VK_TIMEOUT;
5884 }
5885
5886 for (uint32_t i = 0; i < fenceCount; ++i) {
5887 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5888 bool expired = false;
5889
5890 if (fence->temp_syncobj) {
5891 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
5892 return VK_TIMEOUT;
5893 continue;
5894 }
5895
5896 if (fence->syncobj) {
5897 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
5898 return VK_TIMEOUT;
5899 continue;
5900 }
5901
5902 if (fence->fence) {
5903 if (!device->ws->is_fence_waitable(fence->fence)) {
5904 while(!device->ws->is_fence_waitable(fence->fence) &&
5905 radv_get_current_time() <= timeout)
5906 /* Do nothing */;
5907 }
5908
5909 expired = device->ws->fence_wait(device->ws,
5910 fence->fence,
5911 true, timeout);
5912 if (!expired)
5913 return VK_TIMEOUT;
5914 }
5915
5916 if (fence->fence_wsi) {
5917 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
5918 if (result != VK_SUCCESS)
5919 return result;
5920 }
5921 }
5922
5923 return VK_SUCCESS;
5924 }
5925
5926 VkResult radv_ResetFences(VkDevice _device,
5927 uint32_t fenceCount,
5928 const VkFence *pFences)
5929 {
5930 RADV_FROM_HANDLE(radv_device, device, _device);
5931
5932 for (unsigned i = 0; i < fenceCount; ++i) {
5933 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5934 if (fence->fence)
5935 device->ws->reset_fence(fence->fence);
5936
5937 /* Per spec, we first restore the permanent payload, and then reset, so
5938 * having a temp syncobj should not skip resetting the permanent syncobj. */
5939 if (fence->temp_syncobj) {
5940 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
5941 fence->temp_syncobj = 0;
5942 }
5943
5944 if (fence->syncobj) {
5945 device->ws->reset_syncobj(device->ws, fence->syncobj);
5946 }
5947 }
5948
5949 return VK_SUCCESS;
5950 }
5951
5952 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5953 {
5954 RADV_FROM_HANDLE(radv_device, device, _device);
5955 RADV_FROM_HANDLE(radv_fence, fence, _fence);
5956
5957 if (fence->temp_syncobj) {
5958 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
5959 return success ? VK_SUCCESS : VK_NOT_READY;
5960 }
5961
5962 if (fence->syncobj) {
5963 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
5964 return success ? VK_SUCCESS : VK_NOT_READY;
5965 }
5966
5967 if (fence->fence) {
5968 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
5969 return VK_NOT_READY;
5970 }
5971 if (fence->fence_wsi) {
5972 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
5973
5974 if (result != VK_SUCCESS) {
5975 if (result == VK_TIMEOUT)
5976 return VK_NOT_READY;
5977 return result;
5978 }
5979 }
5980 return VK_SUCCESS;
5981 }
5982
5983
5984 // Queue semaphore functions
5985
5986 static void
5987 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
5988 {
5989 timeline->highest_signaled = value;
5990 timeline->highest_submitted = value;
5991 list_inithead(&timeline->points);
5992 list_inithead(&timeline->free_points);
5993 list_inithead(&timeline->waiters);
5994 pthread_mutex_init(&timeline->mutex, NULL);
5995 }
5996
5997 static void
5998 radv_destroy_timeline(struct radv_device *device,
5999 struct radv_timeline *timeline)
6000 {
6001 list_for_each_entry_safe(struct radv_timeline_point, point,
6002 &timeline->free_points, list) {
6003 list_del(&point->list);
6004 device->ws->destroy_syncobj(device->ws, point->syncobj);
6005 free(point);
6006 }
6007 list_for_each_entry_safe(struct radv_timeline_point, point,
6008 &timeline->points, list) {
6009 list_del(&point->list);
6010 device->ws->destroy_syncobj(device->ws, point->syncobj);
6011 free(point);
6012 }
6013 pthread_mutex_destroy(&timeline->mutex);
6014 }
6015
6016 static void
6017 radv_timeline_gc_locked(struct radv_device *device,
6018 struct radv_timeline *timeline)
6019 {
6020 list_for_each_entry_safe(struct radv_timeline_point, point,
6021 &timeline->points, list) {
6022 if (point->wait_count || point->value > timeline->highest_submitted)
6023 return;
6024
6025 if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
6026 timeline->highest_signaled = point->value;
6027 list_del(&point->list);
6028 list_add(&point->list, &timeline->free_points);
6029 }
6030 }
6031 }
6032
6033 static struct radv_timeline_point *
6034 radv_timeline_find_point_at_least_locked(struct radv_device *device,
6035 struct radv_timeline *timeline,
6036 uint64_t p)
6037 {
6038 radv_timeline_gc_locked(device, timeline);
6039
6040 if (p <= timeline->highest_signaled)
6041 return NULL;
6042
6043 list_for_each_entry(struct radv_timeline_point, point,
6044 &timeline->points, list) {
6045 if (point->value >= p) {
6046 ++point->wait_count;
6047 return point;
6048 }
6049 }
6050 return NULL;
6051 }
6052
6053 static struct radv_timeline_point *
6054 radv_timeline_add_point_locked(struct radv_device *device,
6055 struct radv_timeline *timeline,
6056 uint64_t p)
6057 {
6058 radv_timeline_gc_locked(device, timeline);
6059
6060 struct radv_timeline_point *ret = NULL;
6061 struct radv_timeline_point *prev = NULL;
6062
6063 if (p <= timeline->highest_signaled)
6064 return NULL;
6065
6066 list_for_each_entry(struct radv_timeline_point, point,
6067 &timeline->points, list) {
6068 if (point->value == p) {
6069 return NULL;
6070 }
6071
6072 if (point->value < p)
6073 prev = point;
6074 }
6075
6076 if (list_is_empty(&timeline->free_points)) {
6077 ret = malloc(sizeof(struct radv_timeline_point));
6078 device->ws->create_syncobj(device->ws, &ret->syncobj);
6079 } else {
6080 ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
6081 list_del(&ret->list);
6082
6083 device->ws->reset_syncobj(device->ws, ret->syncobj);
6084 }
6085
6086 ret->value = p;
6087 ret->wait_count = 1;
6088
6089 if (prev) {
6090 list_add(&ret->list, &prev->list);
6091 } else {
6092 list_addtail(&ret->list, &timeline->points);
6093 }
6094 return ret;
6095 }
6096
6097
6098 static VkResult
6099 radv_timeline_wait_locked(struct radv_device *device,
6100 struct radv_timeline *timeline,
6101 uint64_t value,
6102 uint64_t abs_timeout)
6103 {
6104 while(timeline->highest_submitted < value) {
6105 struct timespec abstime;
6106 timespec_from_nsec(&abstime, abs_timeout);
6107
6108 pthread_cond_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6109
6110 if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value)
6111 return VK_TIMEOUT;
6112 }
6113
6114 struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
6115 if (!point)
6116 return VK_SUCCESS;
6117
6118 pthread_mutex_unlock(&timeline->mutex);
6119
6120 bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6121
6122 pthread_mutex_lock(&timeline->mutex);
6123 point->wait_count--;
6124 return success ? VK_SUCCESS : VK_TIMEOUT;
6125 }
6126
6127 static void
6128 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6129 struct list_head *processing_list)
6130 {
6131 list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
6132 &timeline->waiters, list) {
6133 if (waiter->value > timeline->highest_submitted)
6134 continue;
6135
6136 if (p_atomic_dec_zero(&waiter->submission->submission_wait_count)) {
6137 list_addtail(&waiter->submission->processing_list, processing_list);
6138 }
6139 list_del(&waiter->list);
6140 }
6141 }
6142
6143 static
6144 void radv_destroy_semaphore_part(struct radv_device *device,
6145 struct radv_semaphore_part *part)
6146 {
6147 switch(part->kind) {
6148 case RADV_SEMAPHORE_NONE:
6149 break;
6150 case RADV_SEMAPHORE_WINSYS:
6151 device->ws->destroy_sem(part->ws_sem);
6152 break;
6153 case RADV_SEMAPHORE_TIMELINE:
6154 radv_destroy_timeline(device, &part->timeline);
6155 break;
6156 case RADV_SEMAPHORE_SYNCOBJ:
6157 device->ws->destroy_syncobj(device->ws, part->syncobj);
6158 break;
6159 }
6160 part->kind = RADV_SEMAPHORE_NONE;
6161 }
6162
6163 static VkSemaphoreTypeKHR
6164 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6165 {
6166 const VkSemaphoreTypeCreateInfo *type_info =
6167 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6168
6169 if (!type_info)
6170 return VK_SEMAPHORE_TYPE_BINARY;
6171
6172 if (initial_value)
6173 *initial_value = type_info->initialValue;
6174 return type_info->semaphoreType;
6175 }
6176
6177 VkResult radv_CreateSemaphore(
6178 VkDevice _device,
6179 const VkSemaphoreCreateInfo* pCreateInfo,
6180 const VkAllocationCallbacks* pAllocator,
6181 VkSemaphore* pSemaphore)
6182 {
6183 RADV_FROM_HANDLE(radv_device, device, _device);
6184 const VkExportSemaphoreCreateInfo *export =
6185 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
6186 VkExternalSemaphoreHandleTypeFlags handleTypes =
6187 export ? export->handleTypes : 0;
6188 uint64_t initial_value = 0;
6189 VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6190
6191 struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
6192 sizeof(*sem), 8,
6193 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6194 if (!sem)
6195 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6196
6197 vk_object_base_init(&device->vk, &sem->base,
6198 VK_OBJECT_TYPE_SEMAPHORE);
6199
6200 sem->temporary.kind = RADV_SEMAPHORE_NONE;
6201 sem->permanent.kind = RADV_SEMAPHORE_NONE;
6202
6203 if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6204 radv_create_timeline(&sem->permanent.timeline, initial_value);
6205 sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6206 } else if (device->always_use_syncobj || handleTypes) {
6207 assert (device->physical_device->rad_info.has_syncobj);
6208 int ret = device->ws->create_syncobj(device->ws, &sem->permanent.syncobj);
6209 if (ret) {
6210 vk_free2(&device->vk.alloc, pAllocator, sem);
6211 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6212 }
6213 sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6214 } else {
6215 sem->permanent.ws_sem = device->ws->create_sem(device->ws);
6216 if (!sem->permanent.ws_sem) {
6217 vk_free2(&device->vk.alloc, pAllocator, sem);
6218 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6219 }
6220 sem->permanent.kind = RADV_SEMAPHORE_WINSYS;
6221 }
6222
6223 *pSemaphore = radv_semaphore_to_handle(sem);
6224 return VK_SUCCESS;
6225 }
6226
6227 void radv_DestroySemaphore(
6228 VkDevice _device,
6229 VkSemaphore _semaphore,
6230 const VkAllocationCallbacks* pAllocator)
6231 {
6232 RADV_FROM_HANDLE(radv_device, device, _device);
6233 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6234 if (!_semaphore)
6235 return;
6236
6237 radv_destroy_semaphore_part(device, &sem->temporary);
6238 radv_destroy_semaphore_part(device, &sem->permanent);
6239 vk_object_base_finish(&sem->base);
6240 vk_free2(&device->vk.alloc, pAllocator, sem);
6241 }
6242
6243 VkResult
6244 radv_GetSemaphoreCounterValue(VkDevice _device,
6245 VkSemaphore _semaphore,
6246 uint64_t* pValue)
6247 {
6248 RADV_FROM_HANDLE(radv_device, device, _device);
6249 RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6250
6251 struct radv_semaphore_part *part =
6252 semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6253
6254 switch (part->kind) {
6255 case RADV_SEMAPHORE_TIMELINE: {
6256 pthread_mutex_lock(&part->timeline.mutex);
6257 radv_timeline_gc_locked(device, &part->timeline);
6258 *pValue = part->timeline.highest_signaled;
6259 pthread_mutex_unlock(&part->timeline.mutex);
6260 return VK_SUCCESS;
6261 }
6262 case RADV_SEMAPHORE_NONE:
6263 case RADV_SEMAPHORE_SYNCOBJ:
6264 case RADV_SEMAPHORE_WINSYS:
6265 unreachable("Invalid semaphore type");
6266 }
6267 unreachable("Unhandled semaphore type");
6268 }
6269
6270
6271 static VkResult
6272 radv_wait_timelines(struct radv_device *device,
6273 const VkSemaphoreWaitInfo* pWaitInfo,
6274 uint64_t abs_timeout)
6275 {
6276 if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6277 for (;;) {
6278 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6279 RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6280 pthread_mutex_lock(&semaphore->permanent.timeline.mutex);
6281 VkResult result = radv_timeline_wait_locked(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6282 pthread_mutex_unlock(&semaphore->permanent.timeline.mutex);
6283
6284 if (result == VK_SUCCESS)
6285 return VK_SUCCESS;
6286 }
6287 if (radv_get_current_time() > abs_timeout)
6288 return VK_TIMEOUT;
6289 }
6290 }
6291
6292 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6293 RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6294 pthread_mutex_lock(&semaphore->permanent.timeline.mutex);
6295 VkResult result = radv_timeline_wait_locked(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
6296 pthread_mutex_unlock(&semaphore->permanent.timeline.mutex);
6297
6298 if (result != VK_SUCCESS)
6299 return result;
6300 }
6301 return VK_SUCCESS;
6302 }
6303 VkResult
6304 radv_WaitSemaphores(VkDevice _device,
6305 const VkSemaphoreWaitInfo* pWaitInfo,
6306 uint64_t timeout)
6307 {
6308 RADV_FROM_HANDLE(radv_device, device, _device);
6309 uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6310 return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6311 }
6312
6313 VkResult
6314 radv_SignalSemaphore(VkDevice _device,
6315 const VkSemaphoreSignalInfo* pSignalInfo)
6316 {
6317 RADV_FROM_HANDLE(radv_device, device, _device);
6318 RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6319
6320 struct radv_semaphore_part *part =
6321 semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6322
6323 switch(part->kind) {
6324 case RADV_SEMAPHORE_TIMELINE: {
6325 pthread_mutex_lock(&part->timeline.mutex);
6326 radv_timeline_gc_locked(device, &part->timeline);
6327 part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6328 part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6329
6330 struct list_head processing_list;
6331 list_inithead(&processing_list);
6332 radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6333 pthread_mutex_unlock(&part->timeline.mutex);
6334
6335 return radv_process_submissions(&processing_list);
6336 }
6337 case RADV_SEMAPHORE_NONE:
6338 case RADV_SEMAPHORE_SYNCOBJ:
6339 case RADV_SEMAPHORE_WINSYS:
6340 unreachable("Invalid semaphore type");
6341 }
6342 return VK_SUCCESS;
6343 }
6344
6345 static void radv_destroy_event(struct radv_device *device,
6346 const VkAllocationCallbacks* pAllocator,
6347 struct radv_event *event)
6348 {
6349 device->ws->buffer_destroy(event->bo);
6350 vk_object_base_finish(&event->base);
6351 vk_free2(&device->vk.alloc, pAllocator, event);
6352 }
6353
6354 VkResult radv_CreateEvent(
6355 VkDevice _device,
6356 const VkEventCreateInfo* pCreateInfo,
6357 const VkAllocationCallbacks* pAllocator,
6358 VkEvent* pEvent)
6359 {
6360 RADV_FROM_HANDLE(radv_device, device, _device);
6361 struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
6362 sizeof(*event), 8,
6363 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6364
6365 if (!event)
6366 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6367
6368 vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6369
6370 event->bo = device->ws->buffer_create(device->ws, 8, 8,
6371 RADEON_DOMAIN_GTT,
6372 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6373 RADV_BO_PRIORITY_FENCE);
6374 if (!event->bo) {
6375 vk_free2(&device->vk.alloc, pAllocator, event);
6376 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6377 }
6378
6379 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
6380 if (!event->map) {
6381 radv_destroy_event(device, pAllocator, event);
6382 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6383 }
6384
6385 *pEvent = radv_event_to_handle(event);
6386
6387 return VK_SUCCESS;
6388 }
6389
6390 void radv_DestroyEvent(
6391 VkDevice _device,
6392 VkEvent _event,
6393 const VkAllocationCallbacks* pAllocator)
6394 {
6395 RADV_FROM_HANDLE(radv_device, device, _device);
6396 RADV_FROM_HANDLE(radv_event, event, _event);
6397
6398 if (!event)
6399 return;
6400
6401 radv_destroy_event(device, pAllocator, event);
6402 }
6403
6404 VkResult radv_GetEventStatus(
6405 VkDevice _device,
6406 VkEvent _event)
6407 {
6408 RADV_FROM_HANDLE(radv_event, event, _event);
6409
6410 if (*event->map == 1)
6411 return VK_EVENT_SET;
6412 return VK_EVENT_RESET;
6413 }
6414
6415 VkResult radv_SetEvent(
6416 VkDevice _device,
6417 VkEvent _event)
6418 {
6419 RADV_FROM_HANDLE(radv_event, event, _event);
6420 *event->map = 1;
6421
6422 return VK_SUCCESS;
6423 }
6424
6425 VkResult radv_ResetEvent(
6426 VkDevice _device,
6427 VkEvent _event)
6428 {
6429 RADV_FROM_HANDLE(radv_event, event, _event);
6430 *event->map = 0;
6431
6432 return VK_SUCCESS;
6433 }
6434
6435 VkResult radv_CreateBuffer(
6436 VkDevice _device,
6437 const VkBufferCreateInfo* pCreateInfo,
6438 const VkAllocationCallbacks* pAllocator,
6439 VkBuffer* pBuffer)
6440 {
6441 RADV_FROM_HANDLE(radv_device, device, _device);
6442 struct radv_buffer *buffer;
6443
6444 if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6445 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6446
6447 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6448
6449 buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6450 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6451 if (buffer == NULL)
6452 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6453
6454 vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6455
6456 buffer->size = pCreateInfo->size;
6457 buffer->usage = pCreateInfo->usage;
6458 buffer->bo = NULL;
6459 buffer->offset = 0;
6460 buffer->flags = pCreateInfo->flags;
6461
6462 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
6463 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6464
6465 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6466 buffer->bo = device->ws->buffer_create(device->ws,
6467 align64(buffer->size, 4096),
6468 4096, 0, RADEON_FLAG_VIRTUAL,
6469 RADV_BO_PRIORITY_VIRTUAL);
6470 if (!buffer->bo) {
6471 vk_free2(&device->vk.alloc, pAllocator, buffer);
6472 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6473 }
6474 }
6475
6476 *pBuffer = radv_buffer_to_handle(buffer);
6477
6478 return VK_SUCCESS;
6479 }
6480
6481 void radv_DestroyBuffer(
6482 VkDevice _device,
6483 VkBuffer _buffer,
6484 const VkAllocationCallbacks* pAllocator)
6485 {
6486 RADV_FROM_HANDLE(radv_device, device, _device);
6487 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6488
6489 if (!buffer)
6490 return;
6491
6492 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
6493 device->ws->buffer_destroy(buffer->bo);
6494
6495 vk_object_base_finish(&buffer->base);
6496 vk_free2(&device->vk.alloc, pAllocator, buffer);
6497 }
6498
6499 VkDeviceAddress radv_GetBufferDeviceAddress(
6500 VkDevice device,
6501 const VkBufferDeviceAddressInfo* pInfo)
6502 {
6503 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6504 return radv_buffer_get_va(buffer->bo) + buffer->offset;
6505 }
6506
6507
6508 uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
6509 const VkBufferDeviceAddressInfo* pInfo)
6510 {
6511 return 0;
6512 }
6513
6514 uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6515 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
6516 {
6517 return 0;
6518 }
6519
6520 static inline unsigned
6521 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6522 {
6523 if (stencil)
6524 return plane->surface.u.legacy.stencil_tiling_index[level];
6525 else
6526 return plane->surface.u.legacy.tiling_index[level];
6527 }
6528
6529 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
6530 {
6531 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
6532 }
6533
6534 static uint32_t
6535 radv_init_dcc_control_reg(struct radv_device *device,
6536 struct radv_image_view *iview)
6537 {
6538 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
6539 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
6540 unsigned max_compressed_block_size;
6541 unsigned independent_128b_blocks;
6542 unsigned independent_64b_blocks;
6543
6544 if (!radv_dcc_enabled(iview->image, iview->base_mip))
6545 return 0;
6546
6547 if (!device->physical_device->rad_info.has_dedicated_vram) {
6548 /* amdvlk: [min-compressed-block-size] should be set to 32 for
6549 * dGPU and 64 for APU because all of our APUs to date use
6550 * DIMMs which have a request granularity size of 64B while all
6551 * other chips have a 32B request size.
6552 */
6553 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
6554 }
6555
6556 if (device->physical_device->rad_info.chip_class >= GFX10) {
6557 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6558 independent_64b_blocks = 0;
6559 independent_128b_blocks = 1;
6560 } else {
6561 independent_128b_blocks = 0;
6562
6563 if (iview->image->info.samples > 1) {
6564 if (iview->image->planes[0].surface.bpe == 1)
6565 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6566 else if (iview->image->planes[0].surface.bpe == 2)
6567 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6568 }
6569
6570 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
6571 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6572 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6573 /* If this DCC image is potentially going to be used in texture
6574 * fetches, we need some special settings.
6575 */
6576 independent_64b_blocks = 1;
6577 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6578 } else {
6579 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6580 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6581 * big as possible for better compression state.
6582 */
6583 independent_64b_blocks = 0;
6584 max_compressed_block_size = max_uncompressed_block_size;
6585 }
6586 }
6587
6588 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6589 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6590 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6591 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6592 S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6593 }
6594
6595 void
6596 radv_initialise_color_surface(struct radv_device *device,
6597 struct radv_color_buffer_info *cb,
6598 struct radv_image_view *iview)
6599 {
6600 const struct vk_format_description *desc;
6601 unsigned ntype, format, swap, endian;
6602 unsigned blend_clamp = 0, blend_bypass = 0;
6603 uint64_t va;
6604 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6605 const struct radeon_surf *surf = &plane->surface;
6606
6607 desc = vk_format_description(iview->vk_format);
6608
6609 memset(cb, 0, sizeof(*cb));
6610
6611 /* Intensity is implemented as Red, so treat it that way. */
6612 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
6613
6614 va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
6615
6616 cb->cb_color_base = va >> 8;
6617
6618 if (device->physical_device->rad_info.chip_class >= GFX9) {
6619 if (device->physical_device->rad_info.chip_class >= GFX10) {
6620 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6621 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6622 S_028EE0_CMASK_PIPE_ALIGNED(1) |
6623 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
6624 } else {
6625 struct gfx9_surf_meta_flags meta = {
6626 .rb_aligned = 1,
6627 .pipe_aligned = 1,
6628 };
6629
6630 if (surf->dcc_offset)
6631 meta = surf->u.gfx9.dcc;
6632
6633 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6634 S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6635 S_028C74_RB_ALIGNED(meta.rb_aligned) |
6636 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6637 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
6638 }
6639
6640 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6641 cb->cb_color_base |= surf->tile_swizzle;
6642 } else {
6643 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6644 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6645
6646 cb->cb_color_base += level_info->offset >> 8;
6647 if (level_info->mode == RADEON_SURF_MODE_2D)
6648 cb->cb_color_base |= surf->tile_swizzle;
6649
6650 pitch_tile_max = level_info->nblk_x / 8 - 1;
6651 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6652 tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6653
6654 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6655 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6656 cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
6657
6658 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6659
6660 if (radv_image_has_fmask(iview->image)) {
6661 if (device->physical_device->rad_info.chip_class >= GFX7)
6662 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
6663 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
6664 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
6665 } else {
6666 /* This must be set for fast clear to work without FMASK. */
6667 if (device->physical_device->rad_info.chip_class >= GFX7)
6668 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6669 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6670 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6671 }
6672 }
6673
6674 /* CMASK variables */
6675 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6676 va += surf->cmask_offset;
6677 cb->cb_color_cmask = va >> 8;
6678
6679 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6680 va += surf->dcc_offset;
6681
6682 if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6683 device->physical_device->rad_info.chip_class <= GFX8)
6684 va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
6685
6686 unsigned dcc_tile_swizzle = surf->tile_swizzle;
6687 dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
6688
6689 cb->cb_dcc_base = va >> 8;
6690 cb->cb_dcc_base |= dcc_tile_swizzle;
6691
6692 /* GFX10 field has the same base shift as the GFX6 field. */
6693 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6694 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
6695 S_028C6C_SLICE_MAX_GFX10(max_slice);
6696
6697 if (iview->image->info.samples > 1) {
6698 unsigned log_samples = util_logbase2(iview->image->info.samples);
6699
6700 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
6701 S_028C74_NUM_FRAGMENTS(log_samples);
6702 }
6703
6704 if (radv_image_has_fmask(iview->image)) {
6705 va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
6706 cb->cb_color_fmask = va >> 8;
6707 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6708 } else {
6709 cb->cb_color_fmask = cb->cb_color_base;
6710 }
6711
6712 ntype = radv_translate_color_numformat(iview->vk_format,
6713 desc,
6714 vk_format_get_first_non_void_channel(iview->vk_format));
6715 format = radv_translate_colorformat(iview->vk_format);
6716 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
6717 radv_finishme("Illegal color\n");
6718 swap = radv_translate_colorswap(iview->vk_format, false);
6719 endian = radv_colorformat_endian_swap(format);
6720
6721 /* blend clamp should be set for all NORM/SRGB types */
6722 if (ntype == V_028C70_NUMBER_UNORM ||
6723 ntype == V_028C70_NUMBER_SNORM ||
6724 ntype == V_028C70_NUMBER_SRGB)
6725 blend_clamp = 1;
6726
6727 /* set blend bypass according to docs if SINT/UINT or
6728 8/24 COLOR variants */
6729 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6730 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6731 format == V_028C70_COLOR_X24_8_32_FLOAT) {
6732 blend_clamp = 0;
6733 blend_bypass = 1;
6734 }
6735 #if 0
6736 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6737 (format == V_028C70_COLOR_8 ||
6738 format == V_028C70_COLOR_8_8 ||
6739 format == V_028C70_COLOR_8_8_8_8))
6740 ->color_is_int8 = true;
6741 #endif
6742 cb->cb_color_info = S_028C70_FORMAT(format) |
6743 S_028C70_COMP_SWAP(swap) |
6744 S_028C70_BLEND_CLAMP(blend_clamp) |
6745 S_028C70_BLEND_BYPASS(blend_bypass) |
6746 S_028C70_SIMPLE_FLOAT(1) |
6747 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
6748 ntype != V_028C70_NUMBER_SNORM &&
6749 ntype != V_028C70_NUMBER_SRGB &&
6750 format != V_028C70_COLOR_8_24 &&
6751 format != V_028C70_COLOR_24_8) |
6752 S_028C70_NUMBER_TYPE(ntype) |
6753 S_028C70_ENDIAN(endian);
6754 if (radv_image_has_fmask(iview->image)) {
6755 cb->cb_color_info |= S_028C70_COMPRESSION(1);
6756 if (device->physical_device->rad_info.chip_class == GFX6) {
6757 unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
6758 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6759 }
6760
6761 if (radv_image_is_tc_compat_cmask(iview->image)) {
6762 /* Allow the texture block to read FMASK directly
6763 * without decompressing it. This bit must be cleared
6764 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6765 * otherwise the operation doesn't happen.
6766 */
6767 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6768
6769 /* Set CMASK into a tiling format that allows the
6770 * texture block to read it.
6771 */
6772 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6773 }
6774 }
6775
6776 if (radv_image_has_cmask(iview->image) &&
6777 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6778 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6779
6780 if (radv_dcc_enabled(iview->image, iview->base_mip))
6781 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6782
6783 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6784
6785 /* This must be set for fast clear to work without FMASK. */
6786 if (!radv_image_has_fmask(iview->image) &&
6787 device->physical_device->rad_info.chip_class == GFX6) {
6788 unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6789 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6790 }
6791
6792 if (device->physical_device->rad_info.chip_class >= GFX9) {
6793 const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
6794
6795 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
6796 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
6797 unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
6798 unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
6799
6800 if (device->physical_device->rad_info.chip_class >= GFX10) {
6801 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6802
6803 cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6804 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6805 S_028EE0_RESOURCE_LEVEL(1);
6806 } else {
6807 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6808 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
6809 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6810 }
6811
6812 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
6813 S_028C68_MIP0_HEIGHT(height - 1) |
6814 S_028C68_MAX_MIP(iview->image->info.levels - 1);
6815 }
6816 }
6817
6818 static unsigned
6819 radv_calc_decompress_on_z_planes(struct radv_device *device,
6820 struct radv_image_view *iview)
6821 {
6822 unsigned max_zplanes = 0;
6823
6824 assert(radv_image_is_tc_compat_htile(iview->image));
6825
6826 if (device->physical_device->rad_info.chip_class >= GFX9) {
6827 /* Default value for 32-bit depth surfaces. */
6828 max_zplanes = 4;
6829
6830 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
6831 iview->image->info.samples > 1)
6832 max_zplanes = 2;
6833
6834 max_zplanes = max_zplanes + 1;
6835 } else {
6836 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6837 /* Do not enable Z plane compression for 16-bit depth
6838 * surfaces because isn't supported on GFX8. Only
6839 * 32-bit depth surfaces are supported by the hardware.
6840 * This allows to maintain shader compatibility and to
6841 * reduce the number of depth decompressions.
6842 */
6843 max_zplanes = 1;
6844 } else {
6845 if (iview->image->info.samples <= 1)
6846 max_zplanes = 5;
6847 else if (iview->image->info.samples <= 4)
6848 max_zplanes = 3;
6849 else
6850 max_zplanes = 2;
6851 }
6852 }
6853
6854 return max_zplanes;
6855 }
6856
6857 void
6858 radv_initialise_ds_surface(struct radv_device *device,
6859 struct radv_ds_buffer_info *ds,
6860 struct radv_image_view *iview)
6861 {
6862 unsigned level = iview->base_mip;
6863 unsigned format, stencil_format;
6864 uint64_t va, s_offs, z_offs;
6865 bool stencil_only = false;
6866 const struct radv_image_plane *plane = &iview->image->planes[0];
6867 const struct radeon_surf *surf = &plane->surface;
6868
6869 assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6870
6871 memset(ds, 0, sizeof(*ds));
6872 switch (iview->image->vk_format) {
6873 case VK_FORMAT_D24_UNORM_S8_UINT:
6874 case VK_FORMAT_X8_D24_UNORM_PACK32:
6875 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6876 ds->offset_scale = 2.0f;
6877 break;
6878 case VK_FORMAT_D16_UNORM:
6879 case VK_FORMAT_D16_UNORM_S8_UINT:
6880 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6881 ds->offset_scale = 4.0f;
6882 break;
6883 case VK_FORMAT_D32_SFLOAT:
6884 case VK_FORMAT_D32_SFLOAT_S8_UINT:
6885 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
6886 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6887 ds->offset_scale = 1.0f;
6888 break;
6889 case VK_FORMAT_S8_UINT:
6890 stencil_only = true;
6891 break;
6892 default:
6893 break;
6894 }
6895
6896 format = radv_translate_dbformat(iview->image->vk_format);
6897 stencil_format = surf->has_stencil ?
6898 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6899
6900 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6901 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
6902 S_028008_SLICE_MAX(max_slice);
6903 if (device->physical_device->rad_info.chip_class >= GFX10) {
6904 ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
6905 S_028008_SLICE_MAX_HI(max_slice >> 11);
6906 }
6907
6908 ds->db_htile_data_base = 0;
6909 ds->db_htile_surface = 0;
6910
6911 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6912 s_offs = z_offs = va;
6913
6914 if (device->physical_device->rad_info.chip_class >= GFX9) {
6915 assert(surf->u.gfx9.surf_offset == 0);
6916 s_offs += surf->u.gfx9.stencil_offset;
6917
6918 ds->db_z_info = S_028038_FORMAT(format) |
6919 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6920 S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6921 S_028038_MAXMIP(iview->image->info.levels - 1) |
6922 S_028038_ZRANGE_PRECISION(1);
6923 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
6924 S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
6925
6926 if (device->physical_device->rad_info.chip_class == GFX9) {
6927 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
6928 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
6929 }
6930
6931 ds->db_depth_view |= S_028008_MIPID(level);
6932 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6933 S_02801C_Y_MAX(iview->image->info.height - 1);
6934
6935 if (radv_htile_enabled(iview->image, level)) {
6936 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6937
6938 if (radv_image_is_tc_compat_htile(iview->image)) {
6939 unsigned max_zplanes =
6940 radv_calc_decompress_on_z_planes(device, iview);
6941
6942 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6943
6944 if (device->physical_device->rad_info.chip_class >= GFX10) {
6945 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6946 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6947 } else {
6948 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6949 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6950 }
6951 }
6952
6953 if (!surf->has_stencil)
6954 /* Use all of the htile_buffer for depth if there's no stencil. */
6955 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6956 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
6957 surf->htile_offset;
6958 ds->db_htile_data_base = va >> 8;
6959 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
6960 S_028ABC_PIPE_ALIGNED(1);
6961
6962 if (device->physical_device->rad_info.chip_class == GFX9) {
6963 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6964 }
6965 }
6966 } else {
6967 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6968
6969 if (stencil_only)
6970 level_info = &surf->u.legacy.stencil_level[level];
6971
6972 z_offs += surf->u.legacy.level[level].offset;
6973 s_offs += surf->u.legacy.stencil_level[level].offset;
6974
6975 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6976 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6977 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6978
6979 if (iview->image->info.samples > 1)
6980 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6981
6982 if (device->physical_device->rad_info.chip_class >= GFX7) {
6983 struct radeon_info *info = &device->physical_device->rad_info;
6984 unsigned tiling_index = surf->u.legacy.tiling_index[level];
6985 unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
6986 unsigned macro_index = surf->u.legacy.macro_tile_index;
6987 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
6988 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
6989 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
6990
6991 if (stencil_only)
6992 tile_mode = stencil_tile_mode;
6993
6994 ds->db_depth_info |=
6995 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
6996 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
6997 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
6998 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
6999 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
7000 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
7001 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
7002 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
7003 } else {
7004 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
7005 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7006 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
7007 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
7008 if (stencil_only)
7009 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7010 }
7011
7012 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
7013 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
7014 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
7015
7016 if (radv_htile_enabled(iview->image, level)) {
7017 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
7018
7019 if (!surf->has_stencil &&
7020 !radv_image_is_tc_compat_htile(iview->image))
7021 /* Use all of the htile_buffer for depth if there's no stencil. */
7022 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7023
7024 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
7025 surf->htile_offset;
7026 ds->db_htile_data_base = va >> 8;
7027 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7028
7029 if (radv_image_is_tc_compat_htile(iview->image)) {
7030 unsigned max_zplanes =
7031 radv_calc_decompress_on_z_planes(device, iview);
7032
7033 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7034 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7035 }
7036 }
7037 }
7038
7039 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7040 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7041 }
7042
7043 VkResult radv_CreateFramebuffer(
7044 VkDevice _device,
7045 const VkFramebufferCreateInfo* pCreateInfo,
7046 const VkAllocationCallbacks* pAllocator,
7047 VkFramebuffer* pFramebuffer)
7048 {
7049 RADV_FROM_HANDLE(radv_device, device, _device);
7050 struct radv_framebuffer *framebuffer;
7051 const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7052 vk_find_struct_const(pCreateInfo->pNext,
7053 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7054
7055 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7056
7057 size_t size = sizeof(*framebuffer);
7058 if (!imageless_create_info)
7059 size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
7060 framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
7061 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7062 if (framebuffer == NULL)
7063 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7064
7065 vk_object_base_init(&device->vk, &framebuffer->base,
7066 VK_OBJECT_TYPE_FRAMEBUFFER);
7067
7068 framebuffer->attachment_count = pCreateInfo->attachmentCount;
7069 framebuffer->width = pCreateInfo->width;
7070 framebuffer->height = pCreateInfo->height;
7071 framebuffer->layers = pCreateInfo->layers;
7072 if (imageless_create_info) {
7073 for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
7074 const VkFramebufferAttachmentImageInfo *attachment =
7075 imageless_create_info->pAttachmentImageInfos + i;
7076 framebuffer->width = MIN2(framebuffer->width, attachment->width);
7077 framebuffer->height = MIN2(framebuffer->height, attachment->height);
7078 framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
7079 }
7080 } else {
7081 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7082 VkImageView _iview = pCreateInfo->pAttachments[i];
7083 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7084 framebuffer->attachments[i] = iview;
7085 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
7086 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
7087 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
7088 }
7089 }
7090
7091 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7092 return VK_SUCCESS;
7093 }
7094
7095 void radv_DestroyFramebuffer(
7096 VkDevice _device,
7097 VkFramebuffer _fb,
7098 const VkAllocationCallbacks* pAllocator)
7099 {
7100 RADV_FROM_HANDLE(radv_device, device, _device);
7101 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7102
7103 if (!fb)
7104 return;
7105 vk_object_base_finish(&fb->base);
7106 vk_free2(&device->vk.alloc, pAllocator, fb);
7107 }
7108
7109 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
7110 {
7111 switch (address_mode) {
7112 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7113 return V_008F30_SQ_TEX_WRAP;
7114 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7115 return V_008F30_SQ_TEX_MIRROR;
7116 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7117 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7118 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7119 return V_008F30_SQ_TEX_CLAMP_BORDER;
7120 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7121 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7122 default:
7123 unreachable("illegal tex wrap mode");
7124 break;
7125 }
7126 }
7127
7128 static unsigned
7129 radv_tex_compare(VkCompareOp op)
7130 {
7131 switch (op) {
7132 case VK_COMPARE_OP_NEVER:
7133 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7134 case VK_COMPARE_OP_LESS:
7135 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7136 case VK_COMPARE_OP_EQUAL:
7137 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7138 case VK_COMPARE_OP_LESS_OR_EQUAL:
7139 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7140 case VK_COMPARE_OP_GREATER:
7141 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7142 case VK_COMPARE_OP_NOT_EQUAL:
7143 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7144 case VK_COMPARE_OP_GREATER_OR_EQUAL:
7145 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7146 case VK_COMPARE_OP_ALWAYS:
7147 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7148 default:
7149 unreachable("illegal compare mode");
7150 break;
7151 }
7152 }
7153
7154 static unsigned
7155 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7156 {
7157 switch (filter) {
7158 case VK_FILTER_NEAREST:
7159 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
7160 V_008F38_SQ_TEX_XY_FILTER_POINT);
7161 case VK_FILTER_LINEAR:
7162 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
7163 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7164 case VK_FILTER_CUBIC_IMG:
7165 default:
7166 fprintf(stderr, "illegal texture filter");
7167 return 0;
7168 }
7169 }
7170
7171 static unsigned
7172 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7173 {
7174 switch (mode) {
7175 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7176 return V_008F38_SQ_TEX_Z_FILTER_POINT;
7177 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7178 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7179 default:
7180 return V_008F38_SQ_TEX_Z_FILTER_NONE;
7181 }
7182 }
7183
7184 static unsigned
7185 radv_tex_bordercolor(VkBorderColor bcolor)
7186 {
7187 switch (bcolor) {
7188 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7189 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7190 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7191 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7192 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7193 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7194 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7195 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7196 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7197 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7198 case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7199 return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7200 default:
7201 break;
7202 }
7203 return 0;
7204 }
7205
7206 static unsigned
7207 radv_tex_aniso_filter(unsigned filter)
7208 {
7209 if (filter < 2)
7210 return 0;
7211 if (filter < 4)
7212 return 1;
7213 if (filter < 8)
7214 return 2;
7215 if (filter < 16)
7216 return 3;
7217 return 4;
7218 }
7219
7220 static unsigned
7221 radv_tex_filter_mode(VkSamplerReductionMode mode)
7222 {
7223 switch (mode) {
7224 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7225 return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7226 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7227 return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7228 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7229 return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7230 default:
7231 break;
7232 }
7233 return 0;
7234 }
7235
7236 static uint32_t
7237 radv_get_max_anisotropy(struct radv_device *device,
7238 const VkSamplerCreateInfo *pCreateInfo)
7239 {
7240 if (device->force_aniso >= 0)
7241 return device->force_aniso;
7242
7243 if (pCreateInfo->anisotropyEnable &&
7244 pCreateInfo->maxAnisotropy > 1.0f)
7245 return (uint32_t)pCreateInfo->maxAnisotropy;
7246
7247 return 0;
7248 }
7249
7250 static inline int S_FIXED(float value, unsigned frac_bits)
7251 {
7252 return value * (1 << frac_bits);
7253 }
7254
7255 static uint32_t radv_register_border_color(struct radv_device *device,
7256 VkClearColorValue value)
7257 {
7258 uint32_t slot;
7259
7260 pthread_mutex_lock(&device->border_color_data.mutex);
7261
7262 for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7263 if (!device->border_color_data.used[slot]) {
7264 /* Copy to the GPU wrt endian-ness. */
7265 util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
7266 &value,
7267 sizeof(VkClearColorValue));
7268
7269 device->border_color_data.used[slot] = true;
7270 break;
7271 }
7272 }
7273
7274 pthread_mutex_unlock(&device->border_color_data.mutex);
7275
7276 return slot;
7277 }
7278
7279 static void radv_unregister_border_color(struct radv_device *device,
7280 uint32_t slot)
7281 {
7282 pthread_mutex_lock(&device->border_color_data.mutex);
7283
7284 device->border_color_data.used[slot] = false;
7285
7286 pthread_mutex_unlock(&device->border_color_data.mutex);
7287 }
7288
7289 static void
7290 radv_init_sampler(struct radv_device *device,
7291 struct radv_sampler *sampler,
7292 const VkSamplerCreateInfo *pCreateInfo)
7293 {
7294 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7295 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7296 bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7297 device->physical_device->rad_info.chip_class == GFX9;
7298 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7299 unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7300 bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7301 bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7302 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7303 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7304 VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7305 uint32_t border_color_ptr;
7306
7307 const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7308 vk_find_struct_const(pCreateInfo->pNext,
7309 SAMPLER_REDUCTION_MODE_CREATE_INFO);
7310 if (sampler_reduction)
7311 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7312
7313 if (pCreateInfo->compareEnable)
7314 depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7315
7316 sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7317
7318 if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7319 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7320 vk_find_struct_const(pCreateInfo->pNext,
7321 SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7322
7323 assert(custom_border_color);
7324
7325 sampler->border_color_slot =
7326 radv_register_border_color(device, custom_border_color->customBorderColor);
7327
7328 /* Did we fail to find a slot? */
7329 if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7330 fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7331 border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7332 }
7333 }
7334
7335 /* If we don't have a custom color, set the ptr to 0 */
7336 border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
7337 ? sampler->border_color_slot
7338 : 0;
7339
7340 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7341 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7342 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7343 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
7344 S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7345 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7346 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
7347 S_008F30_ANISO_BIAS(max_aniso_ratio) |
7348 S_008F30_DISABLE_CUBE_WRAP(0) |
7349 S_008F30_COMPAT_MODE(compat_mode) |
7350 S_008F30_FILTER_MODE(filter_mode) |
7351 S_008F30_TRUNC_COORD(trunc_coord));
7352 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7353 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7354 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7355 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7356 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7357 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7358 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7359 S_008F38_MIP_POINT_PRECLAMP(0));
7360 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7361 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7362
7363 if (device->physical_device->rad_info.chip_class >= GFX10) {
7364 sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7365 } else {
7366 sampler->state[2] |=
7367 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7368 S_008F38_FILTER_PREC_FIX(1) |
7369 S_008F38_ANISO_OVERRIDE_GFX6(device->physical_device->rad_info.chip_class >= GFX8);
7370 }
7371 }
7372
7373 VkResult radv_CreateSampler(
7374 VkDevice _device,
7375 const VkSamplerCreateInfo* pCreateInfo,
7376 const VkAllocationCallbacks* pAllocator,
7377 VkSampler* pSampler)
7378 {
7379 RADV_FROM_HANDLE(radv_device, device, _device);
7380 struct radv_sampler *sampler;
7381
7382 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7383 vk_find_struct_const(pCreateInfo->pNext,
7384 SAMPLER_YCBCR_CONVERSION_INFO);
7385
7386 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7387
7388 sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7389 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7390 if (!sampler)
7391 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7392
7393 vk_object_base_init(&device->vk, &sampler->base,
7394 VK_OBJECT_TYPE_SAMPLER);
7395
7396 radv_init_sampler(device, sampler, pCreateInfo);
7397
7398 sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
7399 *pSampler = radv_sampler_to_handle(sampler);
7400
7401 return VK_SUCCESS;
7402 }
7403
7404 void radv_DestroySampler(
7405 VkDevice _device,
7406 VkSampler _sampler,
7407 const VkAllocationCallbacks* pAllocator)
7408 {
7409 RADV_FROM_HANDLE(radv_device, device, _device);
7410 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7411
7412 if (!sampler)
7413 return;
7414
7415 if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7416 radv_unregister_border_color(device, sampler->border_color_slot);
7417
7418 vk_object_base_finish(&sampler->base);
7419 vk_free2(&device->vk.alloc, pAllocator, sampler);
7420 }
7421
7422 /* vk_icd.h does not declare this function, so we declare it here to
7423 * suppress Wmissing-prototypes.
7424 */
7425 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7426 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
7427
7428 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7429 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7430 {
7431 /* For the full details on loader interface versioning, see
7432 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7433 * What follows is a condensed summary, to help you navigate the large and
7434 * confusing official doc.
7435 *
7436 * - Loader interface v0 is incompatible with later versions. We don't
7437 * support it.
7438 *
7439 * - In loader interface v1:
7440 * - The first ICD entrypoint called by the loader is
7441 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7442 * entrypoint.
7443 * - The ICD must statically expose no other Vulkan symbol unless it is
7444 * linked with -Bsymbolic.
7445 * - Each dispatchable Vulkan handle created by the ICD must be
7446 * a pointer to a struct whose first member is VK_LOADER_DATA. The
7447 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7448 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7449 * vkDestroySurfaceKHR(). The ICD must be capable of working with
7450 * such loader-managed surfaces.
7451 *
7452 * - Loader interface v2 differs from v1 in:
7453 * - The first ICD entrypoint called by the loader is
7454 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7455 * statically expose this entrypoint.
7456 *
7457 * - Loader interface v3 differs from v2 in:
7458 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7459 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7460 * because the loader no longer does so.
7461 */
7462 *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7463 return VK_SUCCESS;
7464 }
7465
7466 VkResult radv_GetMemoryFdKHR(VkDevice _device,
7467 const VkMemoryGetFdInfoKHR *pGetFdInfo,
7468 int *pFD)
7469 {
7470 RADV_FROM_HANDLE(radv_device, device, _device);
7471 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7472
7473 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7474
7475 /* At the moment, we support only the below handle types. */
7476 assert(pGetFdInfo->handleType ==
7477 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7478 pGetFdInfo->handleType ==
7479 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7480
7481 bool ret = radv_get_memory_fd(device, memory, pFD);
7482 if (ret == false)
7483 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7484 return VK_SUCCESS;
7485 }
7486
7487 static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7488 enum radeon_bo_domain domains,
7489 enum radeon_bo_flag flags,
7490 enum radeon_bo_flag ignore_flags)
7491 {
7492 /* Don't count GTT/CPU as relevant:
7493 *
7494 * - We're not fully consistent between the two.
7495 * - Sometimes VRAM gets VRAM|GTT.
7496 */
7497 const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
7498 RADEON_DOMAIN_GDS |
7499 RADEON_DOMAIN_OA;
7500 uint32_t bits = 0;
7501 for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7502 if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7503 continue;
7504
7505 if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7506 continue;
7507
7508 bits |= 1u << i;
7509 }
7510
7511 return bits;
7512 }
7513
7514 static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
7515 enum radeon_bo_domain domains,
7516 enum radeon_bo_flag flags)
7517 {
7518 enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7519 uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7520
7521 if (!bits) {
7522 ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7523 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7524 }
7525
7526 return bits;
7527 }
7528 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
7529 VkExternalMemoryHandleTypeFlagBits handleType,
7530 int fd,
7531 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7532 {
7533 RADV_FROM_HANDLE(radv_device, device, _device);
7534
7535 switch (handleType) {
7536 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7537 enum radeon_bo_domain domains;
7538 enum radeon_bo_flag flags;
7539 if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7540 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7541
7542 pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
7543 return VK_SUCCESS;
7544 }
7545 default:
7546 /* The valid usage section for this function says:
7547 *
7548 * "handleType must not be one of the handle types defined as
7549 * opaque."
7550 *
7551 * So opaque handle types fall into the default "unsupported" case.
7552 */
7553 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7554 }
7555 }
7556
7557 static VkResult radv_import_opaque_fd(struct radv_device *device,
7558 int fd,
7559 uint32_t *syncobj)
7560 {
7561 uint32_t syncobj_handle = 0;
7562 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7563 if (ret != 0)
7564 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7565
7566 if (*syncobj)
7567 device->ws->destroy_syncobj(device->ws, *syncobj);
7568
7569 *syncobj = syncobj_handle;
7570 close(fd);
7571
7572 return VK_SUCCESS;
7573 }
7574
7575 static VkResult radv_import_sync_fd(struct radv_device *device,
7576 int fd,
7577 uint32_t *syncobj)
7578 {
7579 /* If we create a syncobj we do it locally so that if we have an error, we don't
7580 * leave a syncobj in an undetermined state in the fence. */
7581 uint32_t syncobj_handle = *syncobj;
7582 if (!syncobj_handle) {
7583 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
7584 if (ret) {
7585 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7586 }
7587 }
7588
7589 if (fd == -1) {
7590 device->ws->signal_syncobj(device->ws, syncobj_handle);
7591 } else {
7592 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7593 if (ret != 0)
7594 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7595 }
7596
7597 *syncobj = syncobj_handle;
7598 if (fd != -1)
7599 close(fd);
7600
7601 return VK_SUCCESS;
7602 }
7603
7604 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
7605 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7606 {
7607 RADV_FROM_HANDLE(radv_device, device, _device);
7608 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7609 VkResult result;
7610 struct radv_semaphore_part *dst = NULL;
7611
7612 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7613 dst = &sem->temporary;
7614 } else {
7615 dst = &sem->permanent;
7616 }
7617
7618 uint32_t syncobj = dst->kind == RADV_SEMAPHORE_SYNCOBJ ? dst->syncobj : 0;
7619
7620 switch(pImportSemaphoreFdInfo->handleType) {
7621 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7622 result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7623 break;
7624 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7625 result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7626 break;
7627 default:
7628 unreachable("Unhandled semaphore handle type");
7629 }
7630
7631 if (result == VK_SUCCESS) {
7632 dst->syncobj = syncobj;
7633 dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7634 }
7635
7636 return result;
7637 }
7638
7639 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
7640 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
7641 int *pFd)
7642 {
7643 RADV_FROM_HANDLE(radv_device, device, _device);
7644 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7645 int ret;
7646 uint32_t syncobj_handle;
7647
7648 if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7649 assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ);
7650 syncobj_handle = sem->temporary.syncobj;
7651 } else {
7652 assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ);
7653 syncobj_handle = sem->permanent.syncobj;
7654 }
7655
7656 switch(pGetFdInfo->handleType) {
7657 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7658 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7659 break;
7660 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7661 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7662 if (!ret) {
7663 if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7664 radv_destroy_semaphore_part(device, &sem->temporary);
7665 } else {
7666 device->ws->reset_syncobj(device->ws, syncobj_handle);
7667 }
7668 }
7669 break;
7670 default:
7671 unreachable("Unhandled semaphore handle type");
7672 }
7673
7674 if (ret)
7675 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7676 return VK_SUCCESS;
7677 }
7678
7679 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
7680 VkPhysicalDevice physicalDevice,
7681 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7682 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
7683 {
7684 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7685 VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7686
7687 if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7688 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7689 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7690 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7691
7692 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
7693 } else if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7694 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7695 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
7696 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7697 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7698 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7699 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7700 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7701 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7702 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7703 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7704 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7705 } else {
7706 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7707 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7708 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7709 }
7710 }
7711
7712 VkResult radv_ImportFenceFdKHR(VkDevice _device,
7713 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7714 {
7715 RADV_FROM_HANDLE(radv_device, device, _device);
7716 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7717 uint32_t *syncobj_dst = NULL;
7718
7719
7720 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7721 syncobj_dst = &fence->temp_syncobj;
7722 } else {
7723 syncobj_dst = &fence->syncobj;
7724 }
7725
7726 switch(pImportFenceFdInfo->handleType) {
7727 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7728 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
7729 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7730 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
7731 default:
7732 unreachable("Unhandled fence handle type");
7733 }
7734 }
7735
7736 VkResult radv_GetFenceFdKHR(VkDevice _device,
7737 const VkFenceGetFdInfoKHR *pGetFdInfo,
7738 int *pFd)
7739 {
7740 RADV_FROM_HANDLE(radv_device, device, _device);
7741 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7742 int ret;
7743 uint32_t syncobj_handle;
7744
7745 if (fence->temp_syncobj)
7746 syncobj_handle = fence->temp_syncobj;
7747 else
7748 syncobj_handle = fence->syncobj;
7749
7750 switch(pGetFdInfo->handleType) {
7751 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7752 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7753 break;
7754 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7755 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7756 if (!ret) {
7757 if (fence->temp_syncobj) {
7758 close (fence->temp_syncobj);
7759 fence->temp_syncobj = 0;
7760 } else {
7761 device->ws->reset_syncobj(device->ws, syncobj_handle);
7762 }
7763 }
7764 break;
7765 default:
7766 unreachable("Unhandled fence handle type");
7767 }
7768
7769 if (ret)
7770 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7771 return VK_SUCCESS;
7772 }
7773
7774 void radv_GetPhysicalDeviceExternalFenceProperties(
7775 VkPhysicalDevice physicalDevice,
7776 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7777 VkExternalFenceProperties *pExternalFenceProperties)
7778 {
7779 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7780
7781 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7782 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7783 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
7784 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7785 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7786 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
7787 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7788 } else {
7789 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7790 pExternalFenceProperties->compatibleHandleTypes = 0;
7791 pExternalFenceProperties->externalFenceFeatures = 0;
7792 }
7793 }
7794
7795 VkResult
7796 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
7797 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
7798 const VkAllocationCallbacks* pAllocator,
7799 VkDebugReportCallbackEXT* pCallback)
7800 {
7801 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7802 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
7803 pCreateInfo, pAllocator, &instance->alloc,
7804 pCallback);
7805 }
7806
7807 void
7808 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
7809 VkDebugReportCallbackEXT _callback,
7810 const VkAllocationCallbacks* pAllocator)
7811 {
7812 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7813 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
7814 _callback, pAllocator, &instance->alloc);
7815 }
7816
7817 void
7818 radv_DebugReportMessageEXT(VkInstance _instance,
7819 VkDebugReportFlagsEXT flags,
7820 VkDebugReportObjectTypeEXT objectType,
7821 uint64_t object,
7822 size_t location,
7823 int32_t messageCode,
7824 const char* pLayerPrefix,
7825 const char* pMessage)
7826 {
7827 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7828 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
7829 object, location, messageCode, pLayerPrefix, pMessage);
7830 }
7831
7832 void
7833 radv_GetDeviceGroupPeerMemoryFeatures(
7834 VkDevice device,
7835 uint32_t heapIndex,
7836 uint32_t localDeviceIndex,
7837 uint32_t remoteDeviceIndex,
7838 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
7839 {
7840 assert(localDeviceIndex == remoteDeviceIndex);
7841
7842 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
7843 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7844 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
7845 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7846 }
7847
7848 static const VkTimeDomainEXT radv_time_domains[] = {
7849 VK_TIME_DOMAIN_DEVICE_EXT,
7850 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
7851 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
7852 };
7853
7854 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
7855 VkPhysicalDevice physicalDevice,
7856 uint32_t *pTimeDomainCount,
7857 VkTimeDomainEXT *pTimeDomains)
7858 {
7859 int d;
7860 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
7861
7862 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
7863 vk_outarray_append(&out, i) {
7864 *i = radv_time_domains[d];
7865 }
7866 }
7867
7868 return vk_outarray_status(&out);
7869 }
7870
7871 static uint64_t
7872 radv_clock_gettime(clockid_t clock_id)
7873 {
7874 struct timespec current;
7875 int ret;
7876
7877 ret = clock_gettime(clock_id, &current);
7878 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
7879 ret = clock_gettime(CLOCK_MONOTONIC, &current);
7880 if (ret < 0)
7881 return 0;
7882
7883 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
7884 }
7885
7886 VkResult radv_GetCalibratedTimestampsEXT(
7887 VkDevice _device,
7888 uint32_t timestampCount,
7889 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
7890 uint64_t *pTimestamps,
7891 uint64_t *pMaxDeviation)
7892 {
7893 RADV_FROM_HANDLE(radv_device, device, _device);
7894 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
7895 int d;
7896 uint64_t begin, end;
7897 uint64_t max_clock_period = 0;
7898
7899 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7900
7901 for (d = 0; d < timestampCount; d++) {
7902 switch (pTimestampInfos[d].timeDomain) {
7903 case VK_TIME_DOMAIN_DEVICE_EXT:
7904 pTimestamps[d] = device->ws->query_value(device->ws,
7905 RADEON_TIMESTAMP);
7906 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
7907 max_clock_period = MAX2(max_clock_period, device_period);
7908 break;
7909 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
7910 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
7911 max_clock_period = MAX2(max_clock_period, 1);
7912 break;
7913
7914 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
7915 pTimestamps[d] = begin;
7916 break;
7917 default:
7918 pTimestamps[d] = 0;
7919 break;
7920 }
7921 }
7922
7923 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7924
7925 /*
7926 * The maximum deviation is the sum of the interval over which we
7927 * perform the sampling and the maximum period of any sampled
7928 * clock. That's because the maximum skew between any two sampled
7929 * clock edges is when the sampled clock with the largest period is
7930 * sampled at the end of that period but right at the beginning of the
7931 * sampling interval and some other clock is sampled right at the
7932 * begining of its sampling period and right at the end of the
7933 * sampling interval. Let's assume the GPU has the longest clock
7934 * period and that the application is sampling GPU and monotonic:
7935 *
7936 * s e
7937 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
7938 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7939 *
7940 * g
7941 * 0 1 2 3
7942 * GPU -----_____-----_____-----_____-----_____
7943 *
7944 * m
7945 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
7946 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7947 *
7948 * Interval <----------------->
7949 * Deviation <-------------------------->
7950 *
7951 * s = read(raw) 2
7952 * g = read(GPU) 1
7953 * m = read(monotonic) 2
7954 * e = read(raw) b
7955 *
7956 * We round the sample interval up by one tick to cover sampling error
7957 * in the interval clock
7958 */
7959
7960 uint64_t sample_interval = end - begin + 1;
7961
7962 *pMaxDeviation = sample_interval + max_clock_period;
7963
7964 return VK_SUCCESS;
7965 }
7966
7967 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
7968 VkPhysicalDevice physicalDevice,
7969 VkSampleCountFlagBits samples,
7970 VkMultisamplePropertiesEXT* pMultisampleProperties)
7971 {
7972 if (samples & (VK_SAMPLE_COUNT_2_BIT |
7973 VK_SAMPLE_COUNT_4_BIT |
7974 VK_SAMPLE_COUNT_8_BIT)) {
7975 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
7976 } else {
7977 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
7978 }
7979 }
7980
7981 VkResult radv_CreatePrivateDataSlotEXT(
7982 VkDevice _device,
7983 const VkPrivateDataSlotCreateInfoEXT* pCreateInfo,
7984 const VkAllocationCallbacks* pAllocator,
7985 VkPrivateDataSlotEXT* pPrivateDataSlot)
7986 {
7987 RADV_FROM_HANDLE(radv_device, device, _device);
7988 return vk_private_data_slot_create(&device->vk, pCreateInfo, pAllocator,
7989 pPrivateDataSlot);
7990 }
7991
7992 void radv_DestroyPrivateDataSlotEXT(
7993 VkDevice _device,
7994 VkPrivateDataSlotEXT privateDataSlot,
7995 const VkAllocationCallbacks* pAllocator)
7996 {
7997 RADV_FROM_HANDLE(radv_device, device, _device);
7998 vk_private_data_slot_destroy(&device->vk, privateDataSlot, pAllocator);
7999 }
8000
8001 VkResult radv_SetPrivateDataEXT(
8002 VkDevice _device,
8003 VkObjectType objectType,
8004 uint64_t objectHandle,
8005 VkPrivateDataSlotEXT privateDataSlot,
8006 uint64_t data)
8007 {
8008 RADV_FROM_HANDLE(radv_device, device, _device);
8009 return vk_object_base_set_private_data(&device->vk, objectType,
8010 objectHandle, privateDataSlot,
8011 data);
8012 }
8013
8014 void radv_GetPrivateDataEXT(
8015 VkDevice _device,
8016 VkObjectType objectType,
8017 uint64_t objectHandle,
8018 VkPrivateDataSlotEXT privateDataSlot,
8019 uint64_t* pData)
8020 {
8021 RADV_FROM_HANDLE(radv_device, device, _device);
8022 vk_object_base_get_private_data(&device->vk, objectType, objectHandle,
8023 privateDataSlot, pData);
8024 }