radv: only enable shaderInt16 on GFX9+ and LLVM7+
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "addrlib/gfx9/chip/gfx9_enum.h"
48 #include "util/build_id.h"
49 #include "util/debug.h"
50 #include "util/mesa-sha1.h"
51
52 static bool
53 radv_get_build_id(void *ptr, struct mesa_sha1 *ctx)
54 {
55 uint32_t timestamp;
56
57 #ifdef HAVE_DL_ITERATE_PHDR
58 const struct build_id_note *note = NULL;
59 if ((note = build_id_find_nhdr_for_addr(ptr))) {
60 _mesa_sha1_update(ctx, build_id_data(note), build_id_length(note));
61 } else
62 #endif
63 if (disk_cache_get_function_timestamp(ptr, &timestamp)) {
64 if (!timestamp) {
65 fprintf(stderr, "radv: The provided filesystem timestamp for the cache is bogus!\n");
66 }
67
68 _mesa_sha1_update(ctx, &timestamp, sizeof(timestamp));
69 } else
70 return false;
71 return true;
72 }
73
74 static int
75 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
76 {
77 struct mesa_sha1 ctx;
78 unsigned char sha1[20];
79 unsigned ptr_size = sizeof(void*);
80
81 memset(uuid, 0, VK_UUID_SIZE);
82 _mesa_sha1_init(&ctx);
83
84 if (!radv_get_build_id(radv_device_get_cache_uuid, &ctx) ||
85 !radv_get_build_id(LLVMInitializeAMDGPUTargetInfo, &ctx))
86 return -1;
87
88 _mesa_sha1_update(&ctx, &family, sizeof(family));
89 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
90 _mesa_sha1_final(&ctx, sha1);
91
92 memcpy(uuid, sha1, VK_UUID_SIZE);
93 return 0;
94 }
95
96 static void
97 radv_get_driver_uuid(void *uuid)
98 {
99 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
100 }
101
102 static void
103 radv_get_device_uuid(struct radeon_info *info, void *uuid)
104 {
105 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
106 }
107
108 static void
109 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
110 {
111 const char *chip_string;
112 char llvm_string[32] = {};
113
114 switch (family) {
115 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
116 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
117 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
118 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
119 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
120 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
121 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
122 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
123 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
124 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
125 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
126 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
127 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
128 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
129 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
130 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
131 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
132 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
133 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
134 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
135 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
136 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
137 default: chip_string = "AMD RADV unknown"; break;
138 }
139
140 snprintf(llvm_string, sizeof(llvm_string),
141 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
142 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
143 snprintf(name, name_len, "%s%s", chip_string, llvm_string);
144 }
145
146 static void
147 radv_physical_device_init_mem_types(struct radv_physical_device *device)
148 {
149 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
150 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
151 device->rad_info.vram_vis_size);
152
153 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
154 device->memory_properties.memoryHeapCount = 0;
155 if (device->rad_info.vram_size - visible_vram_size > 0) {
156 vram_index = device->memory_properties.memoryHeapCount++;
157 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
158 .size = device->rad_info.vram_size - visible_vram_size,
159 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
160 };
161 }
162 if (visible_vram_size) {
163 visible_vram_index = device->memory_properties.memoryHeapCount++;
164 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
165 .size = visible_vram_size,
166 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
167 };
168 }
169 if (device->rad_info.gart_size > 0) {
170 gart_index = device->memory_properties.memoryHeapCount++;
171 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
172 .size = device->rad_info.gart_size,
173 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
174 };
175 }
176
177 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
178 unsigned type_count = 0;
179 if (vram_index >= 0) {
180 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
181 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
182 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
183 .heapIndex = vram_index,
184 };
185 }
186 if (gart_index >= 0) {
187 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
188 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
189 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
190 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
191 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
192 .heapIndex = gart_index,
193 };
194 }
195 if (visible_vram_index >= 0) {
196 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
197 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
198 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
199 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
200 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
201 .heapIndex = visible_vram_index,
202 };
203 }
204 if (gart_index >= 0) {
205 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
206 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
207 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
208 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
209 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
210 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
211 .heapIndex = gart_index,
212 };
213 }
214 device->memory_properties.memoryTypeCount = type_count;
215 }
216
217 static void
218 radv_handle_env_var_force_family(struct radv_physical_device *device)
219 {
220 const char *family = getenv("RADV_FORCE_FAMILY");
221 unsigned i;
222
223 if (!family)
224 return;
225
226 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
227 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
228 /* Override family and chip_class. */
229 device->rad_info.family = i;
230
231 if (i >= CHIP_VEGA10)
232 device->rad_info.chip_class = GFX9;
233 else if (i >= CHIP_TONGA)
234 device->rad_info.chip_class = VI;
235 else if (i >= CHIP_BONAIRE)
236 device->rad_info.chip_class = CIK;
237 else
238 device->rad_info.chip_class = SI;
239
240 return;
241 }
242 }
243
244 fprintf(stderr, "radv: Unknown family: %s\n", family);
245 exit(1);
246 }
247
248 static VkResult
249 radv_physical_device_init(struct radv_physical_device *device,
250 struct radv_instance *instance,
251 drmDevicePtr drm_device)
252 {
253 const char *path = drm_device->nodes[DRM_NODE_RENDER];
254 VkResult result;
255 drmVersionPtr version;
256 int fd;
257 int master_fd = -1;
258
259 fd = open(path, O_RDWR | O_CLOEXEC);
260 if (fd < 0) {
261 if (instance->debug_flags & RADV_DEBUG_STARTUP)
262 radv_logi("Could not open device '%s'", path);
263
264 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
265 }
266
267 version = drmGetVersion(fd);
268 if (!version) {
269 close(fd);
270
271 if (instance->debug_flags & RADV_DEBUG_STARTUP)
272 radv_logi("Could not get the kernel driver version for device '%s'", path);
273
274 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
275 "failed to get version %s: %m", path);
276 }
277
278 if (strcmp(version->name, "amdgpu")) {
279 drmFreeVersion(version);
280 if (master_fd != -1)
281 close(master_fd);
282 close(fd);
283
284 if (instance->debug_flags & RADV_DEBUG_STARTUP)
285 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
286
287 return VK_ERROR_INCOMPATIBLE_DRIVER;
288 }
289 drmFreeVersion(version);
290
291 if (instance->debug_flags & RADV_DEBUG_STARTUP)
292 radv_logi("Found compatible device '%s'.", path);
293
294 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
295 device->instance = instance;
296 assert(strlen(path) < ARRAY_SIZE(device->path));
297 strncpy(device->path, path, ARRAY_SIZE(device->path));
298
299 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
300 instance->perftest_flags);
301 if (!device->ws) {
302 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
303 goto fail;
304 }
305
306 if (instance->enabled_extensions.KHR_display) {
307 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
308 if (master_fd >= 0) {
309 uint32_t accel_working = 0;
310 struct drm_amdgpu_info request = {
311 .return_pointer = (uintptr_t)&accel_working,
312 .return_size = sizeof(accel_working),
313 .query = AMDGPU_INFO_ACCEL_WORKING
314 };
315
316 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
317 close(master_fd);
318 master_fd = -1;
319 }
320 }
321 }
322
323 device->master_fd = master_fd;
324 device->local_fd = fd;
325 device->ws->query_info(device->ws, &device->rad_info);
326
327 radv_handle_env_var_force_family(device);
328
329 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
330
331 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
332 device->ws->destroy(device->ws);
333 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
334 "cannot generate UUID");
335 goto fail;
336 }
337
338 /* These flags affect shader compilation. */
339 uint64_t shader_env_flags =
340 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
341 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
342
343 /* The gpu id is already embedded in the uuid so we just pass "radv"
344 * when creating the cache.
345 */
346 char buf[VK_UUID_SIZE * 2 + 1];
347 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
348 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
349
350 if (device->rad_info.chip_class < VI ||
351 device->rad_info.chip_class > GFX9)
352 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
353
354 radv_get_driver_uuid(&device->device_uuid);
355 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
356
357 if (device->rad_info.family == CHIP_STONEY ||
358 device->rad_info.chip_class >= GFX9) {
359 device->has_rbplus = true;
360 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
361 device->rad_info.family == CHIP_VEGA12 ||
362 device->rad_info.family == CHIP_RAVEN;
363 }
364
365 /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
366 * on SI.
367 */
368 device->has_clear_state = device->rad_info.chip_class >= CIK;
369
370 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
371
372 /* Vega10/Raven need a special workaround for a hardware bug. */
373 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
374 device->rad_info.family == CHIP_RAVEN;
375
376 /* Out-of-order primitive rasterization. */
377 device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
378 device->rad_info.max_se >= 2;
379 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
380 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
381
382 device->dcc_msaa_allowed =
383 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
384
385 radv_physical_device_init_mem_types(device);
386 radv_fill_device_extension_table(device, &device->supported_extensions);
387
388 result = radv_init_wsi(device);
389 if (result != VK_SUCCESS) {
390 device->ws->destroy(device->ws);
391 vk_error(instance, result);
392 goto fail;
393 }
394
395 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
396 ac_print_gpu_info(&device->rad_info);
397
398 return VK_SUCCESS;
399
400 fail:
401 close(fd);
402 if (master_fd != -1)
403 close(master_fd);
404 return result;
405 }
406
407 static void
408 radv_physical_device_finish(struct radv_physical_device *device)
409 {
410 radv_finish_wsi(device);
411 device->ws->destroy(device->ws);
412 disk_cache_destroy(device->disk_cache);
413 close(device->local_fd);
414 if (device->master_fd != -1)
415 close(device->master_fd);
416 }
417
418 static void *
419 default_alloc_func(void *pUserData, size_t size, size_t align,
420 VkSystemAllocationScope allocationScope)
421 {
422 return malloc(size);
423 }
424
425 static void *
426 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
427 size_t align, VkSystemAllocationScope allocationScope)
428 {
429 return realloc(pOriginal, size);
430 }
431
432 static void
433 default_free_func(void *pUserData, void *pMemory)
434 {
435 free(pMemory);
436 }
437
438 static const VkAllocationCallbacks default_alloc = {
439 .pUserData = NULL,
440 .pfnAllocation = default_alloc_func,
441 .pfnReallocation = default_realloc_func,
442 .pfnFree = default_free_func,
443 };
444
445 static const struct debug_control radv_debug_options[] = {
446 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
447 {"nodcc", RADV_DEBUG_NO_DCC},
448 {"shaders", RADV_DEBUG_DUMP_SHADERS},
449 {"nocache", RADV_DEBUG_NO_CACHE},
450 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
451 {"nohiz", RADV_DEBUG_NO_HIZ},
452 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
453 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
454 {"allbos", RADV_DEBUG_ALL_BOS},
455 {"noibs", RADV_DEBUG_NO_IBS},
456 {"spirv", RADV_DEBUG_DUMP_SPIRV},
457 {"vmfaults", RADV_DEBUG_VM_FAULTS},
458 {"zerovram", RADV_DEBUG_ZERO_VRAM},
459 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
460 {"nosisched", RADV_DEBUG_NO_SISCHED},
461 {"preoptir", RADV_DEBUG_PREOPTIR},
462 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
463 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
464 {"info", RADV_DEBUG_INFO},
465 {"errors", RADV_DEBUG_ERRORS},
466 {"startup", RADV_DEBUG_STARTUP},
467 {"checkir", RADV_DEBUG_CHECKIR},
468 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
469 {NULL, 0}
470 };
471
472 const char *
473 radv_get_debug_option_name(int id)
474 {
475 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
476 return radv_debug_options[id].string;
477 }
478
479 static const struct debug_control radv_perftest_options[] = {
480 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
481 {"sisched", RADV_PERFTEST_SISCHED},
482 {"localbos", RADV_PERFTEST_LOCAL_BOS},
483 {"binning", RADV_PERFTEST_BINNING},
484 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
485 {NULL, 0}
486 };
487
488 const char *
489 radv_get_perftest_option_name(int id)
490 {
491 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
492 return radv_perftest_options[id].string;
493 }
494
495 static void
496 radv_handle_per_app_options(struct radv_instance *instance,
497 const VkApplicationInfo *info)
498 {
499 const char *name = info ? info->pApplicationName : NULL;
500
501 if (!name)
502 return;
503
504 if (!strcmp(name, "Talos - Linux - 32bit") ||
505 !strcmp(name, "Talos - Linux - 64bit")) {
506 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
507 /* Force enable LLVM sisched for Talos because it looks
508 * safe and it gives few more FPS.
509 */
510 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
511 }
512 } else if (!strcmp(name, "DOOM_VFR")) {
513 /* Work around a Doom VFR game bug */
514 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
515 }
516 }
517
518 static int radv_get_instance_extension_index(const char *name)
519 {
520 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
521 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
522 return i;
523 }
524 return -1;
525 }
526
527
528 VkResult radv_CreateInstance(
529 const VkInstanceCreateInfo* pCreateInfo,
530 const VkAllocationCallbacks* pAllocator,
531 VkInstance* pInstance)
532 {
533 struct radv_instance *instance;
534 VkResult result;
535
536 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
537
538 uint32_t client_version;
539 if (pCreateInfo->pApplicationInfo &&
540 pCreateInfo->pApplicationInfo->apiVersion != 0) {
541 client_version = pCreateInfo->pApplicationInfo->apiVersion;
542 } else {
543 radv_EnumerateInstanceVersion(&client_version);
544 }
545
546 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
547 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
548 if (!instance)
549 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
550
551 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
552
553 if (pAllocator)
554 instance->alloc = *pAllocator;
555 else
556 instance->alloc = default_alloc;
557
558 instance->apiVersion = client_version;
559 instance->physicalDeviceCount = -1;
560
561 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
562 radv_debug_options);
563
564 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
565 radv_perftest_options);
566
567
568 if (instance->debug_flags & RADV_DEBUG_STARTUP)
569 radv_logi("Created an instance");
570
571 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
572 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
573 int index = radv_get_instance_extension_index(ext_name);
574
575 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
576 vk_free2(&default_alloc, pAllocator, instance);
577 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
578 }
579
580 instance->enabled_extensions.extensions[index] = true;
581 }
582
583 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
584 if (result != VK_SUCCESS) {
585 vk_free2(&default_alloc, pAllocator, instance);
586 return vk_error(instance, result);
587 }
588
589 _mesa_locale_init();
590
591 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
592
593 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
594
595 *pInstance = radv_instance_to_handle(instance);
596
597 return VK_SUCCESS;
598 }
599
600 void radv_DestroyInstance(
601 VkInstance _instance,
602 const VkAllocationCallbacks* pAllocator)
603 {
604 RADV_FROM_HANDLE(radv_instance, instance, _instance);
605
606 if (!instance)
607 return;
608
609 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
610 radv_physical_device_finish(instance->physicalDevices + i);
611 }
612
613 VG(VALGRIND_DESTROY_MEMPOOL(instance));
614
615 _mesa_locale_fini();
616
617 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
618
619 vk_free(&instance->alloc, instance);
620 }
621
622 static VkResult
623 radv_enumerate_devices(struct radv_instance *instance)
624 {
625 /* TODO: Check for more devices ? */
626 drmDevicePtr devices[8];
627 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
628 int max_devices;
629
630 instance->physicalDeviceCount = 0;
631
632 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
633
634 if (instance->debug_flags & RADV_DEBUG_STARTUP)
635 radv_logi("Found %d drm nodes", max_devices);
636
637 if (max_devices < 1)
638 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
639
640 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
641 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
642 devices[i]->bustype == DRM_BUS_PCI &&
643 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
644
645 result = radv_physical_device_init(instance->physicalDevices +
646 instance->physicalDeviceCount,
647 instance,
648 devices[i]);
649 if (result == VK_SUCCESS)
650 ++instance->physicalDeviceCount;
651 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
652 break;
653 }
654 }
655 drmFreeDevices(devices, max_devices);
656
657 return result;
658 }
659
660 VkResult radv_EnumeratePhysicalDevices(
661 VkInstance _instance,
662 uint32_t* pPhysicalDeviceCount,
663 VkPhysicalDevice* pPhysicalDevices)
664 {
665 RADV_FROM_HANDLE(radv_instance, instance, _instance);
666 VkResult result;
667
668 if (instance->physicalDeviceCount < 0) {
669 result = radv_enumerate_devices(instance);
670 if (result != VK_SUCCESS &&
671 result != VK_ERROR_INCOMPATIBLE_DRIVER)
672 return result;
673 }
674
675 if (!pPhysicalDevices) {
676 *pPhysicalDeviceCount = instance->physicalDeviceCount;
677 } else {
678 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
679 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
680 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
681 }
682
683 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
684 : VK_SUCCESS;
685 }
686
687 VkResult radv_EnumeratePhysicalDeviceGroups(
688 VkInstance _instance,
689 uint32_t* pPhysicalDeviceGroupCount,
690 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
691 {
692 RADV_FROM_HANDLE(radv_instance, instance, _instance);
693 VkResult result;
694
695 if (instance->physicalDeviceCount < 0) {
696 result = radv_enumerate_devices(instance);
697 if (result != VK_SUCCESS &&
698 result != VK_ERROR_INCOMPATIBLE_DRIVER)
699 return result;
700 }
701
702 if (!pPhysicalDeviceGroupProperties) {
703 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
704 } else {
705 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
706 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
707 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
708 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
709 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
710 }
711 }
712 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
713 : VK_SUCCESS;
714 }
715
716 void radv_GetPhysicalDeviceFeatures(
717 VkPhysicalDevice physicalDevice,
718 VkPhysicalDeviceFeatures* pFeatures)
719 {
720 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
721 memset(pFeatures, 0, sizeof(*pFeatures));
722
723 *pFeatures = (VkPhysicalDeviceFeatures) {
724 .robustBufferAccess = true,
725 .fullDrawIndexUint32 = true,
726 .imageCubeArray = true,
727 .independentBlend = true,
728 .geometryShader = true,
729 .tessellationShader = true,
730 .sampleRateShading = true,
731 .dualSrcBlend = true,
732 .logicOp = true,
733 .multiDrawIndirect = true,
734 .drawIndirectFirstInstance = true,
735 .depthClamp = true,
736 .depthBiasClamp = true,
737 .fillModeNonSolid = true,
738 .depthBounds = true,
739 .wideLines = true,
740 .largePoints = true,
741 .alphaToOne = true,
742 .multiViewport = true,
743 .samplerAnisotropy = true,
744 .textureCompressionETC2 = pdevice->rad_info.chip_class >= GFX9 ||
745 pdevice->rad_info.family == CHIP_STONEY,
746 .textureCompressionASTC_LDR = false,
747 .textureCompressionBC = true,
748 .occlusionQueryPrecise = true,
749 .pipelineStatisticsQuery = true,
750 .vertexPipelineStoresAndAtomics = true,
751 .fragmentStoresAndAtomics = true,
752 .shaderTessellationAndGeometryPointSize = true,
753 .shaderImageGatherExtended = true,
754 .shaderStorageImageExtendedFormats = true,
755 .shaderStorageImageMultisample = false,
756 .shaderUniformBufferArrayDynamicIndexing = true,
757 .shaderSampledImageArrayDynamicIndexing = true,
758 .shaderStorageBufferArrayDynamicIndexing = true,
759 .shaderStorageImageArrayDynamicIndexing = true,
760 .shaderStorageImageReadWithoutFormat = true,
761 .shaderStorageImageWriteWithoutFormat = true,
762 .shaderClipDistance = true,
763 .shaderCullDistance = true,
764 .shaderFloat64 = true,
765 .shaderInt64 = true,
766 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
767 .sparseBinding = true,
768 .variableMultisampleRate = true,
769 .inheritedQueries = true,
770 };
771 }
772
773 void radv_GetPhysicalDeviceFeatures2(
774 VkPhysicalDevice physicalDevice,
775 VkPhysicalDeviceFeatures2KHR *pFeatures)
776 {
777 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
778 vk_foreach_struct(ext, pFeatures->pNext) {
779 switch (ext->sType) {
780 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
781 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
782 features->variablePointersStorageBuffer = true;
783 features->variablePointers = false;
784 break;
785 }
786 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
787 VkPhysicalDeviceMultiviewFeaturesKHR *features = (VkPhysicalDeviceMultiviewFeaturesKHR*)ext;
788 features->multiview = true;
789 features->multiviewGeometryShader = true;
790 features->multiviewTessellationShader = true;
791 break;
792 }
793 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
794 VkPhysicalDeviceShaderDrawParameterFeatures *features =
795 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
796 features->shaderDrawParameters = true;
797 break;
798 }
799 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
800 VkPhysicalDeviceProtectedMemoryFeatures *features =
801 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
802 features->protectedMemory = false;
803 break;
804 }
805 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
806 VkPhysicalDevice16BitStorageFeatures *features =
807 (VkPhysicalDevice16BitStorageFeatures*)ext;
808 bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
809 features->storageBuffer16BitAccess = enabled;
810 features->uniformAndStorageBuffer16BitAccess = enabled;
811 features->storagePushConstant16 = enabled;
812 features->storageInputOutput16 = enabled;
813 break;
814 }
815 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
816 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
817 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
818 features->samplerYcbcrConversion = false;
819 break;
820 }
821 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
822 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
823 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
824 features->shaderInputAttachmentArrayDynamicIndexing = true;
825 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
826 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
827 features->shaderUniformBufferArrayNonUniformIndexing = false;
828 features->shaderSampledImageArrayNonUniformIndexing = false;
829 features->shaderStorageBufferArrayNonUniformIndexing = false;
830 features->shaderStorageImageArrayNonUniformIndexing = false;
831 features->shaderInputAttachmentArrayNonUniformIndexing = false;
832 features->shaderUniformTexelBufferArrayNonUniformIndexing = false;
833 features->shaderStorageTexelBufferArrayNonUniformIndexing = false;
834 features->descriptorBindingUniformBufferUpdateAfterBind = true;
835 features->descriptorBindingSampledImageUpdateAfterBind = true;
836 features->descriptorBindingStorageImageUpdateAfterBind = true;
837 features->descriptorBindingStorageBufferUpdateAfterBind = true;
838 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
839 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
840 features->descriptorBindingUpdateUnusedWhilePending = true;
841 features->descriptorBindingPartiallyBound = true;
842 features->descriptorBindingVariableDescriptorCount = true;
843 features->runtimeDescriptorArray = true;
844 break;
845 }
846 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
847 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
848 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
849 features->conditionalRendering = true;
850 features->inheritedConditionalRendering = false;
851 break;
852 }
853 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
854 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
855 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
856 features->vertexAttributeInstanceRateDivisor = VK_TRUE;
857 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
858 break;
859 }
860 default:
861 break;
862 }
863 }
864 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
865 }
866
867 void radv_GetPhysicalDeviceProperties(
868 VkPhysicalDevice physicalDevice,
869 VkPhysicalDeviceProperties* pProperties)
870 {
871 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
872 VkSampleCountFlags sample_counts = 0xf;
873
874 /* make sure that the entire descriptor set is addressable with a signed
875 * 32-bit int. So the sum of all limits scaled by descriptor size has to
876 * be at most 2 GiB. the combined image & samples object count as one of
877 * both. This limit is for the pipeline layout, not for the set layout, but
878 * there is no set limit, so we just set a pipeline limit. I don't think
879 * any app is going to hit this soon. */
880 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
881 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
882 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
883 32 /* sampler, largest when combined with image */ +
884 64 /* sampled image */ +
885 64 /* storage image */);
886
887 VkPhysicalDeviceLimits limits = {
888 .maxImageDimension1D = (1 << 14),
889 .maxImageDimension2D = (1 << 14),
890 .maxImageDimension3D = (1 << 11),
891 .maxImageDimensionCube = (1 << 14),
892 .maxImageArrayLayers = (1 << 11),
893 .maxTexelBufferElements = 128 * 1024 * 1024,
894 .maxUniformBufferRange = UINT32_MAX,
895 .maxStorageBufferRange = UINT32_MAX,
896 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
897 .maxMemoryAllocationCount = UINT32_MAX,
898 .maxSamplerAllocationCount = 64 * 1024,
899 .bufferImageGranularity = 64, /* A cache line */
900 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
901 .maxBoundDescriptorSets = MAX_SETS,
902 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
903 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
904 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
905 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
906 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
907 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
908 .maxPerStageResources = max_descriptor_set_size,
909 .maxDescriptorSetSamplers = max_descriptor_set_size,
910 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
911 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
912 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
913 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
914 .maxDescriptorSetSampledImages = max_descriptor_set_size,
915 .maxDescriptorSetStorageImages = max_descriptor_set_size,
916 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
917 .maxVertexInputAttributes = 32,
918 .maxVertexInputBindings = 32,
919 .maxVertexInputAttributeOffset = 2047,
920 .maxVertexInputBindingStride = 2048,
921 .maxVertexOutputComponents = 128,
922 .maxTessellationGenerationLevel = 64,
923 .maxTessellationPatchSize = 32,
924 .maxTessellationControlPerVertexInputComponents = 128,
925 .maxTessellationControlPerVertexOutputComponents = 128,
926 .maxTessellationControlPerPatchOutputComponents = 120,
927 .maxTessellationControlTotalOutputComponents = 4096,
928 .maxTessellationEvaluationInputComponents = 128,
929 .maxTessellationEvaluationOutputComponents = 128,
930 .maxGeometryShaderInvocations = 127,
931 .maxGeometryInputComponents = 64,
932 .maxGeometryOutputComponents = 128,
933 .maxGeometryOutputVertices = 256,
934 .maxGeometryTotalOutputComponents = 1024,
935 .maxFragmentInputComponents = 128,
936 .maxFragmentOutputAttachments = 8,
937 .maxFragmentDualSrcAttachments = 1,
938 .maxFragmentCombinedOutputResources = 8,
939 .maxComputeSharedMemorySize = 32768,
940 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
941 .maxComputeWorkGroupInvocations = 2048,
942 .maxComputeWorkGroupSize = {
943 2048,
944 2048,
945 2048
946 },
947 .subPixelPrecisionBits = 4 /* FIXME */,
948 .subTexelPrecisionBits = 4 /* FIXME */,
949 .mipmapPrecisionBits = 4 /* FIXME */,
950 .maxDrawIndexedIndexValue = UINT32_MAX,
951 .maxDrawIndirectCount = UINT32_MAX,
952 .maxSamplerLodBias = 16,
953 .maxSamplerAnisotropy = 16,
954 .maxViewports = MAX_VIEWPORTS,
955 .maxViewportDimensions = { (1 << 14), (1 << 14) },
956 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
957 .viewportSubPixelBits = 8,
958 .minMemoryMapAlignment = 4096, /* A page */
959 .minTexelBufferOffsetAlignment = 1,
960 .minUniformBufferOffsetAlignment = 4,
961 .minStorageBufferOffsetAlignment = 4,
962 .minTexelOffset = -32,
963 .maxTexelOffset = 31,
964 .minTexelGatherOffset = -32,
965 .maxTexelGatherOffset = 31,
966 .minInterpolationOffset = -2,
967 .maxInterpolationOffset = 2,
968 .subPixelInterpolationOffsetBits = 8,
969 .maxFramebufferWidth = (1 << 14),
970 .maxFramebufferHeight = (1 << 14),
971 .maxFramebufferLayers = (1 << 10),
972 .framebufferColorSampleCounts = sample_counts,
973 .framebufferDepthSampleCounts = sample_counts,
974 .framebufferStencilSampleCounts = sample_counts,
975 .framebufferNoAttachmentsSampleCounts = sample_counts,
976 .maxColorAttachments = MAX_RTS,
977 .sampledImageColorSampleCounts = sample_counts,
978 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
979 .sampledImageDepthSampleCounts = sample_counts,
980 .sampledImageStencilSampleCounts = sample_counts,
981 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
982 .maxSampleMaskWords = 1,
983 .timestampComputeAndGraphics = true,
984 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
985 .maxClipDistances = 8,
986 .maxCullDistances = 8,
987 .maxCombinedClipAndCullDistances = 8,
988 .discreteQueuePriorities = 1,
989 .pointSizeRange = { 0.125, 255.875 },
990 .lineWidthRange = { 0.0, 7.9921875 },
991 .pointSizeGranularity = (1.0 / 8.0),
992 .lineWidthGranularity = (1.0 / 128.0),
993 .strictLines = false, /* FINISHME */
994 .standardSampleLocations = true,
995 .optimalBufferCopyOffsetAlignment = 128,
996 .optimalBufferCopyRowPitchAlignment = 128,
997 .nonCoherentAtomSize = 64,
998 };
999
1000 *pProperties = (VkPhysicalDeviceProperties) {
1001 .apiVersion = radv_physical_device_api_version(pdevice),
1002 .driverVersion = vk_get_driver_version(),
1003 .vendorID = ATI_VENDOR_ID,
1004 .deviceID = pdevice->rad_info.pci_id,
1005 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1006 .limits = limits,
1007 .sparseProperties = {0},
1008 };
1009
1010 strcpy(pProperties->deviceName, pdevice->name);
1011 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1012 }
1013
1014 void radv_GetPhysicalDeviceProperties2(
1015 VkPhysicalDevice physicalDevice,
1016 VkPhysicalDeviceProperties2KHR *pProperties)
1017 {
1018 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1019 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1020
1021 vk_foreach_struct(ext, pProperties->pNext) {
1022 switch (ext->sType) {
1023 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1024 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1025 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1026 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1027 break;
1028 }
1029 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
1030 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
1031 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1032 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1033 properties->deviceLUIDValid = false;
1034 break;
1035 }
1036 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR: {
1037 VkPhysicalDeviceMultiviewPropertiesKHR *properties = (VkPhysicalDeviceMultiviewPropertiesKHR*)ext;
1038 properties->maxMultiviewViewCount = MAX_VIEWS;
1039 properties->maxMultiviewInstanceIndex = INT_MAX;
1040 break;
1041 }
1042 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
1043 VkPhysicalDevicePointClippingPropertiesKHR *properties =
1044 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
1045 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
1046 break;
1047 }
1048 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1049 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1050 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1051 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1052 break;
1053 }
1054 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1055 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1056 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1057 properties->minImportedHostPointerAlignment = 4096;
1058 break;
1059 }
1060 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1061 VkPhysicalDeviceSubgroupProperties *properties =
1062 (VkPhysicalDeviceSubgroupProperties*)ext;
1063 properties->subgroupSize = 64;
1064 properties->supportedStages = VK_SHADER_STAGE_ALL;
1065 properties->supportedOperations =
1066 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1067 VK_SUBGROUP_FEATURE_BASIC_BIT |
1068 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1069 VK_SUBGROUP_FEATURE_QUAD_BIT |
1070 VK_SUBGROUP_FEATURE_VOTE_BIT;
1071 if (pdevice->rad_info.chip_class >= VI) {
1072 properties->supportedOperations |=
1073 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1074 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1075 }
1076 properties->quadOperationsInAllStages = true;
1077 break;
1078 }
1079 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1080 VkPhysicalDeviceMaintenance3Properties *properties =
1081 (VkPhysicalDeviceMaintenance3Properties*)ext;
1082 /* Make sure everything is addressable by a signed 32-bit int, and
1083 * our largest descriptors are 96 bytes. */
1084 properties->maxPerSetDescriptors = (1ull << 31) / 96;
1085 /* Our buffer size fields allow only this much */
1086 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1087 break;
1088 }
1089 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1090 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1091 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1092 /* GFX6-8 only support single channel min/max filter. */
1093 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1094 properties->filterMinmaxSingleComponentFormats = true;
1095 break;
1096 }
1097 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1098 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1099 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1100
1101 /* Shader engines. */
1102 properties->shaderEngineCount =
1103 pdevice->rad_info.max_se;
1104 properties->shaderArraysPerEngineCount =
1105 pdevice->rad_info.max_sh_per_se;
1106 properties->computeUnitsPerShaderArray =
1107 pdevice->rad_info.num_good_compute_units /
1108 (pdevice->rad_info.max_se *
1109 pdevice->rad_info.max_sh_per_se);
1110 properties->simdPerComputeUnit = 4;
1111 properties->wavefrontsPerSimd =
1112 pdevice->rad_info.family == CHIP_TONGA ||
1113 pdevice->rad_info.family == CHIP_ICELAND ||
1114 pdevice->rad_info.family == CHIP_POLARIS10 ||
1115 pdevice->rad_info.family == CHIP_POLARIS11 ||
1116 pdevice->rad_info.family == CHIP_POLARIS12 ||
1117 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1118 properties->wavefrontSize = 64;
1119
1120 /* SGPR. */
1121 properties->sgprsPerSimd =
1122 radv_get_num_physical_sgprs(pdevice);
1123 properties->minSgprAllocation =
1124 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1125 properties->maxSgprAllocation =
1126 pdevice->rad_info.family == CHIP_TONGA ||
1127 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1128 properties->sgprAllocationGranularity =
1129 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1130
1131 /* VGPR. */
1132 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1133 properties->minVgprAllocation = 4;
1134 properties->maxVgprAllocation = 256;
1135 properties->vgprAllocationGranularity = 4;
1136 break;
1137 }
1138 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1139 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1140 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1141 properties->maxVertexAttribDivisor = UINT32_MAX;
1142 break;
1143 }
1144 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1145 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1146 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1147 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1148 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1149 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1150 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1151 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1152 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1153 properties->robustBufferAccessUpdateAfterBind = false;
1154 properties->quadDivergentImplicitLod = false;
1155
1156 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1157 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1158 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1159 32 /* sampler, largest when combined with image */ +
1160 64 /* sampled image */ +
1161 64 /* storage image */);
1162 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1163 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1164 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1165 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1166 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1167 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1168 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1169 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1170 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1171 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1172 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1173 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1174 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1175 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1176 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1177 break;
1178 }
1179 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1180 VkPhysicalDeviceProtectedMemoryProperties *properties =
1181 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1182 properties->protectedNoFault = false;
1183 break;
1184 }
1185 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1186 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1187 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1188 properties->primitiveOverestimationSize = 0;
1189 properties->maxExtraPrimitiveOverestimationSize = 0;
1190 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1191 properties->primitiveUnderestimation = VK_FALSE;
1192 properties->conservativePointAndLineRasterization = VK_FALSE;
1193 properties->degenerateTrianglesRasterized = VK_FALSE;
1194 properties->degenerateLinesRasterized = VK_FALSE;
1195 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1196 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1197 break;
1198 }
1199 default:
1200 break;
1201 }
1202 }
1203 }
1204
1205 static void radv_get_physical_device_queue_family_properties(
1206 struct radv_physical_device* pdevice,
1207 uint32_t* pCount,
1208 VkQueueFamilyProperties** pQueueFamilyProperties)
1209 {
1210 int num_queue_families = 1;
1211 int idx;
1212 if (pdevice->rad_info.num_compute_rings > 0 &&
1213 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1214 num_queue_families++;
1215
1216 if (pQueueFamilyProperties == NULL) {
1217 *pCount = num_queue_families;
1218 return;
1219 }
1220
1221 if (!*pCount)
1222 return;
1223
1224 idx = 0;
1225 if (*pCount >= 1) {
1226 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1227 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1228 VK_QUEUE_COMPUTE_BIT |
1229 VK_QUEUE_TRANSFER_BIT |
1230 VK_QUEUE_SPARSE_BINDING_BIT,
1231 .queueCount = 1,
1232 .timestampValidBits = 64,
1233 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1234 };
1235 idx++;
1236 }
1237
1238 if (pdevice->rad_info.num_compute_rings > 0 &&
1239 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1240 if (*pCount > idx) {
1241 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1242 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1243 VK_QUEUE_TRANSFER_BIT |
1244 VK_QUEUE_SPARSE_BINDING_BIT,
1245 .queueCount = pdevice->rad_info.num_compute_rings,
1246 .timestampValidBits = 64,
1247 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1248 };
1249 idx++;
1250 }
1251 }
1252 *pCount = idx;
1253 }
1254
1255 void radv_GetPhysicalDeviceQueueFamilyProperties(
1256 VkPhysicalDevice physicalDevice,
1257 uint32_t* pCount,
1258 VkQueueFamilyProperties* pQueueFamilyProperties)
1259 {
1260 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1261 if (!pQueueFamilyProperties) {
1262 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1263 return;
1264 }
1265 VkQueueFamilyProperties *properties[] = {
1266 pQueueFamilyProperties + 0,
1267 pQueueFamilyProperties + 1,
1268 pQueueFamilyProperties + 2,
1269 };
1270 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1271 assert(*pCount <= 3);
1272 }
1273
1274 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1275 VkPhysicalDevice physicalDevice,
1276 uint32_t* pCount,
1277 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
1278 {
1279 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1280 if (!pQueueFamilyProperties) {
1281 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1282 return;
1283 }
1284 VkQueueFamilyProperties *properties[] = {
1285 &pQueueFamilyProperties[0].queueFamilyProperties,
1286 &pQueueFamilyProperties[1].queueFamilyProperties,
1287 &pQueueFamilyProperties[2].queueFamilyProperties,
1288 };
1289 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1290 assert(*pCount <= 3);
1291 }
1292
1293 void radv_GetPhysicalDeviceMemoryProperties(
1294 VkPhysicalDevice physicalDevice,
1295 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1296 {
1297 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1298
1299 *pMemoryProperties = physical_device->memory_properties;
1300 }
1301
1302 void radv_GetPhysicalDeviceMemoryProperties2(
1303 VkPhysicalDevice physicalDevice,
1304 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
1305 {
1306 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1307 &pMemoryProperties->memoryProperties);
1308 }
1309
1310 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1311 VkDevice _device,
1312 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1313 const void *pHostPointer,
1314 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1315 {
1316 RADV_FROM_HANDLE(radv_device, device, _device);
1317
1318 switch (handleType)
1319 {
1320 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1321 const struct radv_physical_device *physical_device = device->physical_device;
1322 uint32_t memoryTypeBits = 0;
1323 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1324 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1325 memoryTypeBits = (1 << i);
1326 break;
1327 }
1328 }
1329 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1330 return VK_SUCCESS;
1331 }
1332 default:
1333 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
1334 }
1335 }
1336
1337 static enum radeon_ctx_priority
1338 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1339 {
1340 /* Default to MEDIUM when a specific global priority isn't requested */
1341 if (!pObj)
1342 return RADEON_CTX_PRIORITY_MEDIUM;
1343
1344 switch(pObj->globalPriority) {
1345 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1346 return RADEON_CTX_PRIORITY_REALTIME;
1347 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1348 return RADEON_CTX_PRIORITY_HIGH;
1349 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1350 return RADEON_CTX_PRIORITY_MEDIUM;
1351 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1352 return RADEON_CTX_PRIORITY_LOW;
1353 default:
1354 unreachable("Illegal global priority value");
1355 return RADEON_CTX_PRIORITY_INVALID;
1356 }
1357 }
1358
1359 static int
1360 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1361 uint32_t queue_family_index, int idx,
1362 VkDeviceQueueCreateFlags flags,
1363 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1364 {
1365 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1366 queue->device = device;
1367 queue->queue_family_index = queue_family_index;
1368 queue->queue_idx = idx;
1369 queue->priority = radv_get_queue_global_priority(global_priority);
1370 queue->flags = flags;
1371
1372 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1373 if (!queue->hw_ctx)
1374 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1375
1376 return VK_SUCCESS;
1377 }
1378
1379 static void
1380 radv_queue_finish(struct radv_queue *queue)
1381 {
1382 if (queue->hw_ctx)
1383 queue->device->ws->ctx_destroy(queue->hw_ctx);
1384
1385 if (queue->initial_full_flush_preamble_cs)
1386 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1387 if (queue->initial_preamble_cs)
1388 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1389 if (queue->continue_preamble_cs)
1390 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1391 if (queue->descriptor_bo)
1392 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1393 if (queue->scratch_bo)
1394 queue->device->ws->buffer_destroy(queue->scratch_bo);
1395 if (queue->esgs_ring_bo)
1396 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1397 if (queue->gsvs_ring_bo)
1398 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1399 if (queue->tess_rings_bo)
1400 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1401 if (queue->compute_scratch_bo)
1402 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1403 }
1404
1405 static void
1406 radv_bo_list_init(struct radv_bo_list *bo_list)
1407 {
1408 pthread_mutex_init(&bo_list->mutex, NULL);
1409 bo_list->list.count = bo_list->capacity = 0;
1410 bo_list->list.bos = NULL;
1411 }
1412
1413 static void
1414 radv_bo_list_finish(struct radv_bo_list *bo_list)
1415 {
1416 free(bo_list->list.bos);
1417 pthread_mutex_destroy(&bo_list->mutex);
1418 }
1419
1420 static VkResult radv_bo_list_add(struct radv_device *device,
1421 struct radeon_winsys_bo *bo)
1422 {
1423 struct radv_bo_list *bo_list = &device->bo_list;
1424
1425 if (unlikely(!device->use_global_bo_list))
1426 return VK_SUCCESS;
1427
1428 pthread_mutex_lock(&bo_list->mutex);
1429 if (bo_list->list.count == bo_list->capacity) {
1430 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1431 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1432
1433 if (!data) {
1434 pthread_mutex_unlock(&bo_list->mutex);
1435 return VK_ERROR_OUT_OF_HOST_MEMORY;
1436 }
1437
1438 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1439 bo_list->capacity = capacity;
1440 }
1441
1442 bo_list->list.bos[bo_list->list.count++] = bo;
1443 pthread_mutex_unlock(&bo_list->mutex);
1444 return VK_SUCCESS;
1445 }
1446
1447 static void radv_bo_list_remove(struct radv_device *device,
1448 struct radeon_winsys_bo *bo)
1449 {
1450 struct radv_bo_list *bo_list = &device->bo_list;
1451
1452 if (unlikely(!device->use_global_bo_list))
1453 return;
1454
1455 pthread_mutex_lock(&bo_list->mutex);
1456 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1457 if (bo_list->list.bos[i] == bo) {
1458 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1459 --bo_list->list.count;
1460 break;
1461 }
1462 }
1463 pthread_mutex_unlock(&bo_list->mutex);
1464 }
1465
1466 static void
1467 radv_device_init_gs_info(struct radv_device *device)
1468 {
1469 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1470 device->physical_device->rad_info.family);
1471 }
1472
1473 static int radv_get_device_extension_index(const char *name)
1474 {
1475 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1476 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1477 return i;
1478 }
1479 return -1;
1480 }
1481
1482 static int
1483 radv_get_int_debug_option(const char *name, int default_value)
1484 {
1485 const char *str;
1486 int result;
1487
1488 str = getenv(name);
1489 if (!str) {
1490 result = default_value;
1491 } else {
1492 char *endptr;
1493
1494 result = strtol(str, &endptr, 0);
1495 if (str == endptr) {
1496 /* No digits founs. */
1497 result = default_value;
1498 }
1499 }
1500
1501 return result;
1502 }
1503
1504 VkResult radv_CreateDevice(
1505 VkPhysicalDevice physicalDevice,
1506 const VkDeviceCreateInfo* pCreateInfo,
1507 const VkAllocationCallbacks* pAllocator,
1508 VkDevice* pDevice)
1509 {
1510 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1511 VkResult result;
1512 struct radv_device *device;
1513
1514 bool keep_shader_info = false;
1515
1516 /* Check enabled features */
1517 if (pCreateInfo->pEnabledFeatures) {
1518 VkPhysicalDeviceFeatures supported_features;
1519 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1520 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1521 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1522 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1523 for (uint32_t i = 0; i < num_features; i++) {
1524 if (enabled_feature[i] && !supported_feature[i])
1525 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1526 }
1527 }
1528
1529 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1530 sizeof(*device), 8,
1531 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1532 if (!device)
1533 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1534
1535 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1536 device->instance = physical_device->instance;
1537 device->physical_device = physical_device;
1538
1539 device->ws = physical_device->ws;
1540 if (pAllocator)
1541 device->alloc = *pAllocator;
1542 else
1543 device->alloc = physical_device->instance->alloc;
1544
1545 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1546 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1547 int index = radv_get_device_extension_index(ext_name);
1548 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1549 vk_free(&device->alloc, device);
1550 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1551 }
1552
1553 device->enabled_extensions.extensions[index] = true;
1554 }
1555
1556 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1557
1558 /* With update after bind we can't attach bo's to the command buffer
1559 * from the descriptor set anymore, so we have to use a global BO list.
1560 */
1561 device->use_global_bo_list =
1562 device->enabled_extensions.EXT_descriptor_indexing;
1563
1564 mtx_init(&device->shader_slab_mutex, mtx_plain);
1565 list_inithead(&device->shader_slabs);
1566
1567 radv_bo_list_init(&device->bo_list);
1568
1569 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1570 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1571 uint32_t qfi = queue_create->queueFamilyIndex;
1572 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1573 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1574
1575 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1576
1577 device->queues[qfi] = vk_alloc(&device->alloc,
1578 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1579 if (!device->queues[qfi]) {
1580 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1581 goto fail;
1582 }
1583
1584 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1585
1586 device->queue_count[qfi] = queue_create->queueCount;
1587
1588 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1589 result = radv_queue_init(device, &device->queues[qfi][q],
1590 qfi, q, queue_create->flags,
1591 global_priority);
1592 if (result != VK_SUCCESS)
1593 goto fail;
1594 }
1595 }
1596
1597 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1598 ((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
1599 device->physical_device->rad_info.family == CHIP_RAVEN);
1600
1601 /* Disabled and not implemented for now. */
1602 device->dfsm_allowed = device->pbb_allowed &&
1603 device->physical_device->rad_info.family == CHIP_RAVEN;
1604
1605 #ifdef ANDROID
1606 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1607 #endif
1608
1609 /* The maximum number of scratch waves. Scratch space isn't divided
1610 * evenly between CUs. The number is only a function of the number of CUs.
1611 * We can decrease the constant to decrease the scratch buffer size.
1612 *
1613 * sctx->scratch_waves must be >= the maximum possible size of
1614 * 1 threadgroup, so that the hw doesn't hang from being unable
1615 * to start any.
1616 *
1617 * The recommended value is 4 per CU at most. Higher numbers don't
1618 * bring much benefit, but they still occupy chip resources (think
1619 * async compute). I've seen ~2% performance difference between 4 and 32.
1620 */
1621 uint32_t max_threads_per_block = 2048;
1622 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1623 max_threads_per_block / 64);
1624
1625 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1626
1627 if (device->physical_device->rad_info.chip_class >= CIK) {
1628 /* If the KMD allows it (there is a KMD hw register for it),
1629 * allow launching waves out-of-order.
1630 */
1631 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1632 }
1633
1634 radv_device_init_gs_info(device);
1635
1636 device->tess_offchip_block_dw_size =
1637 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1638 device->has_distributed_tess =
1639 device->physical_device->rad_info.chip_class >= VI &&
1640 device->physical_device->rad_info.max_se >= 2;
1641
1642 if (getenv("RADV_TRACE_FILE")) {
1643 const char *filename = getenv("RADV_TRACE_FILE");
1644
1645 keep_shader_info = true;
1646
1647 if (!radv_init_trace(device))
1648 goto fail;
1649
1650 fprintf(stderr, "*****************************************************************************\n");
1651 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
1652 fprintf(stderr, "*****************************************************************************\n");
1653
1654 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1655 radv_dump_enabled_options(device, stderr);
1656 }
1657
1658 device->keep_shader_info = keep_shader_info;
1659
1660 result = radv_device_init_meta(device);
1661 if (result != VK_SUCCESS)
1662 goto fail;
1663
1664 radv_device_init_msaa(device);
1665
1666 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1667 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1668 switch (family) {
1669 case RADV_QUEUE_GENERAL:
1670 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1671 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1672 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1673 break;
1674 case RADV_QUEUE_COMPUTE:
1675 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1676 radeon_emit(device->empty_cs[family], 0);
1677 break;
1678 }
1679 device->ws->cs_finalize(device->empty_cs[family]);
1680 }
1681
1682 if (device->physical_device->rad_info.chip_class >= CIK)
1683 cik_create_gfx_config(device);
1684
1685 VkPipelineCacheCreateInfo ci;
1686 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1687 ci.pNext = NULL;
1688 ci.flags = 0;
1689 ci.pInitialData = NULL;
1690 ci.initialDataSize = 0;
1691 VkPipelineCache pc;
1692 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1693 &ci, NULL, &pc);
1694 if (result != VK_SUCCESS)
1695 goto fail_meta;
1696
1697 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1698
1699 device->force_aniso =
1700 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
1701 if (device->force_aniso >= 0) {
1702 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
1703 1 << util_logbase2(device->force_aniso));
1704 }
1705
1706 *pDevice = radv_device_to_handle(device);
1707 return VK_SUCCESS;
1708
1709 fail_meta:
1710 radv_device_finish_meta(device);
1711 fail:
1712 radv_bo_list_finish(&device->bo_list);
1713
1714 if (device->trace_bo)
1715 device->ws->buffer_destroy(device->trace_bo);
1716
1717 if (device->gfx_init)
1718 device->ws->buffer_destroy(device->gfx_init);
1719
1720 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1721 for (unsigned q = 0; q < device->queue_count[i]; q++)
1722 radv_queue_finish(&device->queues[i][q]);
1723 if (device->queue_count[i])
1724 vk_free(&device->alloc, device->queues[i]);
1725 }
1726
1727 vk_free(&device->alloc, device);
1728 return result;
1729 }
1730
1731 void radv_DestroyDevice(
1732 VkDevice _device,
1733 const VkAllocationCallbacks* pAllocator)
1734 {
1735 RADV_FROM_HANDLE(radv_device, device, _device);
1736
1737 if (!device)
1738 return;
1739
1740 if (device->trace_bo)
1741 device->ws->buffer_destroy(device->trace_bo);
1742
1743 if (device->gfx_init)
1744 device->ws->buffer_destroy(device->gfx_init);
1745
1746 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1747 for (unsigned q = 0; q < device->queue_count[i]; q++)
1748 radv_queue_finish(&device->queues[i][q]);
1749 if (device->queue_count[i])
1750 vk_free(&device->alloc, device->queues[i]);
1751 if (device->empty_cs[i])
1752 device->ws->cs_destroy(device->empty_cs[i]);
1753 }
1754 radv_device_finish_meta(device);
1755
1756 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1757 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1758
1759 radv_destroy_shader_slabs(device);
1760
1761 radv_bo_list_finish(&device->bo_list);
1762 vk_free(&device->alloc, device);
1763 }
1764
1765 VkResult radv_EnumerateInstanceLayerProperties(
1766 uint32_t* pPropertyCount,
1767 VkLayerProperties* pProperties)
1768 {
1769 if (pProperties == NULL) {
1770 *pPropertyCount = 0;
1771 return VK_SUCCESS;
1772 }
1773
1774 /* None supported at this time */
1775 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1776 }
1777
1778 VkResult radv_EnumerateDeviceLayerProperties(
1779 VkPhysicalDevice physicalDevice,
1780 uint32_t* pPropertyCount,
1781 VkLayerProperties* pProperties)
1782 {
1783 if (pProperties == NULL) {
1784 *pPropertyCount = 0;
1785 return VK_SUCCESS;
1786 }
1787
1788 /* None supported at this time */
1789 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1790 }
1791
1792 void radv_GetDeviceQueue2(
1793 VkDevice _device,
1794 const VkDeviceQueueInfo2* pQueueInfo,
1795 VkQueue* pQueue)
1796 {
1797 RADV_FROM_HANDLE(radv_device, device, _device);
1798 struct radv_queue *queue;
1799
1800 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1801 if (pQueueInfo->flags != queue->flags) {
1802 /* From the Vulkan 1.1.70 spec:
1803 *
1804 * "The queue returned by vkGetDeviceQueue2 must have the same
1805 * flags value from this structure as that used at device
1806 * creation time in a VkDeviceQueueCreateInfo instance. If no
1807 * matching flags were specified at device creation time then
1808 * pQueue will return VK_NULL_HANDLE."
1809 */
1810 *pQueue = VK_NULL_HANDLE;
1811 return;
1812 }
1813
1814 *pQueue = radv_queue_to_handle(queue);
1815 }
1816
1817 void radv_GetDeviceQueue(
1818 VkDevice _device,
1819 uint32_t queueFamilyIndex,
1820 uint32_t queueIndex,
1821 VkQueue* pQueue)
1822 {
1823 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1824 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1825 .queueFamilyIndex = queueFamilyIndex,
1826 .queueIndex = queueIndex
1827 };
1828
1829 radv_GetDeviceQueue2(_device, &info, pQueue);
1830 }
1831
1832 static void
1833 fill_geom_tess_rings(struct radv_queue *queue,
1834 uint32_t *map,
1835 bool add_sample_positions,
1836 uint32_t esgs_ring_size,
1837 struct radeon_winsys_bo *esgs_ring_bo,
1838 uint32_t gsvs_ring_size,
1839 struct radeon_winsys_bo *gsvs_ring_bo,
1840 uint32_t tess_factor_ring_size,
1841 uint32_t tess_offchip_ring_offset,
1842 uint32_t tess_offchip_ring_size,
1843 struct radeon_winsys_bo *tess_rings_bo)
1844 {
1845 uint64_t esgs_va = 0, gsvs_va = 0;
1846 uint64_t tess_va = 0, tess_offchip_va = 0;
1847 uint32_t *desc = &map[4];
1848
1849 if (esgs_ring_bo)
1850 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1851 if (gsvs_ring_bo)
1852 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1853 if (tess_rings_bo) {
1854 tess_va = radv_buffer_get_va(tess_rings_bo);
1855 tess_offchip_va = tess_va + tess_offchip_ring_offset;
1856 }
1857
1858 /* stride 0, num records - size, add tid, swizzle, elsize4,
1859 index stride 64 */
1860 desc[0] = esgs_va;
1861 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1862 S_008F04_STRIDE(0) |
1863 S_008F04_SWIZZLE_ENABLE(true);
1864 desc[2] = esgs_ring_size;
1865 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1866 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1867 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1868 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1869 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1870 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1871 S_008F0C_ELEMENT_SIZE(1) |
1872 S_008F0C_INDEX_STRIDE(3) |
1873 S_008F0C_ADD_TID_ENABLE(true);
1874
1875 desc += 4;
1876 /* GS entry for ES->GS ring */
1877 /* stride 0, num records - size, elsize0,
1878 index stride 0 */
1879 desc[0] = esgs_va;
1880 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1881 S_008F04_STRIDE(0) |
1882 S_008F04_SWIZZLE_ENABLE(false);
1883 desc[2] = esgs_ring_size;
1884 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1885 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1886 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1887 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1888 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1889 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1890 S_008F0C_ELEMENT_SIZE(0) |
1891 S_008F0C_INDEX_STRIDE(0) |
1892 S_008F0C_ADD_TID_ENABLE(false);
1893
1894 desc += 4;
1895 /* VS entry for GS->VS ring */
1896 /* stride 0, num records - size, elsize0,
1897 index stride 0 */
1898 desc[0] = gsvs_va;
1899 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1900 S_008F04_STRIDE(0) |
1901 S_008F04_SWIZZLE_ENABLE(false);
1902 desc[2] = gsvs_ring_size;
1903 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1904 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1905 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1906 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1907 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1908 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1909 S_008F0C_ELEMENT_SIZE(0) |
1910 S_008F0C_INDEX_STRIDE(0) |
1911 S_008F0C_ADD_TID_ENABLE(false);
1912 desc += 4;
1913
1914 /* stride gsvs_itemsize, num records 64
1915 elsize 4, index stride 16 */
1916 /* shader will patch stride and desc[2] */
1917 desc[0] = gsvs_va;
1918 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1919 S_008F04_STRIDE(0) |
1920 S_008F04_SWIZZLE_ENABLE(true);
1921 desc[2] = 0;
1922 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1923 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1924 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1925 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1926 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1927 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1928 S_008F0C_ELEMENT_SIZE(1) |
1929 S_008F0C_INDEX_STRIDE(1) |
1930 S_008F0C_ADD_TID_ENABLE(true);
1931 desc += 4;
1932
1933 desc[0] = tess_va;
1934 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
1935 S_008F04_STRIDE(0) |
1936 S_008F04_SWIZZLE_ENABLE(false);
1937 desc[2] = tess_factor_ring_size;
1938 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1939 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1940 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1941 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1942 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1943 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1944 S_008F0C_ELEMENT_SIZE(0) |
1945 S_008F0C_INDEX_STRIDE(0) |
1946 S_008F0C_ADD_TID_ENABLE(false);
1947 desc += 4;
1948
1949 desc[0] = tess_offchip_va;
1950 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1951 S_008F04_STRIDE(0) |
1952 S_008F04_SWIZZLE_ENABLE(false);
1953 desc[2] = tess_offchip_ring_size;
1954 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1955 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1956 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1957 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1958 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1959 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1960 S_008F0C_ELEMENT_SIZE(0) |
1961 S_008F0C_INDEX_STRIDE(0) |
1962 S_008F0C_ADD_TID_ENABLE(false);
1963 desc += 4;
1964
1965 /* add sample positions after all rings */
1966 memcpy(desc, queue->device->sample_locations_1x, 8);
1967 desc += 2;
1968 memcpy(desc, queue->device->sample_locations_2x, 16);
1969 desc += 4;
1970 memcpy(desc, queue->device->sample_locations_4x, 32);
1971 desc += 8;
1972 memcpy(desc, queue->device->sample_locations_8x, 64);
1973 desc += 16;
1974 memcpy(desc, queue->device->sample_locations_16x, 128);
1975 }
1976
1977 static unsigned
1978 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1979 {
1980 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1981 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1982 device->physical_device->rad_info.family != CHIP_STONEY;
1983 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1984 unsigned max_offchip_buffers;
1985 unsigned offchip_granularity;
1986 unsigned hs_offchip_param;
1987
1988 /*
1989 * Per RadeonSI:
1990 * This must be one less than the maximum number due to a hw limitation.
1991 * Various hardware bugs in SI, CIK, and GFX9 need this.
1992 *
1993 * Per AMDVLK:
1994 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
1995 * Gfx7 should limit max_offchip_buffers to 508
1996 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
1997 *
1998 * Follow AMDVLK here.
1999 */
2000 if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2001 device->physical_device->rad_info.chip_class == CIK ||
2002 device->physical_device->rad_info.chip_class == SI)
2003 --max_offchip_buffers_per_se;
2004
2005 max_offchip_buffers = max_offchip_buffers_per_se *
2006 device->physical_device->rad_info.max_se;
2007
2008 switch (device->tess_offchip_block_dw_size) {
2009 default:
2010 assert(0);
2011 /* fall through */
2012 case 8192:
2013 offchip_granularity = V_03093C_X_8K_DWORDS;
2014 break;
2015 case 4096:
2016 offchip_granularity = V_03093C_X_4K_DWORDS;
2017 break;
2018 }
2019
2020 switch (device->physical_device->rad_info.chip_class) {
2021 case SI:
2022 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2023 break;
2024 case CIK:
2025 case VI:
2026 case GFX9:
2027 default:
2028 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2029 break;
2030 }
2031
2032 *max_offchip_buffers_p = max_offchip_buffers;
2033 if (device->physical_device->rad_info.chip_class >= CIK) {
2034 if (device->physical_device->rad_info.chip_class >= VI)
2035 --max_offchip_buffers;
2036 hs_offchip_param =
2037 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2038 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2039 } else {
2040 hs_offchip_param =
2041 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2042 }
2043 return hs_offchip_param;
2044 }
2045
2046 static void
2047 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2048 struct radeon_winsys_bo *esgs_ring_bo,
2049 uint32_t esgs_ring_size,
2050 struct radeon_winsys_bo *gsvs_ring_bo,
2051 uint32_t gsvs_ring_size)
2052 {
2053 if (!esgs_ring_bo && !gsvs_ring_bo)
2054 return;
2055
2056 if (esgs_ring_bo)
2057 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2058
2059 if (gsvs_ring_bo)
2060 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2061
2062 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2063 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2064 radeon_emit(cs, esgs_ring_size >> 8);
2065 radeon_emit(cs, gsvs_ring_size >> 8);
2066 } else {
2067 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2068 radeon_emit(cs, esgs_ring_size >> 8);
2069 radeon_emit(cs, gsvs_ring_size >> 8);
2070 }
2071 }
2072
2073 static void
2074 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2075 unsigned hs_offchip_param, unsigned tf_ring_size,
2076 struct radeon_winsys_bo *tess_rings_bo)
2077 {
2078 uint64_t tf_va;
2079
2080 if (!tess_rings_bo)
2081 return;
2082
2083 tf_va = radv_buffer_get_va(tess_rings_bo);
2084
2085 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2086
2087 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2088 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2089 S_030938_SIZE(tf_ring_size / 4));
2090 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2091 tf_va >> 8);
2092 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2093 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2094 S_030944_BASE_HI(tf_va >> 40));
2095 }
2096 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2097 hs_offchip_param);
2098 } else {
2099 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2100 S_008988_SIZE(tf_ring_size / 4));
2101 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2102 tf_va >> 8);
2103 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2104 hs_offchip_param);
2105 }
2106 }
2107
2108 static void
2109 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2110 struct radeon_winsys_bo *compute_scratch_bo)
2111 {
2112 uint64_t scratch_va;
2113
2114 if (!compute_scratch_bo)
2115 return;
2116
2117 scratch_va = radv_buffer_get_va(compute_scratch_bo);
2118
2119 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2120
2121 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2122 radeon_emit(cs, scratch_va);
2123 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2124 S_008F04_SWIZZLE_ENABLE(1));
2125 }
2126
2127 static void
2128 radv_emit_global_shader_pointers(struct radv_queue *queue,
2129 struct radeon_cmdbuf *cs,
2130 struct radeon_winsys_bo *descriptor_bo)
2131 {
2132 uint64_t va;
2133
2134 if (!descriptor_bo)
2135 return;
2136
2137 va = radv_buffer_get_va(descriptor_bo);
2138
2139 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2140
2141 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2142 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2143 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2144 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2145 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2146
2147 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2148 radv_emit_shader_pointer(queue->device, cs, regs[i],
2149 va, true);
2150 }
2151 } else {
2152 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2153 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2154 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2155 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2156 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2157 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2158
2159 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2160 radv_emit_shader_pointer(queue->device, cs, regs[i],
2161 va, true);
2162 }
2163 }
2164 }
2165
2166 static void
2167 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2168 {
2169 struct radv_device *device = queue->device;
2170
2171 if (device->gfx_init) {
2172 uint64_t va = radv_buffer_get_va(device->gfx_init);
2173
2174 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2175 radeon_emit(cs, va);
2176 radeon_emit(cs, va >> 32);
2177 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2178
2179 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2180 } else {
2181 struct radv_physical_device *physical_device = device->physical_device;
2182 si_emit_graphics(physical_device, cs);
2183 }
2184 }
2185
2186 static void
2187 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2188 {
2189 struct radv_physical_device *physical_device = queue->device->physical_device;
2190 si_emit_compute(physical_device, cs);
2191 }
2192
2193 static VkResult
2194 radv_get_preamble_cs(struct radv_queue *queue,
2195 uint32_t scratch_size,
2196 uint32_t compute_scratch_size,
2197 uint32_t esgs_ring_size,
2198 uint32_t gsvs_ring_size,
2199 bool needs_tess_rings,
2200 bool needs_sample_positions,
2201 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2202 struct radeon_cmdbuf **initial_preamble_cs,
2203 struct radeon_cmdbuf **continue_preamble_cs)
2204 {
2205 struct radeon_winsys_bo *scratch_bo = NULL;
2206 struct radeon_winsys_bo *descriptor_bo = NULL;
2207 struct radeon_winsys_bo *compute_scratch_bo = NULL;
2208 struct radeon_winsys_bo *esgs_ring_bo = NULL;
2209 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2210 struct radeon_winsys_bo *tess_rings_bo = NULL;
2211 struct radeon_cmdbuf *dest_cs[3] = {0};
2212 bool add_tess_rings = false, add_sample_positions = false;
2213 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2214 unsigned max_offchip_buffers;
2215 unsigned hs_offchip_param = 0;
2216 unsigned tess_offchip_ring_offset;
2217 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2218 if (!queue->has_tess_rings) {
2219 if (needs_tess_rings)
2220 add_tess_rings = true;
2221 }
2222 if (!queue->has_sample_positions) {
2223 if (needs_sample_positions)
2224 add_sample_positions = true;
2225 }
2226 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2227 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2228 &max_offchip_buffers);
2229 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2230 tess_offchip_ring_size = max_offchip_buffers *
2231 queue->device->tess_offchip_block_dw_size * 4;
2232
2233 if (scratch_size <= queue->scratch_size &&
2234 compute_scratch_size <= queue->compute_scratch_size &&
2235 esgs_ring_size <= queue->esgs_ring_size &&
2236 gsvs_ring_size <= queue->gsvs_ring_size &&
2237 !add_tess_rings && !add_sample_positions &&
2238 queue->initial_preamble_cs) {
2239 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2240 *initial_preamble_cs = queue->initial_preamble_cs;
2241 *continue_preamble_cs = queue->continue_preamble_cs;
2242 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2243 *continue_preamble_cs = NULL;
2244 return VK_SUCCESS;
2245 }
2246
2247 if (scratch_size > queue->scratch_size) {
2248 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2249 scratch_size,
2250 4096,
2251 RADEON_DOMAIN_VRAM,
2252 ring_bo_flags);
2253 if (!scratch_bo)
2254 goto fail;
2255 } else
2256 scratch_bo = queue->scratch_bo;
2257
2258 if (compute_scratch_size > queue->compute_scratch_size) {
2259 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2260 compute_scratch_size,
2261 4096,
2262 RADEON_DOMAIN_VRAM,
2263 ring_bo_flags);
2264 if (!compute_scratch_bo)
2265 goto fail;
2266
2267 } else
2268 compute_scratch_bo = queue->compute_scratch_bo;
2269
2270 if (esgs_ring_size > queue->esgs_ring_size) {
2271 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2272 esgs_ring_size,
2273 4096,
2274 RADEON_DOMAIN_VRAM,
2275 ring_bo_flags);
2276 if (!esgs_ring_bo)
2277 goto fail;
2278 } else {
2279 esgs_ring_bo = queue->esgs_ring_bo;
2280 esgs_ring_size = queue->esgs_ring_size;
2281 }
2282
2283 if (gsvs_ring_size > queue->gsvs_ring_size) {
2284 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2285 gsvs_ring_size,
2286 4096,
2287 RADEON_DOMAIN_VRAM,
2288 ring_bo_flags);
2289 if (!gsvs_ring_bo)
2290 goto fail;
2291 } else {
2292 gsvs_ring_bo = queue->gsvs_ring_bo;
2293 gsvs_ring_size = queue->gsvs_ring_size;
2294 }
2295
2296 if (add_tess_rings) {
2297 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2298 tess_offchip_ring_offset + tess_offchip_ring_size,
2299 256,
2300 RADEON_DOMAIN_VRAM,
2301 ring_bo_flags);
2302 if (!tess_rings_bo)
2303 goto fail;
2304 } else {
2305 tess_rings_bo = queue->tess_rings_bo;
2306 }
2307
2308 if (scratch_bo != queue->scratch_bo ||
2309 esgs_ring_bo != queue->esgs_ring_bo ||
2310 gsvs_ring_bo != queue->gsvs_ring_bo ||
2311 tess_rings_bo != queue->tess_rings_bo ||
2312 add_sample_positions) {
2313 uint32_t size = 0;
2314 if (gsvs_ring_bo || esgs_ring_bo ||
2315 tess_rings_bo || add_sample_positions) {
2316 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2317 if (add_sample_positions)
2318 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
2319 }
2320 else if (scratch_bo)
2321 size = 8; /* 2 dword */
2322
2323 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2324 size,
2325 4096,
2326 RADEON_DOMAIN_VRAM,
2327 RADEON_FLAG_CPU_ACCESS |
2328 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2329 RADEON_FLAG_READ_ONLY);
2330 if (!descriptor_bo)
2331 goto fail;
2332 } else
2333 descriptor_bo = queue->descriptor_bo;
2334
2335 for(int i = 0; i < 3; ++i) {
2336 struct radeon_cmdbuf *cs = NULL;
2337 cs = queue->device->ws->cs_create(queue->device->ws,
2338 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2339 if (!cs)
2340 goto fail;
2341
2342 dest_cs[i] = cs;
2343
2344 if (scratch_bo)
2345 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2346
2347 /* Emit initial configuration. */
2348 switch (queue->queue_family_index) {
2349 case RADV_QUEUE_GENERAL:
2350 radv_init_graphics_state(cs, queue);
2351 break;
2352 case RADV_QUEUE_COMPUTE:
2353 radv_init_compute_state(cs, queue);
2354 break;
2355 case RADV_QUEUE_TRANSFER:
2356 break;
2357 }
2358
2359 if (descriptor_bo != queue->descriptor_bo) {
2360 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2361
2362 if (scratch_bo) {
2363 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2364 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2365 S_008F04_SWIZZLE_ENABLE(1);
2366 map[0] = scratch_va;
2367 map[1] = rsrc1;
2368 }
2369
2370 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo ||
2371 add_sample_positions)
2372 fill_geom_tess_rings(queue, map, add_sample_positions,
2373 esgs_ring_size, esgs_ring_bo,
2374 gsvs_ring_size, gsvs_ring_bo,
2375 tess_factor_ring_size,
2376 tess_offchip_ring_offset,
2377 tess_offchip_ring_size,
2378 tess_rings_bo);
2379
2380 queue->device->ws->buffer_unmap(descriptor_bo);
2381 }
2382
2383 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2384 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2385 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2386 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2387 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2388 }
2389
2390 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2391 gsvs_ring_bo, gsvs_ring_size);
2392 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2393 tess_factor_ring_size, tess_rings_bo);
2394 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2395 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2396
2397 if (i == 0) {
2398 si_cs_emit_cache_flush(cs,
2399 queue->device->physical_device->rad_info.chip_class,
2400 NULL, 0,
2401 queue->queue_family_index == RING_COMPUTE &&
2402 queue->device->physical_device->rad_info.chip_class >= CIK,
2403 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2404 RADV_CMD_FLAG_INV_ICACHE |
2405 RADV_CMD_FLAG_INV_SMEM_L1 |
2406 RADV_CMD_FLAG_INV_VMEM_L1 |
2407 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2408 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2409 } else if (i == 1) {
2410 si_cs_emit_cache_flush(cs,
2411 queue->device->physical_device->rad_info.chip_class,
2412 NULL, 0,
2413 queue->queue_family_index == RING_COMPUTE &&
2414 queue->device->physical_device->rad_info.chip_class >= CIK,
2415 RADV_CMD_FLAG_INV_ICACHE |
2416 RADV_CMD_FLAG_INV_SMEM_L1 |
2417 RADV_CMD_FLAG_INV_VMEM_L1 |
2418 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2419 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2420 }
2421
2422 if (!queue->device->ws->cs_finalize(cs))
2423 goto fail;
2424 }
2425
2426 if (queue->initial_full_flush_preamble_cs)
2427 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2428
2429 if (queue->initial_preamble_cs)
2430 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2431
2432 if (queue->continue_preamble_cs)
2433 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2434
2435 queue->initial_full_flush_preamble_cs = dest_cs[0];
2436 queue->initial_preamble_cs = dest_cs[1];
2437 queue->continue_preamble_cs = dest_cs[2];
2438
2439 if (scratch_bo != queue->scratch_bo) {
2440 if (queue->scratch_bo)
2441 queue->device->ws->buffer_destroy(queue->scratch_bo);
2442 queue->scratch_bo = scratch_bo;
2443 queue->scratch_size = scratch_size;
2444 }
2445
2446 if (compute_scratch_bo != queue->compute_scratch_bo) {
2447 if (queue->compute_scratch_bo)
2448 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2449 queue->compute_scratch_bo = compute_scratch_bo;
2450 queue->compute_scratch_size = compute_scratch_size;
2451 }
2452
2453 if (esgs_ring_bo != queue->esgs_ring_bo) {
2454 if (queue->esgs_ring_bo)
2455 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2456 queue->esgs_ring_bo = esgs_ring_bo;
2457 queue->esgs_ring_size = esgs_ring_size;
2458 }
2459
2460 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2461 if (queue->gsvs_ring_bo)
2462 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2463 queue->gsvs_ring_bo = gsvs_ring_bo;
2464 queue->gsvs_ring_size = gsvs_ring_size;
2465 }
2466
2467 if (tess_rings_bo != queue->tess_rings_bo) {
2468 queue->tess_rings_bo = tess_rings_bo;
2469 queue->has_tess_rings = true;
2470 }
2471
2472 if (descriptor_bo != queue->descriptor_bo) {
2473 if (queue->descriptor_bo)
2474 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2475
2476 queue->descriptor_bo = descriptor_bo;
2477 }
2478
2479 if (add_sample_positions)
2480 queue->has_sample_positions = true;
2481
2482 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2483 *initial_preamble_cs = queue->initial_preamble_cs;
2484 *continue_preamble_cs = queue->continue_preamble_cs;
2485 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2486 *continue_preamble_cs = NULL;
2487 return VK_SUCCESS;
2488 fail:
2489 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2490 if (dest_cs[i])
2491 queue->device->ws->cs_destroy(dest_cs[i]);
2492 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2493 queue->device->ws->buffer_destroy(descriptor_bo);
2494 if (scratch_bo && scratch_bo != queue->scratch_bo)
2495 queue->device->ws->buffer_destroy(scratch_bo);
2496 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2497 queue->device->ws->buffer_destroy(compute_scratch_bo);
2498 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2499 queue->device->ws->buffer_destroy(esgs_ring_bo);
2500 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2501 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2502 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2503 queue->device->ws->buffer_destroy(tess_rings_bo);
2504 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2505 }
2506
2507 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2508 struct radv_winsys_sem_counts *counts,
2509 int num_sems,
2510 const VkSemaphore *sems,
2511 VkFence _fence,
2512 bool reset_temp)
2513 {
2514 int syncobj_idx = 0, sem_idx = 0;
2515
2516 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2517 return VK_SUCCESS;
2518
2519 for (uint32_t i = 0; i < num_sems; i++) {
2520 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2521
2522 if (sem->temp_syncobj || sem->syncobj)
2523 counts->syncobj_count++;
2524 else
2525 counts->sem_count++;
2526 }
2527
2528 if (_fence != VK_NULL_HANDLE) {
2529 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2530 if (fence->temp_syncobj || fence->syncobj)
2531 counts->syncobj_count++;
2532 }
2533
2534 if (counts->syncobj_count) {
2535 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2536 if (!counts->syncobj)
2537 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2538 }
2539
2540 if (counts->sem_count) {
2541 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2542 if (!counts->sem) {
2543 free(counts->syncobj);
2544 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2545 }
2546 }
2547
2548 for (uint32_t i = 0; i < num_sems; i++) {
2549 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2550
2551 if (sem->temp_syncobj) {
2552 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2553 }
2554 else if (sem->syncobj)
2555 counts->syncobj[syncobj_idx++] = sem->syncobj;
2556 else {
2557 assert(sem->sem);
2558 counts->sem[sem_idx++] = sem->sem;
2559 }
2560 }
2561
2562 if (_fence != VK_NULL_HANDLE) {
2563 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2564 if (fence->temp_syncobj)
2565 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2566 else if (fence->syncobj)
2567 counts->syncobj[syncobj_idx++] = fence->syncobj;
2568 }
2569
2570 return VK_SUCCESS;
2571 }
2572
2573 static void
2574 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2575 {
2576 free(sem_info->wait.syncobj);
2577 free(sem_info->wait.sem);
2578 free(sem_info->signal.syncobj);
2579 free(sem_info->signal.sem);
2580 }
2581
2582
2583 static void radv_free_temp_syncobjs(struct radv_device *device,
2584 int num_sems,
2585 const VkSemaphore *sems)
2586 {
2587 for (uint32_t i = 0; i < num_sems; i++) {
2588 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2589
2590 if (sem->temp_syncobj) {
2591 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2592 sem->temp_syncobj = 0;
2593 }
2594 }
2595 }
2596
2597 static VkResult
2598 radv_alloc_sem_info(struct radv_instance *instance,
2599 struct radv_winsys_sem_info *sem_info,
2600 int num_wait_sems,
2601 const VkSemaphore *wait_sems,
2602 int num_signal_sems,
2603 const VkSemaphore *signal_sems,
2604 VkFence fence)
2605 {
2606 VkResult ret;
2607 memset(sem_info, 0, sizeof(*sem_info));
2608
2609 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2610 if (ret)
2611 return ret;
2612 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
2613 if (ret)
2614 radv_free_sem_info(sem_info);
2615
2616 /* caller can override these */
2617 sem_info->cs_emit_wait = true;
2618 sem_info->cs_emit_signal = true;
2619 return ret;
2620 }
2621
2622 /* Signals fence as soon as all the work currently put on queue is done. */
2623 static VkResult radv_signal_fence(struct radv_queue *queue,
2624 struct radv_fence *fence)
2625 {
2626 int ret;
2627 VkResult result;
2628 struct radv_winsys_sem_info sem_info;
2629
2630 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
2631 radv_fence_to_handle(fence));
2632 if (result != VK_SUCCESS)
2633 return result;
2634
2635 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2636 &queue->device->empty_cs[queue->queue_family_index],
2637 1, NULL, NULL, &sem_info, NULL,
2638 false, fence->fence);
2639 radv_free_sem_info(&sem_info);
2640
2641 if (ret)
2642 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
2643
2644 return VK_SUCCESS;
2645 }
2646
2647 VkResult radv_QueueSubmit(
2648 VkQueue _queue,
2649 uint32_t submitCount,
2650 const VkSubmitInfo* pSubmits,
2651 VkFence _fence)
2652 {
2653 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2654 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2655 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2656 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2657 int ret;
2658 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
2659 uint32_t scratch_size = 0;
2660 uint32_t compute_scratch_size = 0;
2661 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2662 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2663 VkResult result;
2664 bool fence_emitted = false;
2665 bool tess_rings_needed = false;
2666 bool sample_positions_needed = false;
2667
2668 /* Do this first so failing to allocate scratch buffers can't result in
2669 * partially executed submissions. */
2670 for (uint32_t i = 0; i < submitCount; i++) {
2671 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2672 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2673 pSubmits[i].pCommandBuffers[j]);
2674
2675 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2676 compute_scratch_size = MAX2(compute_scratch_size,
2677 cmd_buffer->compute_scratch_size_needed);
2678 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2679 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2680 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2681 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2682 }
2683 }
2684
2685 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2686 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2687 sample_positions_needed, &initial_flush_preamble_cs,
2688 &initial_preamble_cs, &continue_preamble_cs);
2689 if (result != VK_SUCCESS)
2690 return result;
2691
2692 for (uint32_t i = 0; i < submitCount; i++) {
2693 struct radeon_cmdbuf **cs_array;
2694 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2695 bool can_patch = true;
2696 uint32_t advance;
2697 struct radv_winsys_sem_info sem_info;
2698
2699 result = radv_alloc_sem_info(queue->device->instance,
2700 &sem_info,
2701 pSubmits[i].waitSemaphoreCount,
2702 pSubmits[i].pWaitSemaphores,
2703 pSubmits[i].signalSemaphoreCount,
2704 pSubmits[i].pSignalSemaphores,
2705 _fence);
2706 if (result != VK_SUCCESS)
2707 return result;
2708
2709 if (!pSubmits[i].commandBufferCount) {
2710 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2711 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2712 &queue->device->empty_cs[queue->queue_family_index],
2713 1, NULL, NULL,
2714 &sem_info, NULL,
2715 false, base_fence);
2716 if (ret) {
2717 radv_loge("failed to submit CS %d\n", i);
2718 abort();
2719 }
2720 fence_emitted = true;
2721 }
2722 radv_free_sem_info(&sem_info);
2723 continue;
2724 }
2725
2726 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
2727 (pSubmits[i].commandBufferCount));
2728
2729 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2730 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2731 pSubmits[i].pCommandBuffers[j]);
2732 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2733
2734 cs_array[j] = cmd_buffer->cs;
2735 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2736 can_patch = false;
2737
2738 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2739 }
2740
2741 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2742 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2743 const struct radv_winsys_bo_list *bo_list = NULL;
2744
2745 advance = MIN2(max_cs_submission,
2746 pSubmits[i].commandBufferCount - j);
2747
2748 if (queue->device->trace_bo)
2749 *queue->device->trace_id_ptr = 0;
2750
2751 sem_info.cs_emit_wait = j == 0;
2752 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2753
2754 if (unlikely(queue->device->use_global_bo_list)) {
2755 pthread_mutex_lock(&queue->device->bo_list.mutex);
2756 bo_list = &queue->device->bo_list.list;
2757 }
2758
2759 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2760 advance, initial_preamble, continue_preamble_cs,
2761 &sem_info, bo_list,
2762 can_patch, base_fence);
2763
2764 if (unlikely(queue->device->use_global_bo_list))
2765 pthread_mutex_unlock(&queue->device->bo_list.mutex);
2766
2767 if (ret) {
2768 radv_loge("failed to submit CS %d\n", i);
2769 abort();
2770 }
2771 fence_emitted = true;
2772 if (queue->device->trace_bo) {
2773 radv_check_gpu_hangs(queue, cs_array[j]);
2774 }
2775 }
2776
2777 radv_free_temp_syncobjs(queue->device,
2778 pSubmits[i].waitSemaphoreCount,
2779 pSubmits[i].pWaitSemaphores);
2780 radv_free_sem_info(&sem_info);
2781 free(cs_array);
2782 }
2783
2784 if (fence) {
2785 if (!fence_emitted) {
2786 result = radv_signal_fence(queue, fence);
2787 if (result != VK_SUCCESS)
2788 return result;
2789 }
2790 fence->submitted = true;
2791 }
2792
2793 return VK_SUCCESS;
2794 }
2795
2796 VkResult radv_QueueWaitIdle(
2797 VkQueue _queue)
2798 {
2799 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2800
2801 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2802 radv_queue_family_to_ring(queue->queue_family_index),
2803 queue->queue_idx);
2804 return VK_SUCCESS;
2805 }
2806
2807 VkResult radv_DeviceWaitIdle(
2808 VkDevice _device)
2809 {
2810 RADV_FROM_HANDLE(radv_device, device, _device);
2811
2812 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2813 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2814 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2815 }
2816 }
2817 return VK_SUCCESS;
2818 }
2819
2820 VkResult radv_EnumerateInstanceExtensionProperties(
2821 const char* pLayerName,
2822 uint32_t* pPropertyCount,
2823 VkExtensionProperties* pProperties)
2824 {
2825 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2826
2827 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
2828 if (radv_supported_instance_extensions.extensions[i]) {
2829 vk_outarray_append(&out, prop) {
2830 *prop = radv_instance_extensions[i];
2831 }
2832 }
2833 }
2834
2835 return vk_outarray_status(&out);
2836 }
2837
2838 VkResult radv_EnumerateDeviceExtensionProperties(
2839 VkPhysicalDevice physicalDevice,
2840 const char* pLayerName,
2841 uint32_t* pPropertyCount,
2842 VkExtensionProperties* pProperties)
2843 {
2844 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2845 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
2846
2847 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
2848 if (device->supported_extensions.extensions[i]) {
2849 vk_outarray_append(&out, prop) {
2850 *prop = radv_device_extensions[i];
2851 }
2852 }
2853 }
2854
2855 return vk_outarray_status(&out);
2856 }
2857
2858 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2859 VkInstance _instance,
2860 const char* pName)
2861 {
2862 RADV_FROM_HANDLE(radv_instance, instance, _instance);
2863
2864 return radv_lookup_entrypoint_checked(pName,
2865 instance ? instance->apiVersion : 0,
2866 instance ? &instance->enabled_extensions : NULL,
2867 NULL);
2868 }
2869
2870 /* The loader wants us to expose a second GetInstanceProcAddr function
2871 * to work around certain LD_PRELOAD issues seen in apps.
2872 */
2873 PUBLIC
2874 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2875 VkInstance instance,
2876 const char* pName);
2877
2878 PUBLIC
2879 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2880 VkInstance instance,
2881 const char* pName)
2882 {
2883 return radv_GetInstanceProcAddr(instance, pName);
2884 }
2885
2886 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2887 VkDevice _device,
2888 const char* pName)
2889 {
2890 RADV_FROM_HANDLE(radv_device, device, _device);
2891
2892 return radv_lookup_entrypoint_checked(pName,
2893 device->instance->apiVersion,
2894 &device->instance->enabled_extensions,
2895 &device->enabled_extensions);
2896 }
2897
2898 bool radv_get_memory_fd(struct radv_device *device,
2899 struct radv_device_memory *memory,
2900 int *pFD)
2901 {
2902 struct radeon_bo_metadata metadata;
2903
2904 if (memory->image) {
2905 radv_init_metadata(device, memory->image, &metadata);
2906 device->ws->buffer_set_metadata(memory->bo, &metadata);
2907 }
2908
2909 return device->ws->buffer_get_fd(device->ws, memory->bo,
2910 pFD);
2911 }
2912
2913 static VkResult radv_alloc_memory(struct radv_device *device,
2914 const VkMemoryAllocateInfo* pAllocateInfo,
2915 const VkAllocationCallbacks* pAllocator,
2916 VkDeviceMemory* pMem)
2917 {
2918 struct radv_device_memory *mem;
2919 VkResult result;
2920 enum radeon_bo_domain domain;
2921 uint32_t flags = 0;
2922 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2923
2924 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2925
2926 if (pAllocateInfo->allocationSize == 0) {
2927 /* Apparently, this is allowed */
2928 *pMem = VK_NULL_HANDLE;
2929 return VK_SUCCESS;
2930 }
2931
2932 const VkImportMemoryFdInfoKHR *import_info =
2933 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2934 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2935 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2936 const VkExportMemoryAllocateInfoKHR *export_info =
2937 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO_KHR);
2938 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
2939 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
2940
2941 const struct wsi_memory_allocate_info *wsi_info =
2942 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2943
2944 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2945 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2946 if (mem == NULL)
2947 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2948
2949 if (wsi_info && wsi_info->implicit_sync)
2950 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2951
2952 if (dedicate_info) {
2953 mem->image = radv_image_from_handle(dedicate_info->image);
2954 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2955 } else {
2956 mem->image = NULL;
2957 mem->buffer = NULL;
2958 }
2959
2960 mem->user_ptr = NULL;
2961
2962 if (import_info) {
2963 assert(import_info->handleType ==
2964 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2965 import_info->handleType ==
2966 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2967 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2968 NULL, NULL);
2969 if (!mem->bo) {
2970 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2971 goto fail;
2972 } else {
2973 close(import_info->fd);
2974 }
2975 } else if (host_ptr_info) {
2976 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
2977 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
2978 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
2979 pAllocateInfo->allocationSize);
2980 if (!mem->bo) {
2981 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2982 goto fail;
2983 } else {
2984 mem->user_ptr = host_ptr_info->pHostPointer;
2985 }
2986 } else {
2987 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2988 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2989 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2990 domain = RADEON_DOMAIN_GTT;
2991 else
2992 domain = RADEON_DOMAIN_VRAM;
2993
2994 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2995 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2996 else
2997 flags |= RADEON_FLAG_CPU_ACCESS;
2998
2999 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3000 flags |= RADEON_FLAG_GTT_WC;
3001
3002 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
3003 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3004
3005 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3006 domain, flags);
3007
3008 if (!mem->bo) {
3009 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3010 goto fail;
3011 }
3012 mem->type_index = mem_type_index;
3013 }
3014
3015 result = radv_bo_list_add(device, mem->bo);
3016 if (result != VK_SUCCESS)
3017 goto fail_bo;
3018
3019 *pMem = radv_device_memory_to_handle(mem);
3020
3021 return VK_SUCCESS;
3022
3023 fail_bo:
3024 device->ws->buffer_destroy(mem->bo);
3025 fail:
3026 vk_free2(&device->alloc, pAllocator, mem);
3027
3028 return result;
3029 }
3030
3031 VkResult radv_AllocateMemory(
3032 VkDevice _device,
3033 const VkMemoryAllocateInfo* pAllocateInfo,
3034 const VkAllocationCallbacks* pAllocator,
3035 VkDeviceMemory* pMem)
3036 {
3037 RADV_FROM_HANDLE(radv_device, device, _device);
3038 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3039 }
3040
3041 void radv_FreeMemory(
3042 VkDevice _device,
3043 VkDeviceMemory _mem,
3044 const VkAllocationCallbacks* pAllocator)
3045 {
3046 RADV_FROM_HANDLE(radv_device, device, _device);
3047 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3048
3049 if (mem == NULL)
3050 return;
3051
3052 radv_bo_list_remove(device, mem->bo);
3053 device->ws->buffer_destroy(mem->bo);
3054 mem->bo = NULL;
3055
3056 vk_free2(&device->alloc, pAllocator, mem);
3057 }
3058
3059 VkResult radv_MapMemory(
3060 VkDevice _device,
3061 VkDeviceMemory _memory,
3062 VkDeviceSize offset,
3063 VkDeviceSize size,
3064 VkMemoryMapFlags flags,
3065 void** ppData)
3066 {
3067 RADV_FROM_HANDLE(radv_device, device, _device);
3068 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3069
3070 if (mem == NULL) {
3071 *ppData = NULL;
3072 return VK_SUCCESS;
3073 }
3074
3075 if (mem->user_ptr)
3076 *ppData = mem->user_ptr;
3077 else
3078 *ppData = device->ws->buffer_map(mem->bo);
3079
3080 if (*ppData) {
3081 *ppData += offset;
3082 return VK_SUCCESS;
3083 }
3084
3085 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3086 }
3087
3088 void radv_UnmapMemory(
3089 VkDevice _device,
3090 VkDeviceMemory _memory)
3091 {
3092 RADV_FROM_HANDLE(radv_device, device, _device);
3093 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3094
3095 if (mem == NULL)
3096 return;
3097
3098 if (mem->user_ptr == NULL)
3099 device->ws->buffer_unmap(mem->bo);
3100 }
3101
3102 VkResult radv_FlushMappedMemoryRanges(
3103 VkDevice _device,
3104 uint32_t memoryRangeCount,
3105 const VkMappedMemoryRange* pMemoryRanges)
3106 {
3107 return VK_SUCCESS;
3108 }
3109
3110 VkResult radv_InvalidateMappedMemoryRanges(
3111 VkDevice _device,
3112 uint32_t memoryRangeCount,
3113 const VkMappedMemoryRange* pMemoryRanges)
3114 {
3115 return VK_SUCCESS;
3116 }
3117
3118 void radv_GetBufferMemoryRequirements(
3119 VkDevice _device,
3120 VkBuffer _buffer,
3121 VkMemoryRequirements* pMemoryRequirements)
3122 {
3123 RADV_FROM_HANDLE(radv_device, device, _device);
3124 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3125
3126 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3127
3128 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3129 pMemoryRequirements->alignment = 4096;
3130 else
3131 pMemoryRequirements->alignment = 16;
3132
3133 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3134 }
3135
3136 void radv_GetBufferMemoryRequirements2(
3137 VkDevice device,
3138 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
3139 VkMemoryRequirements2KHR* pMemoryRequirements)
3140 {
3141 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3142 &pMemoryRequirements->memoryRequirements);
3143 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3144 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3145 switch (ext->sType) {
3146 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3147 VkMemoryDedicatedRequirementsKHR *req =
3148 (VkMemoryDedicatedRequirementsKHR *) ext;
3149 req->requiresDedicatedAllocation = buffer->shareable;
3150 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3151 break;
3152 }
3153 default:
3154 break;
3155 }
3156 }
3157 }
3158
3159 void radv_GetImageMemoryRequirements(
3160 VkDevice _device,
3161 VkImage _image,
3162 VkMemoryRequirements* pMemoryRequirements)
3163 {
3164 RADV_FROM_HANDLE(radv_device, device, _device);
3165 RADV_FROM_HANDLE(radv_image, image, _image);
3166
3167 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3168
3169 pMemoryRequirements->size = image->size;
3170 pMemoryRequirements->alignment = image->alignment;
3171 }
3172
3173 void radv_GetImageMemoryRequirements2(
3174 VkDevice device,
3175 const VkImageMemoryRequirementsInfo2KHR* pInfo,
3176 VkMemoryRequirements2KHR* pMemoryRequirements)
3177 {
3178 radv_GetImageMemoryRequirements(device, pInfo->image,
3179 &pMemoryRequirements->memoryRequirements);
3180
3181 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3182
3183 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3184 switch (ext->sType) {
3185 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
3186 VkMemoryDedicatedRequirementsKHR *req =
3187 (VkMemoryDedicatedRequirementsKHR *) ext;
3188 req->requiresDedicatedAllocation = image->shareable;
3189 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3190 break;
3191 }
3192 default:
3193 break;
3194 }
3195 }
3196 }
3197
3198 void radv_GetImageSparseMemoryRequirements(
3199 VkDevice device,
3200 VkImage image,
3201 uint32_t* pSparseMemoryRequirementCount,
3202 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
3203 {
3204 stub();
3205 }
3206
3207 void radv_GetImageSparseMemoryRequirements2(
3208 VkDevice device,
3209 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
3210 uint32_t* pSparseMemoryRequirementCount,
3211 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
3212 {
3213 stub();
3214 }
3215
3216 void radv_GetDeviceMemoryCommitment(
3217 VkDevice device,
3218 VkDeviceMemory memory,
3219 VkDeviceSize* pCommittedMemoryInBytes)
3220 {
3221 *pCommittedMemoryInBytes = 0;
3222 }
3223
3224 VkResult radv_BindBufferMemory2(VkDevice device,
3225 uint32_t bindInfoCount,
3226 const VkBindBufferMemoryInfoKHR *pBindInfos)
3227 {
3228 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3229 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3230 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3231
3232 if (mem) {
3233 buffer->bo = mem->bo;
3234 buffer->offset = pBindInfos[i].memoryOffset;
3235 } else {
3236 buffer->bo = NULL;
3237 }
3238 }
3239 return VK_SUCCESS;
3240 }
3241
3242 VkResult radv_BindBufferMemory(
3243 VkDevice device,
3244 VkBuffer buffer,
3245 VkDeviceMemory memory,
3246 VkDeviceSize memoryOffset)
3247 {
3248 const VkBindBufferMemoryInfoKHR info = {
3249 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3250 .buffer = buffer,
3251 .memory = memory,
3252 .memoryOffset = memoryOffset
3253 };
3254
3255 return radv_BindBufferMemory2(device, 1, &info);
3256 }
3257
3258 VkResult radv_BindImageMemory2(VkDevice device,
3259 uint32_t bindInfoCount,
3260 const VkBindImageMemoryInfoKHR *pBindInfos)
3261 {
3262 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3263 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3264 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3265
3266 if (mem) {
3267 image->bo = mem->bo;
3268 image->offset = pBindInfos[i].memoryOffset;
3269 } else {
3270 image->bo = NULL;
3271 image->offset = 0;
3272 }
3273 }
3274 return VK_SUCCESS;
3275 }
3276
3277
3278 VkResult radv_BindImageMemory(
3279 VkDevice device,
3280 VkImage image,
3281 VkDeviceMemory memory,
3282 VkDeviceSize memoryOffset)
3283 {
3284 const VkBindImageMemoryInfoKHR info = {
3285 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
3286 .image = image,
3287 .memory = memory,
3288 .memoryOffset = memoryOffset
3289 };
3290
3291 return radv_BindImageMemory2(device, 1, &info);
3292 }
3293
3294
3295 static void
3296 radv_sparse_buffer_bind_memory(struct radv_device *device,
3297 const VkSparseBufferMemoryBindInfo *bind)
3298 {
3299 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3300
3301 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3302 struct radv_device_memory *mem = NULL;
3303
3304 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3305 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3306
3307 device->ws->buffer_virtual_bind(buffer->bo,
3308 bind->pBinds[i].resourceOffset,
3309 bind->pBinds[i].size,
3310 mem ? mem->bo : NULL,
3311 bind->pBinds[i].memoryOffset);
3312 }
3313 }
3314
3315 static void
3316 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3317 const VkSparseImageOpaqueMemoryBindInfo *bind)
3318 {
3319 RADV_FROM_HANDLE(radv_image, image, bind->image);
3320
3321 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3322 struct radv_device_memory *mem = NULL;
3323
3324 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3325 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3326
3327 device->ws->buffer_virtual_bind(image->bo,
3328 bind->pBinds[i].resourceOffset,
3329 bind->pBinds[i].size,
3330 mem ? mem->bo : NULL,
3331 bind->pBinds[i].memoryOffset);
3332 }
3333 }
3334
3335 VkResult radv_QueueBindSparse(
3336 VkQueue _queue,
3337 uint32_t bindInfoCount,
3338 const VkBindSparseInfo* pBindInfo,
3339 VkFence _fence)
3340 {
3341 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3342 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3343 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3344 bool fence_emitted = false;
3345 VkResult result;
3346 int ret;
3347
3348 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3349 struct radv_winsys_sem_info sem_info;
3350 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3351 radv_sparse_buffer_bind_memory(queue->device,
3352 pBindInfo[i].pBufferBinds + j);
3353 }
3354
3355 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3356 radv_sparse_image_opaque_bind_memory(queue->device,
3357 pBindInfo[i].pImageOpaqueBinds + j);
3358 }
3359
3360 VkResult result;
3361 result = radv_alloc_sem_info(queue->device->instance,
3362 &sem_info,
3363 pBindInfo[i].waitSemaphoreCount,
3364 pBindInfo[i].pWaitSemaphores,
3365 pBindInfo[i].signalSemaphoreCount,
3366 pBindInfo[i].pSignalSemaphores,
3367 _fence);
3368 if (result != VK_SUCCESS)
3369 return result;
3370
3371 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3372 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3373 &queue->device->empty_cs[queue->queue_family_index],
3374 1, NULL, NULL,
3375 &sem_info, NULL,
3376 false, base_fence);
3377 if (ret) {
3378 radv_loge("failed to submit CS %d\n", i);
3379 abort();
3380 }
3381
3382 fence_emitted = true;
3383 if (fence)
3384 fence->submitted = true;
3385 }
3386
3387 radv_free_sem_info(&sem_info);
3388
3389 }
3390
3391 if (fence) {
3392 if (!fence_emitted) {
3393 result = radv_signal_fence(queue, fence);
3394 if (result != VK_SUCCESS)
3395 return result;
3396 }
3397 fence->submitted = true;
3398 }
3399
3400 return VK_SUCCESS;
3401 }
3402
3403 VkResult radv_CreateFence(
3404 VkDevice _device,
3405 const VkFenceCreateInfo* pCreateInfo,
3406 const VkAllocationCallbacks* pAllocator,
3407 VkFence* pFence)
3408 {
3409 RADV_FROM_HANDLE(radv_device, device, _device);
3410 const VkExportFenceCreateInfoKHR *export =
3411 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
3412 VkExternalFenceHandleTypeFlagsKHR handleTypes =
3413 export ? export->handleTypes : 0;
3414
3415 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3416 sizeof(*fence), 8,
3417 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3418
3419 if (!fence)
3420 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3421
3422 fence->fence_wsi = NULL;
3423 fence->submitted = false;
3424 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3425 fence->temp_syncobj = 0;
3426 if (device->always_use_syncobj || handleTypes) {
3427 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3428 if (ret) {
3429 vk_free2(&device->alloc, pAllocator, fence);
3430 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3431 }
3432 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3433 device->ws->signal_syncobj(device->ws, fence->syncobj);
3434 }
3435 fence->fence = NULL;
3436 } else {
3437 fence->fence = device->ws->create_fence();
3438 if (!fence->fence) {
3439 vk_free2(&device->alloc, pAllocator, fence);
3440 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3441 }
3442 fence->syncobj = 0;
3443 }
3444
3445 *pFence = radv_fence_to_handle(fence);
3446
3447 return VK_SUCCESS;
3448 }
3449
3450 void radv_DestroyFence(
3451 VkDevice _device,
3452 VkFence _fence,
3453 const VkAllocationCallbacks* pAllocator)
3454 {
3455 RADV_FROM_HANDLE(radv_device, device, _device);
3456 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3457
3458 if (!fence)
3459 return;
3460
3461 if (fence->temp_syncobj)
3462 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3463 if (fence->syncobj)
3464 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3465 if (fence->fence)
3466 device->ws->destroy_fence(fence->fence);
3467 if (fence->fence_wsi)
3468 fence->fence_wsi->destroy(fence->fence_wsi);
3469 vk_free2(&device->alloc, pAllocator, fence);
3470 }
3471
3472
3473 static uint64_t radv_get_current_time()
3474 {
3475 struct timespec tv;
3476 clock_gettime(CLOCK_MONOTONIC, &tv);
3477 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3478 }
3479
3480 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3481 {
3482 uint64_t current_time = radv_get_current_time();
3483
3484 timeout = MIN2(UINT64_MAX - current_time, timeout);
3485
3486 return current_time + timeout;
3487 }
3488
3489
3490 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3491 {
3492 for (uint32_t i = 0; i < fenceCount; ++i) {
3493 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3494 if (fence->fence == NULL || fence->syncobj ||
3495 fence->temp_syncobj ||
3496 (!fence->signalled && !fence->submitted))
3497 return false;
3498 }
3499 return true;
3500 }
3501
3502 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3503 {
3504 for (uint32_t i = 0; i < fenceCount; ++i) {
3505 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3506 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3507 return false;
3508 }
3509 return true;
3510 }
3511
3512 VkResult radv_WaitForFences(
3513 VkDevice _device,
3514 uint32_t fenceCount,
3515 const VkFence* pFences,
3516 VkBool32 waitAll,
3517 uint64_t timeout)
3518 {
3519 RADV_FROM_HANDLE(radv_device, device, _device);
3520 timeout = radv_get_absolute_timeout(timeout);
3521
3522 if (device->always_use_syncobj &&
3523 radv_all_fences_syncobj(fenceCount, pFences))
3524 {
3525 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3526 if (!handles)
3527 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3528
3529 for (uint32_t i = 0; i < fenceCount; ++i) {
3530 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3531 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3532 }
3533
3534 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3535
3536 free(handles);
3537 return success ? VK_SUCCESS : VK_TIMEOUT;
3538 }
3539
3540 if (!waitAll && fenceCount > 1) {
3541 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3542 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3543 uint32_t wait_count = 0;
3544 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3545 if (!fences)
3546 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3547
3548 for (uint32_t i = 0; i < fenceCount; ++i) {
3549 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3550
3551 if (fence->signalled) {
3552 free(fences);
3553 return VK_SUCCESS;
3554 }
3555
3556 fences[wait_count++] = fence->fence;
3557 }
3558
3559 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3560 waitAll, timeout - radv_get_current_time());
3561
3562 free(fences);
3563 return success ? VK_SUCCESS : VK_TIMEOUT;
3564 }
3565
3566 while(radv_get_current_time() <= timeout) {
3567 for (uint32_t i = 0; i < fenceCount; ++i) {
3568 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3569 return VK_SUCCESS;
3570 }
3571 }
3572 return VK_TIMEOUT;
3573 }
3574
3575 for (uint32_t i = 0; i < fenceCount; ++i) {
3576 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3577 bool expired = false;
3578
3579 if (fence->temp_syncobj) {
3580 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3581 return VK_TIMEOUT;
3582 continue;
3583 }
3584
3585 if (fence->syncobj) {
3586 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3587 return VK_TIMEOUT;
3588 continue;
3589 }
3590
3591 if (fence->signalled)
3592 continue;
3593
3594 if (fence->fence) {
3595 if (!fence->submitted) {
3596 while(radv_get_current_time() <= timeout &&
3597 !fence->submitted)
3598 /* Do nothing */;
3599
3600 if (!fence->submitted)
3601 return VK_TIMEOUT;
3602
3603 /* Recheck as it may have been set by
3604 * submitting operations. */
3605
3606 if (fence->signalled)
3607 continue;
3608 }
3609
3610 expired = device->ws->fence_wait(device->ws,
3611 fence->fence,
3612 true, timeout);
3613 if (!expired)
3614 return VK_TIMEOUT;
3615 }
3616
3617 if (fence->fence_wsi) {
3618 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
3619 if (result != VK_SUCCESS)
3620 return result;
3621 }
3622
3623 fence->signalled = true;
3624 }
3625
3626 return VK_SUCCESS;
3627 }
3628
3629 VkResult radv_ResetFences(VkDevice _device,
3630 uint32_t fenceCount,
3631 const VkFence *pFences)
3632 {
3633 RADV_FROM_HANDLE(radv_device, device, _device);
3634
3635 for (unsigned i = 0; i < fenceCount; ++i) {
3636 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3637 fence->submitted = fence->signalled = false;
3638
3639 /* Per spec, we first restore the permanent payload, and then reset, so
3640 * having a temp syncobj should not skip resetting the permanent syncobj. */
3641 if (fence->temp_syncobj) {
3642 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3643 fence->temp_syncobj = 0;
3644 }
3645
3646 if (fence->syncobj) {
3647 device->ws->reset_syncobj(device->ws, fence->syncobj);
3648 }
3649 }
3650
3651 return VK_SUCCESS;
3652 }
3653
3654 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3655 {
3656 RADV_FROM_HANDLE(radv_device, device, _device);
3657 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3658
3659 if (fence->temp_syncobj) {
3660 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3661 return success ? VK_SUCCESS : VK_NOT_READY;
3662 }
3663
3664 if (fence->syncobj) {
3665 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3666 return success ? VK_SUCCESS : VK_NOT_READY;
3667 }
3668
3669 if (fence->signalled)
3670 return VK_SUCCESS;
3671 if (!fence->submitted)
3672 return VK_NOT_READY;
3673 if (fence->fence) {
3674 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3675 return VK_NOT_READY;
3676 }
3677 if (fence->fence_wsi) {
3678 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
3679
3680 if (result != VK_SUCCESS) {
3681 if (result == VK_TIMEOUT)
3682 return VK_NOT_READY;
3683 return result;
3684 }
3685 }
3686 return VK_SUCCESS;
3687 }
3688
3689
3690 // Queue semaphore functions
3691
3692 VkResult radv_CreateSemaphore(
3693 VkDevice _device,
3694 const VkSemaphoreCreateInfo* pCreateInfo,
3695 const VkAllocationCallbacks* pAllocator,
3696 VkSemaphore* pSemaphore)
3697 {
3698 RADV_FROM_HANDLE(radv_device, device, _device);
3699 const VkExportSemaphoreCreateInfoKHR *export =
3700 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
3701 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
3702 export ? export->handleTypes : 0;
3703
3704 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3705 sizeof(*sem), 8,
3706 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3707 if (!sem)
3708 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3709
3710 sem->temp_syncobj = 0;
3711 /* create a syncobject if we are going to export this semaphore */
3712 if (device->always_use_syncobj || handleTypes) {
3713 assert (device->physical_device->rad_info.has_syncobj);
3714 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3715 if (ret) {
3716 vk_free2(&device->alloc, pAllocator, sem);
3717 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3718 }
3719 sem->sem = NULL;
3720 } else {
3721 sem->sem = device->ws->create_sem(device->ws);
3722 if (!sem->sem) {
3723 vk_free2(&device->alloc, pAllocator, sem);
3724 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3725 }
3726 sem->syncobj = 0;
3727 }
3728
3729 *pSemaphore = radv_semaphore_to_handle(sem);
3730 return VK_SUCCESS;
3731 }
3732
3733 void radv_DestroySemaphore(
3734 VkDevice _device,
3735 VkSemaphore _semaphore,
3736 const VkAllocationCallbacks* pAllocator)
3737 {
3738 RADV_FROM_HANDLE(radv_device, device, _device);
3739 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3740 if (!_semaphore)
3741 return;
3742
3743 if (sem->syncobj)
3744 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3745 else
3746 device->ws->destroy_sem(sem->sem);
3747 vk_free2(&device->alloc, pAllocator, sem);
3748 }
3749
3750 VkResult radv_CreateEvent(
3751 VkDevice _device,
3752 const VkEventCreateInfo* pCreateInfo,
3753 const VkAllocationCallbacks* pAllocator,
3754 VkEvent* pEvent)
3755 {
3756 RADV_FROM_HANDLE(radv_device, device, _device);
3757 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3758 sizeof(*event), 8,
3759 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3760
3761 if (!event)
3762 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3763
3764 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3765 RADEON_DOMAIN_GTT,
3766 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
3767 if (!event->bo) {
3768 vk_free2(&device->alloc, pAllocator, event);
3769 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3770 }
3771
3772 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3773
3774 *pEvent = radv_event_to_handle(event);
3775
3776 return VK_SUCCESS;
3777 }
3778
3779 void radv_DestroyEvent(
3780 VkDevice _device,
3781 VkEvent _event,
3782 const VkAllocationCallbacks* pAllocator)
3783 {
3784 RADV_FROM_HANDLE(radv_device, device, _device);
3785 RADV_FROM_HANDLE(radv_event, event, _event);
3786
3787 if (!event)
3788 return;
3789 device->ws->buffer_destroy(event->bo);
3790 vk_free2(&device->alloc, pAllocator, event);
3791 }
3792
3793 VkResult radv_GetEventStatus(
3794 VkDevice _device,
3795 VkEvent _event)
3796 {
3797 RADV_FROM_HANDLE(radv_event, event, _event);
3798
3799 if (*event->map == 1)
3800 return VK_EVENT_SET;
3801 return VK_EVENT_RESET;
3802 }
3803
3804 VkResult radv_SetEvent(
3805 VkDevice _device,
3806 VkEvent _event)
3807 {
3808 RADV_FROM_HANDLE(radv_event, event, _event);
3809 *event->map = 1;
3810
3811 return VK_SUCCESS;
3812 }
3813
3814 VkResult radv_ResetEvent(
3815 VkDevice _device,
3816 VkEvent _event)
3817 {
3818 RADV_FROM_HANDLE(radv_event, event, _event);
3819 *event->map = 0;
3820
3821 return VK_SUCCESS;
3822 }
3823
3824 VkResult radv_CreateBuffer(
3825 VkDevice _device,
3826 const VkBufferCreateInfo* pCreateInfo,
3827 const VkAllocationCallbacks* pAllocator,
3828 VkBuffer* pBuffer)
3829 {
3830 RADV_FROM_HANDLE(radv_device, device, _device);
3831 struct radv_buffer *buffer;
3832
3833 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
3834
3835 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
3836 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3837 if (buffer == NULL)
3838 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3839
3840 buffer->size = pCreateInfo->size;
3841 buffer->usage = pCreateInfo->usage;
3842 buffer->bo = NULL;
3843 buffer->offset = 0;
3844 buffer->flags = pCreateInfo->flags;
3845
3846 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
3847 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
3848
3849 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
3850 buffer->bo = device->ws->buffer_create(device->ws,
3851 align64(buffer->size, 4096),
3852 4096, 0, RADEON_FLAG_VIRTUAL);
3853 if (!buffer->bo) {
3854 vk_free2(&device->alloc, pAllocator, buffer);
3855 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3856 }
3857 }
3858
3859 *pBuffer = radv_buffer_to_handle(buffer);
3860
3861 return VK_SUCCESS;
3862 }
3863
3864 void radv_DestroyBuffer(
3865 VkDevice _device,
3866 VkBuffer _buffer,
3867 const VkAllocationCallbacks* pAllocator)
3868 {
3869 RADV_FROM_HANDLE(radv_device, device, _device);
3870 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3871
3872 if (!buffer)
3873 return;
3874
3875 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3876 device->ws->buffer_destroy(buffer->bo);
3877
3878 vk_free2(&device->alloc, pAllocator, buffer);
3879 }
3880
3881 static inline unsigned
3882 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
3883 {
3884 if (stencil)
3885 return image->surface.u.legacy.stencil_tiling_index[level];
3886 else
3887 return image->surface.u.legacy.tiling_index[level];
3888 }
3889
3890 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
3891 {
3892 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
3893 }
3894
3895 static uint32_t
3896 radv_init_dcc_control_reg(struct radv_device *device,
3897 struct radv_image_view *iview)
3898 {
3899 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
3900 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
3901 unsigned max_compressed_block_size;
3902 unsigned independent_64b_blocks;
3903
3904 if (device->physical_device->rad_info.chip_class < VI)
3905 return 0;
3906
3907 if (iview->image->info.samples > 1) {
3908 if (iview->image->surface.bpe == 1)
3909 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3910 else if (iview->image->surface.bpe == 2)
3911 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
3912 }
3913
3914 if (!device->physical_device->rad_info.has_dedicated_vram) {
3915 /* amdvlk: [min-compressed-block-size] should be set to 32 for
3916 * dGPU and 64 for APU because all of our APUs to date use
3917 * DIMMs which have a request granularity size of 64B while all
3918 * other chips have a 32B request size.
3919 */
3920 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
3921 }
3922
3923 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
3924 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
3925 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
3926 /* If this DCC image is potentially going to be used in texture
3927 * fetches, we need some special settings.
3928 */
3929 independent_64b_blocks = 1;
3930 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
3931 } else {
3932 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
3933 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
3934 * big as possible for better compression state.
3935 */
3936 independent_64b_blocks = 0;
3937 max_compressed_block_size = max_uncompressed_block_size;
3938 }
3939
3940 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3941 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
3942 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
3943 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
3944 }
3945
3946 static void
3947 radv_initialise_color_surface(struct radv_device *device,
3948 struct radv_color_buffer_info *cb,
3949 struct radv_image_view *iview)
3950 {
3951 const struct vk_format_description *desc;
3952 unsigned ntype, format, swap, endian;
3953 unsigned blend_clamp = 0, blend_bypass = 0;
3954 uint64_t va;
3955 const struct radeon_surf *surf = &iview->image->surface;
3956
3957 desc = vk_format_description(iview->vk_format);
3958
3959 memset(cb, 0, sizeof(*cb));
3960
3961 /* Intensity is implemented as Red, so treat it that way. */
3962 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
3963
3964 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3965
3966 cb->cb_color_base = va >> 8;
3967
3968 if (device->physical_device->rad_info.chip_class >= GFX9) {
3969 struct gfx9_surf_meta_flags meta;
3970 if (iview->image->dcc_offset)
3971 meta = iview->image->surface.u.gfx9.dcc;
3972 else
3973 meta = iview->image->surface.u.gfx9.cmask;
3974
3975 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3976 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
3977 S_028C74_RB_ALIGNED(meta.rb_aligned) |
3978 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
3979
3980 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
3981 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3982 } else {
3983 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
3984 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
3985
3986 cb->cb_color_base += level_info->offset >> 8;
3987 if (level_info->mode == RADEON_SURF_MODE_2D)
3988 cb->cb_color_base |= iview->image->surface.tile_swizzle;
3989
3990 pitch_tile_max = level_info->nblk_x / 8 - 1;
3991 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
3992 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
3993
3994 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
3995 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
3996 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
3997
3998 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
3999
4000 if (radv_image_has_fmask(iview->image)) {
4001 if (device->physical_device->rad_info.chip_class >= CIK)
4002 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
4003 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
4004 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
4005 } else {
4006 /* This must be set for fast clear to work without FMASK. */
4007 if (device->physical_device->rad_info.chip_class >= CIK)
4008 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4009 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4010 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4011 }
4012 }
4013
4014 /* CMASK variables */
4015 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4016 va += iview->image->cmask.offset;
4017 cb->cb_color_cmask = va >> 8;
4018
4019 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4020 va += iview->image->dcc_offset;
4021 cb->cb_dcc_base = va >> 8;
4022 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
4023
4024 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4025 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4026 S_028C6C_SLICE_MAX(max_slice);
4027
4028 if (iview->image->info.samples > 1) {
4029 unsigned log_samples = util_logbase2(iview->image->info.samples);
4030
4031 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4032 S_028C74_NUM_FRAGMENTS(log_samples);
4033 }
4034
4035 if (radv_image_has_fmask(iview->image)) {
4036 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
4037 cb->cb_color_fmask = va >> 8;
4038 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
4039 } else {
4040 cb->cb_color_fmask = cb->cb_color_base;
4041 }
4042
4043 ntype = radv_translate_color_numformat(iview->vk_format,
4044 desc,
4045 vk_format_get_first_non_void_channel(iview->vk_format));
4046 format = radv_translate_colorformat(iview->vk_format);
4047 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4048 radv_finishme("Illegal color\n");
4049 swap = radv_translate_colorswap(iview->vk_format, FALSE);
4050 endian = radv_colorformat_endian_swap(format);
4051
4052 /* blend clamp should be set for all NORM/SRGB types */
4053 if (ntype == V_028C70_NUMBER_UNORM ||
4054 ntype == V_028C70_NUMBER_SNORM ||
4055 ntype == V_028C70_NUMBER_SRGB)
4056 blend_clamp = 1;
4057
4058 /* set blend bypass according to docs if SINT/UINT or
4059 8/24 COLOR variants */
4060 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4061 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4062 format == V_028C70_COLOR_X24_8_32_FLOAT) {
4063 blend_clamp = 0;
4064 blend_bypass = 1;
4065 }
4066 #if 0
4067 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4068 (format == V_028C70_COLOR_8 ||
4069 format == V_028C70_COLOR_8_8 ||
4070 format == V_028C70_COLOR_8_8_8_8))
4071 ->color_is_int8 = true;
4072 #endif
4073 cb->cb_color_info = S_028C70_FORMAT(format) |
4074 S_028C70_COMP_SWAP(swap) |
4075 S_028C70_BLEND_CLAMP(blend_clamp) |
4076 S_028C70_BLEND_BYPASS(blend_bypass) |
4077 S_028C70_SIMPLE_FLOAT(1) |
4078 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4079 ntype != V_028C70_NUMBER_SNORM &&
4080 ntype != V_028C70_NUMBER_SRGB &&
4081 format != V_028C70_COLOR_8_24 &&
4082 format != V_028C70_COLOR_24_8) |
4083 S_028C70_NUMBER_TYPE(ntype) |
4084 S_028C70_ENDIAN(endian);
4085 if (radv_image_has_fmask(iview->image)) {
4086 cb->cb_color_info |= S_028C70_COMPRESSION(1);
4087 if (device->physical_device->rad_info.chip_class == SI) {
4088 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
4089 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4090 }
4091 }
4092
4093 if (radv_image_has_cmask(iview->image) &&
4094 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4095 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4096
4097 if (radv_dcc_enabled(iview->image, iview->base_mip))
4098 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4099
4100 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4101
4102 /* This must be set for fast clear to work without FMASK. */
4103 if (!radv_image_has_fmask(iview->image) &&
4104 device->physical_device->rad_info.chip_class == SI) {
4105 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
4106 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4107 }
4108
4109 if (device->physical_device->rad_info.chip_class >= GFX9) {
4110 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4111 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4112
4113 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
4114 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4115 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
4116 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
4117 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
4118 S_028C68_MAX_MIP(iview->image->info.levels - 1);
4119 }
4120 }
4121
4122 static unsigned
4123 radv_calc_decompress_on_z_planes(struct radv_device *device,
4124 struct radv_image_view *iview)
4125 {
4126 unsigned max_zplanes = 0;
4127
4128 assert(radv_image_is_tc_compat_htile(iview->image));
4129
4130 if (device->physical_device->rad_info.chip_class >= GFX9) {
4131 /* Default value for 32-bit depth surfaces. */
4132 max_zplanes = 4;
4133
4134 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4135 iview->image->info.samples > 1)
4136 max_zplanes = 2;
4137
4138 max_zplanes = max_zplanes + 1;
4139 } else {
4140 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4141 /* Do not enable Z plane compression for 16-bit depth
4142 * surfaces because isn't supported on GFX8. Only
4143 * 32-bit depth surfaces are supported by the hardware.
4144 * This allows to maintain shader compatibility and to
4145 * reduce the number of depth decompressions.
4146 */
4147 max_zplanes = 1;
4148 } else {
4149 if (iview->image->info.samples <= 1)
4150 max_zplanes = 5;
4151 else if (iview->image->info.samples <= 4)
4152 max_zplanes = 3;
4153 else
4154 max_zplanes = 2;
4155 }
4156 }
4157
4158 return max_zplanes;
4159 }
4160
4161 static void
4162 radv_initialise_ds_surface(struct radv_device *device,
4163 struct radv_ds_buffer_info *ds,
4164 struct radv_image_view *iview)
4165 {
4166 unsigned level = iview->base_mip;
4167 unsigned format, stencil_format;
4168 uint64_t va, s_offs, z_offs;
4169 bool stencil_only = false;
4170 memset(ds, 0, sizeof(*ds));
4171 switch (iview->image->vk_format) {
4172 case VK_FORMAT_D24_UNORM_S8_UINT:
4173 case VK_FORMAT_X8_D24_UNORM_PACK32:
4174 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4175 ds->offset_scale = 2.0f;
4176 break;
4177 case VK_FORMAT_D16_UNORM:
4178 case VK_FORMAT_D16_UNORM_S8_UINT:
4179 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4180 ds->offset_scale = 4.0f;
4181 break;
4182 case VK_FORMAT_D32_SFLOAT:
4183 case VK_FORMAT_D32_SFLOAT_S8_UINT:
4184 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4185 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4186 ds->offset_scale = 1.0f;
4187 break;
4188 case VK_FORMAT_S8_UINT:
4189 stencil_only = true;
4190 break;
4191 default:
4192 break;
4193 }
4194
4195 format = radv_translate_dbformat(iview->image->vk_format);
4196 stencil_format = iview->image->surface.has_stencil ?
4197 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4198
4199 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4200 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4201 S_028008_SLICE_MAX(max_slice);
4202
4203 ds->db_htile_data_base = 0;
4204 ds->db_htile_surface = 0;
4205
4206 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4207 s_offs = z_offs = va;
4208
4209 if (device->physical_device->rad_info.chip_class >= GFX9) {
4210 assert(iview->image->surface.u.gfx9.surf_offset == 0);
4211 s_offs += iview->image->surface.u.gfx9.stencil_offset;
4212
4213 ds->db_z_info = S_028038_FORMAT(format) |
4214 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4215 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4216 S_028038_MAXMIP(iview->image->info.levels - 1) |
4217 S_028038_ZRANGE_PRECISION(1);
4218 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4219 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
4220
4221 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
4222 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
4223 ds->db_depth_view |= S_028008_MIPID(level);
4224
4225 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4226 S_02801C_Y_MAX(iview->image->info.height - 1);
4227
4228 if (radv_htile_enabled(iview->image, level)) {
4229 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4230
4231 if (radv_image_is_tc_compat_htile(iview->image)) {
4232 unsigned max_zplanes =
4233 radv_calc_decompress_on_z_planes(device, iview);
4234
4235 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
4236 S_028038_ITERATE_FLUSH(1);
4237 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4238 }
4239
4240 if (!iview->image->surface.has_stencil)
4241 /* Use all of the htile_buffer for depth if there's no stencil. */
4242 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4243 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4244 iview->image->htile_offset;
4245 ds->db_htile_data_base = va >> 8;
4246 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4247 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
4248 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
4249 }
4250 } else {
4251 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
4252
4253 if (stencil_only)
4254 level_info = &iview->image->surface.u.legacy.stencil_level[level];
4255
4256 z_offs += iview->image->surface.u.legacy.level[level].offset;
4257 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
4258
4259 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4260 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4261 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4262
4263 if (iview->image->info.samples > 1)
4264 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4265
4266 if (device->physical_device->rad_info.chip_class >= CIK) {
4267 struct radeon_info *info = &device->physical_device->rad_info;
4268 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
4269 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
4270 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
4271 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4272 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4273 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4274
4275 if (stencil_only)
4276 tile_mode = stencil_tile_mode;
4277
4278 ds->db_depth_info |=
4279 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4280 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4281 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4282 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4283 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4284 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4285 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4286 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4287 } else {
4288 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
4289 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4290 tile_mode_index = si_tile_mode_index(iview->image, level, true);
4291 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4292 if (stencil_only)
4293 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4294 }
4295
4296 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4297 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4298 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4299
4300 if (radv_htile_enabled(iview->image, level)) {
4301 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4302
4303 if (!iview->image->surface.has_stencil &&
4304 !radv_image_is_tc_compat_htile(iview->image))
4305 /* Use all of the htile_buffer for depth if there's no stencil. */
4306 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4307
4308 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4309 iview->image->htile_offset;
4310 ds->db_htile_data_base = va >> 8;
4311 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4312
4313 if (radv_image_is_tc_compat_htile(iview->image)) {
4314 unsigned max_zplanes =
4315 radv_calc_decompress_on_z_planes(device, iview);
4316
4317 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4318 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4319 }
4320 }
4321 }
4322
4323 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4324 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4325 }
4326
4327 VkResult radv_CreateFramebuffer(
4328 VkDevice _device,
4329 const VkFramebufferCreateInfo* pCreateInfo,
4330 const VkAllocationCallbacks* pAllocator,
4331 VkFramebuffer* pFramebuffer)
4332 {
4333 RADV_FROM_HANDLE(radv_device, device, _device);
4334 struct radv_framebuffer *framebuffer;
4335
4336 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4337
4338 size_t size = sizeof(*framebuffer) +
4339 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4340 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4341 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4342 if (framebuffer == NULL)
4343 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4344
4345 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4346 framebuffer->width = pCreateInfo->width;
4347 framebuffer->height = pCreateInfo->height;
4348 framebuffer->layers = pCreateInfo->layers;
4349 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4350 VkImageView _iview = pCreateInfo->pAttachments[i];
4351 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4352 framebuffer->attachments[i].attachment = iview;
4353 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
4354 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4355 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4356 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4357 }
4358 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4359 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4360 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4361 }
4362
4363 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4364 return VK_SUCCESS;
4365 }
4366
4367 void radv_DestroyFramebuffer(
4368 VkDevice _device,
4369 VkFramebuffer _fb,
4370 const VkAllocationCallbacks* pAllocator)
4371 {
4372 RADV_FROM_HANDLE(radv_device, device, _device);
4373 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4374
4375 if (!fb)
4376 return;
4377 vk_free2(&device->alloc, pAllocator, fb);
4378 }
4379
4380 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4381 {
4382 switch (address_mode) {
4383 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4384 return V_008F30_SQ_TEX_WRAP;
4385 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4386 return V_008F30_SQ_TEX_MIRROR;
4387 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4388 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4389 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4390 return V_008F30_SQ_TEX_CLAMP_BORDER;
4391 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4392 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4393 default:
4394 unreachable("illegal tex wrap mode");
4395 break;
4396 }
4397 }
4398
4399 static unsigned
4400 radv_tex_compare(VkCompareOp op)
4401 {
4402 switch (op) {
4403 case VK_COMPARE_OP_NEVER:
4404 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4405 case VK_COMPARE_OP_LESS:
4406 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4407 case VK_COMPARE_OP_EQUAL:
4408 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4409 case VK_COMPARE_OP_LESS_OR_EQUAL:
4410 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4411 case VK_COMPARE_OP_GREATER:
4412 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4413 case VK_COMPARE_OP_NOT_EQUAL:
4414 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4415 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4416 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4417 case VK_COMPARE_OP_ALWAYS:
4418 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4419 default:
4420 unreachable("illegal compare mode");
4421 break;
4422 }
4423 }
4424
4425 static unsigned
4426 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4427 {
4428 switch (filter) {
4429 case VK_FILTER_NEAREST:
4430 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4431 V_008F38_SQ_TEX_XY_FILTER_POINT);
4432 case VK_FILTER_LINEAR:
4433 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4434 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4435 case VK_FILTER_CUBIC_IMG:
4436 default:
4437 fprintf(stderr, "illegal texture filter");
4438 return 0;
4439 }
4440 }
4441
4442 static unsigned
4443 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4444 {
4445 switch (mode) {
4446 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4447 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4448 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4449 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4450 default:
4451 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4452 }
4453 }
4454
4455 static unsigned
4456 radv_tex_bordercolor(VkBorderColor bcolor)
4457 {
4458 switch (bcolor) {
4459 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4460 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4461 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4462 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4463 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4464 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4465 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4466 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4467 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4468 default:
4469 break;
4470 }
4471 return 0;
4472 }
4473
4474 static unsigned
4475 radv_tex_aniso_filter(unsigned filter)
4476 {
4477 if (filter < 2)
4478 return 0;
4479 if (filter < 4)
4480 return 1;
4481 if (filter < 8)
4482 return 2;
4483 if (filter < 16)
4484 return 3;
4485 return 4;
4486 }
4487
4488 static unsigned
4489 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4490 {
4491 switch (mode) {
4492 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4493 return SQ_IMG_FILTER_MODE_BLEND;
4494 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4495 return SQ_IMG_FILTER_MODE_MIN;
4496 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4497 return SQ_IMG_FILTER_MODE_MAX;
4498 default:
4499 break;
4500 }
4501 return 0;
4502 }
4503
4504 static uint32_t
4505 radv_get_max_anisotropy(struct radv_device *device,
4506 const VkSamplerCreateInfo *pCreateInfo)
4507 {
4508 if (device->force_aniso >= 0)
4509 return device->force_aniso;
4510
4511 if (pCreateInfo->anisotropyEnable &&
4512 pCreateInfo->maxAnisotropy > 1.0f)
4513 return (uint32_t)pCreateInfo->maxAnisotropy;
4514
4515 return 0;
4516 }
4517
4518 static void
4519 radv_init_sampler(struct radv_device *device,
4520 struct radv_sampler *sampler,
4521 const VkSamplerCreateInfo *pCreateInfo)
4522 {
4523 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
4524 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4525 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4526 unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
4527
4528 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4529 vk_find_struct_const(pCreateInfo->pNext,
4530 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4531 if (sampler_reduction)
4532 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4533
4534 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4535 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4536 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4537 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4538 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4539 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4540 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4541 S_008F30_ANISO_BIAS(max_aniso_ratio) |
4542 S_008F30_DISABLE_CUBE_WRAP(0) |
4543 S_008F30_COMPAT_MODE(is_vi) |
4544 S_008F30_FILTER_MODE(filter_mode));
4545 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4546 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4547 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4548 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4549 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4550 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4551 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4552 S_008F38_MIP_POINT_PRECLAMP(0) |
4553 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4554 S_008F38_FILTER_PREC_FIX(1) |
4555 S_008F38_ANISO_OVERRIDE(is_vi));
4556 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4557 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4558 }
4559
4560 VkResult radv_CreateSampler(
4561 VkDevice _device,
4562 const VkSamplerCreateInfo* pCreateInfo,
4563 const VkAllocationCallbacks* pAllocator,
4564 VkSampler* pSampler)
4565 {
4566 RADV_FROM_HANDLE(radv_device, device, _device);
4567 struct radv_sampler *sampler;
4568
4569 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4570
4571 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4572 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4573 if (!sampler)
4574 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4575
4576 radv_init_sampler(device, sampler, pCreateInfo);
4577 *pSampler = radv_sampler_to_handle(sampler);
4578
4579 return VK_SUCCESS;
4580 }
4581
4582 void radv_DestroySampler(
4583 VkDevice _device,
4584 VkSampler _sampler,
4585 const VkAllocationCallbacks* pAllocator)
4586 {
4587 RADV_FROM_HANDLE(radv_device, device, _device);
4588 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4589
4590 if (!sampler)
4591 return;
4592 vk_free2(&device->alloc, pAllocator, sampler);
4593 }
4594
4595 /* vk_icd.h does not declare this function, so we declare it here to
4596 * suppress Wmissing-prototypes.
4597 */
4598 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4599 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4600
4601 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4602 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4603 {
4604 /* For the full details on loader interface versioning, see
4605 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4606 * What follows is a condensed summary, to help you navigate the large and
4607 * confusing official doc.
4608 *
4609 * - Loader interface v0 is incompatible with later versions. We don't
4610 * support it.
4611 *
4612 * - In loader interface v1:
4613 * - The first ICD entrypoint called by the loader is
4614 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4615 * entrypoint.
4616 * - The ICD must statically expose no other Vulkan symbol unless it is
4617 * linked with -Bsymbolic.
4618 * - Each dispatchable Vulkan handle created by the ICD must be
4619 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4620 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4621 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4622 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4623 * such loader-managed surfaces.
4624 *
4625 * - Loader interface v2 differs from v1 in:
4626 * - The first ICD entrypoint called by the loader is
4627 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4628 * statically expose this entrypoint.
4629 *
4630 * - Loader interface v3 differs from v2 in:
4631 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4632 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4633 * because the loader no longer does so.
4634 */
4635 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4636 return VK_SUCCESS;
4637 }
4638
4639 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4640 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4641 int *pFD)
4642 {
4643 RADV_FROM_HANDLE(radv_device, device, _device);
4644 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4645
4646 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4647
4648 /* At the moment, we support only the below handle types. */
4649 assert(pGetFdInfo->handleType ==
4650 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4651 pGetFdInfo->handleType ==
4652 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4653
4654 bool ret = radv_get_memory_fd(device, memory, pFD);
4655 if (ret == false)
4656 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4657 return VK_SUCCESS;
4658 }
4659
4660 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4661 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
4662 int fd,
4663 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4664 {
4665 RADV_FROM_HANDLE(radv_device, device, _device);
4666
4667 switch (handleType) {
4668 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4669 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4670 return VK_SUCCESS;
4671
4672 default:
4673 /* The valid usage section for this function says:
4674 *
4675 * "handleType must not be one of the handle types defined as
4676 * opaque."
4677 *
4678 * So opaque handle types fall into the default "unsupported" case.
4679 */
4680 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4681 }
4682 }
4683
4684 static VkResult radv_import_opaque_fd(struct radv_device *device,
4685 int fd,
4686 uint32_t *syncobj)
4687 {
4688 uint32_t syncobj_handle = 0;
4689 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4690 if (ret != 0)
4691 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4692
4693 if (*syncobj)
4694 device->ws->destroy_syncobj(device->ws, *syncobj);
4695
4696 *syncobj = syncobj_handle;
4697 close(fd);
4698
4699 return VK_SUCCESS;
4700 }
4701
4702 static VkResult radv_import_sync_fd(struct radv_device *device,
4703 int fd,
4704 uint32_t *syncobj)
4705 {
4706 /* If we create a syncobj we do it locally so that if we have an error, we don't
4707 * leave a syncobj in an undetermined state in the fence. */
4708 uint32_t syncobj_handle = *syncobj;
4709 if (!syncobj_handle) {
4710 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4711 if (ret) {
4712 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4713 }
4714 }
4715
4716 if (fd == -1) {
4717 device->ws->signal_syncobj(device->ws, syncobj_handle);
4718 } else {
4719 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4720 if (ret != 0)
4721 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4722 }
4723
4724 *syncobj = syncobj_handle;
4725 if (fd != -1)
4726 close(fd);
4727
4728 return VK_SUCCESS;
4729 }
4730
4731 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4732 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4733 {
4734 RADV_FROM_HANDLE(radv_device, device, _device);
4735 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4736 uint32_t *syncobj_dst = NULL;
4737
4738 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
4739 syncobj_dst = &sem->temp_syncobj;
4740 } else {
4741 syncobj_dst = &sem->syncobj;
4742 }
4743
4744 switch(pImportSemaphoreFdInfo->handleType) {
4745 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4746 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4747 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4748 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4749 default:
4750 unreachable("Unhandled semaphore handle type");
4751 }
4752 }
4753
4754 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4755 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4756 int *pFd)
4757 {
4758 RADV_FROM_HANDLE(radv_device, device, _device);
4759 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4760 int ret;
4761 uint32_t syncobj_handle;
4762
4763 if (sem->temp_syncobj)
4764 syncobj_handle = sem->temp_syncobj;
4765 else
4766 syncobj_handle = sem->syncobj;
4767
4768 switch(pGetFdInfo->handleType) {
4769 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4770 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4771 break;
4772 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4773 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4774 if (!ret) {
4775 if (sem->temp_syncobj) {
4776 close (sem->temp_syncobj);
4777 sem->temp_syncobj = 0;
4778 } else {
4779 device->ws->reset_syncobj(device->ws, syncobj_handle);
4780 }
4781 }
4782 break;
4783 default:
4784 unreachable("Unhandled semaphore handle type");
4785 }
4786
4787 if (ret)
4788 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4789 return VK_SUCCESS;
4790 }
4791
4792 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4793 VkPhysicalDevice physicalDevice,
4794 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
4795 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
4796 {
4797 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4798
4799 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
4800 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4801 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4802 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4803 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4804 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4805 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4806 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4807 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
4808 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4809 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
4810 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
4811 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4812 } else {
4813 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
4814 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
4815 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
4816 }
4817 }
4818
4819 VkResult radv_ImportFenceFdKHR(VkDevice _device,
4820 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
4821 {
4822 RADV_FROM_HANDLE(radv_device, device, _device);
4823 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
4824 uint32_t *syncobj_dst = NULL;
4825
4826
4827 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
4828 syncobj_dst = &fence->temp_syncobj;
4829 } else {
4830 syncobj_dst = &fence->syncobj;
4831 }
4832
4833 switch(pImportFenceFdInfo->handleType) {
4834 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4835 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4836 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4837 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
4838 default:
4839 unreachable("Unhandled fence handle type");
4840 }
4841 }
4842
4843 VkResult radv_GetFenceFdKHR(VkDevice _device,
4844 const VkFenceGetFdInfoKHR *pGetFdInfo,
4845 int *pFd)
4846 {
4847 RADV_FROM_HANDLE(radv_device, device, _device);
4848 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
4849 int ret;
4850 uint32_t syncobj_handle;
4851
4852 if (fence->temp_syncobj)
4853 syncobj_handle = fence->temp_syncobj;
4854 else
4855 syncobj_handle = fence->syncobj;
4856
4857 switch(pGetFdInfo->handleType) {
4858 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
4859 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4860 break;
4861 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
4862 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4863 if (!ret) {
4864 if (fence->temp_syncobj) {
4865 close (fence->temp_syncobj);
4866 fence->temp_syncobj = 0;
4867 } else {
4868 device->ws->reset_syncobj(device->ws, syncobj_handle);
4869 }
4870 }
4871 break;
4872 default:
4873 unreachable("Unhandled fence handle type");
4874 }
4875
4876 if (ret)
4877 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
4878 return VK_SUCCESS;
4879 }
4880
4881 void radv_GetPhysicalDeviceExternalFenceProperties(
4882 VkPhysicalDevice physicalDevice,
4883 const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
4884 VkExternalFencePropertiesKHR* pExternalFenceProperties)
4885 {
4886 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
4887
4888 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
4889 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
4890 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
4891 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4892 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
4893 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
4894 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
4895 } else {
4896 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
4897 pExternalFenceProperties->compatibleHandleTypes = 0;
4898 pExternalFenceProperties->externalFenceFeatures = 0;
4899 }
4900 }
4901
4902 VkResult
4903 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
4904 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
4905 const VkAllocationCallbacks* pAllocator,
4906 VkDebugReportCallbackEXT* pCallback)
4907 {
4908 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4909 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
4910 pCreateInfo, pAllocator, &instance->alloc,
4911 pCallback);
4912 }
4913
4914 void
4915 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
4916 VkDebugReportCallbackEXT _callback,
4917 const VkAllocationCallbacks* pAllocator)
4918 {
4919 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4920 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
4921 _callback, pAllocator, &instance->alloc);
4922 }
4923
4924 void
4925 radv_DebugReportMessageEXT(VkInstance _instance,
4926 VkDebugReportFlagsEXT flags,
4927 VkDebugReportObjectTypeEXT objectType,
4928 uint64_t object,
4929 size_t location,
4930 int32_t messageCode,
4931 const char* pLayerPrefix,
4932 const char* pMessage)
4933 {
4934 RADV_FROM_HANDLE(radv_instance, instance, _instance);
4935 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
4936 object, location, messageCode, pLayerPrefix, pMessage);
4937 }
4938
4939 void
4940 radv_GetDeviceGroupPeerMemoryFeatures(
4941 VkDevice device,
4942 uint32_t heapIndex,
4943 uint32_t localDeviceIndex,
4944 uint32_t remoteDeviceIndex,
4945 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
4946 {
4947 assert(localDeviceIndex == remoteDeviceIndex);
4948
4949 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
4950 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
4951 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
4952 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
4953 }