radv: Use local buffers for the global bo list.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "git_sha1.h"
47 #include "gfx9d.h"
48 #include "util/build_id.h"
49 #include "util/debug.h"
50 #include "util/mesa-sha1.h"
51
52 static int
53 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
54 {
55 struct mesa_sha1 ctx;
56 unsigned char sha1[20];
57 unsigned ptr_size = sizeof(void*);
58
59 memset(uuid, 0, VK_UUID_SIZE);
60 _mesa_sha1_init(&ctx);
61
62 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
63 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
64 return -1;
65
66 _mesa_sha1_update(&ctx, &family, sizeof(family));
67 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
68 _mesa_sha1_final(&ctx, sha1);
69
70 memcpy(uuid, sha1, VK_UUID_SIZE);
71 return 0;
72 }
73
74 static void
75 radv_get_driver_uuid(void *uuid)
76 {
77 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
78 }
79
80 static void
81 radv_get_device_uuid(struct radeon_info *info, void *uuid)
82 {
83 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
84 }
85
86 static void
87 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
88 {
89 const char *chip_string;
90
91 switch (family) {
92 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
93 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
94 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
95 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
96 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
97 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
98 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
99 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
100 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
101 case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
102 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
103 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
104 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
105 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
106 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
107 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
108 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
109 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
110 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
111 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
112 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
113 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
114 case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
115 default: chip_string = "AMD RADV unknown"; break;
116 }
117
118 snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
119 }
120
121 static uint64_t
122 radv_get_visible_vram_size(struct radv_physical_device *device)
123 {
124 return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
125 }
126
127 static uint64_t
128 radv_get_vram_size(struct radv_physical_device *device)
129 {
130 return device->rad_info.vram_size - radv_get_visible_vram_size(device);
131 }
132
133 static void
134 radv_physical_device_init_mem_types(struct radv_physical_device *device)
135 {
136 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
137 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
138 uint64_t vram_size = radv_get_vram_size(device);
139 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
140 device->memory_properties.memoryHeapCount = 0;
141 if (vram_size > 0) {
142 vram_index = device->memory_properties.memoryHeapCount++;
143 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
144 .size = vram_size,
145 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
146 };
147 }
148 if (visible_vram_size) {
149 visible_vram_index = device->memory_properties.memoryHeapCount++;
150 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
151 .size = visible_vram_size,
152 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
153 };
154 }
155 if (device->rad_info.gart_size > 0) {
156 gart_index = device->memory_properties.memoryHeapCount++;
157 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
158 .size = device->rad_info.gart_size,
159 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
160 };
161 }
162
163 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
164 unsigned type_count = 0;
165 if (vram_index >= 0) {
166 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
167 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
168 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
169 .heapIndex = vram_index,
170 };
171 }
172 if (gart_index >= 0) {
173 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
174 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
175 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
176 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
177 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
178 .heapIndex = gart_index,
179 };
180 }
181 if (visible_vram_index >= 0) {
182 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
183 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
184 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
185 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
186 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
187 .heapIndex = visible_vram_index,
188 };
189 }
190 if (gart_index >= 0) {
191 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
192 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
193 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
194 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
195 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
196 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
197 .heapIndex = gart_index,
198 };
199 }
200 device->memory_properties.memoryTypeCount = type_count;
201 }
202
203 static void
204 radv_handle_env_var_force_family(struct radv_physical_device *device)
205 {
206 const char *family = getenv("RADV_FORCE_FAMILY");
207 unsigned i;
208
209 if (!family)
210 return;
211
212 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
213 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
214 /* Override family and chip_class. */
215 device->rad_info.family = i;
216
217 if (i >= CHIP_VEGA10)
218 device->rad_info.chip_class = GFX9;
219 else if (i >= CHIP_TONGA)
220 device->rad_info.chip_class = VI;
221 else if (i >= CHIP_BONAIRE)
222 device->rad_info.chip_class = CIK;
223 else
224 device->rad_info.chip_class = SI;
225
226 return;
227 }
228 }
229
230 fprintf(stderr, "radv: Unknown family: %s\n", family);
231 exit(1);
232 }
233
234 static VkResult
235 radv_physical_device_init(struct radv_physical_device *device,
236 struct radv_instance *instance,
237 drmDevicePtr drm_device)
238 {
239 const char *path = drm_device->nodes[DRM_NODE_RENDER];
240 VkResult result;
241 drmVersionPtr version;
242 int fd;
243 int master_fd = -1;
244
245 fd = open(path, O_RDWR | O_CLOEXEC);
246 if (fd < 0) {
247 if (instance->debug_flags & RADV_DEBUG_STARTUP)
248 radv_logi("Could not open device '%s'", path);
249
250 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
251 }
252
253 version = drmGetVersion(fd);
254 if (!version) {
255 close(fd);
256
257 if (instance->debug_flags & RADV_DEBUG_STARTUP)
258 radv_logi("Could not get the kernel driver version for device '%s'", path);
259
260 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
261 "failed to get version %s: %m", path);
262 }
263
264 if (strcmp(version->name, "amdgpu")) {
265 drmFreeVersion(version);
266 close(fd);
267
268 if (instance->debug_flags & RADV_DEBUG_STARTUP)
269 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
270
271 return VK_ERROR_INCOMPATIBLE_DRIVER;
272 }
273 drmFreeVersion(version);
274
275 if (instance->debug_flags & RADV_DEBUG_STARTUP)
276 radv_logi("Found compatible device '%s'.", path);
277
278 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
279 device->instance = instance;
280
281 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
282 instance->perftest_flags);
283 if (!device->ws) {
284 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
285 goto fail;
286 }
287
288 if (instance->enabled_extensions.KHR_display) {
289 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
290 if (master_fd >= 0) {
291 uint32_t accel_working = 0;
292 struct drm_amdgpu_info request = {
293 .return_pointer = (uintptr_t)&accel_working,
294 .return_size = sizeof(accel_working),
295 .query = AMDGPU_INFO_ACCEL_WORKING
296 };
297
298 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
299 close(master_fd);
300 master_fd = -1;
301 }
302 }
303 }
304
305 device->master_fd = master_fd;
306 device->local_fd = fd;
307 device->ws->query_info(device->ws, &device->rad_info);
308
309 radv_handle_env_var_force_family(device);
310
311 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
312
313 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
314 device->ws->destroy(device->ws);
315 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
316 "cannot generate UUID");
317 goto fail;
318 }
319
320 /* These flags affect shader compilation. */
321 uint64_t shader_env_flags =
322 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
323 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
324
325 /* The gpu id is already embedded in the uuid so we just pass "radv"
326 * when creating the cache.
327 */
328 char buf[VK_UUID_SIZE * 2 + 1];
329 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
330 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
331
332 if (device->rad_info.chip_class < VI ||
333 device->rad_info.chip_class > GFX9)
334 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
335
336 radv_get_driver_uuid(&device->driver_uuid);
337 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
338
339 if (device->rad_info.family == CHIP_STONEY ||
340 device->rad_info.chip_class >= GFX9) {
341 device->has_rbplus = true;
342 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
343 device->rad_info.family == CHIP_VEGA12 ||
344 device->rad_info.family == CHIP_RAVEN ||
345 device->rad_info.family == CHIP_RAVEN2;
346 }
347
348 /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
349 * on SI.
350 */
351 device->has_clear_state = device->rad_info.chip_class >= CIK;
352
353 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
354
355 /* Vega10/Raven need a special workaround for a hardware bug. */
356 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
357 device->rad_info.family == CHIP_RAVEN;
358
359 /* Out-of-order primitive rasterization. */
360 device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
361 device->rad_info.max_se >= 2;
362 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
363 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
364
365 device->dcc_msaa_allowed =
366 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
367
368 /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
369 device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
370 (device->rad_info.chip_class >= VI &&
371 device->rad_info.me_fw_feature >= 41);
372
373 radv_physical_device_init_mem_types(device);
374 radv_fill_device_extension_table(device, &device->supported_extensions);
375
376 device->bus_info = *drm_device->businfo.pci;
377
378 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
379 ac_print_gpu_info(&device->rad_info);
380
381 /* The WSI is structured as a layer on top of the driver, so this has
382 * to be the last part of initialization (at least until we get other
383 * semi-layers).
384 */
385 result = radv_init_wsi(device);
386 if (result != VK_SUCCESS) {
387 device->ws->destroy(device->ws);
388 vk_error(instance, result);
389 goto fail;
390 }
391
392 return VK_SUCCESS;
393
394 fail:
395 close(fd);
396 if (master_fd != -1)
397 close(master_fd);
398 return result;
399 }
400
401 static void
402 radv_physical_device_finish(struct radv_physical_device *device)
403 {
404 radv_finish_wsi(device);
405 device->ws->destroy(device->ws);
406 disk_cache_destroy(device->disk_cache);
407 close(device->local_fd);
408 if (device->master_fd != -1)
409 close(device->master_fd);
410 }
411
412 static void *
413 default_alloc_func(void *pUserData, size_t size, size_t align,
414 VkSystemAllocationScope allocationScope)
415 {
416 return malloc(size);
417 }
418
419 static void *
420 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
421 size_t align, VkSystemAllocationScope allocationScope)
422 {
423 return realloc(pOriginal, size);
424 }
425
426 static void
427 default_free_func(void *pUserData, void *pMemory)
428 {
429 free(pMemory);
430 }
431
432 static const VkAllocationCallbacks default_alloc = {
433 .pUserData = NULL,
434 .pfnAllocation = default_alloc_func,
435 .pfnReallocation = default_realloc_func,
436 .pfnFree = default_free_func,
437 };
438
439 static const struct debug_control radv_debug_options[] = {
440 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
441 {"nodcc", RADV_DEBUG_NO_DCC},
442 {"shaders", RADV_DEBUG_DUMP_SHADERS},
443 {"nocache", RADV_DEBUG_NO_CACHE},
444 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
445 {"nohiz", RADV_DEBUG_NO_HIZ},
446 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
447 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
448 {"allbos", RADV_DEBUG_ALL_BOS},
449 {"noibs", RADV_DEBUG_NO_IBS},
450 {"spirv", RADV_DEBUG_DUMP_SPIRV},
451 {"vmfaults", RADV_DEBUG_VM_FAULTS},
452 {"zerovram", RADV_DEBUG_ZERO_VRAM},
453 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
454 {"nosisched", RADV_DEBUG_NO_SISCHED},
455 {"preoptir", RADV_DEBUG_PREOPTIR},
456 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
457 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
458 {"info", RADV_DEBUG_INFO},
459 {"errors", RADV_DEBUG_ERRORS},
460 {"startup", RADV_DEBUG_STARTUP},
461 {"checkir", RADV_DEBUG_CHECKIR},
462 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
463 {"nobinning", RADV_DEBUG_NOBINNING},
464 {NULL, 0}
465 };
466
467 const char *
468 radv_get_debug_option_name(int id)
469 {
470 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
471 return radv_debug_options[id].string;
472 }
473
474 static const struct debug_control radv_perftest_options[] = {
475 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
476 {"sisched", RADV_PERFTEST_SISCHED},
477 {"localbos", RADV_PERFTEST_LOCAL_BOS},
478 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
479 {"bolist", RADV_PERFTEST_BO_LIST},
480 {NULL, 0}
481 };
482
483 const char *
484 radv_get_perftest_option_name(int id)
485 {
486 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
487 return radv_perftest_options[id].string;
488 }
489
490 static void
491 radv_handle_per_app_options(struct radv_instance *instance,
492 const VkApplicationInfo *info)
493 {
494 const char *name = info ? info->pApplicationName : NULL;
495
496 if (!name)
497 return;
498
499 if (!strcmp(name, "Talos - Linux - 32bit") ||
500 !strcmp(name, "Talos - Linux - 64bit")) {
501 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
502 /* Force enable LLVM sisched for Talos because it looks
503 * safe and it gives few more FPS.
504 */
505 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
506 }
507 } else if (!strcmp(name, "DOOM_VFR")) {
508 /* Work around a Doom VFR game bug */
509 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
510 }
511 }
512
513 static int radv_get_instance_extension_index(const char *name)
514 {
515 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
516 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
517 return i;
518 }
519 return -1;
520 }
521
522
523 VkResult radv_CreateInstance(
524 const VkInstanceCreateInfo* pCreateInfo,
525 const VkAllocationCallbacks* pAllocator,
526 VkInstance* pInstance)
527 {
528 struct radv_instance *instance;
529 VkResult result;
530
531 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
532
533 uint32_t client_version;
534 if (pCreateInfo->pApplicationInfo &&
535 pCreateInfo->pApplicationInfo->apiVersion != 0) {
536 client_version = pCreateInfo->pApplicationInfo->apiVersion;
537 } else {
538 client_version = VK_API_VERSION_1_0;
539 }
540
541 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
542 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
543 if (!instance)
544 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
545
546 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
547
548 if (pAllocator)
549 instance->alloc = *pAllocator;
550 else
551 instance->alloc = default_alloc;
552
553 instance->apiVersion = client_version;
554 instance->physicalDeviceCount = -1;
555
556 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
557 radv_debug_options);
558
559 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
560 radv_perftest_options);
561
562
563 if (instance->debug_flags & RADV_DEBUG_STARTUP)
564 radv_logi("Created an instance");
565
566 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
567 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
568 int index = radv_get_instance_extension_index(ext_name);
569
570 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
571 vk_free2(&default_alloc, pAllocator, instance);
572 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
573 }
574
575 instance->enabled_extensions.extensions[index] = true;
576 }
577
578 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
579 if (result != VK_SUCCESS) {
580 vk_free2(&default_alloc, pAllocator, instance);
581 return vk_error(instance, result);
582 }
583
584 _mesa_locale_init();
585
586 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
587
588 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
589
590 *pInstance = radv_instance_to_handle(instance);
591
592 return VK_SUCCESS;
593 }
594
595 void radv_DestroyInstance(
596 VkInstance _instance,
597 const VkAllocationCallbacks* pAllocator)
598 {
599 RADV_FROM_HANDLE(radv_instance, instance, _instance);
600
601 if (!instance)
602 return;
603
604 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
605 radv_physical_device_finish(instance->physicalDevices + i);
606 }
607
608 VG(VALGRIND_DESTROY_MEMPOOL(instance));
609
610 _mesa_locale_fini();
611
612 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
613
614 vk_free(&instance->alloc, instance);
615 }
616
617 static VkResult
618 radv_enumerate_devices(struct radv_instance *instance)
619 {
620 /* TODO: Check for more devices ? */
621 drmDevicePtr devices[8];
622 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
623 int max_devices;
624
625 instance->physicalDeviceCount = 0;
626
627 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
628
629 if (instance->debug_flags & RADV_DEBUG_STARTUP)
630 radv_logi("Found %d drm nodes", max_devices);
631
632 if (max_devices < 1)
633 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
634
635 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
636 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
637 devices[i]->bustype == DRM_BUS_PCI &&
638 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
639
640 result = radv_physical_device_init(instance->physicalDevices +
641 instance->physicalDeviceCount,
642 instance,
643 devices[i]);
644 if (result == VK_SUCCESS)
645 ++instance->physicalDeviceCount;
646 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
647 break;
648 }
649 }
650 drmFreeDevices(devices, max_devices);
651
652 return result;
653 }
654
655 VkResult radv_EnumeratePhysicalDevices(
656 VkInstance _instance,
657 uint32_t* pPhysicalDeviceCount,
658 VkPhysicalDevice* pPhysicalDevices)
659 {
660 RADV_FROM_HANDLE(radv_instance, instance, _instance);
661 VkResult result;
662
663 if (instance->physicalDeviceCount < 0) {
664 result = radv_enumerate_devices(instance);
665 if (result != VK_SUCCESS &&
666 result != VK_ERROR_INCOMPATIBLE_DRIVER)
667 return result;
668 }
669
670 if (!pPhysicalDevices) {
671 *pPhysicalDeviceCount = instance->physicalDeviceCount;
672 } else {
673 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
674 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
675 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
676 }
677
678 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
679 : VK_SUCCESS;
680 }
681
682 VkResult radv_EnumeratePhysicalDeviceGroups(
683 VkInstance _instance,
684 uint32_t* pPhysicalDeviceGroupCount,
685 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
686 {
687 RADV_FROM_HANDLE(radv_instance, instance, _instance);
688 VkResult result;
689
690 if (instance->physicalDeviceCount < 0) {
691 result = radv_enumerate_devices(instance);
692 if (result != VK_SUCCESS &&
693 result != VK_ERROR_INCOMPATIBLE_DRIVER)
694 return result;
695 }
696
697 if (!pPhysicalDeviceGroupProperties) {
698 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
699 } else {
700 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
701 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
702 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
703 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
704 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
705 }
706 }
707 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
708 : VK_SUCCESS;
709 }
710
711 void radv_GetPhysicalDeviceFeatures(
712 VkPhysicalDevice physicalDevice,
713 VkPhysicalDeviceFeatures* pFeatures)
714 {
715 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
716 memset(pFeatures, 0, sizeof(*pFeatures));
717
718 *pFeatures = (VkPhysicalDeviceFeatures) {
719 .robustBufferAccess = true,
720 .fullDrawIndexUint32 = true,
721 .imageCubeArray = true,
722 .independentBlend = true,
723 .geometryShader = true,
724 .tessellationShader = true,
725 .sampleRateShading = true,
726 .dualSrcBlend = true,
727 .logicOp = true,
728 .multiDrawIndirect = true,
729 .drawIndirectFirstInstance = true,
730 .depthClamp = true,
731 .depthBiasClamp = true,
732 .fillModeNonSolid = true,
733 .depthBounds = true,
734 .wideLines = true,
735 .largePoints = true,
736 .alphaToOne = true,
737 .multiViewport = true,
738 .samplerAnisotropy = true,
739 .textureCompressionETC2 = radv_device_supports_etc(pdevice),
740 .textureCompressionASTC_LDR = false,
741 .textureCompressionBC = true,
742 .occlusionQueryPrecise = true,
743 .pipelineStatisticsQuery = true,
744 .vertexPipelineStoresAndAtomics = true,
745 .fragmentStoresAndAtomics = true,
746 .shaderTessellationAndGeometryPointSize = true,
747 .shaderImageGatherExtended = true,
748 .shaderStorageImageExtendedFormats = true,
749 .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI,
750 .shaderUniformBufferArrayDynamicIndexing = true,
751 .shaderSampledImageArrayDynamicIndexing = true,
752 .shaderStorageBufferArrayDynamicIndexing = true,
753 .shaderStorageImageArrayDynamicIndexing = true,
754 .shaderStorageImageReadWithoutFormat = true,
755 .shaderStorageImageWriteWithoutFormat = true,
756 .shaderClipDistance = true,
757 .shaderCullDistance = true,
758 .shaderFloat64 = true,
759 .shaderInt64 = true,
760 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
761 .sparseBinding = true,
762 .variableMultisampleRate = true,
763 .inheritedQueries = true,
764 };
765 }
766
767 void radv_GetPhysicalDeviceFeatures2(
768 VkPhysicalDevice physicalDevice,
769 VkPhysicalDeviceFeatures2 *pFeatures)
770 {
771 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
772 vk_foreach_struct(ext, pFeatures->pNext) {
773 switch (ext->sType) {
774 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
775 VkPhysicalDeviceVariablePointerFeatures *features = (void *)ext;
776 features->variablePointersStorageBuffer = true;
777 features->variablePointers = true;
778 break;
779 }
780 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
781 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
782 features->multiview = true;
783 features->multiviewGeometryShader = true;
784 features->multiviewTessellationShader = true;
785 break;
786 }
787 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES: {
788 VkPhysicalDeviceShaderDrawParameterFeatures *features =
789 (VkPhysicalDeviceShaderDrawParameterFeatures*)ext;
790 features->shaderDrawParameters = true;
791 break;
792 }
793 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
794 VkPhysicalDeviceProtectedMemoryFeatures *features =
795 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
796 features->protectedMemory = false;
797 break;
798 }
799 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
800 VkPhysicalDevice16BitStorageFeatures *features =
801 (VkPhysicalDevice16BitStorageFeatures*)ext;
802 bool enabled = pdevice->rad_info.chip_class >= VI;
803 features->storageBuffer16BitAccess = enabled;
804 features->uniformAndStorageBuffer16BitAccess = enabled;
805 features->storagePushConstant16 = enabled;
806 features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
807 break;
808 }
809 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
810 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
811 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
812 features->samplerYcbcrConversion = false;
813 break;
814 }
815 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
816 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
817 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
818 features->shaderInputAttachmentArrayDynamicIndexing = true;
819 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
820 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
821 features->shaderUniformBufferArrayNonUniformIndexing = true;
822 features->shaderSampledImageArrayNonUniformIndexing = true;
823 features->shaderStorageBufferArrayNonUniformIndexing = true;
824 features->shaderStorageImageArrayNonUniformIndexing = true;
825 features->shaderInputAttachmentArrayNonUniformIndexing = true;
826 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
827 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
828 features->descriptorBindingUniformBufferUpdateAfterBind = true;
829 features->descriptorBindingSampledImageUpdateAfterBind = true;
830 features->descriptorBindingStorageImageUpdateAfterBind = true;
831 features->descriptorBindingStorageBufferUpdateAfterBind = true;
832 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
833 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
834 features->descriptorBindingUpdateUnusedWhilePending = true;
835 features->descriptorBindingPartiallyBound = true;
836 features->descriptorBindingVariableDescriptorCount = true;
837 features->runtimeDescriptorArray = true;
838 break;
839 }
840 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
841 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
842 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
843 features->conditionalRendering = true;
844 features->inheritedConditionalRendering = false;
845 break;
846 }
847 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
848 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
849 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
850 features->vertexAttributeInstanceRateDivisor = VK_TRUE;
851 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
852 break;
853 }
854 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
855 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
856 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
857 features->transformFeedback = true;
858 features->geometryStreams = true;
859 break;
860 }
861 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
862 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
863 (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
864 features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK;
865 break;
866 }
867 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
868 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
869 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
870 features->memoryPriority = VK_TRUE;
871 break;
872 }
873 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT: {
874 VkPhysicalDeviceBufferAddressFeaturesEXT *features =
875 (VkPhysicalDeviceBufferAddressFeaturesEXT *)ext;
876 features->bufferDeviceAddress = true;
877 features->bufferDeviceAddressCaptureReplay = false;
878 features->bufferDeviceAddressMultiDevice = false;
879 break;
880 }
881 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
882 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
883 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
884 features->depthClipEnable = true;
885 break;
886 }
887 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
888 VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
889 (VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
890 features->hostQueryReset = true;
891 break;
892 }
893 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
894 VkPhysicalDevice8BitStorageFeaturesKHR *features =
895 (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
896 bool enabled = pdevice->rad_info.chip_class >= VI;
897 features->storageBuffer8BitAccess = enabled;
898 features->uniformAndStorageBuffer8BitAccess = enabled;
899 features->storagePushConstant8 = enabled;
900 break;
901 }
902 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
903 VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
904 (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
905 bool enabled = pdevice->rad_info.chip_class >= VI;
906 features->shaderFloat16 = enabled && HAVE_LLVM >= 0x0800;
907 features->shaderInt8 = enabled;
908 break;
909 }
910 default:
911 break;
912 }
913 }
914 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
915 }
916
917 void radv_GetPhysicalDeviceProperties(
918 VkPhysicalDevice physicalDevice,
919 VkPhysicalDeviceProperties* pProperties)
920 {
921 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
922 VkSampleCountFlags sample_counts = 0xf;
923
924 /* make sure that the entire descriptor set is addressable with a signed
925 * 32-bit int. So the sum of all limits scaled by descriptor size has to
926 * be at most 2 GiB. the combined image & samples object count as one of
927 * both. This limit is for the pipeline layout, not for the set layout, but
928 * there is no set limit, so we just set a pipeline limit. I don't think
929 * any app is going to hit this soon. */
930 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
931 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
932 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
933 32 /* sampler, largest when combined with image */ +
934 64 /* sampled image */ +
935 64 /* storage image */);
936
937 VkPhysicalDeviceLimits limits = {
938 .maxImageDimension1D = (1 << 14),
939 .maxImageDimension2D = (1 << 14),
940 .maxImageDimension3D = (1 << 11),
941 .maxImageDimensionCube = (1 << 14),
942 .maxImageArrayLayers = (1 << 11),
943 .maxTexelBufferElements = 128 * 1024 * 1024,
944 .maxUniformBufferRange = UINT32_MAX,
945 .maxStorageBufferRange = UINT32_MAX,
946 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
947 .maxMemoryAllocationCount = UINT32_MAX,
948 .maxSamplerAllocationCount = 64 * 1024,
949 .bufferImageGranularity = 64, /* A cache line */
950 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
951 .maxBoundDescriptorSets = MAX_SETS,
952 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
953 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
954 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
955 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
956 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
957 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
958 .maxPerStageResources = max_descriptor_set_size,
959 .maxDescriptorSetSamplers = max_descriptor_set_size,
960 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
961 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
962 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
963 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
964 .maxDescriptorSetSampledImages = max_descriptor_set_size,
965 .maxDescriptorSetStorageImages = max_descriptor_set_size,
966 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
967 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
968 .maxVertexInputBindings = MAX_VBS,
969 .maxVertexInputAttributeOffset = 2047,
970 .maxVertexInputBindingStride = 2048,
971 .maxVertexOutputComponents = 128,
972 .maxTessellationGenerationLevel = 64,
973 .maxTessellationPatchSize = 32,
974 .maxTessellationControlPerVertexInputComponents = 128,
975 .maxTessellationControlPerVertexOutputComponents = 128,
976 .maxTessellationControlPerPatchOutputComponents = 120,
977 .maxTessellationControlTotalOutputComponents = 4096,
978 .maxTessellationEvaluationInputComponents = 128,
979 .maxTessellationEvaluationOutputComponents = 128,
980 .maxGeometryShaderInvocations = 127,
981 .maxGeometryInputComponents = 64,
982 .maxGeometryOutputComponents = 128,
983 .maxGeometryOutputVertices = 256,
984 .maxGeometryTotalOutputComponents = 1024,
985 .maxFragmentInputComponents = 128,
986 .maxFragmentOutputAttachments = 8,
987 .maxFragmentDualSrcAttachments = 1,
988 .maxFragmentCombinedOutputResources = 8,
989 .maxComputeSharedMemorySize = 32768,
990 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
991 .maxComputeWorkGroupInvocations = 2048,
992 .maxComputeWorkGroupSize = {
993 2048,
994 2048,
995 2048
996 },
997 .subPixelPrecisionBits = 8,
998 .subTexelPrecisionBits = 8,
999 .mipmapPrecisionBits = 8,
1000 .maxDrawIndexedIndexValue = UINT32_MAX,
1001 .maxDrawIndirectCount = UINT32_MAX,
1002 .maxSamplerLodBias = 16,
1003 .maxSamplerAnisotropy = 16,
1004 .maxViewports = MAX_VIEWPORTS,
1005 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1006 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1007 .viewportSubPixelBits = 8,
1008 .minMemoryMapAlignment = 4096, /* A page */
1009 .minTexelBufferOffsetAlignment = 1,
1010 .minUniformBufferOffsetAlignment = 4,
1011 .minStorageBufferOffsetAlignment = 4,
1012 .minTexelOffset = -32,
1013 .maxTexelOffset = 31,
1014 .minTexelGatherOffset = -32,
1015 .maxTexelGatherOffset = 31,
1016 .minInterpolationOffset = -2,
1017 .maxInterpolationOffset = 2,
1018 .subPixelInterpolationOffsetBits = 8,
1019 .maxFramebufferWidth = (1 << 14),
1020 .maxFramebufferHeight = (1 << 14),
1021 .maxFramebufferLayers = (1 << 10),
1022 .framebufferColorSampleCounts = sample_counts,
1023 .framebufferDepthSampleCounts = sample_counts,
1024 .framebufferStencilSampleCounts = sample_counts,
1025 .framebufferNoAttachmentsSampleCounts = sample_counts,
1026 .maxColorAttachments = MAX_RTS,
1027 .sampledImageColorSampleCounts = sample_counts,
1028 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1029 .sampledImageDepthSampleCounts = sample_counts,
1030 .sampledImageStencilSampleCounts = sample_counts,
1031 .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
1032 .maxSampleMaskWords = 1,
1033 .timestampComputeAndGraphics = true,
1034 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1035 .maxClipDistances = 8,
1036 .maxCullDistances = 8,
1037 .maxCombinedClipAndCullDistances = 8,
1038 .discreteQueuePriorities = 2,
1039 .pointSizeRange = { 0.0, 8192.0 },
1040 .lineWidthRange = { 0.0, 7.9921875 },
1041 .pointSizeGranularity = (1.0 / 8.0),
1042 .lineWidthGranularity = (1.0 / 128.0),
1043 .strictLines = false, /* FINISHME */
1044 .standardSampleLocations = true,
1045 .optimalBufferCopyOffsetAlignment = 128,
1046 .optimalBufferCopyRowPitchAlignment = 128,
1047 .nonCoherentAtomSize = 64,
1048 };
1049
1050 *pProperties = (VkPhysicalDeviceProperties) {
1051 .apiVersion = radv_physical_device_api_version(pdevice),
1052 .driverVersion = vk_get_driver_version(),
1053 .vendorID = ATI_VENDOR_ID,
1054 .deviceID = pdevice->rad_info.pci_id,
1055 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1056 .limits = limits,
1057 .sparseProperties = {0},
1058 };
1059
1060 strcpy(pProperties->deviceName, pdevice->name);
1061 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1062 }
1063
1064 void radv_GetPhysicalDeviceProperties2(
1065 VkPhysicalDevice physicalDevice,
1066 VkPhysicalDeviceProperties2 *pProperties)
1067 {
1068 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1069 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1070
1071 vk_foreach_struct(ext, pProperties->pNext) {
1072 switch (ext->sType) {
1073 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1074 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1075 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1076 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1077 break;
1078 }
1079 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1080 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1081 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1082 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1083 properties->deviceLUIDValid = false;
1084 break;
1085 }
1086 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1087 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1088 properties->maxMultiviewViewCount = MAX_VIEWS;
1089 properties->maxMultiviewInstanceIndex = INT_MAX;
1090 break;
1091 }
1092 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1093 VkPhysicalDevicePointClippingProperties *properties =
1094 (VkPhysicalDevicePointClippingProperties*)ext;
1095 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1096 break;
1097 }
1098 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1099 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1100 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1101 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1102 break;
1103 }
1104 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1105 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1106 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1107 properties->minImportedHostPointerAlignment = 4096;
1108 break;
1109 }
1110 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1111 VkPhysicalDeviceSubgroupProperties *properties =
1112 (VkPhysicalDeviceSubgroupProperties*)ext;
1113 properties->subgroupSize = 64;
1114 properties->supportedStages = VK_SHADER_STAGE_ALL;
1115 properties->supportedOperations =
1116 VK_SUBGROUP_FEATURE_BASIC_BIT |
1117 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1118 VK_SUBGROUP_FEATURE_QUAD_BIT |
1119 VK_SUBGROUP_FEATURE_VOTE_BIT;
1120 if (pdevice->rad_info.chip_class >= VI) {
1121 properties->supportedOperations |=
1122 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1123 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1124 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1125 }
1126 properties->quadOperationsInAllStages = true;
1127 break;
1128 }
1129 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1130 VkPhysicalDeviceMaintenance3Properties *properties =
1131 (VkPhysicalDeviceMaintenance3Properties*)ext;
1132 /* Make sure everything is addressable by a signed 32-bit int, and
1133 * our largest descriptors are 96 bytes. */
1134 properties->maxPerSetDescriptors = (1ull << 31) / 96;
1135 /* Our buffer size fields allow only this much */
1136 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1137 break;
1138 }
1139 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1140 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1141 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1142 /* GFX6-8 only support single channel min/max filter. */
1143 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1144 properties->filterMinmaxSingleComponentFormats = true;
1145 break;
1146 }
1147 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1148 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1149 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1150
1151 /* Shader engines. */
1152 properties->shaderEngineCount =
1153 pdevice->rad_info.max_se;
1154 properties->shaderArraysPerEngineCount =
1155 pdevice->rad_info.max_sh_per_se;
1156 properties->computeUnitsPerShaderArray =
1157 pdevice->rad_info.num_good_cu_per_sh;
1158 properties->simdPerComputeUnit = 4;
1159 properties->wavefrontsPerSimd =
1160 pdevice->rad_info.family == CHIP_TONGA ||
1161 pdevice->rad_info.family == CHIP_ICELAND ||
1162 pdevice->rad_info.family == CHIP_POLARIS10 ||
1163 pdevice->rad_info.family == CHIP_POLARIS11 ||
1164 pdevice->rad_info.family == CHIP_POLARIS12 ||
1165 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1166 properties->wavefrontSize = 64;
1167
1168 /* SGPR. */
1169 properties->sgprsPerSimd =
1170 ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
1171 properties->minSgprAllocation =
1172 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1173 properties->maxSgprAllocation =
1174 pdevice->rad_info.family == CHIP_TONGA ||
1175 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1176 properties->sgprAllocationGranularity =
1177 pdevice->rad_info.chip_class >= VI ? 16 : 8;
1178
1179 /* VGPR. */
1180 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1181 properties->minVgprAllocation = 4;
1182 properties->maxVgprAllocation = 256;
1183 properties->vgprAllocationGranularity = 4;
1184 break;
1185 }
1186 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1187 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1188 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1189 properties->maxVertexAttribDivisor = UINT32_MAX;
1190 break;
1191 }
1192 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1193 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1194 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1195 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1196 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1197 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1198 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1199 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1200 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1201 properties->robustBufferAccessUpdateAfterBind = false;
1202 properties->quadDivergentImplicitLod = false;
1203
1204 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1205 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1206 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1207 32 /* sampler, largest when combined with image */ +
1208 64 /* sampled image */ +
1209 64 /* storage image */);
1210 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1211 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1212 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1213 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1214 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1215 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1216 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1217 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1218 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1219 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1220 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1221 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1222 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1223 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1224 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1225 break;
1226 }
1227 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1228 VkPhysicalDeviceProtectedMemoryProperties *properties =
1229 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1230 properties->protectedNoFault = false;
1231 break;
1232 }
1233 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1234 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1235 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1236 properties->primitiveOverestimationSize = 0;
1237 properties->maxExtraPrimitiveOverestimationSize = 0;
1238 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1239 properties->primitiveUnderestimation = VK_FALSE;
1240 properties->conservativePointAndLineRasterization = VK_FALSE;
1241 properties->degenerateTrianglesRasterized = VK_FALSE;
1242 properties->degenerateLinesRasterized = VK_FALSE;
1243 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1244 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1245 break;
1246 }
1247 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1248 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1249 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1250 properties->pciDomain = pdevice->bus_info.domain;
1251 properties->pciBus = pdevice->bus_info.bus;
1252 properties->pciDevice = pdevice->bus_info.dev;
1253 properties->pciFunction = pdevice->bus_info.func;
1254 break;
1255 }
1256 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
1257 VkPhysicalDeviceDriverPropertiesKHR *driver_props =
1258 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
1259
1260 driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
1261 memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
1262 strcpy(driver_props->driverName, "radv");
1263
1264 memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
1265 snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
1266 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
1267 " (LLVM " MESA_LLVM_VERSION_STRING ")");
1268
1269 driver_props->conformanceVersion = (VkConformanceVersionKHR) {
1270 .major = 1,
1271 .minor = 1,
1272 .subminor = 2,
1273 .patch = 0,
1274 };
1275 break;
1276 }
1277 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1278 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1279 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1280 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1281 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1282 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1283 properties->maxTransformFeedbackStreamDataSize = 512;
1284 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1285 properties->maxTransformFeedbackBufferDataStride = 512;
1286 properties->transformFeedbackQueries = true;
1287 properties->transformFeedbackStreamsLinesTriangles = false;
1288 properties->transformFeedbackRasterizationStreamSelect = false;
1289 properties->transformFeedbackDraw = true;
1290 break;
1291 }
1292 default:
1293 break;
1294 }
1295 }
1296 }
1297
1298 static void radv_get_physical_device_queue_family_properties(
1299 struct radv_physical_device* pdevice,
1300 uint32_t* pCount,
1301 VkQueueFamilyProperties** pQueueFamilyProperties)
1302 {
1303 int num_queue_families = 1;
1304 int idx;
1305 if (pdevice->rad_info.num_compute_rings > 0 &&
1306 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1307 num_queue_families++;
1308
1309 if (pQueueFamilyProperties == NULL) {
1310 *pCount = num_queue_families;
1311 return;
1312 }
1313
1314 if (!*pCount)
1315 return;
1316
1317 idx = 0;
1318 if (*pCount >= 1) {
1319 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1320 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1321 VK_QUEUE_COMPUTE_BIT |
1322 VK_QUEUE_TRANSFER_BIT |
1323 VK_QUEUE_SPARSE_BINDING_BIT,
1324 .queueCount = 1,
1325 .timestampValidBits = 64,
1326 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1327 };
1328 idx++;
1329 }
1330
1331 if (pdevice->rad_info.num_compute_rings > 0 &&
1332 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1333 if (*pCount > idx) {
1334 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1335 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1336 VK_QUEUE_TRANSFER_BIT |
1337 VK_QUEUE_SPARSE_BINDING_BIT,
1338 .queueCount = pdevice->rad_info.num_compute_rings,
1339 .timestampValidBits = 64,
1340 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1341 };
1342 idx++;
1343 }
1344 }
1345 *pCount = idx;
1346 }
1347
1348 void radv_GetPhysicalDeviceQueueFamilyProperties(
1349 VkPhysicalDevice physicalDevice,
1350 uint32_t* pCount,
1351 VkQueueFamilyProperties* pQueueFamilyProperties)
1352 {
1353 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1354 if (!pQueueFamilyProperties) {
1355 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1356 return;
1357 }
1358 VkQueueFamilyProperties *properties[] = {
1359 pQueueFamilyProperties + 0,
1360 pQueueFamilyProperties + 1,
1361 pQueueFamilyProperties + 2,
1362 };
1363 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1364 assert(*pCount <= 3);
1365 }
1366
1367 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1368 VkPhysicalDevice physicalDevice,
1369 uint32_t* pCount,
1370 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1371 {
1372 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1373 if (!pQueueFamilyProperties) {
1374 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1375 return;
1376 }
1377 VkQueueFamilyProperties *properties[] = {
1378 &pQueueFamilyProperties[0].queueFamilyProperties,
1379 &pQueueFamilyProperties[1].queueFamilyProperties,
1380 &pQueueFamilyProperties[2].queueFamilyProperties,
1381 };
1382 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1383 assert(*pCount <= 3);
1384 }
1385
1386 void radv_GetPhysicalDeviceMemoryProperties(
1387 VkPhysicalDevice physicalDevice,
1388 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1389 {
1390 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1391
1392 *pMemoryProperties = physical_device->memory_properties;
1393 }
1394
1395 static void
1396 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
1397 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
1398 {
1399 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
1400 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
1401 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
1402 uint64_t vram_size = radv_get_vram_size(device);
1403 uint64_t gtt_size = device->rad_info.gart_size;
1404 uint64_t heap_budget, heap_usage;
1405
1406 /* For all memory heaps, the computation of budget is as follow:
1407 * heap_budget = heap_size - global_heap_usage + app_heap_usage
1408 *
1409 * The Vulkan spec 1.1.97 says that the budget should include any
1410 * currently allocated device memory.
1411 *
1412 * Note that the application heap usages are not really accurate (eg.
1413 * in presence of shared buffers).
1414 */
1415 if (vram_size) {
1416 heap_usage = device->ws->query_value(device->ws,
1417 RADEON_ALLOCATED_VRAM);
1418
1419 heap_budget = vram_size -
1420 device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
1421 heap_usage;
1422
1423 memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM] = heap_budget;
1424 memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM] = heap_usage;
1425 }
1426
1427 if (visible_vram_size) {
1428 heap_usage = device->ws->query_value(device->ws,
1429 RADEON_ALLOCATED_VRAM_VIS);
1430
1431 heap_budget = visible_vram_size -
1432 device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
1433 heap_usage;
1434
1435 memoryBudget->heapBudget[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_budget;
1436 memoryBudget->heapUsage[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = heap_usage;
1437 }
1438
1439 if (gtt_size) {
1440 heap_usage = device->ws->query_value(device->ws,
1441 RADEON_ALLOCATED_GTT);
1442
1443 heap_budget = gtt_size -
1444 device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
1445 heap_usage;
1446
1447 memoryBudget->heapBudget[RADV_MEM_HEAP_GTT] = heap_budget;
1448 memoryBudget->heapUsage[RADV_MEM_HEAP_GTT] = heap_usage;
1449 }
1450
1451 /* The heapBudget and heapUsage values must be zero for array elements
1452 * greater than or equal to
1453 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
1454 */
1455 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
1456 memoryBudget->heapBudget[i] = 0;
1457 memoryBudget->heapUsage[i] = 0;
1458 }
1459 }
1460
1461 void radv_GetPhysicalDeviceMemoryProperties2(
1462 VkPhysicalDevice physicalDevice,
1463 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1464 {
1465 radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1466 &pMemoryProperties->memoryProperties);
1467
1468 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
1469 vk_find_struct(pMemoryProperties->pNext,
1470 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
1471 if (memory_budget)
1472 radv_get_memory_budget_properties(physicalDevice, memory_budget);
1473 }
1474
1475 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1476 VkDevice _device,
1477 VkExternalMemoryHandleTypeFlagBits handleType,
1478 const void *pHostPointer,
1479 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1480 {
1481 RADV_FROM_HANDLE(radv_device, device, _device);
1482
1483 switch (handleType)
1484 {
1485 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1486 const struct radv_physical_device *physical_device = device->physical_device;
1487 uint32_t memoryTypeBits = 0;
1488 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1489 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1490 memoryTypeBits = (1 << i);
1491 break;
1492 }
1493 }
1494 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1495 return VK_SUCCESS;
1496 }
1497 default:
1498 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1499 }
1500 }
1501
1502 static enum radeon_ctx_priority
1503 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1504 {
1505 /* Default to MEDIUM when a specific global priority isn't requested */
1506 if (!pObj)
1507 return RADEON_CTX_PRIORITY_MEDIUM;
1508
1509 switch(pObj->globalPriority) {
1510 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1511 return RADEON_CTX_PRIORITY_REALTIME;
1512 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1513 return RADEON_CTX_PRIORITY_HIGH;
1514 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1515 return RADEON_CTX_PRIORITY_MEDIUM;
1516 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1517 return RADEON_CTX_PRIORITY_LOW;
1518 default:
1519 unreachable("Illegal global priority value");
1520 return RADEON_CTX_PRIORITY_INVALID;
1521 }
1522 }
1523
1524 static int
1525 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1526 uint32_t queue_family_index, int idx,
1527 VkDeviceQueueCreateFlags flags,
1528 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1529 {
1530 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1531 queue->device = device;
1532 queue->queue_family_index = queue_family_index;
1533 queue->queue_idx = idx;
1534 queue->priority = radv_get_queue_global_priority(global_priority);
1535 queue->flags = flags;
1536
1537 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1538 if (!queue->hw_ctx)
1539 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1540
1541 return VK_SUCCESS;
1542 }
1543
1544 static void
1545 radv_queue_finish(struct radv_queue *queue)
1546 {
1547 if (queue->hw_ctx)
1548 queue->device->ws->ctx_destroy(queue->hw_ctx);
1549
1550 if (queue->initial_full_flush_preamble_cs)
1551 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1552 if (queue->initial_preamble_cs)
1553 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1554 if (queue->continue_preamble_cs)
1555 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1556 if (queue->descriptor_bo)
1557 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1558 if (queue->scratch_bo)
1559 queue->device->ws->buffer_destroy(queue->scratch_bo);
1560 if (queue->esgs_ring_bo)
1561 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1562 if (queue->gsvs_ring_bo)
1563 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1564 if (queue->tess_rings_bo)
1565 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1566 if (queue->compute_scratch_bo)
1567 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1568 }
1569
1570 static void
1571 radv_bo_list_init(struct radv_bo_list *bo_list)
1572 {
1573 pthread_mutex_init(&bo_list->mutex, NULL);
1574 bo_list->list.count = bo_list->capacity = 0;
1575 bo_list->list.bos = NULL;
1576 }
1577
1578 static void
1579 radv_bo_list_finish(struct radv_bo_list *bo_list)
1580 {
1581 free(bo_list->list.bos);
1582 pthread_mutex_destroy(&bo_list->mutex);
1583 }
1584
1585 static VkResult radv_bo_list_add(struct radv_device *device,
1586 struct radeon_winsys_bo *bo)
1587 {
1588 struct radv_bo_list *bo_list = &device->bo_list;
1589
1590 if (bo->is_local)
1591 return VK_SUCCESS;
1592
1593 if (unlikely(!device->use_global_bo_list))
1594 return VK_SUCCESS;
1595
1596 pthread_mutex_lock(&bo_list->mutex);
1597 if (bo_list->list.count == bo_list->capacity) {
1598 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1599 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1600
1601 if (!data) {
1602 pthread_mutex_unlock(&bo_list->mutex);
1603 return VK_ERROR_OUT_OF_HOST_MEMORY;
1604 }
1605
1606 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1607 bo_list->capacity = capacity;
1608 }
1609
1610 bo_list->list.bos[bo_list->list.count++] = bo;
1611 pthread_mutex_unlock(&bo_list->mutex);
1612 return VK_SUCCESS;
1613 }
1614
1615 static void radv_bo_list_remove(struct radv_device *device,
1616 struct radeon_winsys_bo *bo)
1617 {
1618 struct radv_bo_list *bo_list = &device->bo_list;
1619
1620 if (bo->is_local)
1621 return;
1622
1623 if (unlikely(!device->use_global_bo_list))
1624 return;
1625
1626 pthread_mutex_lock(&bo_list->mutex);
1627 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1628 if (bo_list->list.bos[i] == bo) {
1629 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1630 --bo_list->list.count;
1631 break;
1632 }
1633 }
1634 pthread_mutex_unlock(&bo_list->mutex);
1635 }
1636
1637 static void
1638 radv_device_init_gs_info(struct radv_device *device)
1639 {
1640 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1641 device->physical_device->rad_info.family);
1642 }
1643
1644 static int radv_get_device_extension_index(const char *name)
1645 {
1646 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1647 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1648 return i;
1649 }
1650 return -1;
1651 }
1652
1653 static int
1654 radv_get_int_debug_option(const char *name, int default_value)
1655 {
1656 const char *str;
1657 int result;
1658
1659 str = getenv(name);
1660 if (!str) {
1661 result = default_value;
1662 } else {
1663 char *endptr;
1664
1665 result = strtol(str, &endptr, 0);
1666 if (str == endptr) {
1667 /* No digits founs. */
1668 result = default_value;
1669 }
1670 }
1671
1672 return result;
1673 }
1674
1675 VkResult radv_CreateDevice(
1676 VkPhysicalDevice physicalDevice,
1677 const VkDeviceCreateInfo* pCreateInfo,
1678 const VkAllocationCallbacks* pAllocator,
1679 VkDevice* pDevice)
1680 {
1681 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1682 VkResult result;
1683 struct radv_device *device;
1684
1685 bool keep_shader_info = false;
1686
1687 /* Check enabled features */
1688 if (pCreateInfo->pEnabledFeatures) {
1689 VkPhysicalDeviceFeatures supported_features;
1690 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1691 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1692 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1693 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1694 for (uint32_t i = 0; i < num_features; i++) {
1695 if (enabled_feature[i] && !supported_feature[i])
1696 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1697 }
1698 }
1699
1700 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1701 sizeof(*device), 8,
1702 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1703 if (!device)
1704 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1705
1706 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1707 device->instance = physical_device->instance;
1708 device->physical_device = physical_device;
1709
1710 device->ws = physical_device->ws;
1711 if (pAllocator)
1712 device->alloc = *pAllocator;
1713 else
1714 device->alloc = physical_device->instance->alloc;
1715
1716 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1717 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1718 int index = radv_get_device_extension_index(ext_name);
1719 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1720 vk_free(&device->alloc, device);
1721 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1722 }
1723
1724 device->enabled_extensions.extensions[index] = true;
1725 }
1726
1727 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1728
1729 /* With update after bind we can't attach bo's to the command buffer
1730 * from the descriptor set anymore, so we have to use a global BO list.
1731 */
1732 device->use_global_bo_list =
1733 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
1734 device->enabled_extensions.EXT_descriptor_indexing ||
1735 device->enabled_extensions.EXT_buffer_device_address;
1736
1737 mtx_init(&device->shader_slab_mutex, mtx_plain);
1738 list_inithead(&device->shader_slabs);
1739
1740 radv_bo_list_init(&device->bo_list);
1741
1742 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1743 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1744 uint32_t qfi = queue_create->queueFamilyIndex;
1745 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1746 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1747
1748 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1749
1750 device->queues[qfi] = vk_alloc(&device->alloc,
1751 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1752 if (!device->queues[qfi]) {
1753 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1754 goto fail;
1755 }
1756
1757 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1758
1759 device->queue_count[qfi] = queue_create->queueCount;
1760
1761 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1762 result = radv_queue_init(device, &device->queues[qfi][q],
1763 qfi, q, queue_create->flags,
1764 global_priority);
1765 if (result != VK_SUCCESS)
1766 goto fail;
1767 }
1768 }
1769
1770 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1771 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
1772
1773 /* Disabled and not implemented for now. */
1774 device->dfsm_allowed = device->pbb_allowed &&
1775 (device->physical_device->rad_info.family == CHIP_RAVEN ||
1776 device->physical_device->rad_info.family == CHIP_RAVEN2);
1777
1778 #ifdef ANDROID
1779 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1780 #endif
1781
1782 /* The maximum number of scratch waves. Scratch space isn't divided
1783 * evenly between CUs. The number is only a function of the number of CUs.
1784 * We can decrease the constant to decrease the scratch buffer size.
1785 *
1786 * sctx->scratch_waves must be >= the maximum possible size of
1787 * 1 threadgroup, so that the hw doesn't hang from being unable
1788 * to start any.
1789 *
1790 * The recommended value is 4 per CU at most. Higher numbers don't
1791 * bring much benefit, but they still occupy chip resources (think
1792 * async compute). I've seen ~2% performance difference between 4 and 32.
1793 */
1794 uint32_t max_threads_per_block = 2048;
1795 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1796 max_threads_per_block / 64);
1797
1798 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
1799
1800 if (device->physical_device->rad_info.chip_class >= CIK) {
1801 /* If the KMD allows it (there is a KMD hw register for it),
1802 * allow launching waves out-of-order.
1803 */
1804 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
1805 }
1806
1807 radv_device_init_gs_info(device);
1808
1809 device->tess_offchip_block_dw_size =
1810 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1811 device->has_distributed_tess =
1812 device->physical_device->rad_info.chip_class >= VI &&
1813 device->physical_device->rad_info.max_se >= 2;
1814
1815 if (getenv("RADV_TRACE_FILE")) {
1816 const char *filename = getenv("RADV_TRACE_FILE");
1817
1818 keep_shader_info = true;
1819
1820 if (!radv_init_trace(device))
1821 goto fail;
1822
1823 fprintf(stderr, "*****************************************************************************\n");
1824 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
1825 fprintf(stderr, "*****************************************************************************\n");
1826
1827 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
1828 radv_dump_enabled_options(device, stderr);
1829 }
1830
1831 device->keep_shader_info = keep_shader_info;
1832
1833 result = radv_device_init_meta(device);
1834 if (result != VK_SUCCESS)
1835 goto fail;
1836
1837 radv_device_init_msaa(device);
1838
1839 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1840 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1841 switch (family) {
1842 case RADV_QUEUE_GENERAL:
1843 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1844 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1845 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1846 break;
1847 case RADV_QUEUE_COMPUTE:
1848 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1849 radeon_emit(device->empty_cs[family], 0);
1850 break;
1851 }
1852 device->ws->cs_finalize(device->empty_cs[family]);
1853 }
1854
1855 if (device->physical_device->rad_info.chip_class >= CIK)
1856 cik_create_gfx_config(device);
1857
1858 VkPipelineCacheCreateInfo ci;
1859 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1860 ci.pNext = NULL;
1861 ci.flags = 0;
1862 ci.pInitialData = NULL;
1863 ci.initialDataSize = 0;
1864 VkPipelineCache pc;
1865 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1866 &ci, NULL, &pc);
1867 if (result != VK_SUCCESS)
1868 goto fail_meta;
1869
1870 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1871
1872 device->force_aniso =
1873 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
1874 if (device->force_aniso >= 0) {
1875 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
1876 1 << util_logbase2(device->force_aniso));
1877 }
1878
1879 *pDevice = radv_device_to_handle(device);
1880 return VK_SUCCESS;
1881
1882 fail_meta:
1883 radv_device_finish_meta(device);
1884 fail:
1885 radv_bo_list_finish(&device->bo_list);
1886
1887 if (device->trace_bo)
1888 device->ws->buffer_destroy(device->trace_bo);
1889
1890 if (device->gfx_init)
1891 device->ws->buffer_destroy(device->gfx_init);
1892
1893 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1894 for (unsigned q = 0; q < device->queue_count[i]; q++)
1895 radv_queue_finish(&device->queues[i][q]);
1896 if (device->queue_count[i])
1897 vk_free(&device->alloc, device->queues[i]);
1898 }
1899
1900 vk_free(&device->alloc, device);
1901 return result;
1902 }
1903
1904 void radv_DestroyDevice(
1905 VkDevice _device,
1906 const VkAllocationCallbacks* pAllocator)
1907 {
1908 RADV_FROM_HANDLE(radv_device, device, _device);
1909
1910 if (!device)
1911 return;
1912
1913 if (device->trace_bo)
1914 device->ws->buffer_destroy(device->trace_bo);
1915
1916 if (device->gfx_init)
1917 device->ws->buffer_destroy(device->gfx_init);
1918
1919 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1920 for (unsigned q = 0; q < device->queue_count[i]; q++)
1921 radv_queue_finish(&device->queues[i][q]);
1922 if (device->queue_count[i])
1923 vk_free(&device->alloc, device->queues[i]);
1924 if (device->empty_cs[i])
1925 device->ws->cs_destroy(device->empty_cs[i]);
1926 }
1927 radv_device_finish_meta(device);
1928
1929 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1930 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1931
1932 radv_destroy_shader_slabs(device);
1933
1934 radv_bo_list_finish(&device->bo_list);
1935 vk_free(&device->alloc, device);
1936 }
1937
1938 VkResult radv_EnumerateInstanceLayerProperties(
1939 uint32_t* pPropertyCount,
1940 VkLayerProperties* pProperties)
1941 {
1942 if (pProperties == NULL) {
1943 *pPropertyCount = 0;
1944 return VK_SUCCESS;
1945 }
1946
1947 /* None supported at this time */
1948 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1949 }
1950
1951 VkResult radv_EnumerateDeviceLayerProperties(
1952 VkPhysicalDevice physicalDevice,
1953 uint32_t* pPropertyCount,
1954 VkLayerProperties* pProperties)
1955 {
1956 if (pProperties == NULL) {
1957 *pPropertyCount = 0;
1958 return VK_SUCCESS;
1959 }
1960
1961 /* None supported at this time */
1962 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1963 }
1964
1965 void radv_GetDeviceQueue2(
1966 VkDevice _device,
1967 const VkDeviceQueueInfo2* pQueueInfo,
1968 VkQueue* pQueue)
1969 {
1970 RADV_FROM_HANDLE(radv_device, device, _device);
1971 struct radv_queue *queue;
1972
1973 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
1974 if (pQueueInfo->flags != queue->flags) {
1975 /* From the Vulkan 1.1.70 spec:
1976 *
1977 * "The queue returned by vkGetDeviceQueue2 must have the same
1978 * flags value from this structure as that used at device
1979 * creation time in a VkDeviceQueueCreateInfo instance. If no
1980 * matching flags were specified at device creation time then
1981 * pQueue will return VK_NULL_HANDLE."
1982 */
1983 *pQueue = VK_NULL_HANDLE;
1984 return;
1985 }
1986
1987 *pQueue = radv_queue_to_handle(queue);
1988 }
1989
1990 void radv_GetDeviceQueue(
1991 VkDevice _device,
1992 uint32_t queueFamilyIndex,
1993 uint32_t queueIndex,
1994 VkQueue* pQueue)
1995 {
1996 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
1997 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
1998 .queueFamilyIndex = queueFamilyIndex,
1999 .queueIndex = queueIndex
2000 };
2001
2002 radv_GetDeviceQueue2(_device, &info, pQueue);
2003 }
2004
2005 static void
2006 fill_geom_tess_rings(struct radv_queue *queue,
2007 uint32_t *map,
2008 bool add_sample_positions,
2009 uint32_t esgs_ring_size,
2010 struct radeon_winsys_bo *esgs_ring_bo,
2011 uint32_t gsvs_ring_size,
2012 struct radeon_winsys_bo *gsvs_ring_bo,
2013 uint32_t tess_factor_ring_size,
2014 uint32_t tess_offchip_ring_offset,
2015 uint32_t tess_offchip_ring_size,
2016 struct radeon_winsys_bo *tess_rings_bo)
2017 {
2018 uint32_t *desc = &map[4];
2019
2020 if (esgs_ring_bo) {
2021 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
2022
2023 /* stride 0, num records - size, add tid, swizzle, elsize4,
2024 index stride 64 */
2025 desc[0] = esgs_va;
2026 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
2027 S_008F04_STRIDE(0) |
2028 S_008F04_SWIZZLE_ENABLE(true);
2029 desc[2] = esgs_ring_size;
2030 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2031 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2032 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2033 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2034 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2035 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2036 S_008F0C_ELEMENT_SIZE(1) |
2037 S_008F0C_INDEX_STRIDE(3) |
2038 S_008F0C_ADD_TID_ENABLE(true);
2039
2040 /* GS entry for ES->GS ring */
2041 /* stride 0, num records - size, elsize0,
2042 index stride 0 */
2043 desc[4] = esgs_va;
2044 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
2045 S_008F04_STRIDE(0) |
2046 S_008F04_SWIZZLE_ENABLE(false);
2047 desc[6] = esgs_ring_size;
2048 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2049 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2050 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2051 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2052 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2053 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2054 S_008F0C_ELEMENT_SIZE(0) |
2055 S_008F0C_INDEX_STRIDE(0) |
2056 S_008F0C_ADD_TID_ENABLE(false);
2057 }
2058
2059 desc += 8;
2060
2061 if (gsvs_ring_bo) {
2062 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
2063
2064 /* VS entry for GS->VS ring */
2065 /* stride 0, num records - size, elsize0,
2066 index stride 0 */
2067 desc[0] = gsvs_va;
2068 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
2069 S_008F04_STRIDE(0) |
2070 S_008F04_SWIZZLE_ENABLE(false);
2071 desc[2] = gsvs_ring_size;
2072 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2073 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2074 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2075 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2076 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2077 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2078 S_008F0C_ELEMENT_SIZE(0) |
2079 S_008F0C_INDEX_STRIDE(0) |
2080 S_008F0C_ADD_TID_ENABLE(false);
2081
2082 /* stride gsvs_itemsize, num records 64
2083 elsize 4, index stride 16 */
2084 /* shader will patch stride and desc[2] */
2085 desc[4] = gsvs_va;
2086 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
2087 S_008F04_STRIDE(0) |
2088 S_008F04_SWIZZLE_ENABLE(true);
2089 desc[6] = 0;
2090 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2091 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2092 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2093 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2094 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2095 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2096 S_008F0C_ELEMENT_SIZE(1) |
2097 S_008F0C_INDEX_STRIDE(1) |
2098 S_008F0C_ADD_TID_ENABLE(true);
2099 }
2100
2101 desc += 8;
2102
2103 if (tess_rings_bo) {
2104 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
2105 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
2106
2107 desc[0] = tess_va;
2108 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32) |
2109 S_008F04_STRIDE(0) |
2110 S_008F04_SWIZZLE_ENABLE(false);
2111 desc[2] = tess_factor_ring_size;
2112 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2113 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2114 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2115 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2116 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2117 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2118 S_008F0C_ELEMENT_SIZE(0) |
2119 S_008F0C_INDEX_STRIDE(0) |
2120 S_008F0C_ADD_TID_ENABLE(false);
2121
2122 desc[4] = tess_offchip_va;
2123 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
2124 S_008F04_STRIDE(0) |
2125 S_008F04_SWIZZLE_ENABLE(false);
2126 desc[6] = tess_offchip_ring_size;
2127 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2128 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2129 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2130 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2131 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2132 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2133 S_008F0C_ELEMENT_SIZE(0) |
2134 S_008F0C_INDEX_STRIDE(0) |
2135 S_008F0C_ADD_TID_ENABLE(false);
2136 }
2137
2138 desc += 8;
2139
2140 if (add_sample_positions) {
2141 /* add sample positions after all rings */
2142 memcpy(desc, queue->device->sample_locations_1x, 8);
2143 desc += 2;
2144 memcpy(desc, queue->device->sample_locations_2x, 16);
2145 desc += 4;
2146 memcpy(desc, queue->device->sample_locations_4x, 32);
2147 desc += 8;
2148 memcpy(desc, queue->device->sample_locations_8x, 64);
2149 }
2150 }
2151
2152 static unsigned
2153 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
2154 {
2155 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
2156 device->physical_device->rad_info.family != CHIP_CARRIZO &&
2157 device->physical_device->rad_info.family != CHIP_STONEY;
2158 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
2159 unsigned max_offchip_buffers;
2160 unsigned offchip_granularity;
2161 unsigned hs_offchip_param;
2162
2163 /*
2164 * Per RadeonSI:
2165 * This must be one less than the maximum number due to a hw limitation.
2166 * Various hardware bugs in SI, CIK, and GFX9 need this.
2167 *
2168 * Per AMDVLK:
2169 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
2170 * Gfx7 should limit max_offchip_buffers to 508
2171 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
2172 *
2173 * Follow AMDVLK here.
2174 */
2175 if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2176 device->physical_device->rad_info.chip_class == CIK ||
2177 device->physical_device->rad_info.chip_class == SI)
2178 --max_offchip_buffers_per_se;
2179
2180 max_offchip_buffers = max_offchip_buffers_per_se *
2181 device->physical_device->rad_info.max_se;
2182
2183 /* Hawaii has a bug with offchip buffers > 256 that can be worked
2184 * around by setting 4K granularity.
2185 */
2186 if (device->tess_offchip_block_dw_size == 4096) {
2187 assert(device->physical_device->rad_info.family == CHIP_HAWAII);
2188 offchip_granularity = V_03093C_X_4K_DWORDS;
2189 } else {
2190 assert(device->tess_offchip_block_dw_size == 8192);
2191 offchip_granularity = V_03093C_X_8K_DWORDS;
2192 }
2193
2194 switch (device->physical_device->rad_info.chip_class) {
2195 case SI:
2196 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2197 break;
2198 case CIK:
2199 case VI:
2200 case GFX9:
2201 default:
2202 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2203 break;
2204 }
2205
2206 *max_offchip_buffers_p = max_offchip_buffers;
2207 if (device->physical_device->rad_info.chip_class >= CIK) {
2208 if (device->physical_device->rad_info.chip_class >= VI)
2209 --max_offchip_buffers;
2210 hs_offchip_param =
2211 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2212 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2213 } else {
2214 hs_offchip_param =
2215 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2216 }
2217 return hs_offchip_param;
2218 }
2219
2220 static void
2221 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2222 struct radeon_winsys_bo *esgs_ring_bo,
2223 uint32_t esgs_ring_size,
2224 struct radeon_winsys_bo *gsvs_ring_bo,
2225 uint32_t gsvs_ring_size)
2226 {
2227 if (!esgs_ring_bo && !gsvs_ring_bo)
2228 return;
2229
2230 if (esgs_ring_bo)
2231 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2232
2233 if (gsvs_ring_bo)
2234 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2235
2236 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2237 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2238 radeon_emit(cs, esgs_ring_size >> 8);
2239 radeon_emit(cs, gsvs_ring_size >> 8);
2240 } else {
2241 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2242 radeon_emit(cs, esgs_ring_size >> 8);
2243 radeon_emit(cs, gsvs_ring_size >> 8);
2244 }
2245 }
2246
2247 static void
2248 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2249 unsigned hs_offchip_param, unsigned tf_ring_size,
2250 struct radeon_winsys_bo *tess_rings_bo)
2251 {
2252 uint64_t tf_va;
2253
2254 if (!tess_rings_bo)
2255 return;
2256
2257 tf_va = radv_buffer_get_va(tess_rings_bo);
2258
2259 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2260
2261 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
2262 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2263 S_030938_SIZE(tf_ring_size / 4));
2264 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2265 tf_va >> 8);
2266 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2267 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2268 S_030944_BASE_HI(tf_va >> 40));
2269 }
2270 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2271 hs_offchip_param);
2272 } else {
2273 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2274 S_008988_SIZE(tf_ring_size / 4));
2275 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2276 tf_va >> 8);
2277 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2278 hs_offchip_param);
2279 }
2280 }
2281
2282 static void
2283 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2284 struct radeon_winsys_bo *compute_scratch_bo)
2285 {
2286 uint64_t scratch_va;
2287
2288 if (!compute_scratch_bo)
2289 return;
2290
2291 scratch_va = radv_buffer_get_va(compute_scratch_bo);
2292
2293 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2294
2295 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2296 radeon_emit(cs, scratch_va);
2297 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2298 S_008F04_SWIZZLE_ENABLE(1));
2299 }
2300
2301 static void
2302 radv_emit_global_shader_pointers(struct radv_queue *queue,
2303 struct radeon_cmdbuf *cs,
2304 struct radeon_winsys_bo *descriptor_bo)
2305 {
2306 uint64_t va;
2307
2308 if (!descriptor_bo)
2309 return;
2310
2311 va = radv_buffer_get_va(descriptor_bo);
2312
2313 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2314
2315 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
2316 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2317 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2318 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2319 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2320
2321 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2322 radv_emit_shader_pointer(queue->device, cs, regs[i],
2323 va, true);
2324 }
2325 } else {
2326 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2327 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2328 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2329 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2330 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2331 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2332
2333 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2334 radv_emit_shader_pointer(queue->device, cs, regs[i],
2335 va, true);
2336 }
2337 }
2338 }
2339
2340 static void
2341 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2342 {
2343 struct radv_device *device = queue->device;
2344
2345 if (device->gfx_init) {
2346 uint64_t va = radv_buffer_get_va(device->gfx_init);
2347
2348 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2349 radeon_emit(cs, va);
2350 radeon_emit(cs, va >> 32);
2351 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2352
2353 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2354 } else {
2355 struct radv_physical_device *physical_device = device->physical_device;
2356 si_emit_graphics(physical_device, cs);
2357 }
2358 }
2359
2360 static void
2361 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2362 {
2363 struct radv_physical_device *physical_device = queue->device->physical_device;
2364 si_emit_compute(physical_device, cs);
2365 }
2366
2367 static VkResult
2368 radv_get_preamble_cs(struct radv_queue *queue,
2369 uint32_t scratch_size,
2370 uint32_t compute_scratch_size,
2371 uint32_t esgs_ring_size,
2372 uint32_t gsvs_ring_size,
2373 bool needs_tess_rings,
2374 bool needs_sample_positions,
2375 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2376 struct radeon_cmdbuf **initial_preamble_cs,
2377 struct radeon_cmdbuf **continue_preamble_cs)
2378 {
2379 struct radeon_winsys_bo *scratch_bo = NULL;
2380 struct radeon_winsys_bo *descriptor_bo = NULL;
2381 struct radeon_winsys_bo *compute_scratch_bo = NULL;
2382 struct radeon_winsys_bo *esgs_ring_bo = NULL;
2383 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2384 struct radeon_winsys_bo *tess_rings_bo = NULL;
2385 struct radeon_cmdbuf *dest_cs[3] = {0};
2386 bool add_tess_rings = false, add_sample_positions = false;
2387 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2388 unsigned max_offchip_buffers;
2389 unsigned hs_offchip_param = 0;
2390 unsigned tess_offchip_ring_offset;
2391 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2392 if (!queue->has_tess_rings) {
2393 if (needs_tess_rings)
2394 add_tess_rings = true;
2395 }
2396 if (!queue->has_sample_positions) {
2397 if (needs_sample_positions)
2398 add_sample_positions = true;
2399 }
2400 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2401 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2402 &max_offchip_buffers);
2403 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2404 tess_offchip_ring_size = max_offchip_buffers *
2405 queue->device->tess_offchip_block_dw_size * 4;
2406
2407 if (scratch_size <= queue->scratch_size &&
2408 compute_scratch_size <= queue->compute_scratch_size &&
2409 esgs_ring_size <= queue->esgs_ring_size &&
2410 gsvs_ring_size <= queue->gsvs_ring_size &&
2411 !add_tess_rings && !add_sample_positions &&
2412 queue->initial_preamble_cs) {
2413 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2414 *initial_preamble_cs = queue->initial_preamble_cs;
2415 *continue_preamble_cs = queue->continue_preamble_cs;
2416 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2417 *continue_preamble_cs = NULL;
2418 return VK_SUCCESS;
2419 }
2420
2421 if (scratch_size > queue->scratch_size) {
2422 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2423 scratch_size,
2424 4096,
2425 RADEON_DOMAIN_VRAM,
2426 ring_bo_flags,
2427 RADV_BO_PRIORITY_SCRATCH);
2428 if (!scratch_bo)
2429 goto fail;
2430 } else
2431 scratch_bo = queue->scratch_bo;
2432
2433 if (compute_scratch_size > queue->compute_scratch_size) {
2434 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2435 compute_scratch_size,
2436 4096,
2437 RADEON_DOMAIN_VRAM,
2438 ring_bo_flags,
2439 RADV_BO_PRIORITY_SCRATCH);
2440 if (!compute_scratch_bo)
2441 goto fail;
2442
2443 } else
2444 compute_scratch_bo = queue->compute_scratch_bo;
2445
2446 if (esgs_ring_size > queue->esgs_ring_size) {
2447 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2448 esgs_ring_size,
2449 4096,
2450 RADEON_DOMAIN_VRAM,
2451 ring_bo_flags,
2452 RADV_BO_PRIORITY_SCRATCH);
2453 if (!esgs_ring_bo)
2454 goto fail;
2455 } else {
2456 esgs_ring_bo = queue->esgs_ring_bo;
2457 esgs_ring_size = queue->esgs_ring_size;
2458 }
2459
2460 if (gsvs_ring_size > queue->gsvs_ring_size) {
2461 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2462 gsvs_ring_size,
2463 4096,
2464 RADEON_DOMAIN_VRAM,
2465 ring_bo_flags,
2466 RADV_BO_PRIORITY_SCRATCH);
2467 if (!gsvs_ring_bo)
2468 goto fail;
2469 } else {
2470 gsvs_ring_bo = queue->gsvs_ring_bo;
2471 gsvs_ring_size = queue->gsvs_ring_size;
2472 }
2473
2474 if (add_tess_rings) {
2475 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2476 tess_offchip_ring_offset + tess_offchip_ring_size,
2477 256,
2478 RADEON_DOMAIN_VRAM,
2479 ring_bo_flags,
2480 RADV_BO_PRIORITY_SCRATCH);
2481 if (!tess_rings_bo)
2482 goto fail;
2483 } else {
2484 tess_rings_bo = queue->tess_rings_bo;
2485 }
2486
2487 if (scratch_bo != queue->scratch_bo ||
2488 esgs_ring_bo != queue->esgs_ring_bo ||
2489 gsvs_ring_bo != queue->gsvs_ring_bo ||
2490 tess_rings_bo != queue->tess_rings_bo ||
2491 add_sample_positions) {
2492 uint32_t size = 0;
2493 if (gsvs_ring_bo || esgs_ring_bo ||
2494 tess_rings_bo || add_sample_positions) {
2495 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2496 if (add_sample_positions)
2497 size += 128; /* 64+32+16+8 = 120 bytes */
2498 }
2499 else if (scratch_bo)
2500 size = 8; /* 2 dword */
2501
2502 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2503 size,
2504 4096,
2505 RADEON_DOMAIN_VRAM,
2506 RADEON_FLAG_CPU_ACCESS |
2507 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2508 RADEON_FLAG_READ_ONLY,
2509 RADV_BO_PRIORITY_DESCRIPTOR);
2510 if (!descriptor_bo)
2511 goto fail;
2512 } else
2513 descriptor_bo = queue->descriptor_bo;
2514
2515 if (descriptor_bo != queue->descriptor_bo) {
2516 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2517
2518 if (scratch_bo) {
2519 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2520 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2521 S_008F04_SWIZZLE_ENABLE(1);
2522 map[0] = scratch_va;
2523 map[1] = rsrc1;
2524 }
2525
2526 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
2527 fill_geom_tess_rings(queue, map, add_sample_positions,
2528 esgs_ring_size, esgs_ring_bo,
2529 gsvs_ring_size, gsvs_ring_bo,
2530 tess_factor_ring_size,
2531 tess_offchip_ring_offset,
2532 tess_offchip_ring_size,
2533 tess_rings_bo);
2534
2535 queue->device->ws->buffer_unmap(descriptor_bo);
2536 }
2537
2538 for(int i = 0; i < 3; ++i) {
2539 struct radeon_cmdbuf *cs = NULL;
2540 cs = queue->device->ws->cs_create(queue->device->ws,
2541 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2542 if (!cs)
2543 goto fail;
2544
2545 dest_cs[i] = cs;
2546
2547 if (scratch_bo)
2548 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2549
2550 /* Emit initial configuration. */
2551 switch (queue->queue_family_index) {
2552 case RADV_QUEUE_GENERAL:
2553 radv_init_graphics_state(cs, queue);
2554 break;
2555 case RADV_QUEUE_COMPUTE:
2556 radv_init_compute_state(cs, queue);
2557 break;
2558 case RADV_QUEUE_TRANSFER:
2559 break;
2560 }
2561
2562 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2563 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2564 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2565 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2566 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2567 }
2568
2569 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2570 gsvs_ring_bo, gsvs_ring_size);
2571 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2572 tess_factor_ring_size, tess_rings_bo);
2573 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2574 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2575
2576 if (i == 0) {
2577 si_cs_emit_cache_flush(cs,
2578 queue->device->physical_device->rad_info.chip_class,
2579 NULL, 0,
2580 queue->queue_family_index == RING_COMPUTE &&
2581 queue->device->physical_device->rad_info.chip_class >= CIK,
2582 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2583 RADV_CMD_FLAG_INV_ICACHE |
2584 RADV_CMD_FLAG_INV_SMEM_L1 |
2585 RADV_CMD_FLAG_INV_VMEM_L1 |
2586 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2587 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2588 } else if (i == 1) {
2589 si_cs_emit_cache_flush(cs,
2590 queue->device->physical_device->rad_info.chip_class,
2591 NULL, 0,
2592 queue->queue_family_index == RING_COMPUTE &&
2593 queue->device->physical_device->rad_info.chip_class >= CIK,
2594 RADV_CMD_FLAG_INV_ICACHE |
2595 RADV_CMD_FLAG_INV_SMEM_L1 |
2596 RADV_CMD_FLAG_INV_VMEM_L1 |
2597 RADV_CMD_FLAG_INV_GLOBAL_L2 |
2598 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2599 }
2600
2601 if (!queue->device->ws->cs_finalize(cs))
2602 goto fail;
2603 }
2604
2605 if (queue->initial_full_flush_preamble_cs)
2606 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2607
2608 if (queue->initial_preamble_cs)
2609 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2610
2611 if (queue->continue_preamble_cs)
2612 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2613
2614 queue->initial_full_flush_preamble_cs = dest_cs[0];
2615 queue->initial_preamble_cs = dest_cs[1];
2616 queue->continue_preamble_cs = dest_cs[2];
2617
2618 if (scratch_bo != queue->scratch_bo) {
2619 if (queue->scratch_bo)
2620 queue->device->ws->buffer_destroy(queue->scratch_bo);
2621 queue->scratch_bo = scratch_bo;
2622 queue->scratch_size = scratch_size;
2623 }
2624
2625 if (compute_scratch_bo != queue->compute_scratch_bo) {
2626 if (queue->compute_scratch_bo)
2627 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2628 queue->compute_scratch_bo = compute_scratch_bo;
2629 queue->compute_scratch_size = compute_scratch_size;
2630 }
2631
2632 if (esgs_ring_bo != queue->esgs_ring_bo) {
2633 if (queue->esgs_ring_bo)
2634 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2635 queue->esgs_ring_bo = esgs_ring_bo;
2636 queue->esgs_ring_size = esgs_ring_size;
2637 }
2638
2639 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2640 if (queue->gsvs_ring_bo)
2641 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2642 queue->gsvs_ring_bo = gsvs_ring_bo;
2643 queue->gsvs_ring_size = gsvs_ring_size;
2644 }
2645
2646 if (tess_rings_bo != queue->tess_rings_bo) {
2647 queue->tess_rings_bo = tess_rings_bo;
2648 queue->has_tess_rings = true;
2649 }
2650
2651 if (descriptor_bo != queue->descriptor_bo) {
2652 if (queue->descriptor_bo)
2653 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2654
2655 queue->descriptor_bo = descriptor_bo;
2656 }
2657
2658 if (add_sample_positions)
2659 queue->has_sample_positions = true;
2660
2661 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2662 *initial_preamble_cs = queue->initial_preamble_cs;
2663 *continue_preamble_cs = queue->continue_preamble_cs;
2664 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2665 *continue_preamble_cs = NULL;
2666 return VK_SUCCESS;
2667 fail:
2668 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2669 if (dest_cs[i])
2670 queue->device->ws->cs_destroy(dest_cs[i]);
2671 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2672 queue->device->ws->buffer_destroy(descriptor_bo);
2673 if (scratch_bo && scratch_bo != queue->scratch_bo)
2674 queue->device->ws->buffer_destroy(scratch_bo);
2675 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2676 queue->device->ws->buffer_destroy(compute_scratch_bo);
2677 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2678 queue->device->ws->buffer_destroy(esgs_ring_bo);
2679 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2680 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2681 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2682 queue->device->ws->buffer_destroy(tess_rings_bo);
2683 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2684 }
2685
2686 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2687 struct radv_winsys_sem_counts *counts,
2688 int num_sems,
2689 const VkSemaphore *sems,
2690 VkFence _fence,
2691 bool reset_temp)
2692 {
2693 int syncobj_idx = 0, sem_idx = 0;
2694
2695 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2696 return VK_SUCCESS;
2697
2698 for (uint32_t i = 0; i < num_sems; i++) {
2699 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2700
2701 if (sem->temp_syncobj || sem->syncobj)
2702 counts->syncobj_count++;
2703 else
2704 counts->sem_count++;
2705 }
2706
2707 if (_fence != VK_NULL_HANDLE) {
2708 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2709 if (fence->temp_syncobj || fence->syncobj)
2710 counts->syncobj_count++;
2711 }
2712
2713 if (counts->syncobj_count) {
2714 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2715 if (!counts->syncobj)
2716 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2717 }
2718
2719 if (counts->sem_count) {
2720 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2721 if (!counts->sem) {
2722 free(counts->syncobj);
2723 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2724 }
2725 }
2726
2727 for (uint32_t i = 0; i < num_sems; i++) {
2728 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2729
2730 if (sem->temp_syncobj) {
2731 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2732 }
2733 else if (sem->syncobj)
2734 counts->syncobj[syncobj_idx++] = sem->syncobj;
2735 else {
2736 assert(sem->sem);
2737 counts->sem[sem_idx++] = sem->sem;
2738 }
2739 }
2740
2741 if (_fence != VK_NULL_HANDLE) {
2742 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2743 if (fence->temp_syncobj)
2744 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2745 else if (fence->syncobj)
2746 counts->syncobj[syncobj_idx++] = fence->syncobj;
2747 }
2748
2749 return VK_SUCCESS;
2750 }
2751
2752 static void
2753 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2754 {
2755 free(sem_info->wait.syncobj);
2756 free(sem_info->wait.sem);
2757 free(sem_info->signal.syncobj);
2758 free(sem_info->signal.sem);
2759 }
2760
2761
2762 static void radv_free_temp_syncobjs(struct radv_device *device,
2763 int num_sems,
2764 const VkSemaphore *sems)
2765 {
2766 for (uint32_t i = 0; i < num_sems; i++) {
2767 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2768
2769 if (sem->temp_syncobj) {
2770 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
2771 sem->temp_syncobj = 0;
2772 }
2773 }
2774 }
2775
2776 static VkResult
2777 radv_alloc_sem_info(struct radv_instance *instance,
2778 struct radv_winsys_sem_info *sem_info,
2779 int num_wait_sems,
2780 const VkSemaphore *wait_sems,
2781 int num_signal_sems,
2782 const VkSemaphore *signal_sems,
2783 VkFence fence)
2784 {
2785 VkResult ret;
2786 memset(sem_info, 0, sizeof(*sem_info));
2787
2788 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
2789 if (ret)
2790 return ret;
2791 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
2792 if (ret)
2793 radv_free_sem_info(sem_info);
2794
2795 /* caller can override these */
2796 sem_info->cs_emit_wait = true;
2797 sem_info->cs_emit_signal = true;
2798 return ret;
2799 }
2800
2801 /* Signals fence as soon as all the work currently put on queue is done. */
2802 static VkResult radv_signal_fence(struct radv_queue *queue,
2803 struct radv_fence *fence)
2804 {
2805 int ret;
2806 VkResult result;
2807 struct radv_winsys_sem_info sem_info;
2808
2809 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
2810 radv_fence_to_handle(fence));
2811 if (result != VK_SUCCESS)
2812 return result;
2813
2814 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2815 &queue->device->empty_cs[queue->queue_family_index],
2816 1, NULL, NULL, &sem_info, NULL,
2817 false, fence->fence);
2818 radv_free_sem_info(&sem_info);
2819
2820 if (ret)
2821 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
2822
2823 return VK_SUCCESS;
2824 }
2825
2826 VkResult radv_QueueSubmit(
2827 VkQueue _queue,
2828 uint32_t submitCount,
2829 const VkSubmitInfo* pSubmits,
2830 VkFence _fence)
2831 {
2832 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2833 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2834 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2835 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
2836 int ret;
2837 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
2838 uint32_t scratch_size = 0;
2839 uint32_t compute_scratch_size = 0;
2840 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
2841 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
2842 VkResult result;
2843 bool fence_emitted = false;
2844 bool tess_rings_needed = false;
2845 bool sample_positions_needed = false;
2846
2847 /* Do this first so failing to allocate scratch buffers can't result in
2848 * partially executed submissions. */
2849 for (uint32_t i = 0; i < submitCount; i++) {
2850 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2851 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2852 pSubmits[i].pCommandBuffers[j]);
2853
2854 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
2855 compute_scratch_size = MAX2(compute_scratch_size,
2856 cmd_buffer->compute_scratch_size_needed);
2857 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
2858 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
2859 tess_rings_needed |= cmd_buffer->tess_rings_needed;
2860 sample_positions_needed |= cmd_buffer->sample_positions_needed;
2861 }
2862 }
2863
2864 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
2865 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
2866 sample_positions_needed, &initial_flush_preamble_cs,
2867 &initial_preamble_cs, &continue_preamble_cs);
2868 if (result != VK_SUCCESS)
2869 return result;
2870
2871 for (uint32_t i = 0; i < submitCount; i++) {
2872 struct radeon_cmdbuf **cs_array;
2873 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
2874 bool can_patch = true;
2875 uint32_t advance;
2876 struct radv_winsys_sem_info sem_info;
2877
2878 result = radv_alloc_sem_info(queue->device->instance,
2879 &sem_info,
2880 pSubmits[i].waitSemaphoreCount,
2881 pSubmits[i].pWaitSemaphores,
2882 pSubmits[i].signalSemaphoreCount,
2883 pSubmits[i].pSignalSemaphores,
2884 _fence);
2885 if (result != VK_SUCCESS)
2886 return result;
2887
2888 if (!pSubmits[i].commandBufferCount) {
2889 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
2890 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2891 &queue->device->empty_cs[queue->queue_family_index],
2892 1, NULL, NULL,
2893 &sem_info, NULL,
2894 false, base_fence);
2895 if (ret) {
2896 radv_loge("failed to submit CS %d\n", i);
2897 abort();
2898 }
2899 fence_emitted = true;
2900 }
2901 radv_free_sem_info(&sem_info);
2902 continue;
2903 }
2904
2905 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
2906 (pSubmits[i].commandBufferCount));
2907
2908 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2909 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2910 pSubmits[i].pCommandBuffers[j]);
2911 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2912
2913 cs_array[j] = cmd_buffer->cs;
2914 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2915 can_patch = false;
2916
2917 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
2918 }
2919
2920 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2921 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2922 const struct radv_winsys_bo_list *bo_list = NULL;
2923
2924 advance = MIN2(max_cs_submission,
2925 pSubmits[i].commandBufferCount - j);
2926
2927 if (queue->device->trace_bo)
2928 *queue->device->trace_id_ptr = 0;
2929
2930 sem_info.cs_emit_wait = j == 0;
2931 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2932
2933 if (unlikely(queue->device->use_global_bo_list)) {
2934 pthread_mutex_lock(&queue->device->bo_list.mutex);
2935 bo_list = &queue->device->bo_list.list;
2936 }
2937
2938 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2939 advance, initial_preamble, continue_preamble_cs,
2940 &sem_info, bo_list,
2941 can_patch, base_fence);
2942
2943 if (unlikely(queue->device->use_global_bo_list))
2944 pthread_mutex_unlock(&queue->device->bo_list.mutex);
2945
2946 if (ret) {
2947 radv_loge("failed to submit CS %d\n", i);
2948 abort();
2949 }
2950 fence_emitted = true;
2951 if (queue->device->trace_bo) {
2952 radv_check_gpu_hangs(queue, cs_array[j]);
2953 }
2954 }
2955
2956 radv_free_temp_syncobjs(queue->device,
2957 pSubmits[i].waitSemaphoreCount,
2958 pSubmits[i].pWaitSemaphores);
2959 radv_free_sem_info(&sem_info);
2960 free(cs_array);
2961 }
2962
2963 if (fence) {
2964 if (!fence_emitted) {
2965 result = radv_signal_fence(queue, fence);
2966 if (result != VK_SUCCESS)
2967 return result;
2968 }
2969 fence->submitted = true;
2970 }
2971
2972 return VK_SUCCESS;
2973 }
2974
2975 VkResult radv_QueueWaitIdle(
2976 VkQueue _queue)
2977 {
2978 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2979
2980 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2981 radv_queue_family_to_ring(queue->queue_family_index),
2982 queue->queue_idx);
2983 return VK_SUCCESS;
2984 }
2985
2986 VkResult radv_DeviceWaitIdle(
2987 VkDevice _device)
2988 {
2989 RADV_FROM_HANDLE(radv_device, device, _device);
2990
2991 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2992 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2993 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2994 }
2995 }
2996 return VK_SUCCESS;
2997 }
2998
2999 VkResult radv_EnumerateInstanceExtensionProperties(
3000 const char* pLayerName,
3001 uint32_t* pPropertyCount,
3002 VkExtensionProperties* pProperties)
3003 {
3004 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3005
3006 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
3007 if (radv_supported_instance_extensions.extensions[i]) {
3008 vk_outarray_append(&out, prop) {
3009 *prop = radv_instance_extensions[i];
3010 }
3011 }
3012 }
3013
3014 return vk_outarray_status(&out);
3015 }
3016
3017 VkResult radv_EnumerateDeviceExtensionProperties(
3018 VkPhysicalDevice physicalDevice,
3019 const char* pLayerName,
3020 uint32_t* pPropertyCount,
3021 VkExtensionProperties* pProperties)
3022 {
3023 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
3024 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3025
3026 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
3027 if (device->supported_extensions.extensions[i]) {
3028 vk_outarray_append(&out, prop) {
3029 *prop = radv_device_extensions[i];
3030 }
3031 }
3032 }
3033
3034 return vk_outarray_status(&out);
3035 }
3036
3037 PFN_vkVoidFunction radv_GetInstanceProcAddr(
3038 VkInstance _instance,
3039 const char* pName)
3040 {
3041 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3042
3043 return radv_lookup_entrypoint_checked(pName,
3044 instance ? instance->apiVersion : 0,
3045 instance ? &instance->enabled_extensions : NULL,
3046 NULL);
3047 }
3048
3049 /* The loader wants us to expose a second GetInstanceProcAddr function
3050 * to work around certain LD_PRELOAD issues seen in apps.
3051 */
3052 PUBLIC
3053 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3054 VkInstance instance,
3055 const char* pName);
3056
3057 PUBLIC
3058 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3059 VkInstance instance,
3060 const char* pName)
3061 {
3062 return radv_GetInstanceProcAddr(instance, pName);
3063 }
3064
3065 PFN_vkVoidFunction radv_GetDeviceProcAddr(
3066 VkDevice _device,
3067 const char* pName)
3068 {
3069 RADV_FROM_HANDLE(radv_device, device, _device);
3070
3071 return radv_lookup_entrypoint_checked(pName,
3072 device->instance->apiVersion,
3073 &device->instance->enabled_extensions,
3074 &device->enabled_extensions);
3075 }
3076
3077 bool radv_get_memory_fd(struct radv_device *device,
3078 struct radv_device_memory *memory,
3079 int *pFD)
3080 {
3081 struct radeon_bo_metadata metadata;
3082
3083 if (memory->image) {
3084 radv_init_metadata(device, memory->image, &metadata);
3085 device->ws->buffer_set_metadata(memory->bo, &metadata);
3086 }
3087
3088 return device->ws->buffer_get_fd(device->ws, memory->bo,
3089 pFD);
3090 }
3091
3092 static VkResult radv_alloc_memory(struct radv_device *device,
3093 const VkMemoryAllocateInfo* pAllocateInfo,
3094 const VkAllocationCallbacks* pAllocator,
3095 VkDeviceMemory* pMem)
3096 {
3097 struct radv_device_memory *mem;
3098 VkResult result;
3099 enum radeon_bo_domain domain;
3100 uint32_t flags = 0;
3101 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
3102
3103 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3104
3105 if (pAllocateInfo->allocationSize == 0) {
3106 /* Apparently, this is allowed */
3107 *pMem = VK_NULL_HANDLE;
3108 return VK_SUCCESS;
3109 }
3110
3111 const VkImportMemoryFdInfoKHR *import_info =
3112 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3113 const VkMemoryDedicatedAllocateInfo *dedicate_info =
3114 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3115 const VkExportMemoryAllocateInfo *export_info =
3116 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3117 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
3118 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
3119
3120 const struct wsi_memory_allocate_info *wsi_info =
3121 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3122
3123 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
3124 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3125 if (mem == NULL)
3126 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3127
3128 if (wsi_info && wsi_info->implicit_sync)
3129 flags |= RADEON_FLAG_IMPLICIT_SYNC;
3130
3131 if (dedicate_info) {
3132 mem->image = radv_image_from_handle(dedicate_info->image);
3133 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
3134 } else {
3135 mem->image = NULL;
3136 mem->buffer = NULL;
3137 }
3138
3139 float priority_float = 0.5;
3140 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
3141 vk_find_struct_const(pAllocateInfo->pNext,
3142 MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
3143 if (priority_ext)
3144 priority_float = priority_ext->priority;
3145
3146 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
3147 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
3148
3149 mem->user_ptr = NULL;
3150
3151 if (import_info) {
3152 assert(import_info->handleType ==
3153 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3154 import_info->handleType ==
3155 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3156 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
3157 priority, NULL, NULL);
3158 if (!mem->bo) {
3159 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3160 goto fail;
3161 } else {
3162 close(import_info->fd);
3163 }
3164 } else if (host_ptr_info) {
3165 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3166 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
3167 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
3168 pAllocateInfo->allocationSize,
3169 priority);
3170 if (!mem->bo) {
3171 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3172 goto fail;
3173 } else {
3174 mem->user_ptr = host_ptr_info->pHostPointer;
3175 }
3176 } else {
3177 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
3178 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
3179 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
3180 domain = RADEON_DOMAIN_GTT;
3181 else
3182 domain = RADEON_DOMAIN_VRAM;
3183
3184 if (mem_type_index == RADV_MEM_TYPE_VRAM)
3185 flags |= RADEON_FLAG_NO_CPU_ACCESS;
3186 else
3187 flags |= RADEON_FLAG_CPU_ACCESS;
3188
3189 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3190 flags |= RADEON_FLAG_GTT_WC;
3191
3192 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
3193 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3194 if (device->use_global_bo_list) {
3195 flags |= RADEON_FLAG_PREFER_LOCAL_BO;
3196 }
3197 }
3198
3199 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3200 domain, flags, priority);
3201
3202 if (!mem->bo) {
3203 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3204 goto fail;
3205 }
3206 mem->type_index = mem_type_index;
3207 }
3208
3209 result = radv_bo_list_add(device, mem->bo);
3210 if (result != VK_SUCCESS)
3211 goto fail_bo;
3212
3213 *pMem = radv_device_memory_to_handle(mem);
3214
3215 return VK_SUCCESS;
3216
3217 fail_bo:
3218 device->ws->buffer_destroy(mem->bo);
3219 fail:
3220 vk_free2(&device->alloc, pAllocator, mem);
3221
3222 return result;
3223 }
3224
3225 VkResult radv_AllocateMemory(
3226 VkDevice _device,
3227 const VkMemoryAllocateInfo* pAllocateInfo,
3228 const VkAllocationCallbacks* pAllocator,
3229 VkDeviceMemory* pMem)
3230 {
3231 RADV_FROM_HANDLE(radv_device, device, _device);
3232 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3233 }
3234
3235 void radv_FreeMemory(
3236 VkDevice _device,
3237 VkDeviceMemory _mem,
3238 const VkAllocationCallbacks* pAllocator)
3239 {
3240 RADV_FROM_HANDLE(radv_device, device, _device);
3241 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3242
3243 if (mem == NULL)
3244 return;
3245
3246 radv_bo_list_remove(device, mem->bo);
3247 device->ws->buffer_destroy(mem->bo);
3248 mem->bo = NULL;
3249
3250 vk_free2(&device->alloc, pAllocator, mem);
3251 }
3252
3253 VkResult radv_MapMemory(
3254 VkDevice _device,
3255 VkDeviceMemory _memory,
3256 VkDeviceSize offset,
3257 VkDeviceSize size,
3258 VkMemoryMapFlags flags,
3259 void** ppData)
3260 {
3261 RADV_FROM_HANDLE(radv_device, device, _device);
3262 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3263
3264 if (mem == NULL) {
3265 *ppData = NULL;
3266 return VK_SUCCESS;
3267 }
3268
3269 if (mem->user_ptr)
3270 *ppData = mem->user_ptr;
3271 else
3272 *ppData = device->ws->buffer_map(mem->bo);
3273
3274 if (*ppData) {
3275 *ppData += offset;
3276 return VK_SUCCESS;
3277 }
3278
3279 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3280 }
3281
3282 void radv_UnmapMemory(
3283 VkDevice _device,
3284 VkDeviceMemory _memory)
3285 {
3286 RADV_FROM_HANDLE(radv_device, device, _device);
3287 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3288
3289 if (mem == NULL)
3290 return;
3291
3292 if (mem->user_ptr == NULL)
3293 device->ws->buffer_unmap(mem->bo);
3294 }
3295
3296 VkResult radv_FlushMappedMemoryRanges(
3297 VkDevice _device,
3298 uint32_t memoryRangeCount,
3299 const VkMappedMemoryRange* pMemoryRanges)
3300 {
3301 return VK_SUCCESS;
3302 }
3303
3304 VkResult radv_InvalidateMappedMemoryRanges(
3305 VkDevice _device,
3306 uint32_t memoryRangeCount,
3307 const VkMappedMemoryRange* pMemoryRanges)
3308 {
3309 return VK_SUCCESS;
3310 }
3311
3312 void radv_GetBufferMemoryRequirements(
3313 VkDevice _device,
3314 VkBuffer _buffer,
3315 VkMemoryRequirements* pMemoryRequirements)
3316 {
3317 RADV_FROM_HANDLE(radv_device, device, _device);
3318 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3319
3320 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3321
3322 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3323 pMemoryRequirements->alignment = 4096;
3324 else
3325 pMemoryRequirements->alignment = 16;
3326
3327 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3328 }
3329
3330 void radv_GetBufferMemoryRequirements2(
3331 VkDevice device,
3332 const VkBufferMemoryRequirementsInfo2 *pInfo,
3333 VkMemoryRequirements2 *pMemoryRequirements)
3334 {
3335 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3336 &pMemoryRequirements->memoryRequirements);
3337 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3338 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3339 switch (ext->sType) {
3340 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3341 VkMemoryDedicatedRequirements *req =
3342 (VkMemoryDedicatedRequirements *) ext;
3343 req->requiresDedicatedAllocation = buffer->shareable;
3344 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3345 break;
3346 }
3347 default:
3348 break;
3349 }
3350 }
3351 }
3352
3353 void radv_GetImageMemoryRequirements(
3354 VkDevice _device,
3355 VkImage _image,
3356 VkMemoryRequirements* pMemoryRequirements)
3357 {
3358 RADV_FROM_HANDLE(radv_device, device, _device);
3359 RADV_FROM_HANDLE(radv_image, image, _image);
3360
3361 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3362
3363 pMemoryRequirements->size = image->size;
3364 pMemoryRequirements->alignment = image->alignment;
3365 }
3366
3367 void radv_GetImageMemoryRequirements2(
3368 VkDevice device,
3369 const VkImageMemoryRequirementsInfo2 *pInfo,
3370 VkMemoryRequirements2 *pMemoryRequirements)
3371 {
3372 radv_GetImageMemoryRequirements(device, pInfo->image,
3373 &pMemoryRequirements->memoryRequirements);
3374
3375 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3376
3377 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3378 switch (ext->sType) {
3379 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3380 VkMemoryDedicatedRequirements *req =
3381 (VkMemoryDedicatedRequirements *) ext;
3382 req->requiresDedicatedAllocation = image->shareable;
3383 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3384 break;
3385 }
3386 default:
3387 break;
3388 }
3389 }
3390 }
3391
3392 void radv_GetImageSparseMemoryRequirements(
3393 VkDevice device,
3394 VkImage image,
3395 uint32_t* pSparseMemoryRequirementCount,
3396 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
3397 {
3398 stub();
3399 }
3400
3401 void radv_GetImageSparseMemoryRequirements2(
3402 VkDevice device,
3403 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
3404 uint32_t* pSparseMemoryRequirementCount,
3405 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3406 {
3407 stub();
3408 }
3409
3410 void radv_GetDeviceMemoryCommitment(
3411 VkDevice device,
3412 VkDeviceMemory memory,
3413 VkDeviceSize* pCommittedMemoryInBytes)
3414 {
3415 *pCommittedMemoryInBytes = 0;
3416 }
3417
3418 VkResult radv_BindBufferMemory2(VkDevice device,
3419 uint32_t bindInfoCount,
3420 const VkBindBufferMemoryInfo *pBindInfos)
3421 {
3422 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3423 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3424 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3425
3426 if (mem) {
3427 buffer->bo = mem->bo;
3428 buffer->offset = pBindInfos[i].memoryOffset;
3429 } else {
3430 buffer->bo = NULL;
3431 }
3432 }
3433 return VK_SUCCESS;
3434 }
3435
3436 VkResult radv_BindBufferMemory(
3437 VkDevice device,
3438 VkBuffer buffer,
3439 VkDeviceMemory memory,
3440 VkDeviceSize memoryOffset)
3441 {
3442 const VkBindBufferMemoryInfo info = {
3443 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3444 .buffer = buffer,
3445 .memory = memory,
3446 .memoryOffset = memoryOffset
3447 };
3448
3449 return radv_BindBufferMemory2(device, 1, &info);
3450 }
3451
3452 VkResult radv_BindImageMemory2(VkDevice device,
3453 uint32_t bindInfoCount,
3454 const VkBindImageMemoryInfo *pBindInfos)
3455 {
3456 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3457 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3458 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3459
3460 if (mem) {
3461 image->bo = mem->bo;
3462 image->offset = pBindInfos[i].memoryOffset;
3463 } else {
3464 image->bo = NULL;
3465 image->offset = 0;
3466 }
3467 }
3468 return VK_SUCCESS;
3469 }
3470
3471
3472 VkResult radv_BindImageMemory(
3473 VkDevice device,
3474 VkImage image,
3475 VkDeviceMemory memory,
3476 VkDeviceSize memoryOffset)
3477 {
3478 const VkBindImageMemoryInfo info = {
3479 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3480 .image = image,
3481 .memory = memory,
3482 .memoryOffset = memoryOffset
3483 };
3484
3485 return radv_BindImageMemory2(device, 1, &info);
3486 }
3487
3488
3489 static void
3490 radv_sparse_buffer_bind_memory(struct radv_device *device,
3491 const VkSparseBufferMemoryBindInfo *bind)
3492 {
3493 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3494
3495 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3496 struct radv_device_memory *mem = NULL;
3497
3498 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3499 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3500
3501 device->ws->buffer_virtual_bind(buffer->bo,
3502 bind->pBinds[i].resourceOffset,
3503 bind->pBinds[i].size,
3504 mem ? mem->bo : NULL,
3505 bind->pBinds[i].memoryOffset);
3506 }
3507 }
3508
3509 static void
3510 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3511 const VkSparseImageOpaqueMemoryBindInfo *bind)
3512 {
3513 RADV_FROM_HANDLE(radv_image, image, bind->image);
3514
3515 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3516 struct radv_device_memory *mem = NULL;
3517
3518 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3519 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3520
3521 device->ws->buffer_virtual_bind(image->bo,
3522 bind->pBinds[i].resourceOffset,
3523 bind->pBinds[i].size,
3524 mem ? mem->bo : NULL,
3525 bind->pBinds[i].memoryOffset);
3526 }
3527 }
3528
3529 VkResult radv_QueueBindSparse(
3530 VkQueue _queue,
3531 uint32_t bindInfoCount,
3532 const VkBindSparseInfo* pBindInfo,
3533 VkFence _fence)
3534 {
3535 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3536 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3537 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3538 bool fence_emitted = false;
3539 VkResult result;
3540 int ret;
3541
3542 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3543 struct radv_winsys_sem_info sem_info;
3544 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3545 radv_sparse_buffer_bind_memory(queue->device,
3546 pBindInfo[i].pBufferBinds + j);
3547 }
3548
3549 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3550 radv_sparse_image_opaque_bind_memory(queue->device,
3551 pBindInfo[i].pImageOpaqueBinds + j);
3552 }
3553
3554 VkResult result;
3555 result = radv_alloc_sem_info(queue->device->instance,
3556 &sem_info,
3557 pBindInfo[i].waitSemaphoreCount,
3558 pBindInfo[i].pWaitSemaphores,
3559 pBindInfo[i].signalSemaphoreCount,
3560 pBindInfo[i].pSignalSemaphores,
3561 _fence);
3562 if (result != VK_SUCCESS)
3563 return result;
3564
3565 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3566 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3567 &queue->device->empty_cs[queue->queue_family_index],
3568 1, NULL, NULL,
3569 &sem_info, NULL,
3570 false, base_fence);
3571 if (ret) {
3572 radv_loge("failed to submit CS %d\n", i);
3573 abort();
3574 }
3575
3576 fence_emitted = true;
3577 if (fence)
3578 fence->submitted = true;
3579 }
3580
3581 radv_free_sem_info(&sem_info);
3582
3583 }
3584
3585 if (fence) {
3586 if (!fence_emitted) {
3587 result = radv_signal_fence(queue, fence);
3588 if (result != VK_SUCCESS)
3589 return result;
3590 }
3591 fence->submitted = true;
3592 }
3593
3594 return VK_SUCCESS;
3595 }
3596
3597 VkResult radv_CreateFence(
3598 VkDevice _device,
3599 const VkFenceCreateInfo* pCreateInfo,
3600 const VkAllocationCallbacks* pAllocator,
3601 VkFence* pFence)
3602 {
3603 RADV_FROM_HANDLE(radv_device, device, _device);
3604 const VkExportFenceCreateInfo *export =
3605 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
3606 VkExternalFenceHandleTypeFlags handleTypes =
3607 export ? export->handleTypes : 0;
3608
3609 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3610 sizeof(*fence), 8,
3611 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3612
3613 if (!fence)
3614 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3615
3616 fence->fence_wsi = NULL;
3617 fence->submitted = false;
3618 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
3619 fence->temp_syncobj = 0;
3620 if (device->always_use_syncobj || handleTypes) {
3621 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3622 if (ret) {
3623 vk_free2(&device->alloc, pAllocator, fence);
3624 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3625 }
3626 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3627 device->ws->signal_syncobj(device->ws, fence->syncobj);
3628 }
3629 fence->fence = NULL;
3630 } else {
3631 fence->fence = device->ws->create_fence();
3632 if (!fence->fence) {
3633 vk_free2(&device->alloc, pAllocator, fence);
3634 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3635 }
3636 fence->syncobj = 0;
3637 }
3638
3639 *pFence = radv_fence_to_handle(fence);
3640
3641 return VK_SUCCESS;
3642 }
3643
3644 void radv_DestroyFence(
3645 VkDevice _device,
3646 VkFence _fence,
3647 const VkAllocationCallbacks* pAllocator)
3648 {
3649 RADV_FROM_HANDLE(radv_device, device, _device);
3650 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3651
3652 if (!fence)
3653 return;
3654
3655 if (fence->temp_syncobj)
3656 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3657 if (fence->syncobj)
3658 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3659 if (fence->fence)
3660 device->ws->destroy_fence(fence->fence);
3661 if (fence->fence_wsi)
3662 fence->fence_wsi->destroy(fence->fence_wsi);
3663 vk_free2(&device->alloc, pAllocator, fence);
3664 }
3665
3666
3667 uint64_t radv_get_current_time(void)
3668 {
3669 struct timespec tv;
3670 clock_gettime(CLOCK_MONOTONIC, &tv);
3671 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3672 }
3673
3674 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3675 {
3676 uint64_t current_time = radv_get_current_time();
3677
3678 timeout = MIN2(UINT64_MAX - current_time, timeout);
3679
3680 return current_time + timeout;
3681 }
3682
3683
3684 static bool radv_all_fences_plain_and_submitted(uint32_t fenceCount, const VkFence *pFences)
3685 {
3686 for (uint32_t i = 0; i < fenceCount; ++i) {
3687 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3688 if (fence->fence == NULL || fence->syncobj ||
3689 fence->temp_syncobj ||
3690 (!fence->signalled && !fence->submitted))
3691 return false;
3692 }
3693 return true;
3694 }
3695
3696 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3697 {
3698 for (uint32_t i = 0; i < fenceCount; ++i) {
3699 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3700 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3701 return false;
3702 }
3703 return true;
3704 }
3705
3706 VkResult radv_WaitForFences(
3707 VkDevice _device,
3708 uint32_t fenceCount,
3709 const VkFence* pFences,
3710 VkBool32 waitAll,
3711 uint64_t timeout)
3712 {
3713 RADV_FROM_HANDLE(radv_device, device, _device);
3714 timeout = radv_get_absolute_timeout(timeout);
3715
3716 if (device->always_use_syncobj &&
3717 radv_all_fences_syncobj(fenceCount, pFences))
3718 {
3719 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3720 if (!handles)
3721 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3722
3723 for (uint32_t i = 0; i < fenceCount; ++i) {
3724 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3725 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3726 }
3727
3728 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3729
3730 free(handles);
3731 return success ? VK_SUCCESS : VK_TIMEOUT;
3732 }
3733
3734 if (!waitAll && fenceCount > 1) {
3735 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3736 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(fenceCount, pFences)) {
3737 uint32_t wait_count = 0;
3738 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
3739 if (!fences)
3740 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3741
3742 for (uint32_t i = 0; i < fenceCount; ++i) {
3743 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3744
3745 if (fence->signalled) {
3746 free(fences);
3747 return VK_SUCCESS;
3748 }
3749
3750 fences[wait_count++] = fence->fence;
3751 }
3752
3753 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
3754 waitAll, timeout - radv_get_current_time());
3755
3756 free(fences);
3757 return success ? VK_SUCCESS : VK_TIMEOUT;
3758 }
3759
3760 while(radv_get_current_time() <= timeout) {
3761 for (uint32_t i = 0; i < fenceCount; ++i) {
3762 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
3763 return VK_SUCCESS;
3764 }
3765 }
3766 return VK_TIMEOUT;
3767 }
3768
3769 for (uint32_t i = 0; i < fenceCount; ++i) {
3770 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3771 bool expired = false;
3772
3773 if (fence->temp_syncobj) {
3774 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
3775 return VK_TIMEOUT;
3776 continue;
3777 }
3778
3779 if (fence->syncobj) {
3780 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
3781 return VK_TIMEOUT;
3782 continue;
3783 }
3784
3785 if (fence->signalled)
3786 continue;
3787
3788 if (fence->fence) {
3789 if (!fence->submitted) {
3790 while(radv_get_current_time() <= timeout &&
3791 !fence->submitted)
3792 /* Do nothing */;
3793
3794 if (!fence->submitted)
3795 return VK_TIMEOUT;
3796
3797 /* Recheck as it may have been set by
3798 * submitting operations. */
3799
3800 if (fence->signalled)
3801 continue;
3802 }
3803
3804 expired = device->ws->fence_wait(device->ws,
3805 fence->fence,
3806 true, timeout);
3807 if (!expired)
3808 return VK_TIMEOUT;
3809 }
3810
3811 if (fence->fence_wsi) {
3812 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
3813 if (result != VK_SUCCESS)
3814 return result;
3815 }
3816
3817 fence->signalled = true;
3818 }
3819
3820 return VK_SUCCESS;
3821 }
3822
3823 VkResult radv_ResetFences(VkDevice _device,
3824 uint32_t fenceCount,
3825 const VkFence *pFences)
3826 {
3827 RADV_FROM_HANDLE(radv_device, device, _device);
3828
3829 for (unsigned i = 0; i < fenceCount; ++i) {
3830 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3831 fence->submitted = fence->signalled = false;
3832
3833 /* Per spec, we first restore the permanent payload, and then reset, so
3834 * having a temp syncobj should not skip resetting the permanent syncobj. */
3835 if (fence->temp_syncobj) {
3836 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3837 fence->temp_syncobj = 0;
3838 }
3839
3840 if (fence->syncobj) {
3841 device->ws->reset_syncobj(device->ws, fence->syncobj);
3842 }
3843 }
3844
3845 return VK_SUCCESS;
3846 }
3847
3848 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
3849 {
3850 RADV_FROM_HANDLE(radv_device, device, _device);
3851 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3852
3853 if (fence->temp_syncobj) {
3854 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
3855 return success ? VK_SUCCESS : VK_NOT_READY;
3856 }
3857
3858 if (fence->syncobj) {
3859 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
3860 return success ? VK_SUCCESS : VK_NOT_READY;
3861 }
3862
3863 if (fence->signalled)
3864 return VK_SUCCESS;
3865 if (!fence->submitted)
3866 return VK_NOT_READY;
3867 if (fence->fence) {
3868 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
3869 return VK_NOT_READY;
3870 }
3871 if (fence->fence_wsi) {
3872 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
3873
3874 if (result != VK_SUCCESS) {
3875 if (result == VK_TIMEOUT)
3876 return VK_NOT_READY;
3877 return result;
3878 }
3879 }
3880 return VK_SUCCESS;
3881 }
3882
3883
3884 // Queue semaphore functions
3885
3886 VkResult radv_CreateSemaphore(
3887 VkDevice _device,
3888 const VkSemaphoreCreateInfo* pCreateInfo,
3889 const VkAllocationCallbacks* pAllocator,
3890 VkSemaphore* pSemaphore)
3891 {
3892 RADV_FROM_HANDLE(radv_device, device, _device);
3893 const VkExportSemaphoreCreateInfo *export =
3894 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
3895 VkExternalSemaphoreHandleTypeFlags handleTypes =
3896 export ? export->handleTypes : 0;
3897
3898 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
3899 sizeof(*sem), 8,
3900 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3901 if (!sem)
3902 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3903
3904 sem->temp_syncobj = 0;
3905 /* create a syncobject if we are going to export this semaphore */
3906 if (device->always_use_syncobj || handleTypes) {
3907 assert (device->physical_device->rad_info.has_syncobj);
3908 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
3909 if (ret) {
3910 vk_free2(&device->alloc, pAllocator, sem);
3911 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3912 }
3913 sem->sem = NULL;
3914 } else {
3915 sem->sem = device->ws->create_sem(device->ws);
3916 if (!sem->sem) {
3917 vk_free2(&device->alloc, pAllocator, sem);
3918 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3919 }
3920 sem->syncobj = 0;
3921 }
3922
3923 *pSemaphore = radv_semaphore_to_handle(sem);
3924 return VK_SUCCESS;
3925 }
3926
3927 void radv_DestroySemaphore(
3928 VkDevice _device,
3929 VkSemaphore _semaphore,
3930 const VkAllocationCallbacks* pAllocator)
3931 {
3932 RADV_FROM_HANDLE(radv_device, device, _device);
3933 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
3934 if (!_semaphore)
3935 return;
3936
3937 if (sem->syncobj)
3938 device->ws->destroy_syncobj(device->ws, sem->syncobj);
3939 else
3940 device->ws->destroy_sem(sem->sem);
3941 vk_free2(&device->alloc, pAllocator, sem);
3942 }
3943
3944 VkResult radv_CreateEvent(
3945 VkDevice _device,
3946 const VkEventCreateInfo* pCreateInfo,
3947 const VkAllocationCallbacks* pAllocator,
3948 VkEvent* pEvent)
3949 {
3950 RADV_FROM_HANDLE(radv_device, device, _device);
3951 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
3952 sizeof(*event), 8,
3953 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3954
3955 if (!event)
3956 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3957
3958 event->bo = device->ws->buffer_create(device->ws, 8, 8,
3959 RADEON_DOMAIN_GTT,
3960 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
3961 RADV_BO_PRIORITY_FENCE);
3962 if (!event->bo) {
3963 vk_free2(&device->alloc, pAllocator, event);
3964 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3965 }
3966
3967 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
3968
3969 *pEvent = radv_event_to_handle(event);
3970
3971 return VK_SUCCESS;
3972 }
3973
3974 void radv_DestroyEvent(
3975 VkDevice _device,
3976 VkEvent _event,
3977 const VkAllocationCallbacks* pAllocator)
3978 {
3979 RADV_FROM_HANDLE(radv_device, device, _device);
3980 RADV_FROM_HANDLE(radv_event, event, _event);
3981
3982 if (!event)
3983 return;
3984 device->ws->buffer_destroy(event->bo);
3985 vk_free2(&device->alloc, pAllocator, event);
3986 }
3987
3988 VkResult radv_GetEventStatus(
3989 VkDevice _device,
3990 VkEvent _event)
3991 {
3992 RADV_FROM_HANDLE(radv_event, event, _event);
3993
3994 if (*event->map == 1)
3995 return VK_EVENT_SET;
3996 return VK_EVENT_RESET;
3997 }
3998
3999 VkResult radv_SetEvent(
4000 VkDevice _device,
4001 VkEvent _event)
4002 {
4003 RADV_FROM_HANDLE(radv_event, event, _event);
4004 *event->map = 1;
4005
4006 return VK_SUCCESS;
4007 }
4008
4009 VkResult radv_ResetEvent(
4010 VkDevice _device,
4011 VkEvent _event)
4012 {
4013 RADV_FROM_HANDLE(radv_event, event, _event);
4014 *event->map = 0;
4015
4016 return VK_SUCCESS;
4017 }
4018
4019 VkResult radv_CreateBuffer(
4020 VkDevice _device,
4021 const VkBufferCreateInfo* pCreateInfo,
4022 const VkAllocationCallbacks* pAllocator,
4023 VkBuffer* pBuffer)
4024 {
4025 RADV_FROM_HANDLE(radv_device, device, _device);
4026 struct radv_buffer *buffer;
4027
4028 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4029
4030 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
4031 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4032 if (buffer == NULL)
4033 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4034
4035 buffer->size = pCreateInfo->size;
4036 buffer->usage = pCreateInfo->usage;
4037 buffer->bo = NULL;
4038 buffer->offset = 0;
4039 buffer->flags = pCreateInfo->flags;
4040
4041 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
4042 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
4043
4044 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
4045 buffer->bo = device->ws->buffer_create(device->ws,
4046 align64(buffer->size, 4096),
4047 4096, 0, RADEON_FLAG_VIRTUAL,
4048 RADV_BO_PRIORITY_VIRTUAL);
4049 if (!buffer->bo) {
4050 vk_free2(&device->alloc, pAllocator, buffer);
4051 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4052 }
4053 }
4054
4055 *pBuffer = radv_buffer_to_handle(buffer);
4056
4057 return VK_SUCCESS;
4058 }
4059
4060 void radv_DestroyBuffer(
4061 VkDevice _device,
4062 VkBuffer _buffer,
4063 const VkAllocationCallbacks* pAllocator)
4064 {
4065 RADV_FROM_HANDLE(radv_device, device, _device);
4066 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
4067
4068 if (!buffer)
4069 return;
4070
4071 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
4072 device->ws->buffer_destroy(buffer->bo);
4073
4074 vk_free2(&device->alloc, pAllocator, buffer);
4075 }
4076
4077 VkDeviceAddress radv_GetBufferDeviceAddressEXT(
4078 VkDevice device,
4079 const VkBufferDeviceAddressInfoEXT* pInfo)
4080 {
4081 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
4082 return radv_buffer_get_va(buffer->bo) + buffer->offset;
4083 }
4084
4085
4086 static inline unsigned
4087 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
4088 {
4089 if (stencil)
4090 return image->surface.u.legacy.stencil_tiling_index[level];
4091 else
4092 return image->surface.u.legacy.tiling_index[level];
4093 }
4094
4095 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
4096 {
4097 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
4098 }
4099
4100 static uint32_t
4101 radv_init_dcc_control_reg(struct radv_device *device,
4102 struct radv_image_view *iview)
4103 {
4104 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
4105 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
4106 unsigned max_compressed_block_size;
4107 unsigned independent_64b_blocks;
4108
4109 if (!radv_image_has_dcc(iview->image))
4110 return 0;
4111
4112 if (iview->image->info.samples > 1) {
4113 if (iview->image->surface.bpe == 1)
4114 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4115 else if (iview->image->surface.bpe == 2)
4116 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4117 }
4118
4119 if (!device->physical_device->rad_info.has_dedicated_vram) {
4120 /* amdvlk: [min-compressed-block-size] should be set to 32 for
4121 * dGPU and 64 for APU because all of our APUs to date use
4122 * DIMMs which have a request granularity size of 64B while all
4123 * other chips have a 32B request size.
4124 */
4125 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
4126 }
4127
4128 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
4129 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
4130 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
4131 /* If this DCC image is potentially going to be used in texture
4132 * fetches, we need some special settings.
4133 */
4134 independent_64b_blocks = 1;
4135 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4136 } else {
4137 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
4138 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
4139 * big as possible for better compression state.
4140 */
4141 independent_64b_blocks = 0;
4142 max_compressed_block_size = max_uncompressed_block_size;
4143 }
4144
4145 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
4146 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
4147 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
4148 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
4149 }
4150
4151 static void
4152 radv_initialise_color_surface(struct radv_device *device,
4153 struct radv_color_buffer_info *cb,
4154 struct radv_image_view *iview)
4155 {
4156 const struct vk_format_description *desc;
4157 unsigned ntype, format, swap, endian;
4158 unsigned blend_clamp = 0, blend_bypass = 0;
4159 uint64_t va;
4160 const struct radeon_surf *surf = &iview->image->surface;
4161
4162 desc = vk_format_description(iview->vk_format);
4163
4164 memset(cb, 0, sizeof(*cb));
4165
4166 /* Intensity is implemented as Red, so treat it that way. */
4167 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
4168
4169 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4170
4171 cb->cb_color_base = va >> 8;
4172
4173 if (device->physical_device->rad_info.chip_class >= GFX9) {
4174 struct gfx9_surf_meta_flags meta;
4175 if (iview->image->dcc_offset)
4176 meta = iview->image->surface.u.gfx9.dcc;
4177 else
4178 meta = iview->image->surface.u.gfx9.cmask;
4179
4180 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4181 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
4182 S_028C74_RB_ALIGNED(meta.rb_aligned) |
4183 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
4184
4185 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
4186 cb->cb_color_base |= iview->image->surface.tile_swizzle;
4187 } else {
4188 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
4189 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
4190
4191 cb->cb_color_base += level_info->offset >> 8;
4192 if (level_info->mode == RADEON_SURF_MODE_2D)
4193 cb->cb_color_base |= iview->image->surface.tile_swizzle;
4194
4195 pitch_tile_max = level_info->nblk_x / 8 - 1;
4196 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
4197 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
4198
4199 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
4200 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
4201 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
4202
4203 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
4204
4205 if (radv_image_has_fmask(iview->image)) {
4206 if (device->physical_device->rad_info.chip_class >= CIK)
4207 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
4208 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
4209 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
4210 } else {
4211 /* This must be set for fast clear to work without FMASK. */
4212 if (device->physical_device->rad_info.chip_class >= CIK)
4213 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4214 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4215 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4216 }
4217 }
4218
4219 /* CMASK variables */
4220 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4221 va += iview->image->cmask.offset;
4222 cb->cb_color_cmask = va >> 8;
4223
4224 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4225 va += iview->image->dcc_offset;
4226 cb->cb_dcc_base = va >> 8;
4227 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
4228
4229 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4230 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4231 S_028C6C_SLICE_MAX(max_slice);
4232
4233 if (iview->image->info.samples > 1) {
4234 unsigned log_samples = util_logbase2(iview->image->info.samples);
4235
4236 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4237 S_028C74_NUM_FRAGMENTS(log_samples);
4238 }
4239
4240 if (radv_image_has_fmask(iview->image)) {
4241 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
4242 cb->cb_color_fmask = va >> 8;
4243 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
4244 } else {
4245 cb->cb_color_fmask = cb->cb_color_base;
4246 }
4247
4248 ntype = radv_translate_color_numformat(iview->vk_format,
4249 desc,
4250 vk_format_get_first_non_void_channel(iview->vk_format));
4251 format = radv_translate_colorformat(iview->vk_format);
4252 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4253 radv_finishme("Illegal color\n");
4254 swap = radv_translate_colorswap(iview->vk_format, FALSE);
4255 endian = radv_colorformat_endian_swap(format);
4256
4257 /* blend clamp should be set for all NORM/SRGB types */
4258 if (ntype == V_028C70_NUMBER_UNORM ||
4259 ntype == V_028C70_NUMBER_SNORM ||
4260 ntype == V_028C70_NUMBER_SRGB)
4261 blend_clamp = 1;
4262
4263 /* set blend bypass according to docs if SINT/UINT or
4264 8/24 COLOR variants */
4265 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4266 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4267 format == V_028C70_COLOR_X24_8_32_FLOAT) {
4268 blend_clamp = 0;
4269 blend_bypass = 1;
4270 }
4271 #if 0
4272 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4273 (format == V_028C70_COLOR_8 ||
4274 format == V_028C70_COLOR_8_8 ||
4275 format == V_028C70_COLOR_8_8_8_8))
4276 ->color_is_int8 = true;
4277 #endif
4278 cb->cb_color_info = S_028C70_FORMAT(format) |
4279 S_028C70_COMP_SWAP(swap) |
4280 S_028C70_BLEND_CLAMP(blend_clamp) |
4281 S_028C70_BLEND_BYPASS(blend_bypass) |
4282 S_028C70_SIMPLE_FLOAT(1) |
4283 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4284 ntype != V_028C70_NUMBER_SNORM &&
4285 ntype != V_028C70_NUMBER_SRGB &&
4286 format != V_028C70_COLOR_8_24 &&
4287 format != V_028C70_COLOR_24_8) |
4288 S_028C70_NUMBER_TYPE(ntype) |
4289 S_028C70_ENDIAN(endian);
4290 if (radv_image_has_fmask(iview->image)) {
4291 cb->cb_color_info |= S_028C70_COMPRESSION(1);
4292 if (device->physical_device->rad_info.chip_class == SI) {
4293 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
4294 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4295 }
4296 }
4297
4298 if (radv_image_has_cmask(iview->image) &&
4299 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4300 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4301
4302 if (radv_dcc_enabled(iview->image, iview->base_mip))
4303 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4304
4305 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4306
4307 /* This must be set for fast clear to work without FMASK. */
4308 if (!radv_image_has_fmask(iview->image) &&
4309 device->physical_device->rad_info.chip_class == SI) {
4310 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
4311 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4312 }
4313
4314 if (device->physical_device->rad_info.chip_class >= GFX9) {
4315 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4316 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4317
4318 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
4319 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4320 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
4321 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
4322 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
4323 S_028C68_MAX_MIP(iview->image->info.levels - 1);
4324 }
4325 }
4326
4327 static unsigned
4328 radv_calc_decompress_on_z_planes(struct radv_device *device,
4329 struct radv_image_view *iview)
4330 {
4331 unsigned max_zplanes = 0;
4332
4333 assert(radv_image_is_tc_compat_htile(iview->image));
4334
4335 if (device->physical_device->rad_info.chip_class >= GFX9) {
4336 /* Default value for 32-bit depth surfaces. */
4337 max_zplanes = 4;
4338
4339 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4340 iview->image->info.samples > 1)
4341 max_zplanes = 2;
4342
4343 max_zplanes = max_zplanes + 1;
4344 } else {
4345 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4346 /* Do not enable Z plane compression for 16-bit depth
4347 * surfaces because isn't supported on GFX8. Only
4348 * 32-bit depth surfaces are supported by the hardware.
4349 * This allows to maintain shader compatibility and to
4350 * reduce the number of depth decompressions.
4351 */
4352 max_zplanes = 1;
4353 } else {
4354 if (iview->image->info.samples <= 1)
4355 max_zplanes = 5;
4356 else if (iview->image->info.samples <= 4)
4357 max_zplanes = 3;
4358 else
4359 max_zplanes = 2;
4360 }
4361 }
4362
4363 return max_zplanes;
4364 }
4365
4366 static void
4367 radv_initialise_ds_surface(struct radv_device *device,
4368 struct radv_ds_buffer_info *ds,
4369 struct radv_image_view *iview)
4370 {
4371 unsigned level = iview->base_mip;
4372 unsigned format, stencil_format;
4373 uint64_t va, s_offs, z_offs;
4374 bool stencil_only = false;
4375 memset(ds, 0, sizeof(*ds));
4376 switch (iview->image->vk_format) {
4377 case VK_FORMAT_D24_UNORM_S8_UINT:
4378 case VK_FORMAT_X8_D24_UNORM_PACK32:
4379 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4380 ds->offset_scale = 2.0f;
4381 break;
4382 case VK_FORMAT_D16_UNORM:
4383 case VK_FORMAT_D16_UNORM_S8_UINT:
4384 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4385 ds->offset_scale = 4.0f;
4386 break;
4387 case VK_FORMAT_D32_SFLOAT:
4388 case VK_FORMAT_D32_SFLOAT_S8_UINT:
4389 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4390 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4391 ds->offset_scale = 1.0f;
4392 break;
4393 case VK_FORMAT_S8_UINT:
4394 stencil_only = true;
4395 break;
4396 default:
4397 break;
4398 }
4399
4400 format = radv_translate_dbformat(iview->image->vk_format);
4401 stencil_format = iview->image->surface.has_stencil ?
4402 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4403
4404 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4405 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4406 S_028008_SLICE_MAX(max_slice);
4407
4408 ds->db_htile_data_base = 0;
4409 ds->db_htile_surface = 0;
4410
4411 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4412 s_offs = z_offs = va;
4413
4414 if (device->physical_device->rad_info.chip_class >= GFX9) {
4415 assert(iview->image->surface.u.gfx9.surf_offset == 0);
4416 s_offs += iview->image->surface.u.gfx9.stencil_offset;
4417
4418 ds->db_z_info = S_028038_FORMAT(format) |
4419 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4420 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
4421 S_028038_MAXMIP(iview->image->info.levels - 1) |
4422 S_028038_ZRANGE_PRECISION(1);
4423 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4424 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
4425
4426 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
4427 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
4428 ds->db_depth_view |= S_028008_MIPID(level);
4429
4430 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4431 S_02801C_Y_MAX(iview->image->info.height - 1);
4432
4433 if (radv_htile_enabled(iview->image, level)) {
4434 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4435
4436 if (radv_image_is_tc_compat_htile(iview->image)) {
4437 unsigned max_zplanes =
4438 radv_calc_decompress_on_z_planes(device, iview);
4439
4440 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes) |
4441 S_028038_ITERATE_FLUSH(1);
4442 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4443 }
4444
4445 if (!iview->image->surface.has_stencil)
4446 /* Use all of the htile_buffer for depth if there's no stencil. */
4447 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4448 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4449 iview->image->htile_offset;
4450 ds->db_htile_data_base = va >> 8;
4451 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4452 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
4453 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
4454 }
4455 } else {
4456 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
4457
4458 if (stencil_only)
4459 level_info = &iview->image->surface.u.legacy.stencil_level[level];
4460
4461 z_offs += iview->image->surface.u.legacy.level[level].offset;
4462 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
4463
4464 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4465 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4466 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4467
4468 if (iview->image->info.samples > 1)
4469 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4470
4471 if (device->physical_device->rad_info.chip_class >= CIK) {
4472 struct radeon_info *info = &device->physical_device->rad_info;
4473 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
4474 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
4475 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
4476 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4477 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4478 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4479
4480 if (stencil_only)
4481 tile_mode = stencil_tile_mode;
4482
4483 ds->db_depth_info |=
4484 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4485 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4486 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4487 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4488 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4489 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4490 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4491 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4492 } else {
4493 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
4494 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4495 tile_mode_index = si_tile_mode_index(iview->image, level, true);
4496 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4497 if (stencil_only)
4498 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4499 }
4500
4501 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4502 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4503 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4504
4505 if (radv_htile_enabled(iview->image, level)) {
4506 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4507
4508 if (!iview->image->surface.has_stencil &&
4509 !radv_image_is_tc_compat_htile(iview->image))
4510 /* Use all of the htile_buffer for depth if there's no stencil. */
4511 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4512
4513 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4514 iview->image->htile_offset;
4515 ds->db_htile_data_base = va >> 8;
4516 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4517
4518 if (radv_image_is_tc_compat_htile(iview->image)) {
4519 unsigned max_zplanes =
4520 radv_calc_decompress_on_z_planes(device, iview);
4521
4522 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4523 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4524 }
4525 }
4526 }
4527
4528 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4529 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4530 }
4531
4532 VkResult radv_CreateFramebuffer(
4533 VkDevice _device,
4534 const VkFramebufferCreateInfo* pCreateInfo,
4535 const VkAllocationCallbacks* pAllocator,
4536 VkFramebuffer* pFramebuffer)
4537 {
4538 RADV_FROM_HANDLE(radv_device, device, _device);
4539 struct radv_framebuffer *framebuffer;
4540
4541 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4542
4543 size_t size = sizeof(*framebuffer) +
4544 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
4545 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4546 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4547 if (framebuffer == NULL)
4548 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4549
4550 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4551 framebuffer->width = pCreateInfo->width;
4552 framebuffer->height = pCreateInfo->height;
4553 framebuffer->layers = pCreateInfo->layers;
4554 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4555 VkImageView _iview = pCreateInfo->pAttachments[i];
4556 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4557 framebuffer->attachments[i].attachment = iview;
4558 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
4559 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
4560 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4561 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
4562 }
4563 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4564 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4565 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4566 }
4567
4568 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4569 return VK_SUCCESS;
4570 }
4571
4572 void radv_DestroyFramebuffer(
4573 VkDevice _device,
4574 VkFramebuffer _fb,
4575 const VkAllocationCallbacks* pAllocator)
4576 {
4577 RADV_FROM_HANDLE(radv_device, device, _device);
4578 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4579
4580 if (!fb)
4581 return;
4582 vk_free2(&device->alloc, pAllocator, fb);
4583 }
4584
4585 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4586 {
4587 switch (address_mode) {
4588 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4589 return V_008F30_SQ_TEX_WRAP;
4590 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4591 return V_008F30_SQ_TEX_MIRROR;
4592 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4593 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4594 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4595 return V_008F30_SQ_TEX_CLAMP_BORDER;
4596 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4597 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4598 default:
4599 unreachable("illegal tex wrap mode");
4600 break;
4601 }
4602 }
4603
4604 static unsigned
4605 radv_tex_compare(VkCompareOp op)
4606 {
4607 switch (op) {
4608 case VK_COMPARE_OP_NEVER:
4609 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4610 case VK_COMPARE_OP_LESS:
4611 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4612 case VK_COMPARE_OP_EQUAL:
4613 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4614 case VK_COMPARE_OP_LESS_OR_EQUAL:
4615 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4616 case VK_COMPARE_OP_GREATER:
4617 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4618 case VK_COMPARE_OP_NOT_EQUAL:
4619 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4620 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4621 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4622 case VK_COMPARE_OP_ALWAYS:
4623 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4624 default:
4625 unreachable("illegal compare mode");
4626 break;
4627 }
4628 }
4629
4630 static unsigned
4631 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4632 {
4633 switch (filter) {
4634 case VK_FILTER_NEAREST:
4635 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4636 V_008F38_SQ_TEX_XY_FILTER_POINT);
4637 case VK_FILTER_LINEAR:
4638 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4639 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4640 case VK_FILTER_CUBIC_IMG:
4641 default:
4642 fprintf(stderr, "illegal texture filter");
4643 return 0;
4644 }
4645 }
4646
4647 static unsigned
4648 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4649 {
4650 switch (mode) {
4651 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4652 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4653 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4654 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4655 default:
4656 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4657 }
4658 }
4659
4660 static unsigned
4661 radv_tex_bordercolor(VkBorderColor bcolor)
4662 {
4663 switch (bcolor) {
4664 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4665 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4666 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4667 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4668 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4669 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4670 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
4671 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
4672 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
4673 default:
4674 break;
4675 }
4676 return 0;
4677 }
4678
4679 static unsigned
4680 radv_tex_aniso_filter(unsigned filter)
4681 {
4682 if (filter < 2)
4683 return 0;
4684 if (filter < 4)
4685 return 1;
4686 if (filter < 8)
4687 return 2;
4688 if (filter < 16)
4689 return 3;
4690 return 4;
4691 }
4692
4693 static unsigned
4694 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
4695 {
4696 switch (mode) {
4697 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
4698 return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
4699 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
4700 return V_008F30_SQ_IMG_FILTER_MODE_MIN;
4701 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
4702 return V_008F30_SQ_IMG_FILTER_MODE_MAX;
4703 default:
4704 break;
4705 }
4706 return 0;
4707 }
4708
4709 static uint32_t
4710 radv_get_max_anisotropy(struct radv_device *device,
4711 const VkSamplerCreateInfo *pCreateInfo)
4712 {
4713 if (device->force_aniso >= 0)
4714 return device->force_aniso;
4715
4716 if (pCreateInfo->anisotropyEnable &&
4717 pCreateInfo->maxAnisotropy > 1.0f)
4718 return (uint32_t)pCreateInfo->maxAnisotropy;
4719
4720 return 0;
4721 }
4722
4723 static void
4724 radv_init_sampler(struct radv_device *device,
4725 struct radv_sampler *sampler,
4726 const VkSamplerCreateInfo *pCreateInfo)
4727 {
4728 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
4729 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
4730 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
4731 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
4732
4733 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
4734 vk_find_struct_const(pCreateInfo->pNext,
4735 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
4736 if (sampler_reduction)
4737 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
4738
4739 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
4740 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
4741 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
4742 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
4743 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
4744 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
4745 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
4746 S_008F30_ANISO_BIAS(max_aniso_ratio) |
4747 S_008F30_DISABLE_CUBE_WRAP(0) |
4748 S_008F30_COMPAT_MODE(is_vi) |
4749 S_008F30_FILTER_MODE(filter_mode));
4750 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
4751 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
4752 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
4753 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
4754 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
4755 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
4756 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
4757 S_008F38_MIP_POINT_PRECLAMP(0) |
4758 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
4759 S_008F38_FILTER_PREC_FIX(1) |
4760 S_008F38_ANISO_OVERRIDE(is_vi));
4761 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
4762 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
4763 }
4764
4765 VkResult radv_CreateSampler(
4766 VkDevice _device,
4767 const VkSamplerCreateInfo* pCreateInfo,
4768 const VkAllocationCallbacks* pAllocator,
4769 VkSampler* pSampler)
4770 {
4771 RADV_FROM_HANDLE(radv_device, device, _device);
4772 struct radv_sampler *sampler;
4773
4774 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
4775
4776 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
4777 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4778 if (!sampler)
4779 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4780
4781 radv_init_sampler(device, sampler, pCreateInfo);
4782 *pSampler = radv_sampler_to_handle(sampler);
4783
4784 return VK_SUCCESS;
4785 }
4786
4787 void radv_DestroySampler(
4788 VkDevice _device,
4789 VkSampler _sampler,
4790 const VkAllocationCallbacks* pAllocator)
4791 {
4792 RADV_FROM_HANDLE(radv_device, device, _device);
4793 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
4794
4795 if (!sampler)
4796 return;
4797 vk_free2(&device->alloc, pAllocator, sampler);
4798 }
4799
4800 /* vk_icd.h does not declare this function, so we declare it here to
4801 * suppress Wmissing-prototypes.
4802 */
4803 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4804 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
4805
4806 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4807 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
4808 {
4809 /* For the full details on loader interface versioning, see
4810 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4811 * What follows is a condensed summary, to help you navigate the large and
4812 * confusing official doc.
4813 *
4814 * - Loader interface v0 is incompatible with later versions. We don't
4815 * support it.
4816 *
4817 * - In loader interface v1:
4818 * - The first ICD entrypoint called by the loader is
4819 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4820 * entrypoint.
4821 * - The ICD must statically expose no other Vulkan symbol unless it is
4822 * linked with -Bsymbolic.
4823 * - Each dispatchable Vulkan handle created by the ICD must be
4824 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4825 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4826 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4827 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4828 * such loader-managed surfaces.
4829 *
4830 * - Loader interface v2 differs from v1 in:
4831 * - The first ICD entrypoint called by the loader is
4832 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4833 * statically expose this entrypoint.
4834 *
4835 * - Loader interface v3 differs from v2 in:
4836 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4837 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4838 * because the loader no longer does so.
4839 */
4840 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
4841 return VK_SUCCESS;
4842 }
4843
4844 VkResult radv_GetMemoryFdKHR(VkDevice _device,
4845 const VkMemoryGetFdInfoKHR *pGetFdInfo,
4846 int *pFD)
4847 {
4848 RADV_FROM_HANDLE(radv_device, device, _device);
4849 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
4850
4851 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4852
4853 /* At the moment, we support only the below handle types. */
4854 assert(pGetFdInfo->handleType ==
4855 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4856 pGetFdInfo->handleType ==
4857 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4858
4859 bool ret = radv_get_memory_fd(device, memory, pFD);
4860 if (ret == false)
4861 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4862 return VK_SUCCESS;
4863 }
4864
4865 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
4866 VkExternalMemoryHandleTypeFlagBits handleType,
4867 int fd,
4868 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
4869 {
4870 RADV_FROM_HANDLE(radv_device, device, _device);
4871
4872 switch (handleType) {
4873 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4874 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
4875 return VK_SUCCESS;
4876
4877 default:
4878 /* The valid usage section for this function says:
4879 *
4880 * "handleType must not be one of the handle types defined as
4881 * opaque."
4882 *
4883 * So opaque handle types fall into the default "unsupported" case.
4884 */
4885 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4886 }
4887 }
4888
4889 static VkResult radv_import_opaque_fd(struct radv_device *device,
4890 int fd,
4891 uint32_t *syncobj)
4892 {
4893 uint32_t syncobj_handle = 0;
4894 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
4895 if (ret != 0)
4896 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4897
4898 if (*syncobj)
4899 device->ws->destroy_syncobj(device->ws, *syncobj);
4900
4901 *syncobj = syncobj_handle;
4902 close(fd);
4903
4904 return VK_SUCCESS;
4905 }
4906
4907 static VkResult radv_import_sync_fd(struct radv_device *device,
4908 int fd,
4909 uint32_t *syncobj)
4910 {
4911 /* If we create a syncobj we do it locally so that if we have an error, we don't
4912 * leave a syncobj in an undetermined state in the fence. */
4913 uint32_t syncobj_handle = *syncobj;
4914 if (!syncobj_handle) {
4915 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
4916 if (ret) {
4917 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4918 }
4919 }
4920
4921 if (fd == -1) {
4922 device->ws->signal_syncobj(device->ws, syncobj_handle);
4923 } else {
4924 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
4925 if (ret != 0)
4926 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4927 }
4928
4929 *syncobj = syncobj_handle;
4930 if (fd != -1)
4931 close(fd);
4932
4933 return VK_SUCCESS;
4934 }
4935
4936 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
4937 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
4938 {
4939 RADV_FROM_HANDLE(radv_device, device, _device);
4940 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
4941 uint32_t *syncobj_dst = NULL;
4942
4943 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
4944 syncobj_dst = &sem->temp_syncobj;
4945 } else {
4946 syncobj_dst = &sem->syncobj;
4947 }
4948
4949 switch(pImportSemaphoreFdInfo->handleType) {
4950 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
4951 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4952 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
4953 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
4954 default:
4955 unreachable("Unhandled semaphore handle type");
4956 }
4957 }
4958
4959 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
4960 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
4961 int *pFd)
4962 {
4963 RADV_FROM_HANDLE(radv_device, device, _device);
4964 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
4965 int ret;
4966 uint32_t syncobj_handle;
4967
4968 if (sem->temp_syncobj)
4969 syncobj_handle = sem->temp_syncobj;
4970 else
4971 syncobj_handle = sem->syncobj;
4972
4973 switch(pGetFdInfo->handleType) {
4974 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
4975 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
4976 break;
4977 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
4978 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
4979 if (!ret) {
4980 if (sem->temp_syncobj) {
4981 close (sem->temp_syncobj);
4982 sem->temp_syncobj = 0;
4983 } else {
4984 device->ws->reset_syncobj(device->ws, syncobj_handle);
4985 }
4986 }
4987 break;
4988 default:
4989 unreachable("Unhandled semaphore handle type");
4990 }
4991
4992 if (ret)
4993 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4994 return VK_SUCCESS;
4995 }
4996
4997 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
4998 VkPhysicalDevice physicalDevice,
4999 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
5000 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
5001 {
5002 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5003
5004 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
5005 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5006 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5007 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
5008 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5009 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5010 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5011 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5012 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
5013 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5014 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5015 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5016 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5017 } else {
5018 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
5019 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
5020 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
5021 }
5022 }
5023
5024 VkResult radv_ImportFenceFdKHR(VkDevice _device,
5025 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
5026 {
5027 RADV_FROM_HANDLE(radv_device, device, _device);
5028 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
5029 uint32_t *syncobj_dst = NULL;
5030
5031
5032 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
5033 syncobj_dst = &fence->temp_syncobj;
5034 } else {
5035 syncobj_dst = &fence->syncobj;
5036 }
5037
5038 switch(pImportFenceFdInfo->handleType) {
5039 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5040 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5041 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5042 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5043 default:
5044 unreachable("Unhandled fence handle type");
5045 }
5046 }
5047
5048 VkResult radv_GetFenceFdKHR(VkDevice _device,
5049 const VkFenceGetFdInfoKHR *pGetFdInfo,
5050 int *pFd)
5051 {
5052 RADV_FROM_HANDLE(radv_device, device, _device);
5053 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
5054 int ret;
5055 uint32_t syncobj_handle;
5056
5057 if (fence->temp_syncobj)
5058 syncobj_handle = fence->temp_syncobj;
5059 else
5060 syncobj_handle = fence->syncobj;
5061
5062 switch(pGetFdInfo->handleType) {
5063 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5064 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5065 break;
5066 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5067 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5068 if (!ret) {
5069 if (fence->temp_syncobj) {
5070 close (fence->temp_syncobj);
5071 fence->temp_syncobj = 0;
5072 } else {
5073 device->ws->reset_syncobj(device->ws, syncobj_handle);
5074 }
5075 }
5076 break;
5077 default:
5078 unreachable("Unhandled fence handle type");
5079 }
5080
5081 if (ret)
5082 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5083 return VK_SUCCESS;
5084 }
5085
5086 void radv_GetPhysicalDeviceExternalFenceProperties(
5087 VkPhysicalDevice physicalDevice,
5088 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
5089 VkExternalFenceProperties *pExternalFenceProperties)
5090 {
5091 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5092
5093 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5094 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5095 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
5096 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5097 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5098 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
5099 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5100 } else {
5101 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
5102 pExternalFenceProperties->compatibleHandleTypes = 0;
5103 pExternalFenceProperties->externalFenceFeatures = 0;
5104 }
5105 }
5106
5107 VkResult
5108 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
5109 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
5110 const VkAllocationCallbacks* pAllocator,
5111 VkDebugReportCallbackEXT* pCallback)
5112 {
5113 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5114 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
5115 pCreateInfo, pAllocator, &instance->alloc,
5116 pCallback);
5117 }
5118
5119 void
5120 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
5121 VkDebugReportCallbackEXT _callback,
5122 const VkAllocationCallbacks* pAllocator)
5123 {
5124 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5125 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
5126 _callback, pAllocator, &instance->alloc);
5127 }
5128
5129 void
5130 radv_DebugReportMessageEXT(VkInstance _instance,
5131 VkDebugReportFlagsEXT flags,
5132 VkDebugReportObjectTypeEXT objectType,
5133 uint64_t object,
5134 size_t location,
5135 int32_t messageCode,
5136 const char* pLayerPrefix,
5137 const char* pMessage)
5138 {
5139 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5140 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
5141 object, location, messageCode, pLayerPrefix, pMessage);
5142 }
5143
5144 void
5145 radv_GetDeviceGroupPeerMemoryFeatures(
5146 VkDevice device,
5147 uint32_t heapIndex,
5148 uint32_t localDeviceIndex,
5149 uint32_t remoteDeviceIndex,
5150 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
5151 {
5152 assert(localDeviceIndex == remoteDeviceIndex);
5153
5154 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
5155 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
5156 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
5157 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
5158 }
5159
5160 static const VkTimeDomainEXT radv_time_domains[] = {
5161 VK_TIME_DOMAIN_DEVICE_EXT,
5162 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
5163 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
5164 };
5165
5166 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
5167 VkPhysicalDevice physicalDevice,
5168 uint32_t *pTimeDomainCount,
5169 VkTimeDomainEXT *pTimeDomains)
5170 {
5171 int d;
5172 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
5173
5174 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
5175 vk_outarray_append(&out, i) {
5176 *i = radv_time_domains[d];
5177 }
5178 }
5179
5180 return vk_outarray_status(&out);
5181 }
5182
5183 static uint64_t
5184 radv_clock_gettime(clockid_t clock_id)
5185 {
5186 struct timespec current;
5187 int ret;
5188
5189 ret = clock_gettime(clock_id, &current);
5190 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
5191 ret = clock_gettime(CLOCK_MONOTONIC, &current);
5192 if (ret < 0)
5193 return 0;
5194
5195 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
5196 }
5197
5198 VkResult radv_GetCalibratedTimestampsEXT(
5199 VkDevice _device,
5200 uint32_t timestampCount,
5201 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
5202 uint64_t *pTimestamps,
5203 uint64_t *pMaxDeviation)
5204 {
5205 RADV_FROM_HANDLE(radv_device, device, _device);
5206 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
5207 int d;
5208 uint64_t begin, end;
5209 uint64_t max_clock_period = 0;
5210
5211 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5212
5213 for (d = 0; d < timestampCount; d++) {
5214 switch (pTimestampInfos[d].timeDomain) {
5215 case VK_TIME_DOMAIN_DEVICE_EXT:
5216 pTimestamps[d] = device->ws->query_value(device->ws,
5217 RADEON_TIMESTAMP);
5218 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
5219 max_clock_period = MAX2(max_clock_period, device_period);
5220 break;
5221 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
5222 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
5223 max_clock_period = MAX2(max_clock_period, 1);
5224 break;
5225
5226 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
5227 pTimestamps[d] = begin;
5228 break;
5229 default:
5230 pTimestamps[d] = 0;
5231 break;
5232 }
5233 }
5234
5235 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5236
5237 /*
5238 * The maximum deviation is the sum of the interval over which we
5239 * perform the sampling and the maximum period of any sampled
5240 * clock. That's because the maximum skew between any two sampled
5241 * clock edges is when the sampled clock with the largest period is
5242 * sampled at the end of that period but right at the beginning of the
5243 * sampling interval and some other clock is sampled right at the
5244 * begining of its sampling period and right at the end of the
5245 * sampling interval. Let's assume the GPU has the longest clock
5246 * period and that the application is sampling GPU and monotonic:
5247 *
5248 * s e
5249 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
5250 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5251 *
5252 * g
5253 * 0 1 2 3
5254 * GPU -----_____-----_____-----_____-----_____
5255 *
5256 * m
5257 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
5258 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5259 *
5260 * Interval <----------------->
5261 * Deviation <-------------------------->
5262 *
5263 * s = read(raw) 2
5264 * g = read(GPU) 1
5265 * m = read(monotonic) 2
5266 * e = read(raw) b
5267 *
5268 * We round the sample interval up by one tick to cover sampling error
5269 * in the interval clock
5270 */
5271
5272 uint64_t sample_interval = end - begin + 1;
5273
5274 *pMaxDeviation = sample_interval + max_clock_period;
5275
5276 return VK_SUCCESS;
5277 }