ac: add has_distributed_tess to ac_gpu_info
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "git_sha1.h"
47 #include "util/build_id.h"
48 #include "util/debug.h"
49 #include "util/mesa-sha1.h"
50 #include "compiler/glsl_types.h"
51 #include "util/xmlpool.h"
52
53 static int
54 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
55 {
56 struct mesa_sha1 ctx;
57 unsigned char sha1[20];
58 unsigned ptr_size = sizeof(void*);
59
60 memset(uuid, 0, VK_UUID_SIZE);
61 _mesa_sha1_init(&ctx);
62
63 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
64 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
65 return -1;
66
67 _mesa_sha1_update(&ctx, &family, sizeof(family));
68 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
69 _mesa_sha1_final(&ctx, sha1);
70
71 memcpy(uuid, sha1, VK_UUID_SIZE);
72 return 0;
73 }
74
75 static void
76 radv_get_driver_uuid(void *uuid)
77 {
78 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
79 }
80
81 static void
82 radv_get_device_uuid(struct radeon_info *info, void *uuid)
83 {
84 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
85 }
86
87 static void
88 radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
89 {
90 const char *chip_string;
91
92 switch (family) {
93 case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
94 case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
95 case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
96 case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
97 case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
98 case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
99 case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
100 case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
101 case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
102 case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
103 case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
104 case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
105 case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
106 case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
107 case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
108 case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
109 case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
110 case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
111 case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
112 case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
113 case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
114 case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
115 case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
116 case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
117 case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
118 case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
119 default: chip_string = "AMD RADV unknown"; break;
120 }
121
122 snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
123 }
124
125 static uint64_t
126 radv_get_visible_vram_size(struct radv_physical_device *device)
127 {
128 return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
129 }
130
131 static uint64_t
132 radv_get_vram_size(struct radv_physical_device *device)
133 {
134 return device->rad_info.vram_size - radv_get_visible_vram_size(device);
135 }
136
137 static void
138 radv_physical_device_init_mem_types(struct radv_physical_device *device)
139 {
140 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
141 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
142 uint64_t vram_size = radv_get_vram_size(device);
143 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
144 device->memory_properties.memoryHeapCount = 0;
145 if (vram_size > 0) {
146 vram_index = device->memory_properties.memoryHeapCount++;
147 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
148 .size = vram_size,
149 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
150 };
151 }
152 if (visible_vram_size) {
153 visible_vram_index = device->memory_properties.memoryHeapCount++;
154 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
155 .size = visible_vram_size,
156 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
157 };
158 }
159 if (device->rad_info.gart_size > 0) {
160 gart_index = device->memory_properties.memoryHeapCount++;
161 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
162 .size = device->rad_info.gart_size,
163 .flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
164 };
165 }
166
167 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
168 unsigned type_count = 0;
169 if (vram_index >= 0) {
170 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
171 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
172 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
173 .heapIndex = vram_index,
174 };
175 }
176 if (gart_index >= 0 && device->rad_info.has_dedicated_vram) {
177 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
178 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
179 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
180 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
181 .heapIndex = gart_index,
182 };
183 }
184 if (visible_vram_index >= 0) {
185 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
186 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
187 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
188 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
189 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
190 .heapIndex = visible_vram_index,
191 };
192 }
193 if (gart_index >= 0 && !device->rad_info.has_dedicated_vram) {
194 /* Put GTT after visible VRAM for GPUs without dedicated VRAM
195 * as they have identical property flags, and according to the
196 * spec, for types with identical flags, the one with greater
197 * performance must be given a lower index. */
198 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
199 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
200 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
201 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
202 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
203 .heapIndex = gart_index,
204 };
205 }
206 if (gart_index >= 0) {
207 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
208 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
209 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
210 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
211 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
212 (device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
213 .heapIndex = gart_index,
214 };
215 }
216 device->memory_properties.memoryTypeCount = type_count;
217 }
218
219 static void
220 radv_handle_env_var_force_family(struct radv_physical_device *device)
221 {
222 const char *family = getenv("RADV_FORCE_FAMILY");
223 unsigned i;
224
225 if (!family)
226 return;
227
228 for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
229 if (!strcmp(family, ac_get_llvm_processor_name(i))) {
230 /* Override family and chip_class. */
231 device->rad_info.family = i;
232
233 if (i >= CHIP_NAVI10)
234 device->rad_info.chip_class = GFX10;
235 else if (i >= CHIP_VEGA10)
236 device->rad_info.chip_class = GFX9;
237 else if (i >= CHIP_TONGA)
238 device->rad_info.chip_class = GFX8;
239 else if (i >= CHIP_BONAIRE)
240 device->rad_info.chip_class = GFX7;
241 else
242 device->rad_info.chip_class = GFX6;
243
244 return;
245 }
246 }
247
248 fprintf(stderr, "radv: Unknown family: %s\n", family);
249 exit(1);
250 }
251
252 static VkResult
253 radv_physical_device_init(struct radv_physical_device *device,
254 struct radv_instance *instance,
255 drmDevicePtr drm_device)
256 {
257 const char *path = drm_device->nodes[DRM_NODE_RENDER];
258 VkResult result;
259 drmVersionPtr version;
260 int fd;
261 int master_fd = -1;
262
263 fd = open(path, O_RDWR | O_CLOEXEC);
264 if (fd < 0) {
265 if (instance->debug_flags & RADV_DEBUG_STARTUP)
266 radv_logi("Could not open device '%s'", path);
267
268 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
269 }
270
271 version = drmGetVersion(fd);
272 if (!version) {
273 close(fd);
274
275 if (instance->debug_flags & RADV_DEBUG_STARTUP)
276 radv_logi("Could not get the kernel driver version for device '%s'", path);
277
278 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
279 "failed to get version %s: %m", path);
280 }
281
282 if (strcmp(version->name, "amdgpu")) {
283 drmFreeVersion(version);
284 close(fd);
285
286 if (instance->debug_flags & RADV_DEBUG_STARTUP)
287 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
288
289 return VK_ERROR_INCOMPATIBLE_DRIVER;
290 }
291 drmFreeVersion(version);
292
293 if (instance->debug_flags & RADV_DEBUG_STARTUP)
294 radv_logi("Found compatible device '%s'.", path);
295
296 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
297 device->instance = instance;
298
299 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
300 instance->perftest_flags);
301 if (!device->ws) {
302 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
303 goto fail;
304 }
305
306 if (instance->enabled_extensions.KHR_display) {
307 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
308 if (master_fd >= 0) {
309 uint32_t accel_working = 0;
310 struct drm_amdgpu_info request = {
311 .return_pointer = (uintptr_t)&accel_working,
312 .return_size = sizeof(accel_working),
313 .query = AMDGPU_INFO_ACCEL_WORKING
314 };
315
316 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
317 close(master_fd);
318 master_fd = -1;
319 }
320 }
321 }
322
323 device->master_fd = master_fd;
324 device->local_fd = fd;
325 device->ws->query_info(device->ws, &device->rad_info);
326
327 radv_handle_env_var_force_family(device);
328
329 radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
330
331 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
332 device->ws->destroy(device->ws);
333 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
334 "cannot generate UUID");
335 goto fail;
336 }
337
338 /* These flags affect shader compilation. */
339 uint64_t shader_env_flags =
340 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
341 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
342
343 /* The gpu id is already embedded in the uuid so we just pass "radv"
344 * when creating the cache.
345 */
346 char buf[VK_UUID_SIZE * 2 + 1];
347 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
348 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
349
350 if (device->rad_info.chip_class < GFX8 ||
351 device->rad_info.chip_class > GFX9)
352 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
353
354 radv_get_driver_uuid(&device->driver_uuid);
355 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
356
357 if (device->rad_info.family == CHIP_STONEY ||
358 device->rad_info.chip_class >= GFX9) {
359 device->has_rbplus = true;
360 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
361 device->rad_info.family == CHIP_VEGA12 ||
362 device->rad_info.family == CHIP_RAVEN ||
363 device->rad_info.family == CHIP_RAVEN2 ||
364 device->rad_info.family == CHIP_RENOIR;
365 }
366
367 device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8;
368
369 /* Vega10/Raven need a special workaround for a hardware bug. */
370 device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
371 device->rad_info.family == CHIP_RAVEN;
372
373 device->has_tc_compat_zrange_bug = device->rad_info.chip_class < GFX10;
374
375 /* Out-of-order primitive rasterization. */
376 device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
377 device->rad_info.max_se >= 2;
378 device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
379 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
380
381 device->dcc_msaa_allowed =
382 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
383
384 /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
385 device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
386 (device->rad_info.chip_class >= GFX8 &&
387 device->rad_info.me_fw_feature >= 41);
388
389 device->has_dcc_constant_encode = device->rad_info.family == CHIP_RAVEN2 ||
390 device->rad_info.family == CHIP_RENOIR ||
391 device->rad_info.chip_class >= GFX10;
392
393 device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
394 device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
395
396 /* Determine the number of threads per wave for all stages. */
397 device->cs_wave_size = 64;
398 device->ps_wave_size = 64;
399 device->ge_wave_size = 64;
400
401 if (device->rad_info.chip_class >= GFX10) {
402 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
403 device->cs_wave_size = 32;
404
405 /* For pixel shaders, wave64 is recommanded. */
406 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
407 device->ps_wave_size = 32;
408
409 if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
410 device->ge_wave_size = 32;
411 }
412
413 radv_physical_device_init_mem_types(device);
414 radv_fill_device_extension_table(device, &device->supported_extensions);
415
416 device->bus_info = *drm_device->businfo.pci;
417
418 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
419 ac_print_gpu_info(&device->rad_info);
420
421 /* The WSI is structured as a layer on top of the driver, so this has
422 * to be the last part of initialization (at least until we get other
423 * semi-layers).
424 */
425 result = radv_init_wsi(device);
426 if (result != VK_SUCCESS) {
427 device->ws->destroy(device->ws);
428 vk_error(instance, result);
429 goto fail;
430 }
431
432 return VK_SUCCESS;
433
434 fail:
435 close(fd);
436 if (master_fd != -1)
437 close(master_fd);
438 return result;
439 }
440
441 static void
442 radv_physical_device_finish(struct radv_physical_device *device)
443 {
444 radv_finish_wsi(device);
445 device->ws->destroy(device->ws);
446 disk_cache_destroy(device->disk_cache);
447 close(device->local_fd);
448 if (device->master_fd != -1)
449 close(device->master_fd);
450 }
451
452 static void *
453 default_alloc_func(void *pUserData, size_t size, size_t align,
454 VkSystemAllocationScope allocationScope)
455 {
456 return malloc(size);
457 }
458
459 static void *
460 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
461 size_t align, VkSystemAllocationScope allocationScope)
462 {
463 return realloc(pOriginal, size);
464 }
465
466 static void
467 default_free_func(void *pUserData, void *pMemory)
468 {
469 free(pMemory);
470 }
471
472 static const VkAllocationCallbacks default_alloc = {
473 .pUserData = NULL,
474 .pfnAllocation = default_alloc_func,
475 .pfnReallocation = default_realloc_func,
476 .pfnFree = default_free_func,
477 };
478
479 static const struct debug_control radv_debug_options[] = {
480 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
481 {"nodcc", RADV_DEBUG_NO_DCC},
482 {"shaders", RADV_DEBUG_DUMP_SHADERS},
483 {"nocache", RADV_DEBUG_NO_CACHE},
484 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
485 {"nohiz", RADV_DEBUG_NO_HIZ},
486 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
487 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
488 {"allbos", RADV_DEBUG_ALL_BOS},
489 {"noibs", RADV_DEBUG_NO_IBS},
490 {"spirv", RADV_DEBUG_DUMP_SPIRV},
491 {"vmfaults", RADV_DEBUG_VM_FAULTS},
492 {"zerovram", RADV_DEBUG_ZERO_VRAM},
493 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
494 {"nosisched", RADV_DEBUG_NO_SISCHED},
495 {"preoptir", RADV_DEBUG_PREOPTIR},
496 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
497 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
498 {"info", RADV_DEBUG_INFO},
499 {"errors", RADV_DEBUG_ERRORS},
500 {"startup", RADV_DEBUG_STARTUP},
501 {"checkir", RADV_DEBUG_CHECKIR},
502 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
503 {"nobinning", RADV_DEBUG_NOBINNING},
504 {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
505 {"nongg", RADV_DEBUG_NO_NGG},
506 {"noshaderballot", RADV_DEBUG_NO_SHADER_BALLOT},
507 {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
508 {NULL, 0}
509 };
510
511 const char *
512 radv_get_debug_option_name(int id)
513 {
514 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
515 return radv_debug_options[id].string;
516 }
517
518 static const struct debug_control radv_perftest_options[] = {
519 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
520 {"sisched", RADV_PERFTEST_SISCHED},
521 {"localbos", RADV_PERFTEST_LOCAL_BOS},
522 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
523 {"bolist", RADV_PERFTEST_BO_LIST},
524 {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
525 {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
526 {"cswave32", RADV_PERFTEST_CS_WAVE_32},
527 {"pswave32", RADV_PERFTEST_PS_WAVE_32},
528 {"gewave32", RADV_PERFTEST_GE_WAVE_32},
529 {NULL, 0}
530 };
531
532 const char *
533 radv_get_perftest_option_name(int id)
534 {
535 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
536 return radv_perftest_options[id].string;
537 }
538
539 static void
540 radv_handle_per_app_options(struct radv_instance *instance,
541 const VkApplicationInfo *info)
542 {
543 const char *name = info ? info->pApplicationName : NULL;
544
545 if (!name)
546 return;
547
548 if (!strcmp(name, "Talos - Linux - 32bit") ||
549 !strcmp(name, "Talos - Linux - 64bit")) {
550 if (!(instance->debug_flags & RADV_DEBUG_NO_SISCHED)) {
551 /* Force enable LLVM sisched for Talos because it looks
552 * safe and it gives few more FPS.
553 */
554 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
555 }
556 } else if (!strcmp(name, "DOOM_VFR")) {
557 /* Work around a Doom VFR game bug */
558 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
559 } else if (!strcmp(name, "MonsterHunterWorld.exe")) {
560 /* Workaround for a WaW hazard when LLVM moves/merges
561 * load/store memory operations.
562 * See https://reviews.llvm.org/D61313
563 */
564 if (HAVE_LLVM < 0x900)
565 instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
566 } else if (!strcmp(name, "Wolfenstein: Youngblood")) {
567 if (!(instance->debug_flags & RADV_DEBUG_NO_SHADER_BALLOT)) {
568 /* Force enable VK_AMD_shader_ballot because it looks
569 * safe and it gives a nice boost (+20% on Vega 56 at
570 * this time).
571 */
572 instance->perftest_flags |= RADV_PERFTEST_SHADER_BALLOT;
573 }
574 }
575 }
576
577 static int radv_get_instance_extension_index(const char *name)
578 {
579 for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
580 if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
581 return i;
582 }
583 return -1;
584 }
585
586 static const char radv_dri_options_xml[] =
587 DRI_CONF_BEGIN
588 DRI_CONF_SECTION_QUALITY
589 DRI_CONF_ADAPTIVE_SYNC("true")
590 DRI_CONF_SECTION_END
591 DRI_CONF_END;
592
593 static void radv_init_dri_options(struct radv_instance *instance)
594 {
595 driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml);
596 driParseConfigFiles(&instance->dri_options,
597 &instance->available_dri_options,
598 0, "radv", NULL);
599 }
600
601 VkResult radv_CreateInstance(
602 const VkInstanceCreateInfo* pCreateInfo,
603 const VkAllocationCallbacks* pAllocator,
604 VkInstance* pInstance)
605 {
606 struct radv_instance *instance;
607 VkResult result;
608
609 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
610
611 uint32_t client_version;
612 if (pCreateInfo->pApplicationInfo &&
613 pCreateInfo->pApplicationInfo->apiVersion != 0) {
614 client_version = pCreateInfo->pApplicationInfo->apiVersion;
615 } else {
616 client_version = VK_API_VERSION_1_0;
617 }
618
619 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
620 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
621 if (!instance)
622 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
623
624 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
625
626 if (pAllocator)
627 instance->alloc = *pAllocator;
628 else
629 instance->alloc = default_alloc;
630
631 instance->apiVersion = client_version;
632 instance->physicalDeviceCount = -1;
633
634 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
635 radv_debug_options);
636
637 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
638 radv_perftest_options);
639
640
641 if (instance->debug_flags & RADV_DEBUG_STARTUP)
642 radv_logi("Created an instance");
643
644 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
645 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
646 int index = radv_get_instance_extension_index(ext_name);
647
648 if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
649 vk_free2(&default_alloc, pAllocator, instance);
650 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
651 }
652
653 instance->enabled_extensions.extensions[index] = true;
654 }
655
656 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
657 if (result != VK_SUCCESS) {
658 vk_free2(&default_alloc, pAllocator, instance);
659 return vk_error(instance, result);
660 }
661
662 _mesa_locale_init();
663 glsl_type_singleton_init_or_ref();
664
665 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
666
667 radv_init_dri_options(instance);
668 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
669
670 *pInstance = radv_instance_to_handle(instance);
671
672 return VK_SUCCESS;
673 }
674
675 void radv_DestroyInstance(
676 VkInstance _instance,
677 const VkAllocationCallbacks* pAllocator)
678 {
679 RADV_FROM_HANDLE(radv_instance, instance, _instance);
680
681 if (!instance)
682 return;
683
684 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
685 radv_physical_device_finish(instance->physicalDevices + i);
686 }
687
688 VG(VALGRIND_DESTROY_MEMPOOL(instance));
689
690 glsl_type_singleton_decref();
691 _mesa_locale_fini();
692
693 driDestroyOptionCache(&instance->dri_options);
694 driDestroyOptionInfo(&instance->available_dri_options);
695
696 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
697
698 vk_free(&instance->alloc, instance);
699 }
700
701 static VkResult
702 radv_enumerate_devices(struct radv_instance *instance)
703 {
704 /* TODO: Check for more devices ? */
705 drmDevicePtr devices[8];
706 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
707 int max_devices;
708
709 instance->physicalDeviceCount = 0;
710
711 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
712
713 if (instance->debug_flags & RADV_DEBUG_STARTUP)
714 radv_logi("Found %d drm nodes", max_devices);
715
716 if (max_devices < 1)
717 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
718
719 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
720 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
721 devices[i]->bustype == DRM_BUS_PCI &&
722 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
723
724 result = radv_physical_device_init(instance->physicalDevices +
725 instance->physicalDeviceCount,
726 instance,
727 devices[i]);
728 if (result == VK_SUCCESS)
729 ++instance->physicalDeviceCount;
730 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
731 break;
732 }
733 }
734 drmFreeDevices(devices, max_devices);
735
736 return result;
737 }
738
739 VkResult radv_EnumeratePhysicalDevices(
740 VkInstance _instance,
741 uint32_t* pPhysicalDeviceCount,
742 VkPhysicalDevice* pPhysicalDevices)
743 {
744 RADV_FROM_HANDLE(radv_instance, instance, _instance);
745 VkResult result;
746
747 if (instance->physicalDeviceCount < 0) {
748 result = radv_enumerate_devices(instance);
749 if (result != VK_SUCCESS &&
750 result != VK_ERROR_INCOMPATIBLE_DRIVER)
751 return result;
752 }
753
754 if (!pPhysicalDevices) {
755 *pPhysicalDeviceCount = instance->physicalDeviceCount;
756 } else {
757 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
758 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
759 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
760 }
761
762 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
763 : VK_SUCCESS;
764 }
765
766 VkResult radv_EnumeratePhysicalDeviceGroups(
767 VkInstance _instance,
768 uint32_t* pPhysicalDeviceGroupCount,
769 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
770 {
771 RADV_FROM_HANDLE(radv_instance, instance, _instance);
772 VkResult result;
773
774 if (instance->physicalDeviceCount < 0) {
775 result = radv_enumerate_devices(instance);
776 if (result != VK_SUCCESS &&
777 result != VK_ERROR_INCOMPATIBLE_DRIVER)
778 return result;
779 }
780
781 if (!pPhysicalDeviceGroupProperties) {
782 *pPhysicalDeviceGroupCount = instance->physicalDeviceCount;
783 } else {
784 *pPhysicalDeviceGroupCount = MIN2(*pPhysicalDeviceGroupCount, instance->physicalDeviceCount);
785 for (unsigned i = 0; i < *pPhysicalDeviceGroupCount; ++i) {
786 pPhysicalDeviceGroupProperties[i].physicalDeviceCount = 1;
787 pPhysicalDeviceGroupProperties[i].physicalDevices[0] = radv_physical_device_to_handle(instance->physicalDevices + i);
788 pPhysicalDeviceGroupProperties[i].subsetAllocation = false;
789 }
790 }
791 return *pPhysicalDeviceGroupCount < instance->physicalDeviceCount ? VK_INCOMPLETE
792 : VK_SUCCESS;
793 }
794
795 void radv_GetPhysicalDeviceFeatures(
796 VkPhysicalDevice physicalDevice,
797 VkPhysicalDeviceFeatures* pFeatures)
798 {
799 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
800 memset(pFeatures, 0, sizeof(*pFeatures));
801
802 *pFeatures = (VkPhysicalDeviceFeatures) {
803 .robustBufferAccess = true,
804 .fullDrawIndexUint32 = true,
805 .imageCubeArray = true,
806 .independentBlend = true,
807 .geometryShader = true,
808 .tessellationShader = true,
809 .sampleRateShading = true,
810 .dualSrcBlend = true,
811 .logicOp = true,
812 .multiDrawIndirect = true,
813 .drawIndirectFirstInstance = true,
814 .depthClamp = true,
815 .depthBiasClamp = true,
816 .fillModeNonSolid = true,
817 .depthBounds = true,
818 .wideLines = true,
819 .largePoints = true,
820 .alphaToOne = true,
821 .multiViewport = true,
822 .samplerAnisotropy = true,
823 .textureCompressionETC2 = radv_device_supports_etc(pdevice),
824 .textureCompressionASTC_LDR = false,
825 .textureCompressionBC = true,
826 .occlusionQueryPrecise = true,
827 .pipelineStatisticsQuery = true,
828 .vertexPipelineStoresAndAtomics = true,
829 .fragmentStoresAndAtomics = true,
830 .shaderTessellationAndGeometryPointSize = true,
831 .shaderImageGatherExtended = true,
832 .shaderStorageImageExtendedFormats = true,
833 .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= GFX8,
834 .shaderUniformBufferArrayDynamicIndexing = true,
835 .shaderSampledImageArrayDynamicIndexing = true,
836 .shaderStorageBufferArrayDynamicIndexing = true,
837 .shaderStorageImageArrayDynamicIndexing = true,
838 .shaderStorageImageReadWithoutFormat = true,
839 .shaderStorageImageWriteWithoutFormat = true,
840 .shaderClipDistance = true,
841 .shaderCullDistance = true,
842 .shaderFloat64 = true,
843 .shaderInt64 = true,
844 .shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
845 .sparseBinding = true,
846 .variableMultisampleRate = true,
847 .inheritedQueries = true,
848 };
849 }
850
851 void radv_GetPhysicalDeviceFeatures2(
852 VkPhysicalDevice physicalDevice,
853 VkPhysicalDeviceFeatures2 *pFeatures)
854 {
855 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
856 vk_foreach_struct(ext, pFeatures->pNext) {
857 switch (ext->sType) {
858 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
859 VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
860 features->variablePointersStorageBuffer = true;
861 features->variablePointers = true;
862 break;
863 }
864 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
865 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
866 features->multiview = true;
867 features->multiviewGeometryShader = true;
868 features->multiviewTessellationShader = true;
869 break;
870 }
871 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
872 VkPhysicalDeviceShaderDrawParametersFeatures *features =
873 (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
874 features->shaderDrawParameters = true;
875 break;
876 }
877 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
878 VkPhysicalDeviceProtectedMemoryFeatures *features =
879 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
880 features->protectedMemory = false;
881 break;
882 }
883 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
884 VkPhysicalDevice16BitStorageFeatures *features =
885 (VkPhysicalDevice16BitStorageFeatures*)ext;
886 bool enabled = pdevice->rad_info.chip_class >= GFX8;
887 features->storageBuffer16BitAccess = enabled;
888 features->uniformAndStorageBuffer16BitAccess = enabled;
889 features->storagePushConstant16 = enabled;
890 features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
891 break;
892 }
893 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
894 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
895 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
896 features->samplerYcbcrConversion = true;
897 break;
898 }
899 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
900 VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
901 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)ext;
902 features->shaderInputAttachmentArrayDynamicIndexing = true;
903 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
904 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
905 features->shaderUniformBufferArrayNonUniformIndexing = true;
906 features->shaderSampledImageArrayNonUniformIndexing = true;
907 features->shaderStorageBufferArrayNonUniformIndexing = true;
908 features->shaderStorageImageArrayNonUniformIndexing = true;
909 features->shaderInputAttachmentArrayNonUniformIndexing = true;
910 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
911 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
912 features->descriptorBindingUniformBufferUpdateAfterBind = true;
913 features->descriptorBindingSampledImageUpdateAfterBind = true;
914 features->descriptorBindingStorageImageUpdateAfterBind = true;
915 features->descriptorBindingStorageBufferUpdateAfterBind = true;
916 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
917 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
918 features->descriptorBindingUpdateUnusedWhilePending = true;
919 features->descriptorBindingPartiallyBound = true;
920 features->descriptorBindingVariableDescriptorCount = true;
921 features->runtimeDescriptorArray = true;
922 break;
923 }
924 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
925 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
926 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
927 features->conditionalRendering = true;
928 features->inheritedConditionalRendering = false;
929 break;
930 }
931 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
932 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
933 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
934 features->vertexAttributeInstanceRateDivisor = VK_TRUE;
935 features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
936 break;
937 }
938 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
939 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
940 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
941 features->transformFeedback = true;
942 features->geometryStreams = true;
943 break;
944 }
945 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
946 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
947 (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
948 features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
949 break;
950 }
951 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
952 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
953 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
954 features->memoryPriority = VK_TRUE;
955 break;
956 }
957 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
958 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
959 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
960 features->bufferDeviceAddress = true;
961 features->bufferDeviceAddressCaptureReplay = false;
962 features->bufferDeviceAddressMultiDevice = false;
963 break;
964 }
965 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
966 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
967 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
968 features->depthClipEnable = true;
969 break;
970 }
971 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
972 VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
973 (VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
974 features->hostQueryReset = true;
975 break;
976 }
977 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
978 VkPhysicalDevice8BitStorageFeaturesKHR *features =
979 (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
980 bool enabled = pdevice->rad_info.chip_class >= GFX8;
981 features->storageBuffer8BitAccess = enabled;
982 features->uniformAndStorageBuffer8BitAccess = enabled;
983 features->storagePushConstant8 = enabled;
984 break;
985 }
986 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
987 VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
988 (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
989 features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
990 features->shaderInt8 = true;
991 break;
992 }
993 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
994 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features =
995 (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *)ext;
996 features->shaderBufferInt64Atomics = HAVE_LLVM >= 0x0900;
997 features->shaderSharedInt64Atomics = HAVE_LLVM >= 0x0900;
998 break;
999 }
1000 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1001 VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1002 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1003
1004 features->inlineUniformBlock = true;
1005 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1006 break;
1007 }
1008 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1009 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1010 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1011 features->computeDerivativeGroupQuads = false;
1012 features->computeDerivativeGroupLinear = true;
1013 break;
1014 }
1015 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1016 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1017 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1018 features->ycbcrImageArrays = true;
1019 break;
1020 }
1021 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
1022 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features =
1023 (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext;
1024 features->uniformBufferStandardLayout = true;
1025 break;
1026 }
1027 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1028 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1029 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1030 features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1031 break;
1032 }
1033 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: {
1034 VkPhysicalDeviceImagelessFramebufferFeaturesKHR *features =
1035 (VkPhysicalDeviceImagelessFramebufferFeaturesKHR *)ext;
1036 features->imagelessFramebuffer = true;
1037 break;
1038 }
1039 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1040 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1041 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1042 features->pipelineExecutableInfo = true;
1043 break;
1044 }
1045 default:
1046 break;
1047 }
1048 }
1049 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1050 }
1051
1052 void radv_GetPhysicalDeviceProperties(
1053 VkPhysicalDevice physicalDevice,
1054 VkPhysicalDeviceProperties* pProperties)
1055 {
1056 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1057 VkSampleCountFlags sample_counts = 0xf;
1058
1059 /* make sure that the entire descriptor set is addressable with a signed
1060 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1061 * be at most 2 GiB. the combined image & samples object count as one of
1062 * both. This limit is for the pipeline layout, not for the set layout, but
1063 * there is no set limit, so we just set a pipeline limit. I don't think
1064 * any app is going to hit this soon. */
1065 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
1066 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1067 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1068 32 /* sampler, largest when combined with image */ +
1069 64 /* sampled image */ +
1070 64 /* storage image */);
1071
1072 VkPhysicalDeviceLimits limits = {
1073 .maxImageDimension1D = (1 << 14),
1074 .maxImageDimension2D = (1 << 14),
1075 .maxImageDimension3D = (1 << 11),
1076 .maxImageDimensionCube = (1 << 14),
1077 .maxImageArrayLayers = (1 << 11),
1078 .maxTexelBufferElements = 128 * 1024 * 1024,
1079 .maxUniformBufferRange = UINT32_MAX,
1080 .maxStorageBufferRange = UINT32_MAX,
1081 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1082 .maxMemoryAllocationCount = UINT32_MAX,
1083 .maxSamplerAllocationCount = 64 * 1024,
1084 .bufferImageGranularity = 64, /* A cache line */
1085 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
1086 .maxBoundDescriptorSets = MAX_SETS,
1087 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1088 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1089 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1090 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1091 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1092 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1093 .maxPerStageResources = max_descriptor_set_size,
1094 .maxDescriptorSetSamplers = max_descriptor_set_size,
1095 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1096 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1097 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1098 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1099 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1100 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1101 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1102 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1103 .maxVertexInputBindings = MAX_VBS,
1104 .maxVertexInputAttributeOffset = 2047,
1105 .maxVertexInputBindingStride = 2048,
1106 .maxVertexOutputComponents = 128,
1107 .maxTessellationGenerationLevel = 64,
1108 .maxTessellationPatchSize = 32,
1109 .maxTessellationControlPerVertexInputComponents = 128,
1110 .maxTessellationControlPerVertexOutputComponents = 128,
1111 .maxTessellationControlPerPatchOutputComponents = 120,
1112 .maxTessellationControlTotalOutputComponents = 4096,
1113 .maxTessellationEvaluationInputComponents = 128,
1114 .maxTessellationEvaluationOutputComponents = 128,
1115 .maxGeometryShaderInvocations = 127,
1116 .maxGeometryInputComponents = 64,
1117 .maxGeometryOutputComponents = 128,
1118 .maxGeometryOutputVertices = 256,
1119 .maxGeometryTotalOutputComponents = 1024,
1120 .maxFragmentInputComponents = 128,
1121 .maxFragmentOutputAttachments = 8,
1122 .maxFragmentDualSrcAttachments = 1,
1123 .maxFragmentCombinedOutputResources = 8,
1124 .maxComputeSharedMemorySize = 32768,
1125 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1126 .maxComputeWorkGroupInvocations = 2048,
1127 .maxComputeWorkGroupSize = {
1128 2048,
1129 2048,
1130 2048
1131 },
1132 .subPixelPrecisionBits = 8,
1133 .subTexelPrecisionBits = 8,
1134 .mipmapPrecisionBits = 8,
1135 .maxDrawIndexedIndexValue = UINT32_MAX,
1136 .maxDrawIndirectCount = UINT32_MAX,
1137 .maxSamplerLodBias = 16,
1138 .maxSamplerAnisotropy = 16,
1139 .maxViewports = MAX_VIEWPORTS,
1140 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1141 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1142 .viewportSubPixelBits = 8,
1143 .minMemoryMapAlignment = 4096, /* A page */
1144 .minTexelBufferOffsetAlignment = 1,
1145 .minUniformBufferOffsetAlignment = 4,
1146 .minStorageBufferOffsetAlignment = 4,
1147 .minTexelOffset = -32,
1148 .maxTexelOffset = 31,
1149 .minTexelGatherOffset = -32,
1150 .maxTexelGatherOffset = 31,
1151 .minInterpolationOffset = -2,
1152 .maxInterpolationOffset = 2,
1153 .subPixelInterpolationOffsetBits = 8,
1154 .maxFramebufferWidth = (1 << 14),
1155 .maxFramebufferHeight = (1 << 14),
1156 .maxFramebufferLayers = (1 << 10),
1157 .framebufferColorSampleCounts = sample_counts,
1158 .framebufferDepthSampleCounts = sample_counts,
1159 .framebufferStencilSampleCounts = sample_counts,
1160 .framebufferNoAttachmentsSampleCounts = sample_counts,
1161 .maxColorAttachments = MAX_RTS,
1162 .sampledImageColorSampleCounts = sample_counts,
1163 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1164 .sampledImageDepthSampleCounts = sample_counts,
1165 .sampledImageStencilSampleCounts = sample_counts,
1166 .storageImageSampleCounts = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
1167 .maxSampleMaskWords = 1,
1168 .timestampComputeAndGraphics = true,
1169 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1170 .maxClipDistances = 8,
1171 .maxCullDistances = 8,
1172 .maxCombinedClipAndCullDistances = 8,
1173 .discreteQueuePriorities = 2,
1174 .pointSizeRange = { 0.0, 8192.0 },
1175 .lineWidthRange = { 0.0, 7.9921875 },
1176 .pointSizeGranularity = (1.0 / 8.0),
1177 .lineWidthGranularity = (1.0 / 128.0),
1178 .strictLines = false, /* FINISHME */
1179 .standardSampleLocations = true,
1180 .optimalBufferCopyOffsetAlignment = 128,
1181 .optimalBufferCopyRowPitchAlignment = 128,
1182 .nonCoherentAtomSize = 64,
1183 };
1184
1185 *pProperties = (VkPhysicalDeviceProperties) {
1186 .apiVersion = radv_physical_device_api_version(pdevice),
1187 .driverVersion = vk_get_driver_version(),
1188 .vendorID = ATI_VENDOR_ID,
1189 .deviceID = pdevice->rad_info.pci_id,
1190 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1191 .limits = limits,
1192 .sparseProperties = {0},
1193 };
1194
1195 strcpy(pProperties->deviceName, pdevice->name);
1196 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1197 }
1198
1199 void radv_GetPhysicalDeviceProperties2(
1200 VkPhysicalDevice physicalDevice,
1201 VkPhysicalDeviceProperties2 *pProperties)
1202 {
1203 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1204 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1205
1206 vk_foreach_struct(ext, pProperties->pNext) {
1207 switch (ext->sType) {
1208 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1209 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1210 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1211 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1212 break;
1213 }
1214 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1215 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1216 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1217 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1218 properties->deviceLUIDValid = false;
1219 break;
1220 }
1221 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1222 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1223 properties->maxMultiviewViewCount = MAX_VIEWS;
1224 properties->maxMultiviewInstanceIndex = INT_MAX;
1225 break;
1226 }
1227 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1228 VkPhysicalDevicePointClippingProperties *properties =
1229 (VkPhysicalDevicePointClippingProperties*)ext;
1230 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1231 break;
1232 }
1233 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1234 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1235 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1236 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1237 break;
1238 }
1239 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1240 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1241 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1242 properties->minImportedHostPointerAlignment = 4096;
1243 break;
1244 }
1245 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1246 VkPhysicalDeviceSubgroupProperties *properties =
1247 (VkPhysicalDeviceSubgroupProperties*)ext;
1248 properties->subgroupSize = 64;
1249 properties->supportedStages = VK_SHADER_STAGE_ALL;
1250 properties->supportedOperations =
1251 VK_SUBGROUP_FEATURE_BASIC_BIT |
1252 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1253 VK_SUBGROUP_FEATURE_QUAD_BIT |
1254 VK_SUBGROUP_FEATURE_VOTE_BIT;
1255 if (pdevice->rad_info.chip_class >= GFX8) {
1256 properties->supportedOperations |=
1257 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1258 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1259 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1260 }
1261 properties->quadOperationsInAllStages = true;
1262 break;
1263 }
1264 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1265 VkPhysicalDeviceMaintenance3Properties *properties =
1266 (VkPhysicalDeviceMaintenance3Properties*)ext;
1267 /* Make sure everything is addressable by a signed 32-bit int, and
1268 * our largest descriptors are 96 bytes. */
1269 properties->maxPerSetDescriptors = (1ull << 31) / 96;
1270 /* Our buffer size fields allow only this much */
1271 properties->maxMemoryAllocationSize = 0xFFFFFFFFull;
1272 break;
1273 }
1274 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
1275 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
1276 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
1277 /* GFX6-8 only support single channel min/max filter. */
1278 properties->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1279 properties->filterMinmaxSingleComponentFormats = true;
1280 break;
1281 }
1282 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1283 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1284 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1285
1286 /* Shader engines. */
1287 properties->shaderEngineCount =
1288 pdevice->rad_info.max_se;
1289 properties->shaderArraysPerEngineCount =
1290 pdevice->rad_info.max_sh_per_se;
1291 properties->computeUnitsPerShaderArray =
1292 pdevice->rad_info.num_good_cu_per_sh;
1293 properties->simdPerComputeUnit = 4;
1294 properties->wavefrontsPerSimd =
1295 pdevice->rad_info.family == CHIP_TONGA ||
1296 pdevice->rad_info.family == CHIP_ICELAND ||
1297 pdevice->rad_info.family == CHIP_POLARIS10 ||
1298 pdevice->rad_info.family == CHIP_POLARIS11 ||
1299 pdevice->rad_info.family == CHIP_POLARIS12 ||
1300 pdevice->rad_info.family == CHIP_VEGAM ? 8 : 10;
1301 properties->wavefrontSize = 64;
1302
1303 /* SGPR. */
1304 properties->sgprsPerSimd =
1305 ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
1306 properties->minSgprAllocation =
1307 pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
1308 properties->maxSgprAllocation =
1309 pdevice->rad_info.family == CHIP_TONGA ||
1310 pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
1311 properties->sgprAllocationGranularity =
1312 pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
1313
1314 /* VGPR. */
1315 properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
1316 properties->minVgprAllocation = 4;
1317 properties->maxVgprAllocation = 256;
1318 properties->vgprAllocationGranularity = 4;
1319 break;
1320 }
1321 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1322 VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1323 (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1324
1325 properties->shaderCoreFeatures = 0;
1326 properties->activeComputeUnitCount =
1327 pdevice->rad_info.num_good_compute_units;
1328 break;
1329 }
1330 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1331 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1332 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1333 properties->maxVertexAttribDivisor = UINT32_MAX;
1334 break;
1335 }
1336 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
1337 VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
1338 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
1339 properties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1340 properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
1341 properties->shaderSampledImageArrayNonUniformIndexingNative = false;
1342 properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
1343 properties->shaderStorageImageArrayNonUniformIndexingNative = false;
1344 properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1345 properties->robustBufferAccessUpdateAfterBind = false;
1346 properties->quadDivergentImplicitLod = false;
1347
1348 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1349 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1350 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1351 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1352 32 /* sampler, largest when combined with image */ +
1353 64 /* sampled image */ +
1354 64 /* storage image */);
1355 properties->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1356 properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1357 properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1358 properties->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1359 properties->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1360 properties->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1361 properties->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1362 properties->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1363 properties->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1364 properties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1365 properties->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1366 properties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1367 properties->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1368 properties->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1369 properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1370 break;
1371 }
1372 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1373 VkPhysicalDeviceProtectedMemoryProperties *properties =
1374 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1375 properties->protectedNoFault = false;
1376 break;
1377 }
1378 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1379 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1380 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1381 properties->primitiveOverestimationSize = 0;
1382 properties->maxExtraPrimitiveOverestimationSize = 0;
1383 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1384 properties->primitiveUnderestimation = VK_FALSE;
1385 properties->conservativePointAndLineRasterization = VK_FALSE;
1386 properties->degenerateTrianglesRasterized = VK_FALSE;
1387 properties->degenerateLinesRasterized = VK_FALSE;
1388 properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
1389 properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
1390 break;
1391 }
1392 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1393 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1394 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1395 properties->pciDomain = pdevice->bus_info.domain;
1396 properties->pciBus = pdevice->bus_info.bus;
1397 properties->pciDevice = pdevice->bus_info.dev;
1398 properties->pciFunction = pdevice->bus_info.func;
1399 break;
1400 }
1401 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
1402 VkPhysicalDeviceDriverPropertiesKHR *driver_props =
1403 (VkPhysicalDeviceDriverPropertiesKHR *) ext;
1404
1405 driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
1406 snprintf(driver_props->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR, "radv");
1407 snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
1408 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
1409 " (LLVM " MESA_LLVM_VERSION_STRING ")");
1410
1411 driver_props->conformanceVersion = (VkConformanceVersionKHR) {
1412 .major = 1,
1413 .minor = 1,
1414 .subminor = 2,
1415 .patch = 0,
1416 };
1417 break;
1418 }
1419 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1420 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1421 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1422 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1423 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1424 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1425 properties->maxTransformFeedbackStreamDataSize = 512;
1426 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1427 properties->maxTransformFeedbackBufferDataStride = 512;
1428 properties->transformFeedbackQueries = true;
1429 properties->transformFeedbackStreamsLinesTriangles = true;
1430 properties->transformFeedbackRasterizationStreamSelect = false;
1431 properties->transformFeedbackDraw = true;
1432 break;
1433 }
1434 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
1435 VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
1436 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
1437
1438 props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1439 props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1440 props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1441 props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1442 props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1443 break;
1444 }
1445 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1446 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1447 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1448 properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
1449 VK_SAMPLE_COUNT_4_BIT |
1450 VK_SAMPLE_COUNT_8_BIT;
1451 properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
1452 properties->sampleLocationCoordinateRange[0] = 0.0f;
1453 properties->sampleLocationCoordinateRange[1] = 0.9375f;
1454 properties->sampleLocationSubPixelBits = 4;
1455 properties->variableSampleLocations = VK_FALSE;
1456 break;
1457 }
1458 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
1459 VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
1460 (VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
1461
1462 /* We support all of the depth resolve modes */
1463 properties->supportedDepthResolveModes =
1464 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1465 VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1466 VK_RESOLVE_MODE_MIN_BIT_KHR |
1467 VK_RESOLVE_MODE_MAX_BIT_KHR;
1468
1469 /* Average doesn't make sense for stencil so we don't support that */
1470 properties->supportedStencilResolveModes =
1471 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1472 VK_RESOLVE_MODE_MIN_BIT_KHR |
1473 VK_RESOLVE_MODE_MAX_BIT_KHR;
1474
1475 properties->independentResolveNone = VK_TRUE;
1476 properties->independentResolve = VK_TRUE;
1477 break;
1478 }
1479 default:
1480 break;
1481 }
1482 }
1483 }
1484
1485 static void radv_get_physical_device_queue_family_properties(
1486 struct radv_physical_device* pdevice,
1487 uint32_t* pCount,
1488 VkQueueFamilyProperties** pQueueFamilyProperties)
1489 {
1490 int num_queue_families = 1;
1491 int idx;
1492 if (pdevice->rad_info.num_compute_rings > 0 &&
1493 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
1494 num_queue_families++;
1495
1496 if (pQueueFamilyProperties == NULL) {
1497 *pCount = num_queue_families;
1498 return;
1499 }
1500
1501 if (!*pCount)
1502 return;
1503
1504 idx = 0;
1505 if (*pCount >= 1) {
1506 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1507 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1508 VK_QUEUE_COMPUTE_BIT |
1509 VK_QUEUE_TRANSFER_BIT |
1510 VK_QUEUE_SPARSE_BINDING_BIT,
1511 .queueCount = 1,
1512 .timestampValidBits = 64,
1513 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1514 };
1515 idx++;
1516 }
1517
1518 if (pdevice->rad_info.num_compute_rings > 0 &&
1519 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
1520 if (*pCount > idx) {
1521 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
1522 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1523 VK_QUEUE_TRANSFER_BIT |
1524 VK_QUEUE_SPARSE_BINDING_BIT,
1525 .queueCount = pdevice->rad_info.num_compute_rings,
1526 .timestampValidBits = 64,
1527 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
1528 };
1529 idx++;
1530 }
1531 }
1532 *pCount = idx;
1533 }
1534
1535 void radv_GetPhysicalDeviceQueueFamilyProperties(
1536 VkPhysicalDevice physicalDevice,
1537 uint32_t* pCount,
1538 VkQueueFamilyProperties* pQueueFamilyProperties)
1539 {
1540 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1541 if (!pQueueFamilyProperties) {
1542 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1543 return;
1544 }
1545 VkQueueFamilyProperties *properties[] = {
1546 pQueueFamilyProperties + 0,
1547 pQueueFamilyProperties + 1,
1548 pQueueFamilyProperties + 2,
1549 };
1550 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1551 assert(*pCount <= 3);
1552 }
1553
1554 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1555 VkPhysicalDevice physicalDevice,
1556 uint32_t* pCount,
1557 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1558 {
1559 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1560 if (!pQueueFamilyProperties) {
1561 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
1562 return;
1563 }
1564 VkQueueFamilyProperties *properties[] = {
1565 &pQueueFamilyProperties[0].queueFamilyProperties,
1566 &pQueueFamilyProperties[1].queueFamilyProperties,
1567 &pQueueFamilyProperties[2].queueFamilyProperties,
1568 };
1569 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
1570 assert(*pCount <= 3);
1571 }
1572
1573 void radv_GetPhysicalDeviceMemoryProperties(
1574 VkPhysicalDevice physicalDevice,
1575 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1576 {
1577 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1578
1579 *pMemoryProperties = physical_device->memory_properties;
1580 }
1581
1582 static void
1583 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
1584 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
1585 {
1586 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
1587 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
1588 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
1589 uint64_t vram_size = radv_get_vram_size(device);
1590 uint64_t gtt_size = device->rad_info.gart_size;
1591 uint64_t heap_budget, heap_usage;
1592
1593 /* For all memory heaps, the computation of budget is as follow:
1594 * heap_budget = heap_size - global_heap_usage + app_heap_usage
1595 *
1596 * The Vulkan spec 1.1.97 says that the budget should include any
1597 * currently allocated device memory.
1598 *
1599 * Note that the application heap usages are not really accurate (eg.
1600 * in presence of shared buffers).
1601 */
1602 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
1603 uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
1604
1605 switch (device->mem_type_indices[i]) {
1606 case RADV_MEM_TYPE_VRAM:
1607 heap_usage = device->ws->query_value(device->ws,
1608 RADEON_ALLOCATED_VRAM);
1609
1610 heap_budget = vram_size -
1611 device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
1612 heap_usage;
1613
1614 memoryBudget->heapBudget[heap_index] = heap_budget;
1615 memoryBudget->heapUsage[heap_index] = heap_usage;
1616 break;
1617 case RADV_MEM_TYPE_VRAM_CPU_ACCESS:
1618 heap_usage = device->ws->query_value(device->ws,
1619 RADEON_ALLOCATED_VRAM_VIS);
1620
1621 heap_budget = visible_vram_size -
1622 device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
1623 heap_usage;
1624
1625 memoryBudget->heapBudget[heap_index] = heap_budget;
1626 memoryBudget->heapUsage[heap_index] = heap_usage;
1627 break;
1628 case RADV_MEM_TYPE_GTT_WRITE_COMBINE:
1629 heap_usage = device->ws->query_value(device->ws,
1630 RADEON_ALLOCATED_GTT);
1631
1632 heap_budget = gtt_size -
1633 device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
1634 heap_usage;
1635
1636 memoryBudget->heapBudget[heap_index] = heap_budget;
1637 memoryBudget->heapUsage[heap_index] = heap_usage;
1638 break;
1639 default:
1640 break;
1641 }
1642 }
1643
1644 /* The heapBudget and heapUsage values must be zero for array elements
1645 * greater than or equal to
1646 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
1647 */
1648 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
1649 memoryBudget->heapBudget[i] = 0;
1650 memoryBudget->heapUsage[i] = 0;
1651 }
1652 }
1653
1654 void radv_GetPhysicalDeviceMemoryProperties2(
1655 VkPhysicalDevice physicalDevice,
1656 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1657 {
1658 radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1659 &pMemoryProperties->memoryProperties);
1660
1661 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
1662 vk_find_struct(pMemoryProperties->pNext,
1663 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
1664 if (memory_budget)
1665 radv_get_memory_budget_properties(physicalDevice, memory_budget);
1666 }
1667
1668 VkResult radv_GetMemoryHostPointerPropertiesEXT(
1669 VkDevice _device,
1670 VkExternalMemoryHandleTypeFlagBits handleType,
1671 const void *pHostPointer,
1672 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
1673 {
1674 RADV_FROM_HANDLE(radv_device, device, _device);
1675
1676 switch (handleType)
1677 {
1678 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
1679 const struct radv_physical_device *physical_device = device->physical_device;
1680 uint32_t memoryTypeBits = 0;
1681 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
1682 if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
1683 memoryTypeBits = (1 << i);
1684 break;
1685 }
1686 }
1687 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
1688 return VK_SUCCESS;
1689 }
1690 default:
1691 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1692 }
1693 }
1694
1695 static enum radeon_ctx_priority
1696 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
1697 {
1698 /* Default to MEDIUM when a specific global priority isn't requested */
1699 if (!pObj)
1700 return RADEON_CTX_PRIORITY_MEDIUM;
1701
1702 switch(pObj->globalPriority) {
1703 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1704 return RADEON_CTX_PRIORITY_REALTIME;
1705 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1706 return RADEON_CTX_PRIORITY_HIGH;
1707 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1708 return RADEON_CTX_PRIORITY_MEDIUM;
1709 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1710 return RADEON_CTX_PRIORITY_LOW;
1711 default:
1712 unreachable("Illegal global priority value");
1713 return RADEON_CTX_PRIORITY_INVALID;
1714 }
1715 }
1716
1717 static int
1718 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1719 uint32_t queue_family_index, int idx,
1720 VkDeviceQueueCreateFlags flags,
1721 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1722 {
1723 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1724 queue->device = device;
1725 queue->queue_family_index = queue_family_index;
1726 queue->queue_idx = idx;
1727 queue->priority = radv_get_queue_global_priority(global_priority);
1728 queue->flags = flags;
1729
1730 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1731 if (!queue->hw_ctx)
1732 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1733
1734 return VK_SUCCESS;
1735 }
1736
1737 static void
1738 radv_queue_finish(struct radv_queue *queue)
1739 {
1740 if (queue->hw_ctx)
1741 queue->device->ws->ctx_destroy(queue->hw_ctx);
1742
1743 if (queue->initial_full_flush_preamble_cs)
1744 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1745 if (queue->initial_preamble_cs)
1746 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1747 if (queue->continue_preamble_cs)
1748 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1749 if (queue->descriptor_bo)
1750 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1751 if (queue->scratch_bo)
1752 queue->device->ws->buffer_destroy(queue->scratch_bo);
1753 if (queue->esgs_ring_bo)
1754 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1755 if (queue->gsvs_ring_bo)
1756 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1757 if (queue->tess_rings_bo)
1758 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1759 if (queue->compute_scratch_bo)
1760 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1761 }
1762
1763 static void
1764 radv_bo_list_init(struct radv_bo_list *bo_list)
1765 {
1766 pthread_mutex_init(&bo_list->mutex, NULL);
1767 bo_list->list.count = bo_list->capacity = 0;
1768 bo_list->list.bos = NULL;
1769 }
1770
1771 static void
1772 radv_bo_list_finish(struct radv_bo_list *bo_list)
1773 {
1774 free(bo_list->list.bos);
1775 pthread_mutex_destroy(&bo_list->mutex);
1776 }
1777
1778 static VkResult radv_bo_list_add(struct radv_device *device,
1779 struct radeon_winsys_bo *bo)
1780 {
1781 struct radv_bo_list *bo_list = &device->bo_list;
1782
1783 if (bo->is_local)
1784 return VK_SUCCESS;
1785
1786 if (unlikely(!device->use_global_bo_list))
1787 return VK_SUCCESS;
1788
1789 pthread_mutex_lock(&bo_list->mutex);
1790 if (bo_list->list.count == bo_list->capacity) {
1791 unsigned capacity = MAX2(4, bo_list->capacity * 2);
1792 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
1793
1794 if (!data) {
1795 pthread_mutex_unlock(&bo_list->mutex);
1796 return VK_ERROR_OUT_OF_HOST_MEMORY;
1797 }
1798
1799 bo_list->list.bos = (struct radeon_winsys_bo**)data;
1800 bo_list->capacity = capacity;
1801 }
1802
1803 bo_list->list.bos[bo_list->list.count++] = bo;
1804 pthread_mutex_unlock(&bo_list->mutex);
1805 return VK_SUCCESS;
1806 }
1807
1808 static void radv_bo_list_remove(struct radv_device *device,
1809 struct radeon_winsys_bo *bo)
1810 {
1811 struct radv_bo_list *bo_list = &device->bo_list;
1812
1813 if (bo->is_local)
1814 return;
1815
1816 if (unlikely(!device->use_global_bo_list))
1817 return;
1818
1819 pthread_mutex_lock(&bo_list->mutex);
1820 for(unsigned i = 0; i < bo_list->list.count; ++i) {
1821 if (bo_list->list.bos[i] == bo) {
1822 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
1823 --bo_list->list.count;
1824 break;
1825 }
1826 }
1827 pthread_mutex_unlock(&bo_list->mutex);
1828 }
1829
1830 static void
1831 radv_device_init_gs_info(struct radv_device *device)
1832 {
1833 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
1834 device->physical_device->rad_info.family);
1835 }
1836
1837 static int radv_get_device_extension_index(const char *name)
1838 {
1839 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
1840 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
1841 return i;
1842 }
1843 return -1;
1844 }
1845
1846 static int
1847 radv_get_int_debug_option(const char *name, int default_value)
1848 {
1849 const char *str;
1850 int result;
1851
1852 str = getenv(name);
1853 if (!str) {
1854 result = default_value;
1855 } else {
1856 char *endptr;
1857
1858 result = strtol(str, &endptr, 0);
1859 if (str == endptr) {
1860 /* No digits founs. */
1861 result = default_value;
1862 }
1863 }
1864
1865 return result;
1866 }
1867
1868 VkResult radv_CreateDevice(
1869 VkPhysicalDevice physicalDevice,
1870 const VkDeviceCreateInfo* pCreateInfo,
1871 const VkAllocationCallbacks* pAllocator,
1872 VkDevice* pDevice)
1873 {
1874 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
1875 VkResult result;
1876 struct radv_device *device;
1877
1878 bool keep_shader_info = false;
1879
1880 /* Check enabled features */
1881 if (pCreateInfo->pEnabledFeatures) {
1882 VkPhysicalDeviceFeatures supported_features;
1883 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1884 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1885 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1886 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1887 for (uint32_t i = 0; i < num_features; i++) {
1888 if (enabled_feature[i] && !supported_feature[i])
1889 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
1890 }
1891 }
1892
1893 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1894 sizeof(*device), 8,
1895 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1896 if (!device)
1897 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1898
1899 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1900 device->instance = physical_device->instance;
1901 device->physical_device = physical_device;
1902
1903 device->ws = physical_device->ws;
1904 if (pAllocator)
1905 device->alloc = *pAllocator;
1906 else
1907 device->alloc = physical_device->instance->alloc;
1908
1909 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1910 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1911 int index = radv_get_device_extension_index(ext_name);
1912 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
1913 vk_free(&device->alloc, device);
1914 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
1915 }
1916
1917 device->enabled_extensions.extensions[index] = true;
1918 }
1919
1920 keep_shader_info = device->enabled_extensions.AMD_shader_info;
1921
1922 /* With update after bind we can't attach bo's to the command buffer
1923 * from the descriptor set anymore, so we have to use a global BO list.
1924 */
1925 device->use_global_bo_list =
1926 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
1927 device->enabled_extensions.EXT_descriptor_indexing ||
1928 device->enabled_extensions.EXT_buffer_device_address;
1929
1930 device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
1931 pCreateInfo->pEnabledFeatures->robustBufferAccess;
1932
1933 mtx_init(&device->shader_slab_mutex, mtx_plain);
1934 list_inithead(&device->shader_slabs);
1935
1936 radv_bo_list_init(&device->bo_list);
1937
1938 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1939 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1940 uint32_t qfi = queue_create->queueFamilyIndex;
1941 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1942 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1943
1944 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1945
1946 device->queues[qfi] = vk_alloc(&device->alloc,
1947 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1948 if (!device->queues[qfi]) {
1949 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1950 goto fail;
1951 }
1952
1953 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1954
1955 device->queue_count[qfi] = queue_create->queueCount;
1956
1957 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1958 result = radv_queue_init(device, &device->queues[qfi][q],
1959 qfi, q, queue_create->flags,
1960 global_priority);
1961 if (result != VK_SUCCESS)
1962 goto fail;
1963 }
1964 }
1965
1966 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
1967 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
1968
1969 device->dfsm_allowed = device->pbb_allowed &&
1970 (device->physical_device->rad_info.family == CHIP_RAVEN ||
1971 device->physical_device->rad_info.family == CHIP_RAVEN2 ||
1972 device->physical_device->rad_info.family == CHIP_RENOIR);
1973
1974 #ifdef ANDROID
1975 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
1976 #endif
1977
1978 /* The maximum number of scratch waves. Scratch space isn't divided
1979 * evenly between CUs. The number is only a function of the number of CUs.
1980 * We can decrease the constant to decrease the scratch buffer size.
1981 *
1982 * sctx->scratch_waves must be >= the maximum possible size of
1983 * 1 threadgroup, so that the hw doesn't hang from being unable
1984 * to start any.
1985 *
1986 * The recommended value is 4 per CU at most. Higher numbers don't
1987 * bring much benefit, but they still occupy chip resources (think
1988 * async compute). I've seen ~2% performance difference between 4 and 32.
1989 */
1990 uint32_t max_threads_per_block = 2048;
1991 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1992 max_threads_per_block / 64);
1993
1994 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
1995 S_00B800_CS_W32_EN(device->physical_device->cs_wave_size == 32);
1996
1997 if (device->physical_device->rad_info.chip_class >= GFX7) {
1998 /* If the KMD allows it (there is a KMD hw register for it),
1999 * allow launching waves out-of-order.
2000 */
2001 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
2002 }
2003
2004 radv_device_init_gs_info(device);
2005
2006 device->tess_offchip_block_dw_size =
2007 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
2008
2009 if (getenv("RADV_TRACE_FILE")) {
2010 const char *filename = getenv("RADV_TRACE_FILE");
2011
2012 keep_shader_info = true;
2013
2014 if (!radv_init_trace(device))
2015 goto fail;
2016
2017 fprintf(stderr, "*****************************************************************************\n");
2018 fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
2019 fprintf(stderr, "*****************************************************************************\n");
2020
2021 fprintf(stderr, "Trace file will be dumped to %s\n", filename);
2022 radv_dump_enabled_options(device, stderr);
2023 }
2024
2025 device->keep_shader_info = keep_shader_info;
2026
2027 result = radv_device_init_meta(device);
2028 if (result != VK_SUCCESS)
2029 goto fail;
2030
2031 radv_device_init_msaa(device);
2032
2033 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
2034 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
2035 switch (family) {
2036 case RADV_QUEUE_GENERAL:
2037 /* Since amdgpu version 3.6.0, CONTEXT_CONTROL is emitted by the kernel */
2038 if (device->physical_device->rad_info.drm_minor < 6) {
2039 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2040 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
2041 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
2042 }
2043 break;
2044 case RADV_QUEUE_COMPUTE:
2045 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
2046 radeon_emit(device->empty_cs[family], 0);
2047 break;
2048 }
2049 device->ws->cs_finalize(device->empty_cs[family]);
2050 }
2051
2052 if (device->physical_device->rad_info.chip_class >= GFX7)
2053 cik_create_gfx_config(device);
2054
2055 VkPipelineCacheCreateInfo ci;
2056 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
2057 ci.pNext = NULL;
2058 ci.flags = 0;
2059 ci.pInitialData = NULL;
2060 ci.initialDataSize = 0;
2061 VkPipelineCache pc;
2062 result = radv_CreatePipelineCache(radv_device_to_handle(device),
2063 &ci, NULL, &pc);
2064 if (result != VK_SUCCESS)
2065 goto fail_meta;
2066
2067 device->mem_cache = radv_pipeline_cache_from_handle(pc);
2068
2069 device->force_aniso =
2070 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2071 if (device->force_aniso >= 0) {
2072 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
2073 1 << util_logbase2(device->force_aniso));
2074 }
2075
2076 *pDevice = radv_device_to_handle(device);
2077 return VK_SUCCESS;
2078
2079 fail_meta:
2080 radv_device_finish_meta(device);
2081 fail:
2082 radv_bo_list_finish(&device->bo_list);
2083
2084 if (device->trace_bo)
2085 device->ws->buffer_destroy(device->trace_bo);
2086
2087 if (device->gfx_init)
2088 device->ws->buffer_destroy(device->gfx_init);
2089
2090 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2091 for (unsigned q = 0; q < device->queue_count[i]; q++)
2092 radv_queue_finish(&device->queues[i][q]);
2093 if (device->queue_count[i])
2094 vk_free(&device->alloc, device->queues[i]);
2095 }
2096
2097 vk_free(&device->alloc, device);
2098 return result;
2099 }
2100
2101 void radv_DestroyDevice(
2102 VkDevice _device,
2103 const VkAllocationCallbacks* pAllocator)
2104 {
2105 RADV_FROM_HANDLE(radv_device, device, _device);
2106
2107 if (!device)
2108 return;
2109
2110 if (device->trace_bo)
2111 device->ws->buffer_destroy(device->trace_bo);
2112
2113 if (device->gfx_init)
2114 device->ws->buffer_destroy(device->gfx_init);
2115
2116 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2117 for (unsigned q = 0; q < device->queue_count[i]; q++)
2118 radv_queue_finish(&device->queues[i][q]);
2119 if (device->queue_count[i])
2120 vk_free(&device->alloc, device->queues[i]);
2121 if (device->empty_cs[i])
2122 device->ws->cs_destroy(device->empty_cs[i]);
2123 }
2124 radv_device_finish_meta(device);
2125
2126 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
2127 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2128
2129 radv_destroy_shader_slabs(device);
2130
2131 radv_bo_list_finish(&device->bo_list);
2132 vk_free(&device->alloc, device);
2133 }
2134
2135 VkResult radv_EnumerateInstanceLayerProperties(
2136 uint32_t* pPropertyCount,
2137 VkLayerProperties* pProperties)
2138 {
2139 if (pProperties == NULL) {
2140 *pPropertyCount = 0;
2141 return VK_SUCCESS;
2142 }
2143
2144 /* None supported at this time */
2145 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2146 }
2147
2148 VkResult radv_EnumerateDeviceLayerProperties(
2149 VkPhysicalDevice physicalDevice,
2150 uint32_t* pPropertyCount,
2151 VkLayerProperties* pProperties)
2152 {
2153 if (pProperties == NULL) {
2154 *pPropertyCount = 0;
2155 return VK_SUCCESS;
2156 }
2157
2158 /* None supported at this time */
2159 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2160 }
2161
2162 void radv_GetDeviceQueue2(
2163 VkDevice _device,
2164 const VkDeviceQueueInfo2* pQueueInfo,
2165 VkQueue* pQueue)
2166 {
2167 RADV_FROM_HANDLE(radv_device, device, _device);
2168 struct radv_queue *queue;
2169
2170 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
2171 if (pQueueInfo->flags != queue->flags) {
2172 /* From the Vulkan 1.1.70 spec:
2173 *
2174 * "The queue returned by vkGetDeviceQueue2 must have the same
2175 * flags value from this structure as that used at device
2176 * creation time in a VkDeviceQueueCreateInfo instance. If no
2177 * matching flags were specified at device creation time then
2178 * pQueue will return VK_NULL_HANDLE."
2179 */
2180 *pQueue = VK_NULL_HANDLE;
2181 return;
2182 }
2183
2184 *pQueue = radv_queue_to_handle(queue);
2185 }
2186
2187 void radv_GetDeviceQueue(
2188 VkDevice _device,
2189 uint32_t queueFamilyIndex,
2190 uint32_t queueIndex,
2191 VkQueue* pQueue)
2192 {
2193 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
2194 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
2195 .queueFamilyIndex = queueFamilyIndex,
2196 .queueIndex = queueIndex
2197 };
2198
2199 radv_GetDeviceQueue2(_device, &info, pQueue);
2200 }
2201
2202 static void
2203 fill_geom_tess_rings(struct radv_queue *queue,
2204 uint32_t *map,
2205 bool add_sample_positions,
2206 uint32_t esgs_ring_size,
2207 struct radeon_winsys_bo *esgs_ring_bo,
2208 uint32_t gsvs_ring_size,
2209 struct radeon_winsys_bo *gsvs_ring_bo,
2210 uint32_t tess_factor_ring_size,
2211 uint32_t tess_offchip_ring_offset,
2212 uint32_t tess_offchip_ring_size,
2213 struct radeon_winsys_bo *tess_rings_bo)
2214 {
2215 uint32_t *desc = &map[4];
2216
2217 if (esgs_ring_bo) {
2218 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
2219
2220 /* stride 0, num records - size, add tid, swizzle, elsize4,
2221 index stride 64 */
2222 desc[0] = esgs_va;
2223 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
2224 S_008F04_SWIZZLE_ENABLE(true);
2225 desc[2] = esgs_ring_size;
2226 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2227 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2228 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2229 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2230 S_008F0C_INDEX_STRIDE(3) |
2231 S_008F0C_ADD_TID_ENABLE(1);
2232
2233 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2234 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2235 S_008F0C_OOB_SELECT(2) |
2236 S_008F0C_RESOURCE_LEVEL(1);
2237 } else {
2238 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2239 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2240 S_008F0C_ELEMENT_SIZE(1);
2241 }
2242
2243 /* GS entry for ES->GS ring */
2244 /* stride 0, num records - size, elsize0,
2245 index stride 0 */
2246 desc[4] = esgs_va;
2247 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
2248 desc[6] = esgs_ring_size;
2249 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2250 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2251 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2252 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2253
2254 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2255 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2256 S_008F0C_OOB_SELECT(2) |
2257 S_008F0C_RESOURCE_LEVEL(1);
2258 } else {
2259 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2260 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2261 }
2262 }
2263
2264 desc += 8;
2265
2266 if (gsvs_ring_bo) {
2267 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
2268
2269 /* VS entry for GS->VS ring */
2270 /* stride 0, num records - size, elsize0,
2271 index stride 0 */
2272 desc[0] = gsvs_va;
2273 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
2274 desc[2] = gsvs_ring_size;
2275 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2276 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2277 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2278 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2279
2280 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2281 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2282 S_008F0C_OOB_SELECT(2) |
2283 S_008F0C_RESOURCE_LEVEL(1);
2284 } else {
2285 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2286 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2287 }
2288
2289 /* stride gsvs_itemsize, num records 64
2290 elsize 4, index stride 16 */
2291 /* shader will patch stride and desc[2] */
2292 desc[4] = gsvs_va;
2293 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
2294 S_008F04_SWIZZLE_ENABLE(1);
2295 desc[6] = 0;
2296 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2297 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2298 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2299 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
2300 S_008F0C_INDEX_STRIDE(1) |
2301 S_008F0C_ADD_TID_ENABLE(true);
2302
2303 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2304 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2305 S_008F0C_OOB_SELECT(2) |
2306 S_008F0C_RESOURCE_LEVEL(1);
2307 } else {
2308 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2309 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
2310 S_008F0C_ELEMENT_SIZE(1);
2311 }
2312
2313 }
2314
2315 desc += 8;
2316
2317 if (tess_rings_bo) {
2318 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
2319 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
2320
2321 desc[0] = tess_va;
2322 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
2323 desc[2] = tess_factor_ring_size;
2324 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2325 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2326 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2327 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2328
2329 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2330 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2331 S_008F0C_OOB_SELECT(3) |
2332 S_008F0C_RESOURCE_LEVEL(1);
2333 } else {
2334 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2335 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2336 }
2337
2338 desc[4] = tess_offchip_va;
2339 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
2340 desc[6] = tess_offchip_ring_size;
2341 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
2342 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
2343 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
2344 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
2345
2346 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2347 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
2348 S_008F0C_OOB_SELECT(3) |
2349 S_008F0C_RESOURCE_LEVEL(1);
2350 } else {
2351 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
2352 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
2353 }
2354 }
2355
2356 desc += 8;
2357
2358 if (add_sample_positions) {
2359 /* add sample positions after all rings */
2360 memcpy(desc, queue->device->sample_locations_1x, 8);
2361 desc += 2;
2362 memcpy(desc, queue->device->sample_locations_2x, 16);
2363 desc += 4;
2364 memcpy(desc, queue->device->sample_locations_4x, 32);
2365 desc += 8;
2366 memcpy(desc, queue->device->sample_locations_8x, 64);
2367 }
2368 }
2369
2370 static unsigned
2371 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
2372 {
2373 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
2374 device->physical_device->rad_info.family != CHIP_CARRIZO &&
2375 device->physical_device->rad_info.family != CHIP_STONEY;
2376 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
2377 unsigned max_offchip_buffers;
2378 unsigned offchip_granularity;
2379 unsigned hs_offchip_param;
2380
2381 /*
2382 * Per RadeonSI:
2383 * This must be one less than the maximum number due to a hw limitation.
2384 * Various hardware bugs need thGFX7
2385 *
2386 * Per AMDVLK:
2387 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
2388 * Gfx7 should limit max_offchip_buffers to 508
2389 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
2390 *
2391 * Follow AMDVLK here.
2392 */
2393 if (device->physical_device->rad_info.chip_class >= GFX10) {
2394 max_offchip_buffers_per_se = 256;
2395 } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
2396 device->physical_device->rad_info.chip_class == GFX7 ||
2397 device->physical_device->rad_info.chip_class == GFX6)
2398 --max_offchip_buffers_per_se;
2399
2400 max_offchip_buffers = max_offchip_buffers_per_se *
2401 device->physical_device->rad_info.max_se;
2402
2403 /* Hawaii has a bug with offchip buffers > 256 that can be worked
2404 * around by setting 4K granularity.
2405 */
2406 if (device->tess_offchip_block_dw_size == 4096) {
2407 assert(device->physical_device->rad_info.family == CHIP_HAWAII);
2408 offchip_granularity = V_03093C_X_4K_DWORDS;
2409 } else {
2410 assert(device->tess_offchip_block_dw_size == 8192);
2411 offchip_granularity = V_03093C_X_8K_DWORDS;
2412 }
2413
2414 switch (device->physical_device->rad_info.chip_class) {
2415 case GFX6:
2416 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
2417 break;
2418 case GFX7:
2419 case GFX8:
2420 case GFX9:
2421 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
2422 break;
2423 case GFX10:
2424 break;
2425 default:
2426 break;
2427 }
2428
2429 *max_offchip_buffers_p = max_offchip_buffers;
2430 if (device->physical_device->rad_info.chip_class >= GFX7) {
2431 if (device->physical_device->rad_info.chip_class >= GFX8)
2432 --max_offchip_buffers;
2433 hs_offchip_param =
2434 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
2435 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
2436 } else {
2437 hs_offchip_param =
2438 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
2439 }
2440 return hs_offchip_param;
2441 }
2442
2443 static void
2444 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2445 struct radeon_winsys_bo *esgs_ring_bo,
2446 uint32_t esgs_ring_size,
2447 struct radeon_winsys_bo *gsvs_ring_bo,
2448 uint32_t gsvs_ring_size)
2449 {
2450 if (!esgs_ring_bo && !gsvs_ring_bo)
2451 return;
2452
2453 if (esgs_ring_bo)
2454 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
2455
2456 if (gsvs_ring_bo)
2457 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
2458
2459 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
2460 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
2461 radeon_emit(cs, esgs_ring_size >> 8);
2462 radeon_emit(cs, gsvs_ring_size >> 8);
2463 } else {
2464 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
2465 radeon_emit(cs, esgs_ring_size >> 8);
2466 radeon_emit(cs, gsvs_ring_size >> 8);
2467 }
2468 }
2469
2470 static void
2471 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2472 unsigned hs_offchip_param, unsigned tf_ring_size,
2473 struct radeon_winsys_bo *tess_rings_bo)
2474 {
2475 uint64_t tf_va;
2476
2477 if (!tess_rings_bo)
2478 return;
2479
2480 tf_va = radv_buffer_get_va(tess_rings_bo);
2481
2482 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
2483
2484 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
2485 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
2486 S_030938_SIZE(tf_ring_size / 4));
2487 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
2488 tf_va >> 8);
2489
2490 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2491 radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
2492 S_030984_BASE_HI(tf_va >> 40));
2493 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
2494 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
2495 S_030944_BASE_HI(tf_va >> 40));
2496 }
2497 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
2498 hs_offchip_param);
2499 } else {
2500 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
2501 S_008988_SIZE(tf_ring_size / 4));
2502 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
2503 tf_va >> 8);
2504 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
2505 hs_offchip_param);
2506 }
2507 }
2508
2509 static void
2510 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
2511 struct radeon_winsys_bo *compute_scratch_bo)
2512 {
2513 uint64_t scratch_va;
2514
2515 if (!compute_scratch_bo)
2516 return;
2517
2518 scratch_va = radv_buffer_get_va(compute_scratch_bo);
2519
2520 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
2521
2522 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
2523 radeon_emit(cs, scratch_va);
2524 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2525 S_008F04_SWIZZLE_ENABLE(1));
2526 }
2527
2528 static void
2529 radv_emit_global_shader_pointers(struct radv_queue *queue,
2530 struct radeon_cmdbuf *cs,
2531 struct radeon_winsys_bo *descriptor_bo)
2532 {
2533 uint64_t va;
2534
2535 if (!descriptor_bo)
2536 return;
2537
2538 va = radv_buffer_get_va(descriptor_bo);
2539
2540 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
2541
2542 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
2543 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2544 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2545 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2546 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2547
2548 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2549 radv_emit_shader_pointer(queue->device, cs, regs[i],
2550 va, true);
2551 }
2552 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
2553 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2554 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2555 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
2556 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
2557
2558 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2559 radv_emit_shader_pointer(queue->device, cs, regs[i],
2560 va, true);
2561 }
2562 } else {
2563 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
2564 R_00B130_SPI_SHADER_USER_DATA_VS_0,
2565 R_00B230_SPI_SHADER_USER_DATA_GS_0,
2566 R_00B330_SPI_SHADER_USER_DATA_ES_0,
2567 R_00B430_SPI_SHADER_USER_DATA_HS_0,
2568 R_00B530_SPI_SHADER_USER_DATA_LS_0};
2569
2570 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
2571 radv_emit_shader_pointer(queue->device, cs, regs[i],
2572 va, true);
2573 }
2574 }
2575 }
2576
2577 static void
2578 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2579 {
2580 struct radv_device *device = queue->device;
2581
2582 if (device->gfx_init) {
2583 uint64_t va = radv_buffer_get_va(device->gfx_init);
2584
2585 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
2586 radeon_emit(cs, va);
2587 radeon_emit(cs, va >> 32);
2588 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
2589
2590 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
2591 } else {
2592 struct radv_physical_device *physical_device = device->physical_device;
2593 si_emit_graphics(physical_device, cs);
2594 }
2595 }
2596
2597 static void
2598 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
2599 {
2600 struct radv_physical_device *physical_device = queue->device->physical_device;
2601 si_emit_compute(physical_device, cs);
2602 }
2603
2604 static VkResult
2605 radv_get_preamble_cs(struct radv_queue *queue,
2606 uint32_t scratch_size,
2607 uint32_t compute_scratch_size,
2608 uint32_t esgs_ring_size,
2609 uint32_t gsvs_ring_size,
2610 bool needs_tess_rings,
2611 bool needs_sample_positions,
2612 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
2613 struct radeon_cmdbuf **initial_preamble_cs,
2614 struct radeon_cmdbuf **continue_preamble_cs)
2615 {
2616 struct radeon_winsys_bo *scratch_bo = NULL;
2617 struct radeon_winsys_bo *descriptor_bo = NULL;
2618 struct radeon_winsys_bo *compute_scratch_bo = NULL;
2619 struct radeon_winsys_bo *esgs_ring_bo = NULL;
2620 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
2621 struct radeon_winsys_bo *tess_rings_bo = NULL;
2622 struct radeon_cmdbuf *dest_cs[3] = {0};
2623 bool add_tess_rings = false, add_sample_positions = false;
2624 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
2625 unsigned max_offchip_buffers;
2626 unsigned hs_offchip_param = 0;
2627 unsigned tess_offchip_ring_offset;
2628 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
2629 if (!queue->has_tess_rings) {
2630 if (needs_tess_rings)
2631 add_tess_rings = true;
2632 }
2633 if (!queue->has_sample_positions) {
2634 if (needs_sample_positions)
2635 add_sample_positions = true;
2636 }
2637 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
2638 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
2639 &max_offchip_buffers);
2640 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
2641 tess_offchip_ring_size = max_offchip_buffers *
2642 queue->device->tess_offchip_block_dw_size * 4;
2643
2644 if (scratch_size <= queue->scratch_size &&
2645 compute_scratch_size <= queue->compute_scratch_size &&
2646 esgs_ring_size <= queue->esgs_ring_size &&
2647 gsvs_ring_size <= queue->gsvs_ring_size &&
2648 !add_tess_rings && !add_sample_positions &&
2649 queue->initial_preamble_cs) {
2650 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2651 *initial_preamble_cs = queue->initial_preamble_cs;
2652 *continue_preamble_cs = queue->continue_preamble_cs;
2653 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2654 *continue_preamble_cs = NULL;
2655 return VK_SUCCESS;
2656 }
2657
2658 if (scratch_size > queue->scratch_size) {
2659 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2660 scratch_size,
2661 4096,
2662 RADEON_DOMAIN_VRAM,
2663 ring_bo_flags,
2664 RADV_BO_PRIORITY_SCRATCH);
2665 if (!scratch_bo)
2666 goto fail;
2667 } else
2668 scratch_bo = queue->scratch_bo;
2669
2670 if (compute_scratch_size > queue->compute_scratch_size) {
2671 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
2672 compute_scratch_size,
2673 4096,
2674 RADEON_DOMAIN_VRAM,
2675 ring_bo_flags,
2676 RADV_BO_PRIORITY_SCRATCH);
2677 if (!compute_scratch_bo)
2678 goto fail;
2679
2680 } else
2681 compute_scratch_bo = queue->compute_scratch_bo;
2682
2683 if (esgs_ring_size > queue->esgs_ring_size) {
2684 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2685 esgs_ring_size,
2686 4096,
2687 RADEON_DOMAIN_VRAM,
2688 ring_bo_flags,
2689 RADV_BO_PRIORITY_SCRATCH);
2690 if (!esgs_ring_bo)
2691 goto fail;
2692 } else {
2693 esgs_ring_bo = queue->esgs_ring_bo;
2694 esgs_ring_size = queue->esgs_ring_size;
2695 }
2696
2697 if (gsvs_ring_size > queue->gsvs_ring_size) {
2698 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
2699 gsvs_ring_size,
2700 4096,
2701 RADEON_DOMAIN_VRAM,
2702 ring_bo_flags,
2703 RADV_BO_PRIORITY_SCRATCH);
2704 if (!gsvs_ring_bo)
2705 goto fail;
2706 } else {
2707 gsvs_ring_bo = queue->gsvs_ring_bo;
2708 gsvs_ring_size = queue->gsvs_ring_size;
2709 }
2710
2711 if (add_tess_rings) {
2712 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
2713 tess_offchip_ring_offset + tess_offchip_ring_size,
2714 256,
2715 RADEON_DOMAIN_VRAM,
2716 ring_bo_flags,
2717 RADV_BO_PRIORITY_SCRATCH);
2718 if (!tess_rings_bo)
2719 goto fail;
2720 } else {
2721 tess_rings_bo = queue->tess_rings_bo;
2722 }
2723
2724 if (scratch_bo != queue->scratch_bo ||
2725 esgs_ring_bo != queue->esgs_ring_bo ||
2726 gsvs_ring_bo != queue->gsvs_ring_bo ||
2727 tess_rings_bo != queue->tess_rings_bo ||
2728 add_sample_positions) {
2729 uint32_t size = 0;
2730 if (gsvs_ring_bo || esgs_ring_bo ||
2731 tess_rings_bo || add_sample_positions) {
2732 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
2733 if (add_sample_positions)
2734 size += 128; /* 64+32+16+8 = 120 bytes */
2735 }
2736 else if (scratch_bo)
2737 size = 8; /* 2 dword */
2738
2739 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
2740 size,
2741 4096,
2742 RADEON_DOMAIN_VRAM,
2743 RADEON_FLAG_CPU_ACCESS |
2744 RADEON_FLAG_NO_INTERPROCESS_SHARING |
2745 RADEON_FLAG_READ_ONLY,
2746 RADV_BO_PRIORITY_DESCRIPTOR);
2747 if (!descriptor_bo)
2748 goto fail;
2749 } else
2750 descriptor_bo = queue->descriptor_bo;
2751
2752 if (descriptor_bo != queue->descriptor_bo) {
2753 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
2754
2755 if (scratch_bo) {
2756 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
2757 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
2758 S_008F04_SWIZZLE_ENABLE(1);
2759 map[0] = scratch_va;
2760 map[1] = rsrc1;
2761 }
2762
2763 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
2764 fill_geom_tess_rings(queue, map, add_sample_positions,
2765 esgs_ring_size, esgs_ring_bo,
2766 gsvs_ring_size, gsvs_ring_bo,
2767 tess_factor_ring_size,
2768 tess_offchip_ring_offset,
2769 tess_offchip_ring_size,
2770 tess_rings_bo);
2771
2772 queue->device->ws->buffer_unmap(descriptor_bo);
2773 }
2774
2775 for(int i = 0; i < 3; ++i) {
2776 struct radeon_cmdbuf *cs = NULL;
2777 cs = queue->device->ws->cs_create(queue->device->ws,
2778 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
2779 if (!cs)
2780 goto fail;
2781
2782 dest_cs[i] = cs;
2783
2784 if (scratch_bo)
2785 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
2786
2787 /* Emit initial configuration. */
2788 switch (queue->queue_family_index) {
2789 case RADV_QUEUE_GENERAL:
2790 radv_init_graphics_state(cs, queue);
2791 break;
2792 case RADV_QUEUE_COMPUTE:
2793 radv_init_compute_state(cs, queue);
2794 break;
2795 case RADV_QUEUE_TRANSFER:
2796 break;
2797 }
2798
2799 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
2800 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2801 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
2802
2803 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
2804 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
2805 }
2806
2807 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
2808 gsvs_ring_bo, gsvs_ring_size);
2809 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
2810 tess_factor_ring_size, tess_rings_bo);
2811 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
2812 radv_emit_compute_scratch(queue, cs, compute_scratch_bo);
2813
2814 if (i == 0) {
2815 si_cs_emit_cache_flush(cs,
2816 queue->device->physical_device->rad_info.chip_class,
2817 NULL, 0,
2818 queue->queue_family_index == RING_COMPUTE &&
2819 queue->device->physical_device->rad_info.chip_class >= GFX7,
2820 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
2821 RADV_CMD_FLAG_INV_ICACHE |
2822 RADV_CMD_FLAG_INV_SCACHE |
2823 RADV_CMD_FLAG_INV_VCACHE |
2824 RADV_CMD_FLAG_INV_L2 |
2825 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2826 } else if (i == 1) {
2827 si_cs_emit_cache_flush(cs,
2828 queue->device->physical_device->rad_info.chip_class,
2829 NULL, 0,
2830 queue->queue_family_index == RING_COMPUTE &&
2831 queue->device->physical_device->rad_info.chip_class >= GFX7,
2832 RADV_CMD_FLAG_INV_ICACHE |
2833 RADV_CMD_FLAG_INV_SCACHE |
2834 RADV_CMD_FLAG_INV_VCACHE |
2835 RADV_CMD_FLAG_INV_L2 |
2836 RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
2837 }
2838
2839 if (!queue->device->ws->cs_finalize(cs))
2840 goto fail;
2841 }
2842
2843 if (queue->initial_full_flush_preamble_cs)
2844 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2845
2846 if (queue->initial_preamble_cs)
2847 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2848
2849 if (queue->continue_preamble_cs)
2850 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2851
2852 queue->initial_full_flush_preamble_cs = dest_cs[0];
2853 queue->initial_preamble_cs = dest_cs[1];
2854 queue->continue_preamble_cs = dest_cs[2];
2855
2856 if (scratch_bo != queue->scratch_bo) {
2857 if (queue->scratch_bo)
2858 queue->device->ws->buffer_destroy(queue->scratch_bo);
2859 queue->scratch_bo = scratch_bo;
2860 queue->scratch_size = scratch_size;
2861 }
2862
2863 if (compute_scratch_bo != queue->compute_scratch_bo) {
2864 if (queue->compute_scratch_bo)
2865 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2866 queue->compute_scratch_bo = compute_scratch_bo;
2867 queue->compute_scratch_size = compute_scratch_size;
2868 }
2869
2870 if (esgs_ring_bo != queue->esgs_ring_bo) {
2871 if (queue->esgs_ring_bo)
2872 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2873 queue->esgs_ring_bo = esgs_ring_bo;
2874 queue->esgs_ring_size = esgs_ring_size;
2875 }
2876
2877 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
2878 if (queue->gsvs_ring_bo)
2879 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2880 queue->gsvs_ring_bo = gsvs_ring_bo;
2881 queue->gsvs_ring_size = gsvs_ring_size;
2882 }
2883
2884 if (tess_rings_bo != queue->tess_rings_bo) {
2885 queue->tess_rings_bo = tess_rings_bo;
2886 queue->has_tess_rings = true;
2887 }
2888
2889 if (descriptor_bo != queue->descriptor_bo) {
2890 if (queue->descriptor_bo)
2891 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2892
2893 queue->descriptor_bo = descriptor_bo;
2894 }
2895
2896 if (add_sample_positions)
2897 queue->has_sample_positions = true;
2898
2899 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
2900 *initial_preamble_cs = queue->initial_preamble_cs;
2901 *continue_preamble_cs = queue->continue_preamble_cs;
2902 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
2903 *continue_preamble_cs = NULL;
2904 return VK_SUCCESS;
2905 fail:
2906 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
2907 if (dest_cs[i])
2908 queue->device->ws->cs_destroy(dest_cs[i]);
2909 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
2910 queue->device->ws->buffer_destroy(descriptor_bo);
2911 if (scratch_bo && scratch_bo != queue->scratch_bo)
2912 queue->device->ws->buffer_destroy(scratch_bo);
2913 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
2914 queue->device->ws->buffer_destroy(compute_scratch_bo);
2915 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
2916 queue->device->ws->buffer_destroy(esgs_ring_bo);
2917 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
2918 queue->device->ws->buffer_destroy(gsvs_ring_bo);
2919 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
2920 queue->device->ws->buffer_destroy(tess_rings_bo);
2921 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2922 }
2923
2924 static VkResult radv_alloc_sem_counts(struct radv_instance *instance,
2925 struct radv_winsys_sem_counts *counts,
2926 int num_sems,
2927 const VkSemaphore *sems,
2928 VkFence _fence,
2929 bool reset_temp)
2930 {
2931 int syncobj_idx = 0, sem_idx = 0;
2932
2933 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
2934 return VK_SUCCESS;
2935
2936 for (uint32_t i = 0; i < num_sems; i++) {
2937 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2938
2939 if (sem->temp_syncobj || sem->syncobj)
2940 counts->syncobj_count++;
2941 else
2942 counts->sem_count++;
2943 }
2944
2945 if (_fence != VK_NULL_HANDLE) {
2946 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2947 if (fence->temp_syncobj || fence->syncobj)
2948 counts->syncobj_count++;
2949 }
2950
2951 if (counts->syncobj_count) {
2952 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
2953 if (!counts->syncobj)
2954 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2955 }
2956
2957 if (counts->sem_count) {
2958 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
2959 if (!counts->sem) {
2960 free(counts->syncobj);
2961 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2962 }
2963 }
2964
2965 for (uint32_t i = 0; i < num_sems; i++) {
2966 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
2967
2968 if (sem->temp_syncobj) {
2969 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
2970 }
2971 else if (sem->syncobj)
2972 counts->syncobj[syncobj_idx++] = sem->syncobj;
2973 else {
2974 assert(sem->sem);
2975 counts->sem[sem_idx++] = sem->sem;
2976 }
2977 }
2978
2979 if (_fence != VK_NULL_HANDLE) {
2980 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2981 if (fence->temp_syncobj)
2982 counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
2983 else if (fence->syncobj)
2984 counts->syncobj[syncobj_idx++] = fence->syncobj;
2985 }
2986
2987 return VK_SUCCESS;
2988 }
2989
2990 static void
2991 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
2992 {
2993 free(sem_info->wait.syncobj);
2994 free(sem_info->wait.sem);
2995 free(sem_info->signal.syncobj);
2996 free(sem_info->signal.sem);
2997 }
2998
2999
3000 static void radv_free_temp_syncobjs(struct radv_device *device,
3001 int num_sems,
3002 const VkSemaphore *sems)
3003 {
3004 for (uint32_t i = 0; i < num_sems; i++) {
3005 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
3006
3007 if (sem->temp_syncobj) {
3008 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
3009 sem->temp_syncobj = 0;
3010 }
3011 }
3012 }
3013
3014 static VkResult
3015 radv_alloc_sem_info(struct radv_instance *instance,
3016 struct radv_winsys_sem_info *sem_info,
3017 int num_wait_sems,
3018 const VkSemaphore *wait_sems,
3019 int num_signal_sems,
3020 const VkSemaphore *signal_sems,
3021 VkFence fence)
3022 {
3023 VkResult ret;
3024 memset(sem_info, 0, sizeof(*sem_info));
3025
3026 ret = radv_alloc_sem_counts(instance, &sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
3027 if (ret)
3028 return ret;
3029 ret = radv_alloc_sem_counts(instance, &sem_info->signal, num_signal_sems, signal_sems, fence, false);
3030 if (ret)
3031 radv_free_sem_info(sem_info);
3032
3033 /* caller can override these */
3034 sem_info->cs_emit_wait = true;
3035 sem_info->cs_emit_signal = true;
3036 return ret;
3037 }
3038
3039 /* Signals fence as soon as all the work currently put on queue is done. */
3040 static VkResult radv_signal_fence(struct radv_queue *queue,
3041 struct radv_fence *fence)
3042 {
3043 int ret;
3044 VkResult result;
3045 struct radv_winsys_sem_info sem_info;
3046
3047 result = radv_alloc_sem_info(queue->device->instance, &sem_info, 0, NULL, 0, NULL,
3048 radv_fence_to_handle(fence));
3049 if (result != VK_SUCCESS)
3050 return result;
3051
3052 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3053 &queue->device->empty_cs[queue->queue_family_index],
3054 1, NULL, NULL, &sem_info, NULL,
3055 false, fence->fence);
3056 radv_free_sem_info(&sem_info);
3057
3058 if (ret)
3059 return vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
3060
3061 return VK_SUCCESS;
3062 }
3063
3064 VkResult radv_QueueSubmit(
3065 VkQueue _queue,
3066 uint32_t submitCount,
3067 const VkSubmitInfo* pSubmits,
3068 VkFence _fence)
3069 {
3070 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3071 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3072 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3073 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
3074 int ret;
3075 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
3076 uint32_t scratch_size = 0;
3077 uint32_t compute_scratch_size = 0;
3078 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
3079 struct radeon_cmdbuf *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
3080 VkResult result;
3081 bool fence_emitted = false;
3082 bool tess_rings_needed = false;
3083 bool sample_positions_needed = false;
3084
3085 /* Do this first so failing to allocate scratch buffers can't result in
3086 * partially executed submissions. */
3087 for (uint32_t i = 0; i < submitCount; i++) {
3088 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
3089 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
3090 pSubmits[i].pCommandBuffers[j]);
3091
3092 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
3093 compute_scratch_size = MAX2(compute_scratch_size,
3094 cmd_buffer->compute_scratch_size_needed);
3095 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
3096 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
3097 tess_rings_needed |= cmd_buffer->tess_rings_needed;
3098 sample_positions_needed |= cmd_buffer->sample_positions_needed;
3099 }
3100 }
3101
3102 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
3103 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
3104 sample_positions_needed, &initial_flush_preamble_cs,
3105 &initial_preamble_cs, &continue_preamble_cs);
3106 if (result != VK_SUCCESS)
3107 return result;
3108
3109 for (uint32_t i = 0; i < submitCount; i++) {
3110 struct radeon_cmdbuf **cs_array;
3111 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
3112 bool can_patch = true;
3113 uint32_t advance;
3114 struct radv_winsys_sem_info sem_info;
3115
3116 result = radv_alloc_sem_info(queue->device->instance,
3117 &sem_info,
3118 pSubmits[i].waitSemaphoreCount,
3119 pSubmits[i].pWaitSemaphores,
3120 pSubmits[i].signalSemaphoreCount,
3121 pSubmits[i].pSignalSemaphores,
3122 _fence);
3123 if (result != VK_SUCCESS)
3124 return result;
3125
3126 if (!pSubmits[i].commandBufferCount) {
3127 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
3128 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
3129 &queue->device->empty_cs[queue->queue_family_index],
3130 1, NULL, NULL,
3131 &sem_info, NULL,
3132 false, base_fence);
3133 if (ret) {
3134 radv_loge("failed to submit CS %d\n", i);
3135 abort();
3136 }
3137 fence_emitted = true;
3138 }
3139 radv_free_sem_info(&sem_info);
3140 continue;
3141 }
3142
3143 cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
3144 (pSubmits[i].commandBufferCount));
3145
3146 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
3147 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
3148 pSubmits[i].pCommandBuffers[j]);
3149 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3150
3151 cs_array[j] = cmd_buffer->cs;
3152 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
3153 can_patch = false;
3154
3155 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
3156 }
3157
3158 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
3159 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
3160 const struct radv_winsys_bo_list *bo_list = NULL;
3161
3162 advance = MIN2(max_cs_submission,
3163 pSubmits[i].commandBufferCount - j);
3164
3165 if (queue->device->trace_bo)
3166 *queue->device->trace_id_ptr = 0;
3167
3168 sem_info.cs_emit_wait = j == 0;
3169 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
3170
3171 if (unlikely(queue->device->use_global_bo_list)) {
3172 pthread_mutex_lock(&queue->device->bo_list.mutex);
3173 bo_list = &queue->device->bo_list.list;
3174 }
3175
3176 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
3177 advance, initial_preamble, continue_preamble_cs,
3178 &sem_info, bo_list,
3179 can_patch, base_fence);
3180
3181 if (unlikely(queue->device->use_global_bo_list))
3182 pthread_mutex_unlock(&queue->device->bo_list.mutex);
3183
3184 if (ret) {
3185 radv_loge("failed to submit CS %d\n", i);
3186 abort();
3187 }
3188 fence_emitted = true;
3189 if (queue->device->trace_bo) {
3190 radv_check_gpu_hangs(queue, cs_array[j]);
3191 }
3192 }
3193
3194 radv_free_temp_syncobjs(queue->device,
3195 pSubmits[i].waitSemaphoreCount,
3196 pSubmits[i].pWaitSemaphores);
3197 radv_free_sem_info(&sem_info);
3198 free(cs_array);
3199 }
3200
3201 if (fence) {
3202 if (!fence_emitted) {
3203 result = radv_signal_fence(queue, fence);
3204 if (result != VK_SUCCESS)
3205 return result;
3206 }
3207 }
3208
3209 return VK_SUCCESS;
3210 }
3211
3212 VkResult radv_QueueWaitIdle(
3213 VkQueue _queue)
3214 {
3215 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3216
3217 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
3218 radv_queue_family_to_ring(queue->queue_family_index),
3219 queue->queue_idx);
3220 return VK_SUCCESS;
3221 }
3222
3223 VkResult radv_DeviceWaitIdle(
3224 VkDevice _device)
3225 {
3226 RADV_FROM_HANDLE(radv_device, device, _device);
3227
3228 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3229 for (unsigned q = 0; q < device->queue_count[i]; q++) {
3230 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
3231 }
3232 }
3233 return VK_SUCCESS;
3234 }
3235
3236 VkResult radv_EnumerateInstanceExtensionProperties(
3237 const char* pLayerName,
3238 uint32_t* pPropertyCount,
3239 VkExtensionProperties* pProperties)
3240 {
3241 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3242
3243 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
3244 if (radv_supported_instance_extensions.extensions[i]) {
3245 vk_outarray_append(&out, prop) {
3246 *prop = radv_instance_extensions[i];
3247 }
3248 }
3249 }
3250
3251 return vk_outarray_status(&out);
3252 }
3253
3254 VkResult radv_EnumerateDeviceExtensionProperties(
3255 VkPhysicalDevice physicalDevice,
3256 const char* pLayerName,
3257 uint32_t* pPropertyCount,
3258 VkExtensionProperties* pProperties)
3259 {
3260 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
3261 VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
3262
3263 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
3264 if (device->supported_extensions.extensions[i]) {
3265 vk_outarray_append(&out, prop) {
3266 *prop = radv_device_extensions[i];
3267 }
3268 }
3269 }
3270
3271 return vk_outarray_status(&out);
3272 }
3273
3274 PFN_vkVoidFunction radv_GetInstanceProcAddr(
3275 VkInstance _instance,
3276 const char* pName)
3277 {
3278 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3279 bool unchecked = instance ? instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
3280
3281 if (unchecked) {
3282 return radv_lookup_entrypoint_unchecked(pName);
3283 } else {
3284 return radv_lookup_entrypoint_checked(pName,
3285 instance ? instance->apiVersion : 0,
3286 instance ? &instance->enabled_extensions : NULL,
3287 NULL);
3288 }
3289 }
3290
3291 /* The loader wants us to expose a second GetInstanceProcAddr function
3292 * to work around certain LD_PRELOAD issues seen in apps.
3293 */
3294 PUBLIC
3295 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3296 VkInstance instance,
3297 const char* pName);
3298
3299 PUBLIC
3300 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
3301 VkInstance instance,
3302 const char* pName)
3303 {
3304 return radv_GetInstanceProcAddr(instance, pName);
3305 }
3306
3307 PUBLIC
3308 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
3309 VkInstance _instance,
3310 const char* pName);
3311
3312 PUBLIC
3313 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
3314 VkInstance _instance,
3315 const char* pName)
3316 {
3317 RADV_FROM_HANDLE(radv_instance, instance, _instance);
3318
3319 return radv_lookup_physical_device_entrypoint_checked(pName,
3320 instance ? instance->apiVersion : 0,
3321 instance ? &instance->enabled_extensions : NULL);
3322 }
3323
3324 PFN_vkVoidFunction radv_GetDeviceProcAddr(
3325 VkDevice _device,
3326 const char* pName)
3327 {
3328 RADV_FROM_HANDLE(radv_device, device, _device);
3329 bool unchecked = device ? device->instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS : false;
3330
3331 if (unchecked) {
3332 return radv_lookup_entrypoint_unchecked(pName);
3333 } else {
3334 return radv_lookup_entrypoint_checked(pName,
3335 device->instance->apiVersion,
3336 &device->instance->enabled_extensions,
3337 &device->enabled_extensions);
3338 }
3339 }
3340
3341 bool radv_get_memory_fd(struct radv_device *device,
3342 struct radv_device_memory *memory,
3343 int *pFD)
3344 {
3345 struct radeon_bo_metadata metadata;
3346
3347 if (memory->image) {
3348 radv_init_metadata(device, memory->image, &metadata);
3349 device->ws->buffer_set_metadata(memory->bo, &metadata);
3350 }
3351
3352 return device->ws->buffer_get_fd(device->ws, memory->bo,
3353 pFD);
3354 }
3355
3356 static VkResult radv_alloc_memory(struct radv_device *device,
3357 const VkMemoryAllocateInfo* pAllocateInfo,
3358 const VkAllocationCallbacks* pAllocator,
3359 VkDeviceMemory* pMem)
3360 {
3361 struct radv_device_memory *mem;
3362 VkResult result;
3363 enum radeon_bo_domain domain;
3364 uint32_t flags = 0;
3365 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
3366
3367 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3368
3369 if (pAllocateInfo->allocationSize == 0) {
3370 /* Apparently, this is allowed */
3371 *pMem = VK_NULL_HANDLE;
3372 return VK_SUCCESS;
3373 }
3374
3375 const VkImportMemoryFdInfoKHR *import_info =
3376 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3377 const VkMemoryDedicatedAllocateInfo *dedicate_info =
3378 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3379 const VkExportMemoryAllocateInfo *export_info =
3380 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3381 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
3382 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
3383
3384 const struct wsi_memory_allocate_info *wsi_info =
3385 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3386
3387 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
3388 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3389 if (mem == NULL)
3390 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3391
3392 if (wsi_info && wsi_info->implicit_sync)
3393 flags |= RADEON_FLAG_IMPLICIT_SYNC;
3394
3395 if (dedicate_info) {
3396 mem->image = radv_image_from_handle(dedicate_info->image);
3397 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
3398 } else {
3399 mem->image = NULL;
3400 mem->buffer = NULL;
3401 }
3402
3403 float priority_float = 0.5;
3404 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
3405 vk_find_struct_const(pAllocateInfo->pNext,
3406 MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
3407 if (priority_ext)
3408 priority_float = priority_ext->priority;
3409
3410 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
3411 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
3412
3413 mem->user_ptr = NULL;
3414
3415 if (import_info) {
3416 assert(import_info->handleType ==
3417 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3418 import_info->handleType ==
3419 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3420 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
3421 priority, NULL, NULL);
3422 if (!mem->bo) {
3423 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3424 goto fail;
3425 } else {
3426 close(import_info->fd);
3427 }
3428 } else if (host_ptr_info) {
3429 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3430 assert(mem_type_index == RADV_MEM_TYPE_GTT_CACHED);
3431 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
3432 pAllocateInfo->allocationSize,
3433 priority);
3434 if (!mem->bo) {
3435 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
3436 goto fail;
3437 } else {
3438 mem->user_ptr = host_ptr_info->pHostPointer;
3439 }
3440 } else {
3441 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
3442 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
3443 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
3444 domain = RADEON_DOMAIN_GTT;
3445 else
3446 domain = RADEON_DOMAIN_VRAM;
3447
3448 if (mem_type_index == RADV_MEM_TYPE_VRAM)
3449 flags |= RADEON_FLAG_NO_CPU_ACCESS;
3450 else
3451 flags |= RADEON_FLAG_CPU_ACCESS;
3452
3453 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
3454 flags |= RADEON_FLAG_GTT_WC;
3455
3456 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
3457 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
3458 if (device->use_global_bo_list) {
3459 flags |= RADEON_FLAG_PREFER_LOCAL_BO;
3460 }
3461 }
3462
3463 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
3464 domain, flags, priority);
3465
3466 if (!mem->bo) {
3467 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
3468 goto fail;
3469 }
3470 mem->type_index = mem_type_index;
3471 }
3472
3473 result = radv_bo_list_add(device, mem->bo);
3474 if (result != VK_SUCCESS)
3475 goto fail_bo;
3476
3477 *pMem = radv_device_memory_to_handle(mem);
3478
3479 return VK_SUCCESS;
3480
3481 fail_bo:
3482 device->ws->buffer_destroy(mem->bo);
3483 fail:
3484 vk_free2(&device->alloc, pAllocator, mem);
3485
3486 return result;
3487 }
3488
3489 VkResult radv_AllocateMemory(
3490 VkDevice _device,
3491 const VkMemoryAllocateInfo* pAllocateInfo,
3492 const VkAllocationCallbacks* pAllocator,
3493 VkDeviceMemory* pMem)
3494 {
3495 RADV_FROM_HANDLE(radv_device, device, _device);
3496 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
3497 }
3498
3499 void radv_FreeMemory(
3500 VkDevice _device,
3501 VkDeviceMemory _mem,
3502 const VkAllocationCallbacks* pAllocator)
3503 {
3504 RADV_FROM_HANDLE(radv_device, device, _device);
3505 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
3506
3507 if (mem == NULL)
3508 return;
3509
3510 radv_bo_list_remove(device, mem->bo);
3511 device->ws->buffer_destroy(mem->bo);
3512 mem->bo = NULL;
3513
3514 vk_free2(&device->alloc, pAllocator, mem);
3515 }
3516
3517 VkResult radv_MapMemory(
3518 VkDevice _device,
3519 VkDeviceMemory _memory,
3520 VkDeviceSize offset,
3521 VkDeviceSize size,
3522 VkMemoryMapFlags flags,
3523 void** ppData)
3524 {
3525 RADV_FROM_HANDLE(radv_device, device, _device);
3526 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3527
3528 if (mem == NULL) {
3529 *ppData = NULL;
3530 return VK_SUCCESS;
3531 }
3532
3533 if (mem->user_ptr)
3534 *ppData = mem->user_ptr;
3535 else
3536 *ppData = device->ws->buffer_map(mem->bo);
3537
3538 if (*ppData) {
3539 *ppData += offset;
3540 return VK_SUCCESS;
3541 }
3542
3543 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
3544 }
3545
3546 void radv_UnmapMemory(
3547 VkDevice _device,
3548 VkDeviceMemory _memory)
3549 {
3550 RADV_FROM_HANDLE(radv_device, device, _device);
3551 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
3552
3553 if (mem == NULL)
3554 return;
3555
3556 if (mem->user_ptr == NULL)
3557 device->ws->buffer_unmap(mem->bo);
3558 }
3559
3560 VkResult radv_FlushMappedMemoryRanges(
3561 VkDevice _device,
3562 uint32_t memoryRangeCount,
3563 const VkMappedMemoryRange* pMemoryRanges)
3564 {
3565 return VK_SUCCESS;
3566 }
3567
3568 VkResult radv_InvalidateMappedMemoryRanges(
3569 VkDevice _device,
3570 uint32_t memoryRangeCount,
3571 const VkMappedMemoryRange* pMemoryRanges)
3572 {
3573 return VK_SUCCESS;
3574 }
3575
3576 void radv_GetBufferMemoryRequirements(
3577 VkDevice _device,
3578 VkBuffer _buffer,
3579 VkMemoryRequirements* pMemoryRequirements)
3580 {
3581 RADV_FROM_HANDLE(radv_device, device, _device);
3582 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
3583
3584 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3585
3586 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
3587 pMemoryRequirements->alignment = 4096;
3588 else
3589 pMemoryRequirements->alignment = 16;
3590
3591 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
3592 }
3593
3594 void radv_GetBufferMemoryRequirements2(
3595 VkDevice device,
3596 const VkBufferMemoryRequirementsInfo2 *pInfo,
3597 VkMemoryRequirements2 *pMemoryRequirements)
3598 {
3599 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
3600 &pMemoryRequirements->memoryRequirements);
3601 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
3602 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3603 switch (ext->sType) {
3604 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3605 VkMemoryDedicatedRequirements *req =
3606 (VkMemoryDedicatedRequirements *) ext;
3607 req->requiresDedicatedAllocation = buffer->shareable;
3608 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3609 break;
3610 }
3611 default:
3612 break;
3613 }
3614 }
3615 }
3616
3617 void radv_GetImageMemoryRequirements(
3618 VkDevice _device,
3619 VkImage _image,
3620 VkMemoryRequirements* pMemoryRequirements)
3621 {
3622 RADV_FROM_HANDLE(radv_device, device, _device);
3623 RADV_FROM_HANDLE(radv_image, image, _image);
3624
3625 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
3626
3627 pMemoryRequirements->size = image->size;
3628 pMemoryRequirements->alignment = image->alignment;
3629 }
3630
3631 void radv_GetImageMemoryRequirements2(
3632 VkDevice device,
3633 const VkImageMemoryRequirementsInfo2 *pInfo,
3634 VkMemoryRequirements2 *pMemoryRequirements)
3635 {
3636 radv_GetImageMemoryRequirements(device, pInfo->image,
3637 &pMemoryRequirements->memoryRequirements);
3638
3639 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
3640
3641 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3642 switch (ext->sType) {
3643 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3644 VkMemoryDedicatedRequirements *req =
3645 (VkMemoryDedicatedRequirements *) ext;
3646 req->requiresDedicatedAllocation = image->shareable;
3647 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
3648 break;
3649 }
3650 default:
3651 break;
3652 }
3653 }
3654 }
3655
3656 void radv_GetImageSparseMemoryRequirements(
3657 VkDevice device,
3658 VkImage image,
3659 uint32_t* pSparseMemoryRequirementCount,
3660 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
3661 {
3662 stub();
3663 }
3664
3665 void radv_GetImageSparseMemoryRequirements2(
3666 VkDevice device,
3667 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
3668 uint32_t* pSparseMemoryRequirementCount,
3669 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3670 {
3671 stub();
3672 }
3673
3674 void radv_GetDeviceMemoryCommitment(
3675 VkDevice device,
3676 VkDeviceMemory memory,
3677 VkDeviceSize* pCommittedMemoryInBytes)
3678 {
3679 *pCommittedMemoryInBytes = 0;
3680 }
3681
3682 VkResult radv_BindBufferMemory2(VkDevice device,
3683 uint32_t bindInfoCount,
3684 const VkBindBufferMemoryInfo *pBindInfos)
3685 {
3686 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3687 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3688 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
3689
3690 if (mem) {
3691 buffer->bo = mem->bo;
3692 buffer->offset = pBindInfos[i].memoryOffset;
3693 } else {
3694 buffer->bo = NULL;
3695 }
3696 }
3697 return VK_SUCCESS;
3698 }
3699
3700 VkResult radv_BindBufferMemory(
3701 VkDevice device,
3702 VkBuffer buffer,
3703 VkDeviceMemory memory,
3704 VkDeviceSize memoryOffset)
3705 {
3706 const VkBindBufferMemoryInfo info = {
3707 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3708 .buffer = buffer,
3709 .memory = memory,
3710 .memoryOffset = memoryOffset
3711 };
3712
3713 return radv_BindBufferMemory2(device, 1, &info);
3714 }
3715
3716 VkResult radv_BindImageMemory2(VkDevice device,
3717 uint32_t bindInfoCount,
3718 const VkBindImageMemoryInfo *pBindInfos)
3719 {
3720 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3721 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
3722 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
3723
3724 if (mem) {
3725 image->bo = mem->bo;
3726 image->offset = pBindInfos[i].memoryOffset;
3727 } else {
3728 image->bo = NULL;
3729 image->offset = 0;
3730 }
3731 }
3732 return VK_SUCCESS;
3733 }
3734
3735
3736 VkResult radv_BindImageMemory(
3737 VkDevice device,
3738 VkImage image,
3739 VkDeviceMemory memory,
3740 VkDeviceSize memoryOffset)
3741 {
3742 const VkBindImageMemoryInfo info = {
3743 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
3744 .image = image,
3745 .memory = memory,
3746 .memoryOffset = memoryOffset
3747 };
3748
3749 return radv_BindImageMemory2(device, 1, &info);
3750 }
3751
3752
3753 static void
3754 radv_sparse_buffer_bind_memory(struct radv_device *device,
3755 const VkSparseBufferMemoryBindInfo *bind)
3756 {
3757 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
3758
3759 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3760 struct radv_device_memory *mem = NULL;
3761
3762 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3763 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3764
3765 device->ws->buffer_virtual_bind(buffer->bo,
3766 bind->pBinds[i].resourceOffset,
3767 bind->pBinds[i].size,
3768 mem ? mem->bo : NULL,
3769 bind->pBinds[i].memoryOffset);
3770 }
3771 }
3772
3773 static void
3774 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
3775 const VkSparseImageOpaqueMemoryBindInfo *bind)
3776 {
3777 RADV_FROM_HANDLE(radv_image, image, bind->image);
3778
3779 for (uint32_t i = 0; i < bind->bindCount; ++i) {
3780 struct radv_device_memory *mem = NULL;
3781
3782 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
3783 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
3784
3785 device->ws->buffer_virtual_bind(image->bo,
3786 bind->pBinds[i].resourceOffset,
3787 bind->pBinds[i].size,
3788 mem ? mem->bo : NULL,
3789 bind->pBinds[i].memoryOffset);
3790 }
3791 }
3792
3793 VkResult radv_QueueBindSparse(
3794 VkQueue _queue,
3795 uint32_t bindInfoCount,
3796 const VkBindSparseInfo* pBindInfo,
3797 VkFence _fence)
3798 {
3799 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3800 RADV_FROM_HANDLE(radv_queue, queue, _queue);
3801 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
3802 bool fence_emitted = false;
3803 VkResult result;
3804 int ret;
3805
3806 for (uint32_t i = 0; i < bindInfoCount; ++i) {
3807 struct radv_winsys_sem_info sem_info;
3808 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
3809 radv_sparse_buffer_bind_memory(queue->device,
3810 pBindInfo[i].pBufferBinds + j);
3811 }
3812
3813 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
3814 radv_sparse_image_opaque_bind_memory(queue->device,
3815 pBindInfo[i].pImageOpaqueBinds + j);
3816 }
3817
3818 VkResult result;
3819 result = radv_alloc_sem_info(queue->device->instance,
3820 &sem_info,
3821 pBindInfo[i].waitSemaphoreCount,
3822 pBindInfo[i].pWaitSemaphores,
3823 pBindInfo[i].signalSemaphoreCount,
3824 pBindInfo[i].pSignalSemaphores,
3825 _fence);
3826 if (result != VK_SUCCESS)
3827 return result;
3828
3829 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
3830 ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
3831 &queue->device->empty_cs[queue->queue_family_index],
3832 1, NULL, NULL,
3833 &sem_info, NULL,
3834 false, base_fence);
3835 if (ret) {
3836 radv_loge("failed to submit CS %d\n", i);
3837 abort();
3838 }
3839
3840 fence_emitted = true;
3841 }
3842
3843 radv_free_sem_info(&sem_info);
3844
3845 }
3846
3847 if (fence) {
3848 if (!fence_emitted) {
3849 result = radv_signal_fence(queue, fence);
3850 if (result != VK_SUCCESS)
3851 return result;
3852 }
3853 }
3854
3855 return VK_SUCCESS;
3856 }
3857
3858 VkResult radv_CreateFence(
3859 VkDevice _device,
3860 const VkFenceCreateInfo* pCreateInfo,
3861 const VkAllocationCallbacks* pAllocator,
3862 VkFence* pFence)
3863 {
3864 RADV_FROM_HANDLE(radv_device, device, _device);
3865 const VkExportFenceCreateInfo *export =
3866 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
3867 VkExternalFenceHandleTypeFlags handleTypes =
3868 export ? export->handleTypes : 0;
3869
3870 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
3871 sizeof(*fence), 8,
3872 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3873
3874 if (!fence)
3875 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3876
3877 fence->fence_wsi = NULL;
3878 fence->temp_syncobj = 0;
3879 if (device->always_use_syncobj || handleTypes) {
3880 int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
3881 if (ret) {
3882 vk_free2(&device->alloc, pAllocator, fence);
3883 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3884 }
3885 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
3886 device->ws->signal_syncobj(device->ws, fence->syncobj);
3887 }
3888 fence->fence = NULL;
3889 } else {
3890 fence->fence = device->ws->create_fence();
3891 if (!fence->fence) {
3892 vk_free2(&device->alloc, pAllocator, fence);
3893 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3894 }
3895 fence->syncobj = 0;
3896 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
3897 device->ws->signal_fence(fence->fence);
3898 }
3899
3900 *pFence = radv_fence_to_handle(fence);
3901
3902 return VK_SUCCESS;
3903 }
3904
3905 void radv_DestroyFence(
3906 VkDevice _device,
3907 VkFence _fence,
3908 const VkAllocationCallbacks* pAllocator)
3909 {
3910 RADV_FROM_HANDLE(radv_device, device, _device);
3911 RADV_FROM_HANDLE(radv_fence, fence, _fence);
3912
3913 if (!fence)
3914 return;
3915
3916 if (fence->temp_syncobj)
3917 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
3918 if (fence->syncobj)
3919 device->ws->destroy_syncobj(device->ws, fence->syncobj);
3920 if (fence->fence)
3921 device->ws->destroy_fence(fence->fence);
3922 if (fence->fence_wsi)
3923 fence->fence_wsi->destroy(fence->fence_wsi);
3924 vk_free2(&device->alloc, pAllocator, fence);
3925 }
3926
3927
3928 uint64_t radv_get_current_time(void)
3929 {
3930 struct timespec tv;
3931 clock_gettime(CLOCK_MONOTONIC, &tv);
3932 return tv.tv_nsec + tv.tv_sec*1000000000ull;
3933 }
3934
3935 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
3936 {
3937 uint64_t current_time = radv_get_current_time();
3938
3939 timeout = MIN2(UINT64_MAX - current_time, timeout);
3940
3941 return current_time + timeout;
3942 }
3943
3944
3945 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
3946 uint32_t fenceCount, const VkFence *pFences)
3947 {
3948 for (uint32_t i = 0; i < fenceCount; ++i) {
3949 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3950 if (fence->fence == NULL || fence->syncobj ||
3951 fence->temp_syncobj || fence->fence_wsi ||
3952 (!device->ws->is_fence_waitable(fence->fence)))
3953 return false;
3954 }
3955 return true;
3956 }
3957
3958 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
3959 {
3960 for (uint32_t i = 0; i < fenceCount; ++i) {
3961 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3962 if (fence->syncobj == 0 && fence->temp_syncobj == 0)
3963 return false;
3964 }
3965 return true;
3966 }
3967
3968 VkResult radv_WaitForFences(
3969 VkDevice _device,
3970 uint32_t fenceCount,
3971 const VkFence* pFences,
3972 VkBool32 waitAll,
3973 uint64_t timeout)
3974 {
3975 RADV_FROM_HANDLE(radv_device, device, _device);
3976 timeout = radv_get_absolute_timeout(timeout);
3977
3978 if (device->always_use_syncobj &&
3979 radv_all_fences_syncobj(fenceCount, pFences))
3980 {
3981 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
3982 if (!handles)
3983 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3984
3985 for (uint32_t i = 0; i < fenceCount; ++i) {
3986 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
3987 handles[i] = fence->temp_syncobj ? fence->temp_syncobj : fence->syncobj;
3988 }
3989
3990 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
3991
3992 free(handles);
3993 return success ? VK_SUCCESS : VK_TIMEOUT;
3994 }
3995
3996 if (!waitAll && fenceCount > 1) {
3997 /* Not doing this by default for waitAll, due to needing to allocate twice. */
3998 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
3999 uint32_t wait_count = 0;
4000 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
4001 if (!fences)
4002 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4003
4004 for (uint32_t i = 0; i < fenceCount; ++i) {
4005 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4006
4007 if (device->ws->fence_wait(device->ws, fence->fence, false, 0)) {
4008 free(fences);
4009 return VK_SUCCESS;
4010 }
4011
4012 fences[wait_count++] = fence->fence;
4013 }
4014
4015 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
4016 waitAll, timeout - radv_get_current_time());
4017
4018 free(fences);
4019 return success ? VK_SUCCESS : VK_TIMEOUT;
4020 }
4021
4022 while(radv_get_current_time() <= timeout) {
4023 for (uint32_t i = 0; i < fenceCount; ++i) {
4024 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
4025 return VK_SUCCESS;
4026 }
4027 }
4028 return VK_TIMEOUT;
4029 }
4030
4031 for (uint32_t i = 0; i < fenceCount; ++i) {
4032 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4033 bool expired = false;
4034
4035 if (fence->temp_syncobj) {
4036 if (!device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, timeout))
4037 return VK_TIMEOUT;
4038 continue;
4039 }
4040
4041 if (fence->syncobj) {
4042 if (!device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, timeout))
4043 return VK_TIMEOUT;
4044 continue;
4045 }
4046
4047 if (fence->fence) {
4048 if (!device->ws->is_fence_waitable(fence->fence)) {
4049 while(!device->ws->is_fence_waitable(fence->fence) &&
4050 radv_get_current_time() <= timeout)
4051 /* Do nothing */;
4052 }
4053
4054 expired = device->ws->fence_wait(device->ws,
4055 fence->fence,
4056 true, timeout);
4057 if (!expired)
4058 return VK_TIMEOUT;
4059 }
4060
4061 if (fence->fence_wsi) {
4062 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, timeout);
4063 if (result != VK_SUCCESS)
4064 return result;
4065 }
4066 }
4067
4068 return VK_SUCCESS;
4069 }
4070
4071 VkResult radv_ResetFences(VkDevice _device,
4072 uint32_t fenceCount,
4073 const VkFence *pFences)
4074 {
4075 RADV_FROM_HANDLE(radv_device, device, _device);
4076
4077 for (unsigned i = 0; i < fenceCount; ++i) {
4078 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
4079 if (fence->fence)
4080 device->ws->reset_fence(fence->fence);
4081
4082 /* Per spec, we first restore the permanent payload, and then reset, so
4083 * having a temp syncobj should not skip resetting the permanent syncobj. */
4084 if (fence->temp_syncobj) {
4085 device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
4086 fence->temp_syncobj = 0;
4087 }
4088
4089 if (fence->syncobj) {
4090 device->ws->reset_syncobj(device->ws, fence->syncobj);
4091 }
4092 }
4093
4094 return VK_SUCCESS;
4095 }
4096
4097 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
4098 {
4099 RADV_FROM_HANDLE(radv_device, device, _device);
4100 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4101
4102 if (fence->temp_syncobj) {
4103 bool success = device->ws->wait_syncobj(device->ws, &fence->temp_syncobj, 1, true, 0);
4104 return success ? VK_SUCCESS : VK_NOT_READY;
4105 }
4106
4107 if (fence->syncobj) {
4108 bool success = device->ws->wait_syncobj(device->ws, &fence->syncobj, 1, true, 0);
4109 return success ? VK_SUCCESS : VK_NOT_READY;
4110 }
4111
4112 if (fence->fence) {
4113 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
4114 return VK_NOT_READY;
4115 }
4116 if (fence->fence_wsi) {
4117 VkResult result = fence->fence_wsi->wait(fence->fence_wsi, 0);
4118
4119 if (result != VK_SUCCESS) {
4120 if (result == VK_TIMEOUT)
4121 return VK_NOT_READY;
4122 return result;
4123 }
4124 }
4125 return VK_SUCCESS;
4126 }
4127
4128
4129 // Queue semaphore functions
4130
4131 VkResult radv_CreateSemaphore(
4132 VkDevice _device,
4133 const VkSemaphoreCreateInfo* pCreateInfo,
4134 const VkAllocationCallbacks* pAllocator,
4135 VkSemaphore* pSemaphore)
4136 {
4137 RADV_FROM_HANDLE(radv_device, device, _device);
4138 const VkExportSemaphoreCreateInfo *export =
4139 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
4140 VkExternalSemaphoreHandleTypeFlags handleTypes =
4141 export ? export->handleTypes : 0;
4142
4143 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
4144 sizeof(*sem), 8,
4145 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4146 if (!sem)
4147 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4148
4149 sem->temp_syncobj = 0;
4150 /* create a syncobject if we are going to export this semaphore */
4151 if (device->always_use_syncobj || handleTypes) {
4152 assert (device->physical_device->rad_info.has_syncobj);
4153 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
4154 if (ret) {
4155 vk_free2(&device->alloc, pAllocator, sem);
4156 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4157 }
4158 sem->sem = NULL;
4159 } else {
4160 sem->sem = device->ws->create_sem(device->ws);
4161 if (!sem->sem) {
4162 vk_free2(&device->alloc, pAllocator, sem);
4163 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4164 }
4165 sem->syncobj = 0;
4166 }
4167
4168 *pSemaphore = radv_semaphore_to_handle(sem);
4169 return VK_SUCCESS;
4170 }
4171
4172 void radv_DestroySemaphore(
4173 VkDevice _device,
4174 VkSemaphore _semaphore,
4175 const VkAllocationCallbacks* pAllocator)
4176 {
4177 RADV_FROM_HANDLE(radv_device, device, _device);
4178 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
4179 if (!_semaphore)
4180 return;
4181
4182 if (sem->syncobj)
4183 device->ws->destroy_syncobj(device->ws, sem->syncobj);
4184 else
4185 device->ws->destroy_sem(sem->sem);
4186 vk_free2(&device->alloc, pAllocator, sem);
4187 }
4188
4189 VkResult radv_CreateEvent(
4190 VkDevice _device,
4191 const VkEventCreateInfo* pCreateInfo,
4192 const VkAllocationCallbacks* pAllocator,
4193 VkEvent* pEvent)
4194 {
4195 RADV_FROM_HANDLE(radv_device, device, _device);
4196 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
4197 sizeof(*event), 8,
4198 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4199
4200 if (!event)
4201 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4202
4203 event->bo = device->ws->buffer_create(device->ws, 8, 8,
4204 RADEON_DOMAIN_GTT,
4205 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
4206 RADV_BO_PRIORITY_FENCE);
4207 if (!event->bo) {
4208 vk_free2(&device->alloc, pAllocator, event);
4209 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4210 }
4211
4212 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
4213
4214 *pEvent = radv_event_to_handle(event);
4215
4216 return VK_SUCCESS;
4217 }
4218
4219 void radv_DestroyEvent(
4220 VkDevice _device,
4221 VkEvent _event,
4222 const VkAllocationCallbacks* pAllocator)
4223 {
4224 RADV_FROM_HANDLE(radv_device, device, _device);
4225 RADV_FROM_HANDLE(radv_event, event, _event);
4226
4227 if (!event)
4228 return;
4229 device->ws->buffer_destroy(event->bo);
4230 vk_free2(&device->alloc, pAllocator, event);
4231 }
4232
4233 VkResult radv_GetEventStatus(
4234 VkDevice _device,
4235 VkEvent _event)
4236 {
4237 RADV_FROM_HANDLE(radv_event, event, _event);
4238
4239 if (*event->map == 1)
4240 return VK_EVENT_SET;
4241 return VK_EVENT_RESET;
4242 }
4243
4244 VkResult radv_SetEvent(
4245 VkDevice _device,
4246 VkEvent _event)
4247 {
4248 RADV_FROM_HANDLE(radv_event, event, _event);
4249 *event->map = 1;
4250
4251 return VK_SUCCESS;
4252 }
4253
4254 VkResult radv_ResetEvent(
4255 VkDevice _device,
4256 VkEvent _event)
4257 {
4258 RADV_FROM_HANDLE(radv_event, event, _event);
4259 *event->map = 0;
4260
4261 return VK_SUCCESS;
4262 }
4263
4264 VkResult radv_CreateBuffer(
4265 VkDevice _device,
4266 const VkBufferCreateInfo* pCreateInfo,
4267 const VkAllocationCallbacks* pAllocator,
4268 VkBuffer* pBuffer)
4269 {
4270 RADV_FROM_HANDLE(radv_device, device, _device);
4271 struct radv_buffer *buffer;
4272
4273 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4274
4275 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
4276 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4277 if (buffer == NULL)
4278 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4279
4280 buffer->size = pCreateInfo->size;
4281 buffer->usage = pCreateInfo->usage;
4282 buffer->bo = NULL;
4283 buffer->offset = 0;
4284 buffer->flags = pCreateInfo->flags;
4285
4286 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
4287 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
4288
4289 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
4290 buffer->bo = device->ws->buffer_create(device->ws,
4291 align64(buffer->size, 4096),
4292 4096, 0, RADEON_FLAG_VIRTUAL,
4293 RADV_BO_PRIORITY_VIRTUAL);
4294 if (!buffer->bo) {
4295 vk_free2(&device->alloc, pAllocator, buffer);
4296 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4297 }
4298 }
4299
4300 *pBuffer = radv_buffer_to_handle(buffer);
4301
4302 return VK_SUCCESS;
4303 }
4304
4305 void radv_DestroyBuffer(
4306 VkDevice _device,
4307 VkBuffer _buffer,
4308 const VkAllocationCallbacks* pAllocator)
4309 {
4310 RADV_FROM_HANDLE(radv_device, device, _device);
4311 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
4312
4313 if (!buffer)
4314 return;
4315
4316 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
4317 device->ws->buffer_destroy(buffer->bo);
4318
4319 vk_free2(&device->alloc, pAllocator, buffer);
4320 }
4321
4322 VkDeviceAddress radv_GetBufferDeviceAddressEXT(
4323 VkDevice device,
4324 const VkBufferDeviceAddressInfoEXT* pInfo)
4325 {
4326 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
4327 return radv_buffer_get_va(buffer->bo) + buffer->offset;
4328 }
4329
4330
4331 static inline unsigned
4332 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
4333 {
4334 if (stencil)
4335 return plane->surface.u.legacy.stencil_tiling_index[level];
4336 else
4337 return plane->surface.u.legacy.tiling_index[level];
4338 }
4339
4340 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
4341 {
4342 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
4343 }
4344
4345 static uint32_t
4346 radv_init_dcc_control_reg(struct radv_device *device,
4347 struct radv_image_view *iview)
4348 {
4349 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
4350 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
4351 unsigned max_compressed_block_size;
4352 unsigned independent_128b_blocks;
4353 unsigned independent_64b_blocks;
4354
4355 if (!radv_dcc_enabled(iview->image, iview->base_mip))
4356 return 0;
4357
4358 if (!device->physical_device->rad_info.has_dedicated_vram) {
4359 /* amdvlk: [min-compressed-block-size] should be set to 32 for
4360 * dGPU and 64 for APU because all of our APUs to date use
4361 * DIMMs which have a request granularity size of 64B while all
4362 * other chips have a 32B request size.
4363 */
4364 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
4365 }
4366
4367 if (device->physical_device->rad_info.chip_class >= GFX10) {
4368 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4369 independent_64b_blocks = 0;
4370 independent_128b_blocks = 1;
4371 } else {
4372 independent_128b_blocks = 0;
4373
4374 if (iview->image->info.samples > 1) {
4375 if (iview->image->planes[0].surface.bpe == 1)
4376 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4377 else if (iview->image->planes[0].surface.bpe == 2)
4378 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
4379 }
4380
4381 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
4382 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
4383 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
4384 /* If this DCC image is potentially going to be used in texture
4385 * fetches, we need some special settings.
4386 */
4387 independent_64b_blocks = 1;
4388 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
4389 } else {
4390 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
4391 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
4392 * big as possible for better compression state.
4393 */
4394 independent_64b_blocks = 0;
4395 max_compressed_block_size = max_uncompressed_block_size;
4396 }
4397 }
4398
4399 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
4400 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
4401 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
4402 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
4403 S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
4404 }
4405
4406 void
4407 radv_initialise_color_surface(struct radv_device *device,
4408 struct radv_color_buffer_info *cb,
4409 struct radv_image_view *iview)
4410 {
4411 const struct vk_format_description *desc;
4412 unsigned ntype, format, swap, endian;
4413 unsigned blend_clamp = 0, blend_bypass = 0;
4414 uint64_t va;
4415 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
4416 const struct radeon_surf *surf = &plane->surface;
4417
4418 desc = vk_format_description(iview->vk_format);
4419
4420 memset(cb, 0, sizeof(*cb));
4421
4422 /* Intensity is implemented as Red, so treat it that way. */
4423 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
4424
4425 va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
4426
4427 cb->cb_color_base = va >> 8;
4428
4429 if (device->physical_device->rad_info.chip_class >= GFX9) {
4430 struct gfx9_surf_meta_flags meta;
4431 if (iview->image->dcc_offset)
4432 meta = surf->u.gfx9.dcc;
4433 else
4434 meta = surf->u.gfx9.cmask;
4435
4436 if (device->physical_device->rad_info.chip_class >= GFX10) {
4437 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4438 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
4439 S_028EE0_CMASK_PIPE_ALIGNED(surf->u.gfx9.cmask.pipe_aligned) |
4440 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
4441 } else {
4442 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4443 S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
4444 S_028C74_RB_ALIGNED(meta.rb_aligned) |
4445 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
4446 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
4447 }
4448
4449 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
4450 cb->cb_color_base |= surf->tile_swizzle;
4451 } else {
4452 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
4453 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
4454
4455 cb->cb_color_base += level_info->offset >> 8;
4456 if (level_info->mode == RADEON_SURF_MODE_2D)
4457 cb->cb_color_base |= surf->tile_swizzle;
4458
4459 pitch_tile_max = level_info->nblk_x / 8 - 1;
4460 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
4461 tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
4462
4463 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
4464 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
4465 cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
4466
4467 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
4468
4469 if (radv_image_has_fmask(iview->image)) {
4470 if (device->physical_device->rad_info.chip_class >= GFX7)
4471 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
4472 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
4473 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
4474 } else {
4475 /* This must be set for fast clear to work without FMASK. */
4476 if (device->physical_device->rad_info.chip_class >= GFX7)
4477 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
4478 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
4479 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
4480 }
4481 }
4482
4483 /* CMASK variables */
4484 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4485 va += iview->image->cmask_offset;
4486 cb->cb_color_cmask = va >> 8;
4487
4488 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4489 va += iview->image->dcc_offset;
4490
4491 if (radv_dcc_enabled(iview->image, iview->base_mip) &&
4492 device->physical_device->rad_info.chip_class <= GFX8)
4493 va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
4494
4495 unsigned dcc_tile_swizzle = surf->tile_swizzle;
4496 dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
4497
4498 cb->cb_dcc_base = va >> 8;
4499 cb->cb_dcc_base |= dcc_tile_swizzle;
4500
4501 /* GFX10 field has the same base shift as the GFX6 field. */
4502 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4503 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
4504 S_028C6C_SLICE_MAX_GFX10(max_slice);
4505
4506 if (iview->image->info.samples > 1) {
4507 unsigned log_samples = util_logbase2(iview->image->info.samples);
4508
4509 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
4510 S_028C74_NUM_FRAGMENTS(log_samples);
4511 }
4512
4513 if (radv_image_has_fmask(iview->image)) {
4514 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask_offset;
4515 cb->cb_color_fmask = va >> 8;
4516 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
4517 } else {
4518 cb->cb_color_fmask = cb->cb_color_base;
4519 }
4520
4521 ntype = radv_translate_color_numformat(iview->vk_format,
4522 desc,
4523 vk_format_get_first_non_void_channel(iview->vk_format));
4524 format = radv_translate_colorformat(iview->vk_format);
4525 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
4526 radv_finishme("Illegal color\n");
4527 swap = radv_translate_colorswap(iview->vk_format, false);
4528 endian = radv_colorformat_endian_swap(format);
4529
4530 /* blend clamp should be set for all NORM/SRGB types */
4531 if (ntype == V_028C70_NUMBER_UNORM ||
4532 ntype == V_028C70_NUMBER_SNORM ||
4533 ntype == V_028C70_NUMBER_SRGB)
4534 blend_clamp = 1;
4535
4536 /* set blend bypass according to docs if SINT/UINT or
4537 8/24 COLOR variants */
4538 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
4539 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
4540 format == V_028C70_COLOR_X24_8_32_FLOAT) {
4541 blend_clamp = 0;
4542 blend_bypass = 1;
4543 }
4544 #if 0
4545 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
4546 (format == V_028C70_COLOR_8 ||
4547 format == V_028C70_COLOR_8_8 ||
4548 format == V_028C70_COLOR_8_8_8_8))
4549 ->color_is_int8 = true;
4550 #endif
4551 cb->cb_color_info = S_028C70_FORMAT(format) |
4552 S_028C70_COMP_SWAP(swap) |
4553 S_028C70_BLEND_CLAMP(blend_clamp) |
4554 S_028C70_BLEND_BYPASS(blend_bypass) |
4555 S_028C70_SIMPLE_FLOAT(1) |
4556 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
4557 ntype != V_028C70_NUMBER_SNORM &&
4558 ntype != V_028C70_NUMBER_SRGB &&
4559 format != V_028C70_COLOR_8_24 &&
4560 format != V_028C70_COLOR_24_8) |
4561 S_028C70_NUMBER_TYPE(ntype) |
4562 S_028C70_ENDIAN(endian);
4563 if (radv_image_has_fmask(iview->image)) {
4564 cb->cb_color_info |= S_028C70_COMPRESSION(1);
4565 if (device->physical_device->rad_info.chip_class == GFX6) {
4566 unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
4567 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
4568 }
4569
4570 if (radv_image_is_tc_compat_cmask(iview->image)) {
4571 /* Allow the texture block to read FMASK directly
4572 * without decompressing it. This bit must be cleared
4573 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
4574 * otherwise the operation doesn't happen.
4575 */
4576 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
4577
4578 /* Set CMASK into a tiling format that allows the
4579 * texture block to read it.
4580 */
4581 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
4582 }
4583 }
4584
4585 if (radv_image_has_cmask(iview->image) &&
4586 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
4587 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
4588
4589 if (radv_dcc_enabled(iview->image, iview->base_mip))
4590 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
4591
4592 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
4593
4594 /* This must be set for fast clear to work without FMASK. */
4595 if (!radv_image_has_fmask(iview->image) &&
4596 device->physical_device->rad_info.chip_class == GFX6) {
4597 unsigned bankh = util_logbase2(surf->u.legacy.bankh);
4598 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
4599 }
4600
4601 if (device->physical_device->rad_info.chip_class >= GFX9) {
4602 const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
4603
4604 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
4605 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
4606 unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
4607 unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
4608
4609 if (device->physical_device->rad_info.chip_class >= GFX10) {
4610 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
4611
4612 cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
4613 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
4614 S_028EE0_RESOURCE_LEVEL(1);
4615 } else {
4616 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
4617 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
4618 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
4619 }
4620
4621 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
4622 S_028C68_MIP0_HEIGHT(height - 1) |
4623 S_028C68_MAX_MIP(iview->image->info.levels - 1);
4624 }
4625 }
4626
4627 static unsigned
4628 radv_calc_decompress_on_z_planes(struct radv_device *device,
4629 struct radv_image_view *iview)
4630 {
4631 unsigned max_zplanes = 0;
4632
4633 assert(radv_image_is_tc_compat_htile(iview->image));
4634
4635 if (device->physical_device->rad_info.chip_class >= GFX9) {
4636 /* Default value for 32-bit depth surfaces. */
4637 max_zplanes = 4;
4638
4639 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
4640 iview->image->info.samples > 1)
4641 max_zplanes = 2;
4642
4643 max_zplanes = max_zplanes + 1;
4644 } else {
4645 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
4646 /* Do not enable Z plane compression for 16-bit depth
4647 * surfaces because isn't supported on GFX8. Only
4648 * 32-bit depth surfaces are supported by the hardware.
4649 * This allows to maintain shader compatibility and to
4650 * reduce the number of depth decompressions.
4651 */
4652 max_zplanes = 1;
4653 } else {
4654 if (iview->image->info.samples <= 1)
4655 max_zplanes = 5;
4656 else if (iview->image->info.samples <= 4)
4657 max_zplanes = 3;
4658 else
4659 max_zplanes = 2;
4660 }
4661 }
4662
4663 return max_zplanes;
4664 }
4665
4666 void
4667 radv_initialise_ds_surface(struct radv_device *device,
4668 struct radv_ds_buffer_info *ds,
4669 struct radv_image_view *iview)
4670 {
4671 unsigned level = iview->base_mip;
4672 unsigned format, stencil_format;
4673 uint64_t va, s_offs, z_offs;
4674 bool stencil_only = false;
4675 const struct radv_image_plane *plane = &iview->image->planes[0];
4676 const struct radeon_surf *surf = &plane->surface;
4677
4678 assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
4679
4680 memset(ds, 0, sizeof(*ds));
4681 switch (iview->image->vk_format) {
4682 case VK_FORMAT_D24_UNORM_S8_UINT:
4683 case VK_FORMAT_X8_D24_UNORM_PACK32:
4684 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
4685 ds->offset_scale = 2.0f;
4686 break;
4687 case VK_FORMAT_D16_UNORM:
4688 case VK_FORMAT_D16_UNORM_S8_UINT:
4689 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
4690 ds->offset_scale = 4.0f;
4691 break;
4692 case VK_FORMAT_D32_SFLOAT:
4693 case VK_FORMAT_D32_SFLOAT_S8_UINT:
4694 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
4695 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
4696 ds->offset_scale = 1.0f;
4697 break;
4698 case VK_FORMAT_S8_UINT:
4699 stencil_only = true;
4700 break;
4701 default:
4702 break;
4703 }
4704
4705 format = radv_translate_dbformat(iview->image->vk_format);
4706 stencil_format = surf->has_stencil ?
4707 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
4708
4709 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
4710 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
4711 S_028008_SLICE_MAX(max_slice);
4712 if (device->physical_device->rad_info.chip_class >= GFX10) {
4713 ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
4714 S_028008_SLICE_MAX_HI(max_slice >> 11);
4715 }
4716
4717 ds->db_htile_data_base = 0;
4718 ds->db_htile_surface = 0;
4719
4720 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
4721 s_offs = z_offs = va;
4722
4723 if (device->physical_device->rad_info.chip_class >= GFX9) {
4724 assert(surf->u.gfx9.surf_offset == 0);
4725 s_offs += surf->u.gfx9.stencil_offset;
4726
4727 ds->db_z_info = S_028038_FORMAT(format) |
4728 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
4729 S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
4730 S_028038_MAXMIP(iview->image->info.levels - 1) |
4731 S_028038_ZRANGE_PRECISION(1);
4732 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
4733 S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
4734
4735 if (device->physical_device->rad_info.chip_class == GFX9) {
4736 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
4737 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
4738 }
4739
4740 ds->db_depth_view |= S_028008_MIPID(level);
4741 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
4742 S_02801C_Y_MAX(iview->image->info.height - 1);
4743
4744 if (radv_htile_enabled(iview->image, level)) {
4745 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
4746
4747 if (radv_image_is_tc_compat_htile(iview->image)) {
4748 unsigned max_zplanes =
4749 radv_calc_decompress_on_z_planes(device, iview);
4750
4751 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4752
4753 if (device->physical_device->rad_info.chip_class >= GFX10) {
4754 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
4755 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
4756 } else {
4757 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
4758 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
4759 }
4760 }
4761
4762 if (!surf->has_stencil)
4763 /* Use all of the htile_buffer for depth if there's no stencil. */
4764 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
4765 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4766 iview->image->htile_offset;
4767 ds->db_htile_data_base = va >> 8;
4768 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
4769 S_028ABC_PIPE_ALIGNED(surf->u.gfx9.htile.pipe_aligned);
4770
4771 if (device->physical_device->rad_info.chip_class == GFX9) {
4772 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(surf->u.gfx9.htile.rb_aligned);
4773 }
4774 }
4775 } else {
4776 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
4777
4778 if (stencil_only)
4779 level_info = &surf->u.legacy.stencil_level[level];
4780
4781 z_offs += surf->u.legacy.level[level].offset;
4782 s_offs += surf->u.legacy.stencil_level[level].offset;
4783
4784 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
4785 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
4786 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
4787
4788 if (iview->image->info.samples > 1)
4789 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
4790
4791 if (device->physical_device->rad_info.chip_class >= GFX7) {
4792 struct radeon_info *info = &device->physical_device->rad_info;
4793 unsigned tiling_index = surf->u.legacy.tiling_index[level];
4794 unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
4795 unsigned macro_index = surf->u.legacy.macro_tile_index;
4796 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
4797 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
4798 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
4799
4800 if (stencil_only)
4801 tile_mode = stencil_tile_mode;
4802
4803 ds->db_depth_info |=
4804 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
4805 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
4806 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
4807 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
4808 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
4809 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
4810 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
4811 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
4812 } else {
4813 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
4814 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4815 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
4816 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
4817 if (stencil_only)
4818 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
4819 }
4820
4821 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
4822 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
4823 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
4824
4825 if (radv_htile_enabled(iview->image, level)) {
4826 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
4827
4828 if (!surf->has_stencil &&
4829 !radv_image_is_tc_compat_htile(iview->image))
4830 /* Use all of the htile_buffer for depth if there's no stencil. */
4831 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
4832
4833 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
4834 iview->image->htile_offset;
4835 ds->db_htile_data_base = va >> 8;
4836 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
4837
4838 if (radv_image_is_tc_compat_htile(iview->image)) {
4839 unsigned max_zplanes =
4840 radv_calc_decompress_on_z_planes(device, iview);
4841
4842 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
4843 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
4844 }
4845 }
4846 }
4847
4848 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
4849 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
4850 }
4851
4852 VkResult radv_CreateFramebuffer(
4853 VkDevice _device,
4854 const VkFramebufferCreateInfo* pCreateInfo,
4855 const VkAllocationCallbacks* pAllocator,
4856 VkFramebuffer* pFramebuffer)
4857 {
4858 RADV_FROM_HANDLE(radv_device, device, _device);
4859 struct radv_framebuffer *framebuffer;
4860 const VkFramebufferAttachmentsCreateInfoKHR *imageless_create_info =
4861 vk_find_struct_const(pCreateInfo->pNext,
4862 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO_KHR);
4863
4864 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4865
4866 size_t size = sizeof(*framebuffer);
4867 if (!imageless_create_info)
4868 size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
4869 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
4870 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4871 if (framebuffer == NULL)
4872 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4873
4874 framebuffer->attachment_count = pCreateInfo->attachmentCount;
4875 framebuffer->width = pCreateInfo->width;
4876 framebuffer->height = pCreateInfo->height;
4877 framebuffer->layers = pCreateInfo->layers;
4878 if (imageless_create_info) {
4879 for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
4880 const VkFramebufferAttachmentImageInfoKHR *attachment =
4881 imageless_create_info->pAttachmentImageInfos + i;
4882 framebuffer->width = MIN2(framebuffer->width, attachment->width);
4883 framebuffer->height = MIN2(framebuffer->height, attachment->height);
4884 framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
4885 }
4886 } else {
4887 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4888 VkImageView _iview = pCreateInfo->pAttachments[i];
4889 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
4890 framebuffer->attachments[i] = iview;
4891 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
4892 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
4893 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
4894 }
4895 }
4896
4897 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
4898 return VK_SUCCESS;
4899 }
4900
4901 void radv_DestroyFramebuffer(
4902 VkDevice _device,
4903 VkFramebuffer _fb,
4904 const VkAllocationCallbacks* pAllocator)
4905 {
4906 RADV_FROM_HANDLE(radv_device, device, _device);
4907 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
4908
4909 if (!fb)
4910 return;
4911 vk_free2(&device->alloc, pAllocator, fb);
4912 }
4913
4914 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
4915 {
4916 switch (address_mode) {
4917 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
4918 return V_008F30_SQ_TEX_WRAP;
4919 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
4920 return V_008F30_SQ_TEX_MIRROR;
4921 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
4922 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
4923 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
4924 return V_008F30_SQ_TEX_CLAMP_BORDER;
4925 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
4926 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
4927 default:
4928 unreachable("illegal tex wrap mode");
4929 break;
4930 }
4931 }
4932
4933 static unsigned
4934 radv_tex_compare(VkCompareOp op)
4935 {
4936 switch (op) {
4937 case VK_COMPARE_OP_NEVER:
4938 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
4939 case VK_COMPARE_OP_LESS:
4940 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
4941 case VK_COMPARE_OP_EQUAL:
4942 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
4943 case VK_COMPARE_OP_LESS_OR_EQUAL:
4944 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
4945 case VK_COMPARE_OP_GREATER:
4946 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
4947 case VK_COMPARE_OP_NOT_EQUAL:
4948 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
4949 case VK_COMPARE_OP_GREATER_OR_EQUAL:
4950 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
4951 case VK_COMPARE_OP_ALWAYS:
4952 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
4953 default:
4954 unreachable("illegal compare mode");
4955 break;
4956 }
4957 }
4958
4959 static unsigned
4960 radv_tex_filter(VkFilter filter, unsigned max_ansio)
4961 {
4962 switch (filter) {
4963 case VK_FILTER_NEAREST:
4964 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
4965 V_008F38_SQ_TEX_XY_FILTER_POINT);
4966 case VK_FILTER_LINEAR:
4967 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
4968 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
4969 case VK_FILTER_CUBIC_IMG:
4970 default:
4971 fprintf(stderr, "illegal texture filter");
4972 return 0;
4973 }
4974 }
4975
4976 static unsigned
4977 radv_tex_mipfilter(VkSamplerMipmapMode mode)
4978 {
4979 switch (mode) {
4980 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
4981 return V_008F38_SQ_TEX_Z_FILTER_POINT;
4982 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
4983 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
4984 default:
4985 return V_008F38_SQ_TEX_Z_FILTER_NONE;
4986 }
4987 }
4988
4989 static unsigned
4990 radv_tex_bordercolor(VkBorderColor bcolor)
4991 {
4992 switch (bcolor) {
4993 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
4994 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
4995 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
4996 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
4997 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
4998 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
4999 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
5000 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
5001 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
5002 default:
5003 break;
5004 }
5005 return 0;
5006 }
5007
5008 static unsigned
5009 radv_tex_aniso_filter(unsigned filter)
5010 {
5011 if (filter < 2)
5012 return 0;
5013 if (filter < 4)
5014 return 1;
5015 if (filter < 8)
5016 return 2;
5017 if (filter < 16)
5018 return 3;
5019 return 4;
5020 }
5021
5022 static unsigned
5023 radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
5024 {
5025 switch (mode) {
5026 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
5027 return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
5028 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
5029 return V_008F30_SQ_IMG_FILTER_MODE_MIN;
5030 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
5031 return V_008F30_SQ_IMG_FILTER_MODE_MAX;
5032 default:
5033 break;
5034 }
5035 return 0;
5036 }
5037
5038 static uint32_t
5039 radv_get_max_anisotropy(struct radv_device *device,
5040 const VkSamplerCreateInfo *pCreateInfo)
5041 {
5042 if (device->force_aniso >= 0)
5043 return device->force_aniso;
5044
5045 if (pCreateInfo->anisotropyEnable &&
5046 pCreateInfo->maxAnisotropy > 1.0f)
5047 return (uint32_t)pCreateInfo->maxAnisotropy;
5048
5049 return 0;
5050 }
5051
5052 static void
5053 radv_init_sampler(struct radv_device *device,
5054 struct radv_sampler *sampler,
5055 const VkSamplerCreateInfo *pCreateInfo)
5056 {
5057 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
5058 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
5059 bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
5060 device->physical_device->rad_info.chip_class == GFX9;
5061 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
5062
5063 const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
5064 vk_find_struct_const(pCreateInfo->pNext,
5065 SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT);
5066 if (sampler_reduction)
5067 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
5068
5069 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
5070 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
5071 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
5072 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
5073 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
5074 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
5075 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
5076 S_008F30_ANISO_BIAS(max_aniso_ratio) |
5077 S_008F30_DISABLE_CUBE_WRAP(0) |
5078 S_008F30_COMPAT_MODE(compat_mode) |
5079 S_008F30_FILTER_MODE(filter_mode));
5080 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
5081 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
5082 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
5083 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
5084 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
5085 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
5086 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
5087 S_008F38_MIP_POINT_PRECLAMP(0));
5088 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
5089 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
5090
5091 if (device->physical_device->rad_info.chip_class >= GFX10) {
5092 sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
5093 } else {
5094 sampler->state[2] |=
5095 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
5096 S_008F38_FILTER_PREC_FIX(1) |
5097 S_008F38_ANISO_OVERRIDE_GFX6(device->physical_device->rad_info.chip_class >= GFX8);
5098 }
5099 }
5100
5101 VkResult radv_CreateSampler(
5102 VkDevice _device,
5103 const VkSamplerCreateInfo* pCreateInfo,
5104 const VkAllocationCallbacks* pAllocator,
5105 VkSampler* pSampler)
5106 {
5107 RADV_FROM_HANDLE(radv_device, device, _device);
5108 struct radv_sampler *sampler;
5109
5110 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
5111 vk_find_struct_const(pCreateInfo->pNext,
5112 SAMPLER_YCBCR_CONVERSION_INFO);
5113
5114 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
5115
5116 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
5117 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5118 if (!sampler)
5119 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5120
5121 radv_init_sampler(device, sampler, pCreateInfo);
5122
5123 sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
5124 *pSampler = radv_sampler_to_handle(sampler);
5125
5126 return VK_SUCCESS;
5127 }
5128
5129 void radv_DestroySampler(
5130 VkDevice _device,
5131 VkSampler _sampler,
5132 const VkAllocationCallbacks* pAllocator)
5133 {
5134 RADV_FROM_HANDLE(radv_device, device, _device);
5135 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
5136
5137 if (!sampler)
5138 return;
5139 vk_free2(&device->alloc, pAllocator, sampler);
5140 }
5141
5142 /* vk_icd.h does not declare this function, so we declare it here to
5143 * suppress Wmissing-prototypes.
5144 */
5145 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
5146 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
5147
5148 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
5149 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
5150 {
5151 /* For the full details on loader interface versioning, see
5152 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
5153 * What follows is a condensed summary, to help you navigate the large and
5154 * confusing official doc.
5155 *
5156 * - Loader interface v0 is incompatible with later versions. We don't
5157 * support it.
5158 *
5159 * - In loader interface v1:
5160 * - The first ICD entrypoint called by the loader is
5161 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
5162 * entrypoint.
5163 * - The ICD must statically expose no other Vulkan symbol unless it is
5164 * linked with -Bsymbolic.
5165 * - Each dispatchable Vulkan handle created by the ICD must be
5166 * a pointer to a struct whose first member is VK_LOADER_DATA. The
5167 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
5168 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
5169 * vkDestroySurfaceKHR(). The ICD must be capable of working with
5170 * such loader-managed surfaces.
5171 *
5172 * - Loader interface v2 differs from v1 in:
5173 * - The first ICD entrypoint called by the loader is
5174 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
5175 * statically expose this entrypoint.
5176 *
5177 * - Loader interface v3 differs from v2 in:
5178 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
5179 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
5180 * because the loader no longer does so.
5181 */
5182 *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
5183 return VK_SUCCESS;
5184 }
5185
5186 VkResult radv_GetMemoryFdKHR(VkDevice _device,
5187 const VkMemoryGetFdInfoKHR *pGetFdInfo,
5188 int *pFD)
5189 {
5190 RADV_FROM_HANDLE(radv_device, device, _device);
5191 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
5192
5193 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
5194
5195 /* At the moment, we support only the below handle types. */
5196 assert(pGetFdInfo->handleType ==
5197 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5198 pGetFdInfo->handleType ==
5199 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5200
5201 bool ret = radv_get_memory_fd(device, memory, pFD);
5202 if (ret == false)
5203 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
5204 return VK_SUCCESS;
5205 }
5206
5207 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
5208 VkExternalMemoryHandleTypeFlagBits handleType,
5209 int fd,
5210 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
5211 {
5212 RADV_FROM_HANDLE(radv_device, device, _device);
5213
5214 switch (handleType) {
5215 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
5216 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
5217 return VK_SUCCESS;
5218
5219 default:
5220 /* The valid usage section for this function says:
5221 *
5222 * "handleType must not be one of the handle types defined as
5223 * opaque."
5224 *
5225 * So opaque handle types fall into the default "unsupported" case.
5226 */
5227 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5228 }
5229 }
5230
5231 static VkResult radv_import_opaque_fd(struct radv_device *device,
5232 int fd,
5233 uint32_t *syncobj)
5234 {
5235 uint32_t syncobj_handle = 0;
5236 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
5237 if (ret != 0)
5238 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5239
5240 if (*syncobj)
5241 device->ws->destroy_syncobj(device->ws, *syncobj);
5242
5243 *syncobj = syncobj_handle;
5244 close(fd);
5245
5246 return VK_SUCCESS;
5247 }
5248
5249 static VkResult radv_import_sync_fd(struct radv_device *device,
5250 int fd,
5251 uint32_t *syncobj)
5252 {
5253 /* If we create a syncobj we do it locally so that if we have an error, we don't
5254 * leave a syncobj in an undetermined state in the fence. */
5255 uint32_t syncobj_handle = *syncobj;
5256 if (!syncobj_handle) {
5257 int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
5258 if (ret) {
5259 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5260 }
5261 }
5262
5263 if (fd == -1) {
5264 device->ws->signal_syncobj(device->ws, syncobj_handle);
5265 } else {
5266 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
5267 if (ret != 0)
5268 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5269 }
5270
5271 *syncobj = syncobj_handle;
5272 if (fd != -1)
5273 close(fd);
5274
5275 return VK_SUCCESS;
5276 }
5277
5278 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
5279 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
5280 {
5281 RADV_FROM_HANDLE(radv_device, device, _device);
5282 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
5283 uint32_t *syncobj_dst = NULL;
5284
5285 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
5286 syncobj_dst = &sem->temp_syncobj;
5287 } else {
5288 syncobj_dst = &sem->syncobj;
5289 }
5290
5291 switch(pImportSemaphoreFdInfo->handleType) {
5292 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5293 return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5294 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5295 return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
5296 default:
5297 unreachable("Unhandled semaphore handle type");
5298 }
5299 }
5300
5301 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
5302 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
5303 int *pFd)
5304 {
5305 RADV_FROM_HANDLE(radv_device, device, _device);
5306 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
5307 int ret;
5308 uint32_t syncobj_handle;
5309
5310 if (sem->temp_syncobj)
5311 syncobj_handle = sem->temp_syncobj;
5312 else
5313 syncobj_handle = sem->syncobj;
5314
5315 switch(pGetFdInfo->handleType) {
5316 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
5317 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5318 break;
5319 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
5320 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5321 if (!ret) {
5322 if (sem->temp_syncobj) {
5323 close (sem->temp_syncobj);
5324 sem->temp_syncobj = 0;
5325 } else {
5326 device->ws->reset_syncobj(device->ws, syncobj_handle);
5327 }
5328 }
5329 break;
5330 default:
5331 unreachable("Unhandled semaphore handle type");
5332 }
5333
5334 if (ret)
5335 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5336 return VK_SUCCESS;
5337 }
5338
5339 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
5340 VkPhysicalDevice physicalDevice,
5341 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
5342 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
5343 {
5344 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5345
5346 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
5347 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5348 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5349 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
5350 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5351 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
5352 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5353 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5354 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
5355 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5356 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
5357 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
5358 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5359 } else {
5360 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
5361 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
5362 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
5363 }
5364 }
5365
5366 VkResult radv_ImportFenceFdKHR(VkDevice _device,
5367 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
5368 {
5369 RADV_FROM_HANDLE(radv_device, device, _device);
5370 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
5371 uint32_t *syncobj_dst = NULL;
5372
5373
5374 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
5375 syncobj_dst = &fence->temp_syncobj;
5376 } else {
5377 syncobj_dst = &fence->syncobj;
5378 }
5379
5380 switch(pImportFenceFdInfo->handleType) {
5381 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5382 return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5383 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5384 return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
5385 default:
5386 unreachable("Unhandled fence handle type");
5387 }
5388 }
5389
5390 VkResult radv_GetFenceFdKHR(VkDevice _device,
5391 const VkFenceGetFdInfoKHR *pGetFdInfo,
5392 int *pFd)
5393 {
5394 RADV_FROM_HANDLE(radv_device, device, _device);
5395 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
5396 int ret;
5397 uint32_t syncobj_handle;
5398
5399 if (fence->temp_syncobj)
5400 syncobj_handle = fence->temp_syncobj;
5401 else
5402 syncobj_handle = fence->syncobj;
5403
5404 switch(pGetFdInfo->handleType) {
5405 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
5406 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
5407 break;
5408 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
5409 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
5410 if (!ret) {
5411 if (fence->temp_syncobj) {
5412 close (fence->temp_syncobj);
5413 fence->temp_syncobj = 0;
5414 } else {
5415 device->ws->reset_syncobj(device->ws, syncobj_handle);
5416 }
5417 }
5418 break;
5419 default:
5420 unreachable("Unhandled fence handle type");
5421 }
5422
5423 if (ret)
5424 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
5425 return VK_SUCCESS;
5426 }
5427
5428 void radv_GetPhysicalDeviceExternalFenceProperties(
5429 VkPhysicalDevice physicalDevice,
5430 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
5431 VkExternalFenceProperties *pExternalFenceProperties)
5432 {
5433 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
5434
5435 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
5436 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
5437 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
5438 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5439 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
5440 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
5441 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
5442 } else {
5443 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
5444 pExternalFenceProperties->compatibleHandleTypes = 0;
5445 pExternalFenceProperties->externalFenceFeatures = 0;
5446 }
5447 }
5448
5449 VkResult
5450 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
5451 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
5452 const VkAllocationCallbacks* pAllocator,
5453 VkDebugReportCallbackEXT* pCallback)
5454 {
5455 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5456 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
5457 pCreateInfo, pAllocator, &instance->alloc,
5458 pCallback);
5459 }
5460
5461 void
5462 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
5463 VkDebugReportCallbackEXT _callback,
5464 const VkAllocationCallbacks* pAllocator)
5465 {
5466 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5467 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
5468 _callback, pAllocator, &instance->alloc);
5469 }
5470
5471 void
5472 radv_DebugReportMessageEXT(VkInstance _instance,
5473 VkDebugReportFlagsEXT flags,
5474 VkDebugReportObjectTypeEXT objectType,
5475 uint64_t object,
5476 size_t location,
5477 int32_t messageCode,
5478 const char* pLayerPrefix,
5479 const char* pMessage)
5480 {
5481 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5482 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
5483 object, location, messageCode, pLayerPrefix, pMessage);
5484 }
5485
5486 void
5487 radv_GetDeviceGroupPeerMemoryFeatures(
5488 VkDevice device,
5489 uint32_t heapIndex,
5490 uint32_t localDeviceIndex,
5491 uint32_t remoteDeviceIndex,
5492 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
5493 {
5494 assert(localDeviceIndex == remoteDeviceIndex);
5495
5496 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
5497 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
5498 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
5499 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
5500 }
5501
5502 static const VkTimeDomainEXT radv_time_domains[] = {
5503 VK_TIME_DOMAIN_DEVICE_EXT,
5504 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
5505 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
5506 };
5507
5508 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
5509 VkPhysicalDevice physicalDevice,
5510 uint32_t *pTimeDomainCount,
5511 VkTimeDomainEXT *pTimeDomains)
5512 {
5513 int d;
5514 VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
5515
5516 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
5517 vk_outarray_append(&out, i) {
5518 *i = radv_time_domains[d];
5519 }
5520 }
5521
5522 return vk_outarray_status(&out);
5523 }
5524
5525 static uint64_t
5526 radv_clock_gettime(clockid_t clock_id)
5527 {
5528 struct timespec current;
5529 int ret;
5530
5531 ret = clock_gettime(clock_id, &current);
5532 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
5533 ret = clock_gettime(CLOCK_MONOTONIC, &current);
5534 if (ret < 0)
5535 return 0;
5536
5537 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
5538 }
5539
5540 VkResult radv_GetCalibratedTimestampsEXT(
5541 VkDevice _device,
5542 uint32_t timestampCount,
5543 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
5544 uint64_t *pTimestamps,
5545 uint64_t *pMaxDeviation)
5546 {
5547 RADV_FROM_HANDLE(radv_device, device, _device);
5548 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
5549 int d;
5550 uint64_t begin, end;
5551 uint64_t max_clock_period = 0;
5552
5553 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5554
5555 for (d = 0; d < timestampCount; d++) {
5556 switch (pTimestampInfos[d].timeDomain) {
5557 case VK_TIME_DOMAIN_DEVICE_EXT:
5558 pTimestamps[d] = device->ws->query_value(device->ws,
5559 RADEON_TIMESTAMP);
5560 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
5561 max_clock_period = MAX2(max_clock_period, device_period);
5562 break;
5563 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
5564 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
5565 max_clock_period = MAX2(max_clock_period, 1);
5566 break;
5567
5568 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
5569 pTimestamps[d] = begin;
5570 break;
5571 default:
5572 pTimestamps[d] = 0;
5573 break;
5574 }
5575 }
5576
5577 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
5578
5579 /*
5580 * The maximum deviation is the sum of the interval over which we
5581 * perform the sampling and the maximum period of any sampled
5582 * clock. That's because the maximum skew between any two sampled
5583 * clock edges is when the sampled clock with the largest period is
5584 * sampled at the end of that period but right at the beginning of the
5585 * sampling interval and some other clock is sampled right at the
5586 * begining of its sampling period and right at the end of the
5587 * sampling interval. Let's assume the GPU has the longest clock
5588 * period and that the application is sampling GPU and monotonic:
5589 *
5590 * s e
5591 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
5592 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5593 *
5594 * g
5595 * 0 1 2 3
5596 * GPU -----_____-----_____-----_____-----_____
5597 *
5598 * m
5599 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
5600 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
5601 *
5602 * Interval <----------------->
5603 * Deviation <-------------------------->
5604 *
5605 * s = read(raw) 2
5606 * g = read(GPU) 1
5607 * m = read(monotonic) 2
5608 * e = read(raw) b
5609 *
5610 * We round the sample interval up by one tick to cover sampling error
5611 * in the interval clock
5612 */
5613
5614 uint64_t sample_interval = end - begin + 1;
5615
5616 *pMaxDeviation = sample_interval + max_clock_period;
5617
5618 return VK_SUCCESS;
5619 }
5620
5621 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
5622 VkPhysicalDevice physicalDevice,
5623 VkSampleCountFlagBits samples,
5624 VkMultisamplePropertiesEXT* pMultisampleProperties)
5625 {
5626 if (samples & (VK_SAMPLE_COUNT_2_BIT |
5627 VK_SAMPLE_COUNT_4_BIT |
5628 VK_SAMPLE_COUNT_8_BIT)) {
5629 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
5630 } else {
5631 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
5632 }
5633 }