radv/wsi: drop allocate memory special case
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static const char *
79 get_chip_name(enum radeon_family family)
80 {
81 switch (family) {
82 case CHIP_TAHITI: return "AMD RADV TAHITI";
83 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
84 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
85 case CHIP_OLAND: return "AMD RADV OLAND";
86 case CHIP_HAINAN: return "AMD RADV HAINAN";
87 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
88 case CHIP_KAVERI: return "AMD RADV KAVERI";
89 case CHIP_KABINI: return "AMD RADV KABINI";
90 case CHIP_HAWAII: return "AMD RADV HAWAII";
91 case CHIP_MULLINS: return "AMD RADV MULLINS";
92 case CHIP_TONGA: return "AMD RADV TONGA";
93 case CHIP_ICELAND: return "AMD RADV ICELAND";
94 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
95 case CHIP_FIJI: return "AMD RADV FIJI";
96 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
97 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
98 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
99 case CHIP_STONEY: return "AMD RADV STONEY";
100 case CHIP_VEGA10: return "AMD RADV VEGA";
101 case CHIP_RAVEN: return "AMD RADV RAVEN";
102 default: return "AMD RADV unknown";
103 }
104 }
105
106 static void
107 radv_physical_device_init_mem_types(struct radv_physical_device *device)
108 {
109 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
110 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
111 device->rad_info.vram_vis_size);
112
113 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
114 device->memory_properties.memoryHeapCount = 0;
115 if (device->rad_info.vram_size - visible_vram_size > 0) {
116 vram_index = device->memory_properties.memoryHeapCount++;
117 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
118 .size = device->rad_info.vram_size - visible_vram_size,
119 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
120 };
121 }
122 if (visible_vram_size) {
123 visible_vram_index = device->memory_properties.memoryHeapCount++;
124 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
125 .size = visible_vram_size,
126 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
127 };
128 }
129 if (device->rad_info.gart_size > 0) {
130 gart_index = device->memory_properties.memoryHeapCount++;
131 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
132 .size = device->rad_info.gart_size,
133 .flags = 0,
134 };
135 }
136
137 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
138 unsigned type_count = 0;
139 if (vram_index >= 0) {
140 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
141 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
142 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
143 .heapIndex = vram_index,
144 };
145 }
146 if (gart_index >= 0) {
147 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
148 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
149 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
150 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
151 .heapIndex = gart_index,
152 };
153 }
154 if (visible_vram_index >= 0) {
155 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
156 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
157 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
158 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
159 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
160 .heapIndex = visible_vram_index,
161 };
162 }
163 if (gart_index >= 0) {
164 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
165 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
166 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
167 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
168 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
169 .heapIndex = gart_index,
170 };
171 }
172 device->memory_properties.memoryTypeCount = type_count;
173 }
174
175 static VkResult
176 radv_physical_device_init(struct radv_physical_device *device,
177 struct radv_instance *instance,
178 drmDevicePtr drm_device)
179 {
180 const char *path = drm_device->nodes[DRM_NODE_RENDER];
181 VkResult result;
182 drmVersionPtr version;
183 int fd;
184
185 fd = open(path, O_RDWR | O_CLOEXEC);
186 if (fd < 0)
187 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
188
189 version = drmGetVersion(fd);
190 if (!version) {
191 close(fd);
192 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
193 "failed to get version %s: %m", path);
194 }
195
196 if (strcmp(version->name, "amdgpu")) {
197 drmFreeVersion(version);
198 close(fd);
199 return VK_ERROR_INCOMPATIBLE_DRIVER;
200 }
201 drmFreeVersion(version);
202
203 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
204 device->instance = instance;
205 assert(strlen(path) < ARRAY_SIZE(device->path));
206 strncpy(device->path, path, ARRAY_SIZE(device->path));
207
208 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
209 instance->perftest_flags);
210 if (!device->ws) {
211 result = VK_ERROR_INCOMPATIBLE_DRIVER;
212 goto fail;
213 }
214
215 device->local_fd = fd;
216 device->ws->query_info(device->ws, &device->rad_info);
217
218 device->name = get_chip_name(device->rad_info.family);
219
220 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
221 device->ws->destroy(device->ws);
222 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
223 "cannot generate UUID");
224 goto fail;
225 }
226
227 /* These flags affect shader compilation. */
228 uint64_t shader_env_flags =
229 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
230 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
231
232 /* The gpu id is already embeded in the uuid so we just pass "radv"
233 * when creating the cache.
234 */
235 char buf[VK_UUID_SIZE * 2 + 1];
236 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
237 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240
241 radv_get_driver_uuid(&device->device_uuid);
242 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
243
244 if (device->rad_info.family == CHIP_STONEY ||
245 device->rad_info.chip_class >= GFX9) {
246 device->has_rbplus = true;
247 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
248 }
249
250 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
251 * on SI.
252 */
253 device->has_clear_state = device->rad_info.chip_class >= CIK;
254
255 radv_physical_device_init_mem_types(device);
256
257 result = radv_init_wsi(device);
258 if (result != VK_SUCCESS) {
259 device->ws->destroy(device->ws);
260 goto fail;
261 }
262
263 return VK_SUCCESS;
264
265 fail:
266 close(fd);
267 return result;
268 }
269
270 static void
271 radv_physical_device_finish(struct radv_physical_device *device)
272 {
273 radv_finish_wsi(device);
274 device->ws->destroy(device->ws);
275 disk_cache_destroy(device->disk_cache);
276 close(device->local_fd);
277 }
278
279 static void *
280 default_alloc_func(void *pUserData, size_t size, size_t align,
281 VkSystemAllocationScope allocationScope)
282 {
283 return malloc(size);
284 }
285
286 static void *
287 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
288 size_t align, VkSystemAllocationScope allocationScope)
289 {
290 return realloc(pOriginal, size);
291 }
292
293 static void
294 default_free_func(void *pUserData, void *pMemory)
295 {
296 free(pMemory);
297 }
298
299 static const VkAllocationCallbacks default_alloc = {
300 .pUserData = NULL,
301 .pfnAllocation = default_alloc_func,
302 .pfnReallocation = default_realloc_func,
303 .pfnFree = default_free_func,
304 };
305
306 static const struct debug_control radv_debug_options[] = {
307 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
308 {"nodcc", RADV_DEBUG_NO_DCC},
309 {"shaders", RADV_DEBUG_DUMP_SHADERS},
310 {"nocache", RADV_DEBUG_NO_CACHE},
311 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
312 {"nohiz", RADV_DEBUG_NO_HIZ},
313 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
314 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
315 {"allbos", RADV_DEBUG_ALL_BOS},
316 {"noibs", RADV_DEBUG_NO_IBS},
317 {"spirv", RADV_DEBUG_DUMP_SPIRV},
318 {"vmfaults", RADV_DEBUG_VM_FAULTS},
319 {"zerovram", RADV_DEBUG_ZERO_VRAM},
320 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
321 {"nosisched", RADV_DEBUG_NO_SISCHED},
322 {NULL, 0}
323 };
324
325 const char *
326 radv_get_debug_option_name(int id)
327 {
328 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
329 return radv_debug_options[id].string;
330 }
331
332 static const struct debug_control radv_perftest_options[] = {
333 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
334 {"sisched", RADV_PERFTEST_SISCHED},
335 {NULL, 0}
336 };
337
338 const char *
339 radv_get_perftest_option_name(int id)
340 {
341 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
342 return radv_perftest_options[id].string;
343 }
344
345 static void
346 radv_handle_per_app_options(struct radv_instance *instance,
347 const VkApplicationInfo *info)
348 {
349 const char *name = info ? info->pApplicationName : NULL;
350
351 if (!name)
352 return;
353
354 if (!strcmp(name, "Talos - Linux - 32bit") ||
355 !strcmp(name, "Talos - Linux - 64bit")) {
356 /* Force enable LLVM sisched for Talos because it looks safe
357 * and it gives few more FPS.
358 */
359 instance->perftest_flags |= RADV_PERFTEST_SISCHED;
360 }
361 }
362
363 VkResult radv_CreateInstance(
364 const VkInstanceCreateInfo* pCreateInfo,
365 const VkAllocationCallbacks* pAllocator,
366 VkInstance* pInstance)
367 {
368 struct radv_instance *instance;
369
370 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
371
372 uint32_t client_version;
373 if (pCreateInfo->pApplicationInfo &&
374 pCreateInfo->pApplicationInfo->apiVersion != 0) {
375 client_version = pCreateInfo->pApplicationInfo->apiVersion;
376 } else {
377 client_version = VK_MAKE_VERSION(1, 0, 0);
378 }
379
380 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
381 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
382 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
383 "Client requested version %d.%d.%d",
384 VK_VERSION_MAJOR(client_version),
385 VK_VERSION_MINOR(client_version),
386 VK_VERSION_PATCH(client_version));
387 }
388
389 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
390 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
391 if (!radv_instance_extension_supported(ext_name))
392 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
393 }
394
395 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
396 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
397 if (!instance)
398 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
399
400 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
401
402 if (pAllocator)
403 instance->alloc = *pAllocator;
404 else
405 instance->alloc = default_alloc;
406
407 instance->apiVersion = client_version;
408 instance->physicalDeviceCount = -1;
409
410 _mesa_locale_init();
411
412 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
413
414 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
415 radv_debug_options);
416
417 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
418 radv_perftest_options);
419
420 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
421
422 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
423 /* Disable sisched when the user requests it, this is mostly
424 * useful when the driver force-enable sisched for the given
425 * application.
426 */
427 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
428 }
429
430 *pInstance = radv_instance_to_handle(instance);
431
432 return VK_SUCCESS;
433 }
434
435 void radv_DestroyInstance(
436 VkInstance _instance,
437 const VkAllocationCallbacks* pAllocator)
438 {
439 RADV_FROM_HANDLE(radv_instance, instance, _instance);
440
441 if (!instance)
442 return;
443
444 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
445 radv_physical_device_finish(instance->physicalDevices + i);
446 }
447
448 VG(VALGRIND_DESTROY_MEMPOOL(instance));
449
450 _mesa_locale_fini();
451
452 vk_free(&instance->alloc, instance);
453 }
454
455 static VkResult
456 radv_enumerate_devices(struct radv_instance *instance)
457 {
458 /* TODO: Check for more devices ? */
459 drmDevicePtr devices[8];
460 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
461 int max_devices;
462
463 instance->physicalDeviceCount = 0;
464
465 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
466 if (max_devices < 1)
467 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
468
469 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
470 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
471 devices[i]->bustype == DRM_BUS_PCI &&
472 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
473
474 result = radv_physical_device_init(instance->physicalDevices +
475 instance->physicalDeviceCount,
476 instance,
477 devices[i]);
478 if (result == VK_SUCCESS)
479 ++instance->physicalDeviceCount;
480 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
481 break;
482 }
483 }
484 drmFreeDevices(devices, max_devices);
485
486 return result;
487 }
488
489 VkResult radv_EnumeratePhysicalDevices(
490 VkInstance _instance,
491 uint32_t* pPhysicalDeviceCount,
492 VkPhysicalDevice* pPhysicalDevices)
493 {
494 RADV_FROM_HANDLE(radv_instance, instance, _instance);
495 VkResult result;
496
497 if (instance->physicalDeviceCount < 0) {
498 result = radv_enumerate_devices(instance);
499 if (result != VK_SUCCESS &&
500 result != VK_ERROR_INCOMPATIBLE_DRIVER)
501 return result;
502 }
503
504 if (!pPhysicalDevices) {
505 *pPhysicalDeviceCount = instance->physicalDeviceCount;
506 } else {
507 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
508 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
509 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
510 }
511
512 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
513 : VK_SUCCESS;
514 }
515
516 void radv_GetPhysicalDeviceFeatures(
517 VkPhysicalDevice physicalDevice,
518 VkPhysicalDeviceFeatures* pFeatures)
519 {
520 memset(pFeatures, 0, sizeof(*pFeatures));
521
522 *pFeatures = (VkPhysicalDeviceFeatures) {
523 .robustBufferAccess = true,
524 .fullDrawIndexUint32 = true,
525 .imageCubeArray = true,
526 .independentBlend = true,
527 .geometryShader = true,
528 .tessellationShader = true,
529 .sampleRateShading = true,
530 .dualSrcBlend = true,
531 .logicOp = true,
532 .multiDrawIndirect = true,
533 .drawIndirectFirstInstance = true,
534 .depthClamp = true,
535 .depthBiasClamp = true,
536 .fillModeNonSolid = true,
537 .depthBounds = true,
538 .wideLines = true,
539 .largePoints = true,
540 .alphaToOne = true,
541 .multiViewport = true,
542 .samplerAnisotropy = true,
543 .textureCompressionETC2 = false,
544 .textureCompressionASTC_LDR = false,
545 .textureCompressionBC = true,
546 .occlusionQueryPrecise = true,
547 .pipelineStatisticsQuery = true,
548 .vertexPipelineStoresAndAtomics = true,
549 .fragmentStoresAndAtomics = true,
550 .shaderTessellationAndGeometryPointSize = true,
551 .shaderImageGatherExtended = true,
552 .shaderStorageImageExtendedFormats = true,
553 .shaderStorageImageMultisample = false,
554 .shaderUniformBufferArrayDynamicIndexing = true,
555 .shaderSampledImageArrayDynamicIndexing = true,
556 .shaderStorageBufferArrayDynamicIndexing = true,
557 .shaderStorageImageArrayDynamicIndexing = true,
558 .shaderStorageImageReadWithoutFormat = true,
559 .shaderStorageImageWriteWithoutFormat = true,
560 .shaderClipDistance = true,
561 .shaderCullDistance = true,
562 .shaderFloat64 = true,
563 .shaderInt64 = true,
564 .shaderInt16 = false,
565 .sparseBinding = true,
566 .variableMultisampleRate = true,
567 .inheritedQueries = true,
568 };
569 }
570
571 void radv_GetPhysicalDeviceFeatures2KHR(
572 VkPhysicalDevice physicalDevice,
573 VkPhysicalDeviceFeatures2KHR *pFeatures)
574 {
575 vk_foreach_struct(ext, pFeatures->pNext) {
576 switch (ext->sType) {
577 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
578 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
579 features->variablePointersStorageBuffer = true;
580 features->variablePointers = false;
581 break;
582 }
583 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
584 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
585 features->multiview = true;
586 features->multiviewGeometryShader = true;
587 features->multiviewTessellationShader = true;
588 break;
589 }
590 default:
591 break;
592 }
593 }
594 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
595 }
596
597 void radv_GetPhysicalDeviceProperties(
598 VkPhysicalDevice physicalDevice,
599 VkPhysicalDeviceProperties* pProperties)
600 {
601 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
602 VkSampleCountFlags sample_counts = 0xf;
603
604 /* make sure that the entire descriptor set is addressable with a signed
605 * 32-bit int. So the sum of all limits scaled by descriptor size has to
606 * be at most 2 GiB. the combined image & samples object count as one of
607 * both. This limit is for the pipeline layout, not for the set layout, but
608 * there is no set limit, so we just set a pipeline limit. I don't think
609 * any app is going to hit this soon. */
610 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
611 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
612 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
613 32 /* sampler, largest when combined with image */ +
614 64 /* sampled image */ +
615 64 /* storage image */);
616
617 VkPhysicalDeviceLimits limits = {
618 .maxImageDimension1D = (1 << 14),
619 .maxImageDimension2D = (1 << 14),
620 .maxImageDimension3D = (1 << 11),
621 .maxImageDimensionCube = (1 << 14),
622 .maxImageArrayLayers = (1 << 11),
623 .maxTexelBufferElements = 128 * 1024 * 1024,
624 .maxUniformBufferRange = UINT32_MAX,
625 .maxStorageBufferRange = UINT32_MAX,
626 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
627 .maxMemoryAllocationCount = UINT32_MAX,
628 .maxSamplerAllocationCount = 64 * 1024,
629 .bufferImageGranularity = 64, /* A cache line */
630 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
631 .maxBoundDescriptorSets = MAX_SETS,
632 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
633 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
634 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
635 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
636 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
637 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
638 .maxPerStageResources = max_descriptor_set_size,
639 .maxDescriptorSetSamplers = max_descriptor_set_size,
640 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
641 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
642 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
643 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
644 .maxDescriptorSetSampledImages = max_descriptor_set_size,
645 .maxDescriptorSetStorageImages = max_descriptor_set_size,
646 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
647 .maxVertexInputAttributes = 32,
648 .maxVertexInputBindings = 32,
649 .maxVertexInputAttributeOffset = 2047,
650 .maxVertexInputBindingStride = 2048,
651 .maxVertexOutputComponents = 128,
652 .maxTessellationGenerationLevel = 64,
653 .maxTessellationPatchSize = 32,
654 .maxTessellationControlPerVertexInputComponents = 128,
655 .maxTessellationControlPerVertexOutputComponents = 128,
656 .maxTessellationControlPerPatchOutputComponents = 120,
657 .maxTessellationControlTotalOutputComponents = 4096,
658 .maxTessellationEvaluationInputComponents = 128,
659 .maxTessellationEvaluationOutputComponents = 128,
660 .maxGeometryShaderInvocations = 127,
661 .maxGeometryInputComponents = 64,
662 .maxGeometryOutputComponents = 128,
663 .maxGeometryOutputVertices = 256,
664 .maxGeometryTotalOutputComponents = 1024,
665 .maxFragmentInputComponents = 128,
666 .maxFragmentOutputAttachments = 8,
667 .maxFragmentDualSrcAttachments = 1,
668 .maxFragmentCombinedOutputResources = 8,
669 .maxComputeSharedMemorySize = 32768,
670 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
671 .maxComputeWorkGroupInvocations = 2048,
672 .maxComputeWorkGroupSize = {
673 2048,
674 2048,
675 2048
676 },
677 .subPixelPrecisionBits = 4 /* FIXME */,
678 .subTexelPrecisionBits = 4 /* FIXME */,
679 .mipmapPrecisionBits = 4 /* FIXME */,
680 .maxDrawIndexedIndexValue = UINT32_MAX,
681 .maxDrawIndirectCount = UINT32_MAX,
682 .maxSamplerLodBias = 16,
683 .maxSamplerAnisotropy = 16,
684 .maxViewports = MAX_VIEWPORTS,
685 .maxViewportDimensions = { (1 << 14), (1 << 14) },
686 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
687 .viewportSubPixelBits = 13, /* We take a float? */
688 .minMemoryMapAlignment = 4096, /* A page */
689 .minTexelBufferOffsetAlignment = 1,
690 .minUniformBufferOffsetAlignment = 4,
691 .minStorageBufferOffsetAlignment = 4,
692 .minTexelOffset = -32,
693 .maxTexelOffset = 31,
694 .minTexelGatherOffset = -32,
695 .maxTexelGatherOffset = 31,
696 .minInterpolationOffset = -2,
697 .maxInterpolationOffset = 2,
698 .subPixelInterpolationOffsetBits = 8,
699 .maxFramebufferWidth = (1 << 14),
700 .maxFramebufferHeight = (1 << 14),
701 .maxFramebufferLayers = (1 << 10),
702 .framebufferColorSampleCounts = sample_counts,
703 .framebufferDepthSampleCounts = sample_counts,
704 .framebufferStencilSampleCounts = sample_counts,
705 .framebufferNoAttachmentsSampleCounts = sample_counts,
706 .maxColorAttachments = MAX_RTS,
707 .sampledImageColorSampleCounts = sample_counts,
708 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
709 .sampledImageDepthSampleCounts = sample_counts,
710 .sampledImageStencilSampleCounts = sample_counts,
711 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
712 .maxSampleMaskWords = 1,
713 .timestampComputeAndGraphics = true,
714 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
715 .maxClipDistances = 8,
716 .maxCullDistances = 8,
717 .maxCombinedClipAndCullDistances = 8,
718 .discreteQueuePriorities = 1,
719 .pointSizeRange = { 0.125, 255.875 },
720 .lineWidthRange = { 0.0, 7.9921875 },
721 .pointSizeGranularity = (1.0 / 8.0),
722 .lineWidthGranularity = (1.0 / 128.0),
723 .strictLines = false, /* FINISHME */
724 .standardSampleLocations = true,
725 .optimalBufferCopyOffsetAlignment = 128,
726 .optimalBufferCopyRowPitchAlignment = 128,
727 .nonCoherentAtomSize = 64,
728 };
729
730 *pProperties = (VkPhysicalDeviceProperties) {
731 .apiVersion = radv_physical_device_api_version(pdevice),
732 .driverVersion = vk_get_driver_version(),
733 .vendorID = ATI_VENDOR_ID,
734 .deviceID = pdevice->rad_info.pci_id,
735 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
736 .limits = limits,
737 .sparseProperties = {0},
738 };
739
740 strcpy(pProperties->deviceName, pdevice->name);
741 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
742 }
743
744 void radv_GetPhysicalDeviceProperties2KHR(
745 VkPhysicalDevice physicalDevice,
746 VkPhysicalDeviceProperties2KHR *pProperties)
747 {
748 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
749 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
750
751 vk_foreach_struct(ext, pProperties->pNext) {
752 switch (ext->sType) {
753 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
754 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
755 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
756 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
757 break;
758 }
759 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
760 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
761 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
762 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
763 properties->deviceLUIDValid = false;
764 break;
765 }
766 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
767 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
768 properties->maxMultiviewViewCount = MAX_VIEWS;
769 properties->maxMultiviewInstanceIndex = INT_MAX;
770 break;
771 }
772 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
773 VkPhysicalDevicePointClippingPropertiesKHR *properties =
774 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
775 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
776 break;
777 }
778 default:
779 break;
780 }
781 }
782 }
783
784 static void radv_get_physical_device_queue_family_properties(
785 struct radv_physical_device* pdevice,
786 uint32_t* pCount,
787 VkQueueFamilyProperties** pQueueFamilyProperties)
788 {
789 int num_queue_families = 1;
790 int idx;
791 if (pdevice->rad_info.num_compute_rings > 0 &&
792 pdevice->rad_info.chip_class >= CIK &&
793 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
794 num_queue_families++;
795
796 if (pQueueFamilyProperties == NULL) {
797 *pCount = num_queue_families;
798 return;
799 }
800
801 if (!*pCount)
802 return;
803
804 idx = 0;
805 if (*pCount >= 1) {
806 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
807 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
808 VK_QUEUE_COMPUTE_BIT |
809 VK_QUEUE_TRANSFER_BIT |
810 VK_QUEUE_SPARSE_BINDING_BIT,
811 .queueCount = 1,
812 .timestampValidBits = 64,
813 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
814 };
815 idx++;
816 }
817
818 if (pdevice->rad_info.num_compute_rings > 0 &&
819 pdevice->rad_info.chip_class >= CIK &&
820 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
821 if (*pCount > idx) {
822 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
823 .queueFlags = VK_QUEUE_COMPUTE_BIT |
824 VK_QUEUE_TRANSFER_BIT |
825 VK_QUEUE_SPARSE_BINDING_BIT,
826 .queueCount = pdevice->rad_info.num_compute_rings,
827 .timestampValidBits = 64,
828 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
829 };
830 idx++;
831 }
832 }
833 *pCount = idx;
834 }
835
836 void radv_GetPhysicalDeviceQueueFamilyProperties(
837 VkPhysicalDevice physicalDevice,
838 uint32_t* pCount,
839 VkQueueFamilyProperties* pQueueFamilyProperties)
840 {
841 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
842 if (!pQueueFamilyProperties) {
843 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
844 return;
845 }
846 VkQueueFamilyProperties *properties[] = {
847 pQueueFamilyProperties + 0,
848 pQueueFamilyProperties + 1,
849 pQueueFamilyProperties + 2,
850 };
851 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
852 assert(*pCount <= 3);
853 }
854
855 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
856 VkPhysicalDevice physicalDevice,
857 uint32_t* pCount,
858 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
859 {
860 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
861 if (!pQueueFamilyProperties) {
862 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
863 return;
864 }
865 VkQueueFamilyProperties *properties[] = {
866 &pQueueFamilyProperties[0].queueFamilyProperties,
867 &pQueueFamilyProperties[1].queueFamilyProperties,
868 &pQueueFamilyProperties[2].queueFamilyProperties,
869 };
870 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
871 assert(*pCount <= 3);
872 }
873
874 void radv_GetPhysicalDeviceMemoryProperties(
875 VkPhysicalDevice physicalDevice,
876 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
877 {
878 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
879
880 *pMemoryProperties = physical_device->memory_properties;
881 }
882
883 void radv_GetPhysicalDeviceMemoryProperties2KHR(
884 VkPhysicalDevice physicalDevice,
885 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
886 {
887 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
888 &pMemoryProperties->memoryProperties);
889 }
890
891 static enum radeon_ctx_priority
892 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
893 {
894 /* Default to MEDIUM when a specific global priority isn't requested */
895 if (!pObj)
896 return RADEON_CTX_PRIORITY_MEDIUM;
897
898 switch(pObj->globalPriority) {
899 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
900 return RADEON_CTX_PRIORITY_REALTIME;
901 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
902 return RADEON_CTX_PRIORITY_HIGH;
903 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
904 return RADEON_CTX_PRIORITY_MEDIUM;
905 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
906 return RADEON_CTX_PRIORITY_LOW;
907 default:
908 unreachable("Illegal global priority value");
909 return RADEON_CTX_PRIORITY_INVALID;
910 }
911 }
912
913 static int
914 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
915 uint32_t queue_family_index, int idx,
916 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
917 {
918 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
919 queue->device = device;
920 queue->queue_family_index = queue_family_index;
921 queue->queue_idx = idx;
922 queue->priority = radv_get_queue_global_priority(global_priority);
923
924 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
925 if (!queue->hw_ctx)
926 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
927
928 return VK_SUCCESS;
929 }
930
931 static void
932 radv_queue_finish(struct radv_queue *queue)
933 {
934 if (queue->hw_ctx)
935 queue->device->ws->ctx_destroy(queue->hw_ctx);
936
937 if (queue->initial_full_flush_preamble_cs)
938 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
939 if (queue->initial_preamble_cs)
940 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
941 if (queue->continue_preamble_cs)
942 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
943 if (queue->descriptor_bo)
944 queue->device->ws->buffer_destroy(queue->descriptor_bo);
945 if (queue->scratch_bo)
946 queue->device->ws->buffer_destroy(queue->scratch_bo);
947 if (queue->esgs_ring_bo)
948 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
949 if (queue->gsvs_ring_bo)
950 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
951 if (queue->tess_factor_ring_bo)
952 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
953 if (queue->tess_offchip_ring_bo)
954 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
955 if (queue->compute_scratch_bo)
956 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
957 }
958
959 static void
960 radv_device_init_gs_info(struct radv_device *device)
961 {
962 switch (device->physical_device->rad_info.family) {
963 case CHIP_OLAND:
964 case CHIP_HAINAN:
965 case CHIP_KAVERI:
966 case CHIP_KABINI:
967 case CHIP_MULLINS:
968 case CHIP_ICELAND:
969 case CHIP_CARRIZO:
970 case CHIP_STONEY:
971 device->gs_table_depth = 16;
972 return;
973 case CHIP_TAHITI:
974 case CHIP_PITCAIRN:
975 case CHIP_VERDE:
976 case CHIP_BONAIRE:
977 case CHIP_HAWAII:
978 case CHIP_TONGA:
979 case CHIP_FIJI:
980 case CHIP_POLARIS10:
981 case CHIP_POLARIS11:
982 case CHIP_POLARIS12:
983 case CHIP_VEGA10:
984 case CHIP_RAVEN:
985 device->gs_table_depth = 32;
986 return;
987 default:
988 unreachable("unknown GPU");
989 }
990 }
991
992 VkResult radv_CreateDevice(
993 VkPhysicalDevice physicalDevice,
994 const VkDeviceCreateInfo* pCreateInfo,
995 const VkAllocationCallbacks* pAllocator,
996 VkDevice* pDevice)
997 {
998 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
999 VkResult result;
1000 struct radv_device *device;
1001
1002 bool keep_shader_info = false;
1003
1004 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1005 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1006 if (!radv_physical_device_extension_supported(physical_device, ext_name))
1007 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1008
1009 if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
1010 keep_shader_info = true;
1011 }
1012
1013 /* Check enabled features */
1014 if (pCreateInfo->pEnabledFeatures) {
1015 VkPhysicalDeviceFeatures supported_features;
1016 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1017 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1018 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1019 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1020 for (uint32_t i = 0; i < num_features; i++) {
1021 if (enabled_feature[i] && !supported_feature[i])
1022 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1023 }
1024 }
1025
1026 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1027 sizeof(*device), 8,
1028 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1029 if (!device)
1030 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1031
1032 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1033 device->instance = physical_device->instance;
1034 device->physical_device = physical_device;
1035
1036 device->ws = physical_device->ws;
1037 if (pAllocator)
1038 device->alloc = *pAllocator;
1039 else
1040 device->alloc = physical_device->instance->alloc;
1041
1042 mtx_init(&device->shader_slab_mutex, mtx_plain);
1043 list_inithead(&device->shader_slabs);
1044
1045 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1046 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1047 uint32_t qfi = queue_create->queueFamilyIndex;
1048 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1049 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1050
1051 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1052
1053 device->queues[qfi] = vk_alloc(&device->alloc,
1054 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1055 if (!device->queues[qfi]) {
1056 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1057 goto fail;
1058 }
1059
1060 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1061
1062 device->queue_count[qfi] = queue_create->queueCount;
1063
1064 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1065 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1066 if (result != VK_SUCCESS)
1067 goto fail;
1068 }
1069 }
1070
1071 #if HAVE_LLVM < 0x0400
1072 device->llvm_supports_spill = false;
1073 #else
1074 device->llvm_supports_spill = true;
1075 #endif
1076
1077 /* The maximum number of scratch waves. Scratch space isn't divided
1078 * evenly between CUs. The number is only a function of the number of CUs.
1079 * We can decrease the constant to decrease the scratch buffer size.
1080 *
1081 * sctx->scratch_waves must be >= the maximum posible size of
1082 * 1 threadgroup, so that the hw doesn't hang from being unable
1083 * to start any.
1084 *
1085 * The recommended value is 4 per CU at most. Higher numbers don't
1086 * bring much benefit, but they still occupy chip resources (think
1087 * async compute). I've seen ~2% performance difference between 4 and 32.
1088 */
1089 uint32_t max_threads_per_block = 2048;
1090 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1091 max_threads_per_block / 64);
1092
1093 radv_device_init_gs_info(device);
1094
1095 device->tess_offchip_block_dw_size =
1096 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1097 device->has_distributed_tess =
1098 device->physical_device->rad_info.chip_class >= VI &&
1099 device->physical_device->rad_info.max_se >= 2;
1100
1101 if (getenv("RADV_TRACE_FILE")) {
1102 keep_shader_info = true;
1103
1104 if (!radv_init_trace(device))
1105 goto fail;
1106 }
1107
1108 device->keep_shader_info = keep_shader_info;
1109
1110 result = radv_device_init_meta(device);
1111 if (result != VK_SUCCESS)
1112 goto fail;
1113
1114 radv_device_init_msaa(device);
1115
1116 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1117 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1118 switch (family) {
1119 case RADV_QUEUE_GENERAL:
1120 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1121 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1122 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1123 break;
1124 case RADV_QUEUE_COMPUTE:
1125 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1126 radeon_emit(device->empty_cs[family], 0);
1127 break;
1128 }
1129 device->ws->cs_finalize(device->empty_cs[family]);
1130 }
1131
1132 if (device->physical_device->rad_info.chip_class >= CIK)
1133 cik_create_gfx_config(device);
1134
1135 VkPipelineCacheCreateInfo ci;
1136 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1137 ci.pNext = NULL;
1138 ci.flags = 0;
1139 ci.pInitialData = NULL;
1140 ci.initialDataSize = 0;
1141 VkPipelineCache pc;
1142 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1143 &ci, NULL, &pc);
1144 if (result != VK_SUCCESS)
1145 goto fail;
1146
1147 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1148
1149 *pDevice = radv_device_to_handle(device);
1150 return VK_SUCCESS;
1151
1152 fail:
1153 if (device->trace_bo)
1154 device->ws->buffer_destroy(device->trace_bo);
1155
1156 if (device->gfx_init)
1157 device->ws->buffer_destroy(device->gfx_init);
1158
1159 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1160 for (unsigned q = 0; q < device->queue_count[i]; q++)
1161 radv_queue_finish(&device->queues[i][q]);
1162 if (device->queue_count[i])
1163 vk_free(&device->alloc, device->queues[i]);
1164 }
1165
1166 vk_free(&device->alloc, device);
1167 return result;
1168 }
1169
1170 void radv_DestroyDevice(
1171 VkDevice _device,
1172 const VkAllocationCallbacks* pAllocator)
1173 {
1174 RADV_FROM_HANDLE(radv_device, device, _device);
1175
1176 if (!device)
1177 return;
1178
1179 if (device->trace_bo)
1180 device->ws->buffer_destroy(device->trace_bo);
1181
1182 if (device->gfx_init)
1183 device->ws->buffer_destroy(device->gfx_init);
1184
1185 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1186 for (unsigned q = 0; q < device->queue_count[i]; q++)
1187 radv_queue_finish(&device->queues[i][q]);
1188 if (device->queue_count[i])
1189 vk_free(&device->alloc, device->queues[i]);
1190 if (device->empty_cs[i])
1191 device->ws->cs_destroy(device->empty_cs[i]);
1192 }
1193 radv_device_finish_meta(device);
1194
1195 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1196 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1197
1198 radv_destroy_shader_slabs(device);
1199
1200 vk_free(&device->alloc, device);
1201 }
1202
1203 VkResult radv_EnumerateInstanceLayerProperties(
1204 uint32_t* pPropertyCount,
1205 VkLayerProperties* pProperties)
1206 {
1207 if (pProperties == NULL) {
1208 *pPropertyCount = 0;
1209 return VK_SUCCESS;
1210 }
1211
1212 /* None supported at this time */
1213 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1214 }
1215
1216 VkResult radv_EnumerateDeviceLayerProperties(
1217 VkPhysicalDevice physicalDevice,
1218 uint32_t* pPropertyCount,
1219 VkLayerProperties* pProperties)
1220 {
1221 if (pProperties == NULL) {
1222 *pPropertyCount = 0;
1223 return VK_SUCCESS;
1224 }
1225
1226 /* None supported at this time */
1227 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1228 }
1229
1230 void radv_GetDeviceQueue(
1231 VkDevice _device,
1232 uint32_t queueFamilyIndex,
1233 uint32_t queueIndex,
1234 VkQueue* pQueue)
1235 {
1236 RADV_FROM_HANDLE(radv_device, device, _device);
1237
1238 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1239 }
1240
1241 static void
1242 fill_geom_tess_rings(struct radv_queue *queue,
1243 uint32_t *map,
1244 bool add_sample_positions,
1245 uint32_t esgs_ring_size,
1246 struct radeon_winsys_bo *esgs_ring_bo,
1247 uint32_t gsvs_ring_size,
1248 struct radeon_winsys_bo *gsvs_ring_bo,
1249 uint32_t tess_factor_ring_size,
1250 struct radeon_winsys_bo *tess_factor_ring_bo,
1251 uint32_t tess_offchip_ring_size,
1252 struct radeon_winsys_bo *tess_offchip_ring_bo)
1253 {
1254 uint64_t esgs_va = 0, gsvs_va = 0;
1255 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1256 uint32_t *desc = &map[4];
1257
1258 if (esgs_ring_bo)
1259 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1260 if (gsvs_ring_bo)
1261 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1262 if (tess_factor_ring_bo)
1263 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1264 if (tess_offchip_ring_bo)
1265 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1266
1267 /* stride 0, num records - size, add tid, swizzle, elsize4,
1268 index stride 64 */
1269 desc[0] = esgs_va;
1270 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1271 S_008F04_STRIDE(0) |
1272 S_008F04_SWIZZLE_ENABLE(true);
1273 desc[2] = esgs_ring_size;
1274 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1275 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1276 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1277 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1278 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1279 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1280 S_008F0C_ELEMENT_SIZE(1) |
1281 S_008F0C_INDEX_STRIDE(3) |
1282 S_008F0C_ADD_TID_ENABLE(true);
1283
1284 desc += 4;
1285 /* GS entry for ES->GS ring */
1286 /* stride 0, num records - size, elsize0,
1287 index stride 0 */
1288 desc[0] = esgs_va;
1289 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1290 S_008F04_STRIDE(0) |
1291 S_008F04_SWIZZLE_ENABLE(false);
1292 desc[2] = esgs_ring_size;
1293 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1294 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1295 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1296 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1297 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1298 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1299 S_008F0C_ELEMENT_SIZE(0) |
1300 S_008F0C_INDEX_STRIDE(0) |
1301 S_008F0C_ADD_TID_ENABLE(false);
1302
1303 desc += 4;
1304 /* VS entry for GS->VS ring */
1305 /* stride 0, num records - size, elsize0,
1306 index stride 0 */
1307 desc[0] = gsvs_va;
1308 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1309 S_008F04_STRIDE(0) |
1310 S_008F04_SWIZZLE_ENABLE(false);
1311 desc[2] = gsvs_ring_size;
1312 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1313 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1314 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1315 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1316 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1317 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1318 S_008F0C_ELEMENT_SIZE(0) |
1319 S_008F0C_INDEX_STRIDE(0) |
1320 S_008F0C_ADD_TID_ENABLE(false);
1321 desc += 4;
1322
1323 /* stride gsvs_itemsize, num records 64
1324 elsize 4, index stride 16 */
1325 /* shader will patch stride and desc[2] */
1326 desc[0] = gsvs_va;
1327 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1328 S_008F04_STRIDE(0) |
1329 S_008F04_SWIZZLE_ENABLE(true);
1330 desc[2] = 0;
1331 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1332 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1333 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1334 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1335 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1336 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1337 S_008F0C_ELEMENT_SIZE(1) |
1338 S_008F0C_INDEX_STRIDE(1) |
1339 S_008F0C_ADD_TID_ENABLE(true);
1340 desc += 4;
1341
1342 desc[0] = tess_factor_va;
1343 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1344 S_008F04_STRIDE(0) |
1345 S_008F04_SWIZZLE_ENABLE(false);
1346 desc[2] = tess_factor_ring_size;
1347 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1348 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1349 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1350 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1351 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1352 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1353 S_008F0C_ELEMENT_SIZE(0) |
1354 S_008F0C_INDEX_STRIDE(0) |
1355 S_008F0C_ADD_TID_ENABLE(false);
1356 desc += 4;
1357
1358 desc[0] = tess_offchip_va;
1359 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1360 S_008F04_STRIDE(0) |
1361 S_008F04_SWIZZLE_ENABLE(false);
1362 desc[2] = tess_offchip_ring_size;
1363 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1364 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1365 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1366 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1367 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1368 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1369 S_008F0C_ELEMENT_SIZE(0) |
1370 S_008F0C_INDEX_STRIDE(0) |
1371 S_008F0C_ADD_TID_ENABLE(false);
1372 desc += 4;
1373
1374 /* add sample positions after all rings */
1375 memcpy(desc, queue->device->sample_locations_1x, 8);
1376 desc += 2;
1377 memcpy(desc, queue->device->sample_locations_2x, 16);
1378 desc += 4;
1379 memcpy(desc, queue->device->sample_locations_4x, 32);
1380 desc += 8;
1381 memcpy(desc, queue->device->sample_locations_8x, 64);
1382 desc += 16;
1383 memcpy(desc, queue->device->sample_locations_16x, 128);
1384 }
1385
1386 static unsigned
1387 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1388 {
1389 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1390 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1391 device->physical_device->rad_info.family != CHIP_STONEY;
1392 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1393 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1394 device->physical_device->rad_info.max_se;
1395 unsigned offchip_granularity;
1396 unsigned hs_offchip_param;
1397 switch (device->tess_offchip_block_dw_size) {
1398 default:
1399 assert(0);
1400 /* fall through */
1401 case 8192:
1402 offchip_granularity = V_03093C_X_8K_DWORDS;
1403 break;
1404 case 4096:
1405 offchip_granularity = V_03093C_X_4K_DWORDS;
1406 break;
1407 }
1408
1409 switch (device->physical_device->rad_info.chip_class) {
1410 case SI:
1411 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1412 break;
1413 case CIK:
1414 case VI:
1415 case GFX9:
1416 default:
1417 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1418 break;
1419 }
1420
1421 *max_offchip_buffers_p = max_offchip_buffers;
1422 if (device->physical_device->rad_info.chip_class >= CIK) {
1423 if (device->physical_device->rad_info.chip_class >= VI)
1424 --max_offchip_buffers;
1425 hs_offchip_param =
1426 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1427 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1428 } else {
1429 hs_offchip_param =
1430 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1431 }
1432 return hs_offchip_param;
1433 }
1434
1435 static VkResult
1436 radv_get_preamble_cs(struct radv_queue *queue,
1437 uint32_t scratch_size,
1438 uint32_t compute_scratch_size,
1439 uint32_t esgs_ring_size,
1440 uint32_t gsvs_ring_size,
1441 bool needs_tess_rings,
1442 bool needs_sample_positions,
1443 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1444 struct radeon_winsys_cs **initial_preamble_cs,
1445 struct radeon_winsys_cs **continue_preamble_cs)
1446 {
1447 struct radeon_winsys_bo *scratch_bo = NULL;
1448 struct radeon_winsys_bo *descriptor_bo = NULL;
1449 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1450 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1451 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1452 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1453 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1454 struct radeon_winsys_cs *dest_cs[3] = {0};
1455 bool add_tess_rings = false, add_sample_positions = false;
1456 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1457 unsigned max_offchip_buffers;
1458 unsigned hs_offchip_param = 0;
1459 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1460 if (!queue->has_tess_rings) {
1461 if (needs_tess_rings)
1462 add_tess_rings = true;
1463 }
1464 if (!queue->has_sample_positions) {
1465 if (needs_sample_positions)
1466 add_sample_positions = true;
1467 }
1468 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1469 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1470 &max_offchip_buffers);
1471 tess_offchip_ring_size = max_offchip_buffers *
1472 queue->device->tess_offchip_block_dw_size * 4;
1473
1474 if (scratch_size <= queue->scratch_size &&
1475 compute_scratch_size <= queue->compute_scratch_size &&
1476 esgs_ring_size <= queue->esgs_ring_size &&
1477 gsvs_ring_size <= queue->gsvs_ring_size &&
1478 !add_tess_rings && !add_sample_positions &&
1479 queue->initial_preamble_cs) {
1480 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1481 *initial_preamble_cs = queue->initial_preamble_cs;
1482 *continue_preamble_cs = queue->continue_preamble_cs;
1483 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1484 *continue_preamble_cs = NULL;
1485 return VK_SUCCESS;
1486 }
1487
1488 if (scratch_size > queue->scratch_size) {
1489 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1490 scratch_size,
1491 4096,
1492 RADEON_DOMAIN_VRAM,
1493 ring_bo_flags);
1494 if (!scratch_bo)
1495 goto fail;
1496 } else
1497 scratch_bo = queue->scratch_bo;
1498
1499 if (compute_scratch_size > queue->compute_scratch_size) {
1500 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1501 compute_scratch_size,
1502 4096,
1503 RADEON_DOMAIN_VRAM,
1504 ring_bo_flags);
1505 if (!compute_scratch_bo)
1506 goto fail;
1507
1508 } else
1509 compute_scratch_bo = queue->compute_scratch_bo;
1510
1511 if (esgs_ring_size > queue->esgs_ring_size) {
1512 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1513 esgs_ring_size,
1514 4096,
1515 RADEON_DOMAIN_VRAM,
1516 ring_bo_flags);
1517 if (!esgs_ring_bo)
1518 goto fail;
1519 } else {
1520 esgs_ring_bo = queue->esgs_ring_bo;
1521 esgs_ring_size = queue->esgs_ring_size;
1522 }
1523
1524 if (gsvs_ring_size > queue->gsvs_ring_size) {
1525 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1526 gsvs_ring_size,
1527 4096,
1528 RADEON_DOMAIN_VRAM,
1529 ring_bo_flags);
1530 if (!gsvs_ring_bo)
1531 goto fail;
1532 } else {
1533 gsvs_ring_bo = queue->gsvs_ring_bo;
1534 gsvs_ring_size = queue->gsvs_ring_size;
1535 }
1536
1537 if (add_tess_rings) {
1538 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1539 tess_factor_ring_size,
1540 256,
1541 RADEON_DOMAIN_VRAM,
1542 ring_bo_flags);
1543 if (!tess_factor_ring_bo)
1544 goto fail;
1545 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1546 tess_offchip_ring_size,
1547 256,
1548 RADEON_DOMAIN_VRAM,
1549 ring_bo_flags);
1550 if (!tess_offchip_ring_bo)
1551 goto fail;
1552 } else {
1553 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1554 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1555 }
1556
1557 if (scratch_bo != queue->scratch_bo ||
1558 esgs_ring_bo != queue->esgs_ring_bo ||
1559 gsvs_ring_bo != queue->gsvs_ring_bo ||
1560 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1561 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1562 uint32_t size = 0;
1563 if (gsvs_ring_bo || esgs_ring_bo ||
1564 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1565 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1566 if (add_sample_positions)
1567 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1568 }
1569 else if (scratch_bo)
1570 size = 8; /* 2 dword */
1571
1572 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1573 size,
1574 4096,
1575 RADEON_DOMAIN_VRAM,
1576 RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
1577 if (!descriptor_bo)
1578 goto fail;
1579 } else
1580 descriptor_bo = queue->descriptor_bo;
1581
1582 for(int i = 0; i < 3; ++i) {
1583 struct radeon_winsys_cs *cs = NULL;
1584 cs = queue->device->ws->cs_create(queue->device->ws,
1585 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1586 if (!cs)
1587 goto fail;
1588
1589 dest_cs[i] = cs;
1590
1591 if (scratch_bo)
1592 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1593
1594 if (esgs_ring_bo)
1595 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1596
1597 if (gsvs_ring_bo)
1598 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1599
1600 if (tess_factor_ring_bo)
1601 radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1602
1603 if (tess_offchip_ring_bo)
1604 radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1605
1606 if (descriptor_bo)
1607 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1608
1609 if (descriptor_bo != queue->descriptor_bo) {
1610 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1611
1612 if (scratch_bo) {
1613 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1614 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1615 S_008F04_SWIZZLE_ENABLE(1);
1616 map[0] = scratch_va;
1617 map[1] = rsrc1;
1618 }
1619
1620 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1621 add_sample_positions)
1622 fill_geom_tess_rings(queue, map, add_sample_positions,
1623 esgs_ring_size, esgs_ring_bo,
1624 gsvs_ring_size, gsvs_ring_bo,
1625 tess_factor_ring_size, tess_factor_ring_bo,
1626 tess_offchip_ring_size, tess_offchip_ring_bo);
1627
1628 queue->device->ws->buffer_unmap(descriptor_bo);
1629 }
1630
1631 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1632 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1633 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1634 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1635 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1636 }
1637
1638 if (esgs_ring_bo || gsvs_ring_bo) {
1639 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1640 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1641 radeon_emit(cs, esgs_ring_size >> 8);
1642 radeon_emit(cs, gsvs_ring_size >> 8);
1643 } else {
1644 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1645 radeon_emit(cs, esgs_ring_size >> 8);
1646 radeon_emit(cs, gsvs_ring_size >> 8);
1647 }
1648 }
1649
1650 if (tess_factor_ring_bo) {
1651 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1652 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1653 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1654 S_030938_SIZE(tess_factor_ring_size / 4));
1655 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1656 tf_va >> 8);
1657 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1658 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1659 tf_va >> 40);
1660 }
1661 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1662 } else {
1663 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1664 S_008988_SIZE(tess_factor_ring_size / 4));
1665 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1666 tf_va >> 8);
1667 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1668 hs_offchip_param);
1669 }
1670 }
1671
1672 if (descriptor_bo) {
1673 uint64_t va = radv_buffer_get_va(descriptor_bo);
1674 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1675 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1676 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1677 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1678 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1679
1680 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1681 radeon_set_sh_reg_seq(cs, regs[i], 2);
1682 radeon_emit(cs, va);
1683 radeon_emit(cs, va >> 32);
1684 }
1685 } else {
1686 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1687 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1688 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1689 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1690 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1691 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1692
1693 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1694 radeon_set_sh_reg_seq(cs, regs[i], 2);
1695 radeon_emit(cs, va);
1696 radeon_emit(cs, va >> 32);
1697 }
1698 }
1699 }
1700
1701 if (compute_scratch_bo) {
1702 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1703 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1704 S_008F04_SWIZZLE_ENABLE(1);
1705
1706 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1707
1708 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1709 radeon_emit(cs, scratch_va);
1710 radeon_emit(cs, rsrc1);
1711 }
1712
1713 if (i == 0) {
1714 si_cs_emit_cache_flush(cs,
1715 false,
1716 queue->device->physical_device->rad_info.chip_class,
1717 NULL, 0,
1718 queue->queue_family_index == RING_COMPUTE &&
1719 queue->device->physical_device->rad_info.chip_class >= CIK,
1720 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1721 RADV_CMD_FLAG_INV_ICACHE |
1722 RADV_CMD_FLAG_INV_SMEM_L1 |
1723 RADV_CMD_FLAG_INV_VMEM_L1 |
1724 RADV_CMD_FLAG_INV_GLOBAL_L2);
1725 } else if (i == 1) {
1726 si_cs_emit_cache_flush(cs,
1727 false,
1728 queue->device->physical_device->rad_info.chip_class,
1729 NULL, 0,
1730 queue->queue_family_index == RING_COMPUTE &&
1731 queue->device->physical_device->rad_info.chip_class >= CIK,
1732 RADV_CMD_FLAG_INV_ICACHE |
1733 RADV_CMD_FLAG_INV_SMEM_L1 |
1734 RADV_CMD_FLAG_INV_VMEM_L1 |
1735 RADV_CMD_FLAG_INV_GLOBAL_L2);
1736 }
1737
1738 if (!queue->device->ws->cs_finalize(cs))
1739 goto fail;
1740 }
1741
1742 if (queue->initial_full_flush_preamble_cs)
1743 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1744
1745 if (queue->initial_preamble_cs)
1746 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1747
1748 if (queue->continue_preamble_cs)
1749 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1750
1751 queue->initial_full_flush_preamble_cs = dest_cs[0];
1752 queue->initial_preamble_cs = dest_cs[1];
1753 queue->continue_preamble_cs = dest_cs[2];
1754
1755 if (scratch_bo != queue->scratch_bo) {
1756 if (queue->scratch_bo)
1757 queue->device->ws->buffer_destroy(queue->scratch_bo);
1758 queue->scratch_bo = scratch_bo;
1759 queue->scratch_size = scratch_size;
1760 }
1761
1762 if (compute_scratch_bo != queue->compute_scratch_bo) {
1763 if (queue->compute_scratch_bo)
1764 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1765 queue->compute_scratch_bo = compute_scratch_bo;
1766 queue->compute_scratch_size = compute_scratch_size;
1767 }
1768
1769 if (esgs_ring_bo != queue->esgs_ring_bo) {
1770 if (queue->esgs_ring_bo)
1771 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1772 queue->esgs_ring_bo = esgs_ring_bo;
1773 queue->esgs_ring_size = esgs_ring_size;
1774 }
1775
1776 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1777 if (queue->gsvs_ring_bo)
1778 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1779 queue->gsvs_ring_bo = gsvs_ring_bo;
1780 queue->gsvs_ring_size = gsvs_ring_size;
1781 }
1782
1783 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1784 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1785 }
1786
1787 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1788 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1789 queue->has_tess_rings = true;
1790 }
1791
1792 if (descriptor_bo != queue->descriptor_bo) {
1793 if (queue->descriptor_bo)
1794 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1795
1796 queue->descriptor_bo = descriptor_bo;
1797 }
1798
1799 if (add_sample_positions)
1800 queue->has_sample_positions = true;
1801
1802 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1803 *initial_preamble_cs = queue->initial_preamble_cs;
1804 *continue_preamble_cs = queue->continue_preamble_cs;
1805 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1806 *continue_preamble_cs = NULL;
1807 return VK_SUCCESS;
1808 fail:
1809 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1810 if (dest_cs[i])
1811 queue->device->ws->cs_destroy(dest_cs[i]);
1812 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1813 queue->device->ws->buffer_destroy(descriptor_bo);
1814 if (scratch_bo && scratch_bo != queue->scratch_bo)
1815 queue->device->ws->buffer_destroy(scratch_bo);
1816 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1817 queue->device->ws->buffer_destroy(compute_scratch_bo);
1818 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1819 queue->device->ws->buffer_destroy(esgs_ring_bo);
1820 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1821 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1822 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1823 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1824 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1825 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1826 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1827 }
1828
1829 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1830 int num_sems,
1831 const VkSemaphore *sems,
1832 bool reset_temp)
1833 {
1834 int syncobj_idx = 0, sem_idx = 0;
1835
1836 if (num_sems == 0)
1837 return VK_SUCCESS;
1838 for (uint32_t i = 0; i < num_sems; i++) {
1839 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1840
1841 if (sem->temp_syncobj || sem->syncobj)
1842 counts->syncobj_count++;
1843 else
1844 counts->sem_count++;
1845 }
1846
1847 if (counts->syncobj_count) {
1848 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
1849 if (!counts->syncobj)
1850 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1851 }
1852
1853 if (counts->sem_count) {
1854 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
1855 if (!counts->sem) {
1856 free(counts->syncobj);
1857 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1858 }
1859 }
1860
1861 for (uint32_t i = 0; i < num_sems; i++) {
1862 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1863
1864 if (sem->temp_syncobj) {
1865 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
1866 }
1867 else if (sem->syncobj)
1868 counts->syncobj[syncobj_idx++] = sem->syncobj;
1869 else {
1870 assert(sem->sem);
1871 counts->sem[sem_idx++] = sem->sem;
1872 }
1873 }
1874
1875 return VK_SUCCESS;
1876 }
1877
1878 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
1879 {
1880 free(sem_info->wait.syncobj);
1881 free(sem_info->wait.sem);
1882 free(sem_info->signal.syncobj);
1883 free(sem_info->signal.sem);
1884 }
1885
1886
1887 static void radv_free_temp_syncobjs(struct radv_device *device,
1888 int num_sems,
1889 const VkSemaphore *sems)
1890 {
1891 for (uint32_t i = 0; i < num_sems; i++) {
1892 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1893
1894 if (sem->temp_syncobj) {
1895 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
1896 sem->temp_syncobj = 0;
1897 }
1898 }
1899 }
1900
1901 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1902 int num_wait_sems,
1903 const VkSemaphore *wait_sems,
1904 int num_signal_sems,
1905 const VkSemaphore *signal_sems)
1906 {
1907 VkResult ret;
1908 memset(sem_info, 0, sizeof(*sem_info));
1909
1910 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
1911 if (ret)
1912 return ret;
1913 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
1914 if (ret)
1915 radv_free_sem_info(sem_info);
1916
1917 /* caller can override these */
1918 sem_info->cs_emit_wait = true;
1919 sem_info->cs_emit_signal = true;
1920 return ret;
1921 }
1922
1923 VkResult radv_QueueSubmit(
1924 VkQueue _queue,
1925 uint32_t submitCount,
1926 const VkSubmitInfo* pSubmits,
1927 VkFence _fence)
1928 {
1929 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1930 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1931 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1932 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1933 int ret;
1934 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1935 uint32_t scratch_size = 0;
1936 uint32_t compute_scratch_size = 0;
1937 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1938 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
1939 VkResult result;
1940 bool fence_emitted = false;
1941 bool tess_rings_needed = false;
1942 bool sample_positions_needed = false;
1943
1944 /* Do this first so failing to allocate scratch buffers can't result in
1945 * partially executed submissions. */
1946 for (uint32_t i = 0; i < submitCount; i++) {
1947 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1948 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1949 pSubmits[i].pCommandBuffers[j]);
1950
1951 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1952 compute_scratch_size = MAX2(compute_scratch_size,
1953 cmd_buffer->compute_scratch_size_needed);
1954 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1955 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1956 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1957 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1958 }
1959 }
1960
1961 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1962 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1963 sample_positions_needed, &initial_flush_preamble_cs,
1964 &initial_preamble_cs, &continue_preamble_cs);
1965 if (result != VK_SUCCESS)
1966 return result;
1967
1968 for (uint32_t i = 0; i < submitCount; i++) {
1969 struct radeon_winsys_cs **cs_array;
1970 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1971 bool can_patch = true;
1972 uint32_t advance;
1973 struct radv_winsys_sem_info sem_info;
1974
1975 result = radv_alloc_sem_info(&sem_info,
1976 pSubmits[i].waitSemaphoreCount,
1977 pSubmits[i].pWaitSemaphores,
1978 pSubmits[i].signalSemaphoreCount,
1979 pSubmits[i].pSignalSemaphores);
1980 if (result != VK_SUCCESS)
1981 return result;
1982
1983 if (!pSubmits[i].commandBufferCount) {
1984 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1985 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1986 &queue->device->empty_cs[queue->queue_family_index],
1987 1, NULL, NULL,
1988 &sem_info,
1989 false, base_fence);
1990 if (ret) {
1991 radv_loge("failed to submit CS %d\n", i);
1992 abort();
1993 }
1994 fence_emitted = true;
1995 }
1996 radv_free_sem_info(&sem_info);
1997 continue;
1998 }
1999
2000 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
2001 (pSubmits[i].commandBufferCount));
2002
2003 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
2004 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
2005 pSubmits[i].pCommandBuffers[j]);
2006 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2007
2008 cs_array[j] = cmd_buffer->cs;
2009 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
2010 can_patch = false;
2011 }
2012
2013 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
2014 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
2015 advance = MIN2(max_cs_submission,
2016 pSubmits[i].commandBufferCount - j);
2017
2018 if (queue->device->trace_bo)
2019 *queue->device->trace_id_ptr = 0;
2020
2021 sem_info.cs_emit_wait = j == 0;
2022 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2023
2024 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2025 advance, initial_preamble, continue_preamble_cs,
2026 &sem_info,
2027 can_patch, base_fence);
2028
2029 if (ret) {
2030 radv_loge("failed to submit CS %d\n", i);
2031 abort();
2032 }
2033 fence_emitted = true;
2034 if (queue->device->trace_bo) {
2035 radv_check_gpu_hangs(queue, cs_array[j]);
2036 }
2037 }
2038
2039 radv_free_temp_syncobjs(queue->device,
2040 pSubmits[i].waitSemaphoreCount,
2041 pSubmits[i].pWaitSemaphores);
2042 radv_free_sem_info(&sem_info);
2043 free(cs_array);
2044 }
2045
2046 if (fence) {
2047 if (!fence_emitted) {
2048 struct radv_winsys_sem_info sem_info = {0};
2049 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2050 &queue->device->empty_cs[queue->queue_family_index],
2051 1, NULL, NULL, &sem_info,
2052 false, base_fence);
2053 }
2054 fence->submitted = true;
2055 }
2056
2057 return VK_SUCCESS;
2058 }
2059
2060 VkResult radv_QueueWaitIdle(
2061 VkQueue _queue)
2062 {
2063 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2064
2065 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2066 radv_queue_family_to_ring(queue->queue_family_index),
2067 queue->queue_idx);
2068 return VK_SUCCESS;
2069 }
2070
2071 VkResult radv_DeviceWaitIdle(
2072 VkDevice _device)
2073 {
2074 RADV_FROM_HANDLE(radv_device, device, _device);
2075
2076 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2077 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2078 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2079 }
2080 }
2081 return VK_SUCCESS;
2082 }
2083
2084 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2085 VkInstance instance,
2086 const char* pName)
2087 {
2088 return radv_lookup_entrypoint(pName);
2089 }
2090
2091 /* The loader wants us to expose a second GetInstanceProcAddr function
2092 * to work around certain LD_PRELOAD issues seen in apps.
2093 */
2094 PUBLIC
2095 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2096 VkInstance instance,
2097 const char* pName);
2098
2099 PUBLIC
2100 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2101 VkInstance instance,
2102 const char* pName)
2103 {
2104 return radv_GetInstanceProcAddr(instance, pName);
2105 }
2106
2107 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2108 VkDevice device,
2109 const char* pName)
2110 {
2111 return radv_lookup_entrypoint(pName);
2112 }
2113
2114 bool radv_get_memory_fd(struct radv_device *device,
2115 struct radv_device_memory *memory,
2116 int *pFD)
2117 {
2118 struct radeon_bo_metadata metadata;
2119
2120 if (memory->image) {
2121 radv_init_metadata(device, memory->image, &metadata);
2122 device->ws->buffer_set_metadata(memory->bo, &metadata);
2123 }
2124
2125 return device->ws->buffer_get_fd(device->ws, memory->bo,
2126 pFD);
2127 }
2128
2129 static VkResult radv_alloc_memory(struct radv_device *device,
2130 const VkMemoryAllocateInfo* pAllocateInfo,
2131 const VkAllocationCallbacks* pAllocator,
2132 VkDeviceMemory* pMem)
2133 {
2134 struct radv_device_memory *mem;
2135 VkResult result;
2136 enum radeon_bo_domain domain;
2137 uint32_t flags = 0;
2138 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2139
2140 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2141
2142 if (pAllocateInfo->allocationSize == 0) {
2143 /* Apparently, this is allowed */
2144 *pMem = VK_NULL_HANDLE;
2145 return VK_SUCCESS;
2146 }
2147
2148 const VkImportMemoryFdInfoKHR *import_info =
2149 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2150 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2151 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2152
2153 const struct wsi_memory_allocate_info *wsi_info =
2154 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2155
2156 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2157 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2158 if (mem == NULL)
2159 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2160
2161 if (wsi_info && wsi_info->implicit_sync)
2162 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2163
2164 if (dedicate_info) {
2165 mem->image = radv_image_from_handle(dedicate_info->image);
2166 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2167 } else {
2168 mem->image = NULL;
2169 mem->buffer = NULL;
2170 }
2171
2172 if (import_info) {
2173 assert(import_info->handleType ==
2174 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
2175 import_info->handleType ==
2176 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2177 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2178 NULL, NULL);
2179 if (!mem->bo) {
2180 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2181 goto fail;
2182 } else {
2183 close(import_info->fd);
2184 goto out_success;
2185 }
2186 }
2187
2188 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2189 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2190 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2191 domain = RADEON_DOMAIN_GTT;
2192 else
2193 domain = RADEON_DOMAIN_VRAM;
2194
2195 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2196 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2197 else
2198 flags |= RADEON_FLAG_CPU_ACCESS;
2199
2200 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2201 flags |= RADEON_FLAG_GTT_WC;
2202
2203 if (!dedicate_info && !import_info)
2204 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2205
2206 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2207 domain, flags);
2208
2209 if (!mem->bo) {
2210 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2211 goto fail;
2212 }
2213 mem->type_index = mem_type_index;
2214 out_success:
2215 *pMem = radv_device_memory_to_handle(mem);
2216
2217 return VK_SUCCESS;
2218
2219 fail:
2220 vk_free2(&device->alloc, pAllocator, mem);
2221
2222 return result;
2223 }
2224
2225 VkResult radv_AllocateMemory(
2226 VkDevice _device,
2227 const VkMemoryAllocateInfo* pAllocateInfo,
2228 const VkAllocationCallbacks* pAllocator,
2229 VkDeviceMemory* pMem)
2230 {
2231 RADV_FROM_HANDLE(radv_device, device, _device);
2232 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
2233 }
2234
2235 void radv_FreeMemory(
2236 VkDevice _device,
2237 VkDeviceMemory _mem,
2238 const VkAllocationCallbacks* pAllocator)
2239 {
2240 RADV_FROM_HANDLE(radv_device, device, _device);
2241 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2242
2243 if (mem == NULL)
2244 return;
2245
2246 device->ws->buffer_destroy(mem->bo);
2247 mem->bo = NULL;
2248
2249 vk_free2(&device->alloc, pAllocator, mem);
2250 }
2251
2252 VkResult radv_MapMemory(
2253 VkDevice _device,
2254 VkDeviceMemory _memory,
2255 VkDeviceSize offset,
2256 VkDeviceSize size,
2257 VkMemoryMapFlags flags,
2258 void** ppData)
2259 {
2260 RADV_FROM_HANDLE(radv_device, device, _device);
2261 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2262
2263 if (mem == NULL) {
2264 *ppData = NULL;
2265 return VK_SUCCESS;
2266 }
2267
2268 *ppData = device->ws->buffer_map(mem->bo);
2269 if (*ppData) {
2270 *ppData += offset;
2271 return VK_SUCCESS;
2272 }
2273
2274 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2275 }
2276
2277 void radv_UnmapMemory(
2278 VkDevice _device,
2279 VkDeviceMemory _memory)
2280 {
2281 RADV_FROM_HANDLE(radv_device, device, _device);
2282 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2283
2284 if (mem == NULL)
2285 return;
2286
2287 device->ws->buffer_unmap(mem->bo);
2288 }
2289
2290 VkResult radv_FlushMappedMemoryRanges(
2291 VkDevice _device,
2292 uint32_t memoryRangeCount,
2293 const VkMappedMemoryRange* pMemoryRanges)
2294 {
2295 return VK_SUCCESS;
2296 }
2297
2298 VkResult radv_InvalidateMappedMemoryRanges(
2299 VkDevice _device,
2300 uint32_t memoryRangeCount,
2301 const VkMappedMemoryRange* pMemoryRanges)
2302 {
2303 return VK_SUCCESS;
2304 }
2305
2306 void radv_GetBufferMemoryRequirements(
2307 VkDevice _device,
2308 VkBuffer _buffer,
2309 VkMemoryRequirements* pMemoryRequirements)
2310 {
2311 RADV_FROM_HANDLE(radv_device, device, _device);
2312 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2313
2314 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2315
2316 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2317 pMemoryRequirements->alignment = 4096;
2318 else
2319 pMemoryRequirements->alignment = 16;
2320
2321 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2322 }
2323
2324 void radv_GetBufferMemoryRequirements2KHR(
2325 VkDevice device,
2326 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2327 VkMemoryRequirements2KHR* pMemoryRequirements)
2328 {
2329 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2330 &pMemoryRequirements->memoryRequirements);
2331 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2332 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2333 switch (ext->sType) {
2334 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2335 VkMemoryDedicatedRequirementsKHR *req =
2336 (VkMemoryDedicatedRequirementsKHR *) ext;
2337 req->requiresDedicatedAllocation = buffer->shareable;
2338 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2339 break;
2340 }
2341 default:
2342 break;
2343 }
2344 }
2345 }
2346
2347 void radv_GetImageMemoryRequirements(
2348 VkDevice _device,
2349 VkImage _image,
2350 VkMemoryRequirements* pMemoryRequirements)
2351 {
2352 RADV_FROM_HANDLE(radv_device, device, _device);
2353 RADV_FROM_HANDLE(radv_image, image, _image);
2354
2355 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2356
2357 pMemoryRequirements->size = image->size;
2358 pMemoryRequirements->alignment = image->alignment;
2359 }
2360
2361 void radv_GetImageMemoryRequirements2KHR(
2362 VkDevice device,
2363 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2364 VkMemoryRequirements2KHR* pMemoryRequirements)
2365 {
2366 radv_GetImageMemoryRequirements(device, pInfo->image,
2367 &pMemoryRequirements->memoryRequirements);
2368
2369 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2370
2371 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2372 switch (ext->sType) {
2373 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2374 VkMemoryDedicatedRequirementsKHR *req =
2375 (VkMemoryDedicatedRequirementsKHR *) ext;
2376 req->requiresDedicatedAllocation = image->shareable;
2377 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2378 break;
2379 }
2380 default:
2381 break;
2382 }
2383 }
2384 }
2385
2386 void radv_GetImageSparseMemoryRequirements(
2387 VkDevice device,
2388 VkImage image,
2389 uint32_t* pSparseMemoryRequirementCount,
2390 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2391 {
2392 stub();
2393 }
2394
2395 void radv_GetImageSparseMemoryRequirements2KHR(
2396 VkDevice device,
2397 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2398 uint32_t* pSparseMemoryRequirementCount,
2399 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2400 {
2401 stub();
2402 }
2403
2404 void radv_GetDeviceMemoryCommitment(
2405 VkDevice device,
2406 VkDeviceMemory memory,
2407 VkDeviceSize* pCommittedMemoryInBytes)
2408 {
2409 *pCommittedMemoryInBytes = 0;
2410 }
2411
2412 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2413 uint32_t bindInfoCount,
2414 const VkBindBufferMemoryInfoKHR *pBindInfos)
2415 {
2416 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2417 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2418 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2419
2420 if (mem) {
2421 buffer->bo = mem->bo;
2422 buffer->offset = pBindInfos[i].memoryOffset;
2423 } else {
2424 buffer->bo = NULL;
2425 }
2426 }
2427 return VK_SUCCESS;
2428 }
2429
2430 VkResult radv_BindBufferMemory(
2431 VkDevice device,
2432 VkBuffer buffer,
2433 VkDeviceMemory memory,
2434 VkDeviceSize memoryOffset)
2435 {
2436 const VkBindBufferMemoryInfoKHR info = {
2437 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2438 .buffer = buffer,
2439 .memory = memory,
2440 .memoryOffset = memoryOffset
2441 };
2442
2443 return radv_BindBufferMemory2KHR(device, 1, &info);
2444 }
2445
2446 VkResult radv_BindImageMemory2KHR(VkDevice device,
2447 uint32_t bindInfoCount,
2448 const VkBindImageMemoryInfoKHR *pBindInfos)
2449 {
2450 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2451 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2452 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2453
2454 if (mem) {
2455 image->bo = mem->bo;
2456 image->offset = pBindInfos[i].memoryOffset;
2457 } else {
2458 image->bo = NULL;
2459 image->offset = 0;
2460 }
2461 }
2462 return VK_SUCCESS;
2463 }
2464
2465
2466 VkResult radv_BindImageMemory(
2467 VkDevice device,
2468 VkImage image,
2469 VkDeviceMemory memory,
2470 VkDeviceSize memoryOffset)
2471 {
2472 const VkBindImageMemoryInfoKHR info = {
2473 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2474 .image = image,
2475 .memory = memory,
2476 .memoryOffset = memoryOffset
2477 };
2478
2479 return radv_BindImageMemory2KHR(device, 1, &info);
2480 }
2481
2482
2483 static void
2484 radv_sparse_buffer_bind_memory(struct radv_device *device,
2485 const VkSparseBufferMemoryBindInfo *bind)
2486 {
2487 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2488
2489 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2490 struct radv_device_memory *mem = NULL;
2491
2492 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2493 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2494
2495 device->ws->buffer_virtual_bind(buffer->bo,
2496 bind->pBinds[i].resourceOffset,
2497 bind->pBinds[i].size,
2498 mem ? mem->bo : NULL,
2499 bind->pBinds[i].memoryOffset);
2500 }
2501 }
2502
2503 static void
2504 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2505 const VkSparseImageOpaqueMemoryBindInfo *bind)
2506 {
2507 RADV_FROM_HANDLE(radv_image, image, bind->image);
2508
2509 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2510 struct radv_device_memory *mem = NULL;
2511
2512 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2513 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2514
2515 device->ws->buffer_virtual_bind(image->bo,
2516 bind->pBinds[i].resourceOffset,
2517 bind->pBinds[i].size,
2518 mem ? mem->bo : NULL,
2519 bind->pBinds[i].memoryOffset);
2520 }
2521 }
2522
2523 VkResult radv_QueueBindSparse(
2524 VkQueue _queue,
2525 uint32_t bindInfoCount,
2526 const VkBindSparseInfo* pBindInfo,
2527 VkFence _fence)
2528 {
2529 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2530 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2531 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2532 bool fence_emitted = false;
2533
2534 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2535 struct radv_winsys_sem_info sem_info;
2536 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2537 radv_sparse_buffer_bind_memory(queue->device,
2538 pBindInfo[i].pBufferBinds + j);
2539 }
2540
2541 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2542 radv_sparse_image_opaque_bind_memory(queue->device,
2543 pBindInfo[i].pImageOpaqueBinds + j);
2544 }
2545
2546 VkResult result;
2547 result = radv_alloc_sem_info(&sem_info,
2548 pBindInfo[i].waitSemaphoreCount,
2549 pBindInfo[i].pWaitSemaphores,
2550 pBindInfo[i].signalSemaphoreCount,
2551 pBindInfo[i].pSignalSemaphores);
2552 if (result != VK_SUCCESS)
2553 return result;
2554
2555 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2556 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2557 &queue->device->empty_cs[queue->queue_family_index],
2558 1, NULL, NULL,
2559 &sem_info,
2560 false, base_fence);
2561 fence_emitted = true;
2562 if (fence)
2563 fence->submitted = true;
2564 }
2565
2566 radv_free_sem_info(&sem_info);
2567
2568 }
2569
2570 if (fence && !fence_emitted) {
2571 fence->signalled = true;
2572 }
2573
2574 return VK_SUCCESS;
2575 }
2576
2577 VkResult radv_CreateFence(
2578 VkDevice _device,
2579 const VkFenceCreateInfo* pCreateInfo,
2580 const VkAllocationCallbacks* pAllocator,
2581 VkFence* pFence)
2582 {
2583 RADV_FROM_HANDLE(radv_device, device, _device);
2584 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2585 sizeof(*fence), 8,
2586 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2587
2588 if (!fence)
2589 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2590
2591 fence->submitted = false;
2592 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2593 fence->fence = device->ws->create_fence();
2594 if (!fence->fence) {
2595 vk_free2(&device->alloc, pAllocator, fence);
2596 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2597 }
2598
2599 *pFence = radv_fence_to_handle(fence);
2600
2601 return VK_SUCCESS;
2602 }
2603
2604 void radv_DestroyFence(
2605 VkDevice _device,
2606 VkFence _fence,
2607 const VkAllocationCallbacks* pAllocator)
2608 {
2609 RADV_FROM_HANDLE(radv_device, device, _device);
2610 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2611
2612 if (!fence)
2613 return;
2614 device->ws->destroy_fence(fence->fence);
2615 vk_free2(&device->alloc, pAllocator, fence);
2616 }
2617
2618 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2619 {
2620 uint64_t current_time;
2621 struct timespec tv;
2622
2623 clock_gettime(CLOCK_MONOTONIC, &tv);
2624 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2625
2626 timeout = MIN2(UINT64_MAX - current_time, timeout);
2627
2628 return current_time + timeout;
2629 }
2630
2631 VkResult radv_WaitForFences(
2632 VkDevice _device,
2633 uint32_t fenceCount,
2634 const VkFence* pFences,
2635 VkBool32 waitAll,
2636 uint64_t timeout)
2637 {
2638 RADV_FROM_HANDLE(radv_device, device, _device);
2639 timeout = radv_get_absolute_timeout(timeout);
2640
2641 if (!waitAll && fenceCount > 1) {
2642 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2643 }
2644
2645 for (uint32_t i = 0; i < fenceCount; ++i) {
2646 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2647 bool expired = false;
2648
2649 if (fence->signalled)
2650 continue;
2651
2652 if (!fence->submitted)
2653 return VK_TIMEOUT;
2654
2655 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2656 if (!expired)
2657 return VK_TIMEOUT;
2658
2659 fence->signalled = true;
2660 }
2661
2662 return VK_SUCCESS;
2663 }
2664
2665 VkResult radv_ResetFences(VkDevice device,
2666 uint32_t fenceCount,
2667 const VkFence *pFences)
2668 {
2669 for (unsigned i = 0; i < fenceCount; ++i) {
2670 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2671 fence->submitted = fence->signalled = false;
2672 }
2673
2674 return VK_SUCCESS;
2675 }
2676
2677 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2678 {
2679 RADV_FROM_HANDLE(radv_device, device, _device);
2680 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2681
2682 if (fence->signalled)
2683 return VK_SUCCESS;
2684 if (!fence->submitted)
2685 return VK_NOT_READY;
2686
2687 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2688 return VK_NOT_READY;
2689
2690 return VK_SUCCESS;
2691 }
2692
2693
2694 // Queue semaphore functions
2695
2696 VkResult radv_CreateSemaphore(
2697 VkDevice _device,
2698 const VkSemaphoreCreateInfo* pCreateInfo,
2699 const VkAllocationCallbacks* pAllocator,
2700 VkSemaphore* pSemaphore)
2701 {
2702 RADV_FROM_HANDLE(radv_device, device, _device);
2703 const VkExportSemaphoreCreateInfoKHR *export =
2704 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2705 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2706 export ? export->handleTypes : 0;
2707
2708 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2709 sizeof(*sem), 8,
2710 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2711 if (!sem)
2712 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2713
2714 sem->temp_syncobj = 0;
2715 /* create a syncobject if we are going to export this semaphore */
2716 if (handleTypes) {
2717 assert (device->physical_device->rad_info.has_syncobj);
2718 assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2719 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2720 if (ret) {
2721 vk_free2(&device->alloc, pAllocator, sem);
2722 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2723 }
2724 sem->sem = NULL;
2725 } else {
2726 sem->sem = device->ws->create_sem(device->ws);
2727 if (!sem->sem) {
2728 vk_free2(&device->alloc, pAllocator, sem);
2729 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2730 }
2731 sem->syncobj = 0;
2732 }
2733
2734 *pSemaphore = radv_semaphore_to_handle(sem);
2735 return VK_SUCCESS;
2736 }
2737
2738 void radv_DestroySemaphore(
2739 VkDevice _device,
2740 VkSemaphore _semaphore,
2741 const VkAllocationCallbacks* pAllocator)
2742 {
2743 RADV_FROM_HANDLE(radv_device, device, _device);
2744 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2745 if (!_semaphore)
2746 return;
2747
2748 if (sem->syncobj)
2749 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2750 else
2751 device->ws->destroy_sem(sem->sem);
2752 vk_free2(&device->alloc, pAllocator, sem);
2753 }
2754
2755 VkResult radv_CreateEvent(
2756 VkDevice _device,
2757 const VkEventCreateInfo* pCreateInfo,
2758 const VkAllocationCallbacks* pAllocator,
2759 VkEvent* pEvent)
2760 {
2761 RADV_FROM_HANDLE(radv_device, device, _device);
2762 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2763 sizeof(*event), 8,
2764 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2765
2766 if (!event)
2767 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2768
2769 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2770 RADEON_DOMAIN_GTT,
2771 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
2772 if (!event->bo) {
2773 vk_free2(&device->alloc, pAllocator, event);
2774 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2775 }
2776
2777 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2778
2779 *pEvent = radv_event_to_handle(event);
2780
2781 return VK_SUCCESS;
2782 }
2783
2784 void radv_DestroyEvent(
2785 VkDevice _device,
2786 VkEvent _event,
2787 const VkAllocationCallbacks* pAllocator)
2788 {
2789 RADV_FROM_HANDLE(radv_device, device, _device);
2790 RADV_FROM_HANDLE(radv_event, event, _event);
2791
2792 if (!event)
2793 return;
2794 device->ws->buffer_destroy(event->bo);
2795 vk_free2(&device->alloc, pAllocator, event);
2796 }
2797
2798 VkResult radv_GetEventStatus(
2799 VkDevice _device,
2800 VkEvent _event)
2801 {
2802 RADV_FROM_HANDLE(radv_event, event, _event);
2803
2804 if (*event->map == 1)
2805 return VK_EVENT_SET;
2806 return VK_EVENT_RESET;
2807 }
2808
2809 VkResult radv_SetEvent(
2810 VkDevice _device,
2811 VkEvent _event)
2812 {
2813 RADV_FROM_HANDLE(radv_event, event, _event);
2814 *event->map = 1;
2815
2816 return VK_SUCCESS;
2817 }
2818
2819 VkResult radv_ResetEvent(
2820 VkDevice _device,
2821 VkEvent _event)
2822 {
2823 RADV_FROM_HANDLE(radv_event, event, _event);
2824 *event->map = 0;
2825
2826 return VK_SUCCESS;
2827 }
2828
2829 VkResult radv_CreateBuffer(
2830 VkDevice _device,
2831 const VkBufferCreateInfo* pCreateInfo,
2832 const VkAllocationCallbacks* pAllocator,
2833 VkBuffer* pBuffer)
2834 {
2835 RADV_FROM_HANDLE(radv_device, device, _device);
2836 struct radv_buffer *buffer;
2837
2838 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2839
2840 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2841 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2842 if (buffer == NULL)
2843 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2844
2845 buffer->size = pCreateInfo->size;
2846 buffer->usage = pCreateInfo->usage;
2847 buffer->bo = NULL;
2848 buffer->offset = 0;
2849 buffer->flags = pCreateInfo->flags;
2850
2851 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
2852 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
2853
2854 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2855 buffer->bo = device->ws->buffer_create(device->ws,
2856 align64(buffer->size, 4096),
2857 4096, 0, RADEON_FLAG_VIRTUAL);
2858 if (!buffer->bo) {
2859 vk_free2(&device->alloc, pAllocator, buffer);
2860 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2861 }
2862 }
2863
2864 *pBuffer = radv_buffer_to_handle(buffer);
2865
2866 return VK_SUCCESS;
2867 }
2868
2869 void radv_DestroyBuffer(
2870 VkDevice _device,
2871 VkBuffer _buffer,
2872 const VkAllocationCallbacks* pAllocator)
2873 {
2874 RADV_FROM_HANDLE(radv_device, device, _device);
2875 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2876
2877 if (!buffer)
2878 return;
2879
2880 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2881 device->ws->buffer_destroy(buffer->bo);
2882
2883 vk_free2(&device->alloc, pAllocator, buffer);
2884 }
2885
2886 static inline unsigned
2887 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2888 {
2889 if (stencil)
2890 return image->surface.u.legacy.stencil_tiling_index[level];
2891 else
2892 return image->surface.u.legacy.tiling_index[level];
2893 }
2894
2895 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2896 {
2897 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2898 }
2899
2900 static void
2901 radv_initialise_color_surface(struct radv_device *device,
2902 struct radv_color_buffer_info *cb,
2903 struct radv_image_view *iview)
2904 {
2905 const struct vk_format_description *desc;
2906 unsigned ntype, format, swap, endian;
2907 unsigned blend_clamp = 0, blend_bypass = 0;
2908 uint64_t va;
2909 const struct radeon_surf *surf = &iview->image->surface;
2910
2911 desc = vk_format_description(iview->vk_format);
2912
2913 memset(cb, 0, sizeof(*cb));
2914
2915 /* Intensity is implemented as Red, so treat it that way. */
2916 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2917
2918 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2919
2920 cb->cb_color_base = va >> 8;
2921
2922 if (device->physical_device->rad_info.chip_class >= GFX9) {
2923 struct gfx9_surf_meta_flags meta;
2924 if (iview->image->dcc_offset)
2925 meta = iview->image->surface.u.gfx9.dcc;
2926 else
2927 meta = iview->image->surface.u.gfx9.cmask;
2928
2929 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2930 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2931 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2932 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2933
2934 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
2935 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2936 } else {
2937 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2938 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2939
2940 cb->cb_color_base += level_info->offset >> 8;
2941 if (level_info->mode == RADEON_SURF_MODE_2D)
2942 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2943
2944 pitch_tile_max = level_info->nblk_x / 8 - 1;
2945 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2946 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2947
2948 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2949 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2950 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2951
2952 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2953
2954 if (iview->image->fmask.size) {
2955 if (device->physical_device->rad_info.chip_class >= CIK)
2956 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2957 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2958 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2959 } else {
2960 /* This must be set for fast clear to work without FMASK. */
2961 if (device->physical_device->rad_info.chip_class >= CIK)
2962 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2963 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2964 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2965 }
2966 }
2967
2968 /* CMASK variables */
2969 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2970 va += iview->image->cmask.offset;
2971 cb->cb_color_cmask = va >> 8;
2972
2973 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2974 va += iview->image->dcc_offset;
2975 cb->cb_dcc_base = va >> 8;
2976 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
2977
2978 uint32_t max_slice = radv_surface_layer_count(iview);
2979 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2980 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2981
2982 if (iview->image->info.samples > 1) {
2983 unsigned log_samples = util_logbase2(iview->image->info.samples);
2984
2985 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2986 S_028C74_NUM_FRAGMENTS(log_samples);
2987 }
2988
2989 if (iview->image->fmask.size) {
2990 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2991 cb->cb_color_fmask = va >> 8;
2992 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
2993 } else {
2994 cb->cb_color_fmask = cb->cb_color_base;
2995 }
2996
2997 ntype = radv_translate_color_numformat(iview->vk_format,
2998 desc,
2999 vk_format_get_first_non_void_channel(iview->vk_format));
3000 format = radv_translate_colorformat(iview->vk_format);
3001 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
3002 radv_finishme("Illegal color\n");
3003 swap = radv_translate_colorswap(iview->vk_format, FALSE);
3004 endian = radv_colorformat_endian_swap(format);
3005
3006 /* blend clamp should be set for all NORM/SRGB types */
3007 if (ntype == V_028C70_NUMBER_UNORM ||
3008 ntype == V_028C70_NUMBER_SNORM ||
3009 ntype == V_028C70_NUMBER_SRGB)
3010 blend_clamp = 1;
3011
3012 /* set blend bypass according to docs if SINT/UINT or
3013 8/24 COLOR variants */
3014 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
3015 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
3016 format == V_028C70_COLOR_X24_8_32_FLOAT) {
3017 blend_clamp = 0;
3018 blend_bypass = 1;
3019 }
3020 #if 0
3021 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
3022 (format == V_028C70_COLOR_8 ||
3023 format == V_028C70_COLOR_8_8 ||
3024 format == V_028C70_COLOR_8_8_8_8))
3025 ->color_is_int8 = true;
3026 #endif
3027 cb->cb_color_info = S_028C70_FORMAT(format) |
3028 S_028C70_COMP_SWAP(swap) |
3029 S_028C70_BLEND_CLAMP(blend_clamp) |
3030 S_028C70_BLEND_BYPASS(blend_bypass) |
3031 S_028C70_SIMPLE_FLOAT(1) |
3032 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3033 ntype != V_028C70_NUMBER_SNORM &&
3034 ntype != V_028C70_NUMBER_SRGB &&
3035 format != V_028C70_COLOR_8_24 &&
3036 format != V_028C70_COLOR_24_8) |
3037 S_028C70_NUMBER_TYPE(ntype) |
3038 S_028C70_ENDIAN(endian);
3039 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3040 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3041 if (device->physical_device->rad_info.chip_class == SI) {
3042 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3043 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3044 }
3045 }
3046
3047 if (iview->image->cmask.size &&
3048 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3049 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3050
3051 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3052 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3053
3054 if (device->physical_device->rad_info.chip_class >= VI) {
3055 unsigned max_uncompressed_block_size = 2;
3056 if (iview->image->info.samples > 1) {
3057 if (iview->image->surface.bpe == 1)
3058 max_uncompressed_block_size = 0;
3059 else if (iview->image->surface.bpe == 2)
3060 max_uncompressed_block_size = 1;
3061 }
3062
3063 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3064 S_028C78_INDEPENDENT_64B_BLOCKS(1);
3065 }
3066
3067 /* This must be set for fast clear to work without FMASK. */
3068 if (!iview->image->fmask.size &&
3069 device->physical_device->rad_info.chip_class == SI) {
3070 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3071 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3072 }
3073
3074 if (device->physical_device->rad_info.chip_class >= GFX9) {
3075 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3076 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3077
3078 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3079 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3080 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3081 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3082 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3083 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3084 }
3085 }
3086
3087 static void
3088 radv_initialise_ds_surface(struct radv_device *device,
3089 struct radv_ds_buffer_info *ds,
3090 struct radv_image_view *iview)
3091 {
3092 unsigned level = iview->base_mip;
3093 unsigned format, stencil_format;
3094 uint64_t va, s_offs, z_offs;
3095 bool stencil_only = false;
3096 memset(ds, 0, sizeof(*ds));
3097 switch (iview->image->vk_format) {
3098 case VK_FORMAT_D24_UNORM_S8_UINT:
3099 case VK_FORMAT_X8_D24_UNORM_PACK32:
3100 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3101 ds->offset_scale = 2.0f;
3102 break;
3103 case VK_FORMAT_D16_UNORM:
3104 case VK_FORMAT_D16_UNORM_S8_UINT:
3105 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3106 ds->offset_scale = 4.0f;
3107 break;
3108 case VK_FORMAT_D32_SFLOAT:
3109 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3110 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3111 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3112 ds->offset_scale = 1.0f;
3113 break;
3114 case VK_FORMAT_S8_UINT:
3115 stencil_only = true;
3116 break;
3117 default:
3118 break;
3119 }
3120
3121 format = radv_translate_dbformat(iview->image->vk_format);
3122 stencil_format = iview->image->surface.has_stencil ?
3123 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3124
3125 uint32_t max_slice = radv_surface_layer_count(iview);
3126 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3127 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3128
3129 ds->db_htile_data_base = 0;
3130 ds->db_htile_surface = 0;
3131
3132 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3133 s_offs = z_offs = va;
3134
3135 if (device->physical_device->rad_info.chip_class >= GFX9) {
3136 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3137 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3138
3139 ds->db_z_info = S_028038_FORMAT(format) |
3140 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3141 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3142 S_028038_MAXMIP(iview->image->info.levels - 1);
3143 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3144 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3145
3146 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3147 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3148 ds->db_depth_view |= S_028008_MIPID(level);
3149
3150 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3151 S_02801C_Y_MAX(iview->image->info.height - 1);
3152
3153 if (radv_htile_enabled(iview->image, level)) {
3154 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3155
3156 if (iview->image->tc_compatible_htile) {
3157 unsigned max_zplanes = 4;
3158
3159 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3160 iview->image->info.samples > 1)
3161 max_zplanes = 2;
3162
3163 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3164 S_028038_ITERATE_FLUSH(1);
3165 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3166 }
3167
3168 if (!iview->image->surface.has_stencil)
3169 /* Use all of the htile_buffer for depth if there's no stencil. */
3170 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3171 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3172 iview->image->htile_offset;
3173 ds->db_htile_data_base = va >> 8;
3174 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3175 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3176 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3177 }
3178 } else {
3179 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3180
3181 if (stencil_only)
3182 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3183
3184 z_offs += iview->image->surface.u.legacy.level[level].offset;
3185 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3186
3187 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3188 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3189 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3190
3191 if (iview->image->info.samples > 1)
3192 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3193
3194 if (device->physical_device->rad_info.chip_class >= CIK) {
3195 struct radeon_info *info = &device->physical_device->rad_info;
3196 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3197 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3198 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3199 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3200 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3201 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3202
3203 if (stencil_only)
3204 tile_mode = stencil_tile_mode;
3205
3206 ds->db_depth_info |=
3207 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3208 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3209 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3210 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3211 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3212 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3213 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3214 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3215 } else {
3216 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3217 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3218 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3219 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3220 if (stencil_only)
3221 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3222 }
3223
3224 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3225 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3226 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3227
3228 if (radv_htile_enabled(iview->image, level)) {
3229 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3230
3231 if (!iview->image->surface.has_stencil &&
3232 !iview->image->tc_compatible_htile)
3233 /* Use all of the htile_buffer for depth if there's no stencil. */
3234 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3235
3236 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3237 iview->image->htile_offset;
3238 ds->db_htile_data_base = va >> 8;
3239 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3240
3241 if (iview->image->tc_compatible_htile) {
3242 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3243
3244 if (iview->image->info.samples <= 1)
3245 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3246 else if (iview->image->info.samples <= 4)
3247 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3248 else
3249 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3250 }
3251 }
3252 }
3253
3254 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3255 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3256 }
3257
3258 VkResult radv_CreateFramebuffer(
3259 VkDevice _device,
3260 const VkFramebufferCreateInfo* pCreateInfo,
3261 const VkAllocationCallbacks* pAllocator,
3262 VkFramebuffer* pFramebuffer)
3263 {
3264 RADV_FROM_HANDLE(radv_device, device, _device);
3265 struct radv_framebuffer *framebuffer;
3266
3267 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3268
3269 size_t size = sizeof(*framebuffer) +
3270 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3271 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3272 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3273 if (framebuffer == NULL)
3274 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3275
3276 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3277 framebuffer->width = pCreateInfo->width;
3278 framebuffer->height = pCreateInfo->height;
3279 framebuffer->layers = pCreateInfo->layers;
3280 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3281 VkImageView _iview = pCreateInfo->pAttachments[i];
3282 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3283 framebuffer->attachments[i].attachment = iview;
3284 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3285 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3286 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3287 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3288 }
3289 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3290 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3291 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3292 }
3293
3294 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3295 return VK_SUCCESS;
3296 }
3297
3298 void radv_DestroyFramebuffer(
3299 VkDevice _device,
3300 VkFramebuffer _fb,
3301 const VkAllocationCallbacks* pAllocator)
3302 {
3303 RADV_FROM_HANDLE(radv_device, device, _device);
3304 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3305
3306 if (!fb)
3307 return;
3308 vk_free2(&device->alloc, pAllocator, fb);
3309 }
3310
3311 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3312 {
3313 switch (address_mode) {
3314 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3315 return V_008F30_SQ_TEX_WRAP;
3316 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3317 return V_008F30_SQ_TEX_MIRROR;
3318 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3319 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3320 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3321 return V_008F30_SQ_TEX_CLAMP_BORDER;
3322 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3323 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3324 default:
3325 unreachable("illegal tex wrap mode");
3326 break;
3327 }
3328 }
3329
3330 static unsigned
3331 radv_tex_compare(VkCompareOp op)
3332 {
3333 switch (op) {
3334 case VK_COMPARE_OP_NEVER:
3335 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3336 case VK_COMPARE_OP_LESS:
3337 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3338 case VK_COMPARE_OP_EQUAL:
3339 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3340 case VK_COMPARE_OP_LESS_OR_EQUAL:
3341 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3342 case VK_COMPARE_OP_GREATER:
3343 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3344 case VK_COMPARE_OP_NOT_EQUAL:
3345 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3346 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3347 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3348 case VK_COMPARE_OP_ALWAYS:
3349 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3350 default:
3351 unreachable("illegal compare mode");
3352 break;
3353 }
3354 }
3355
3356 static unsigned
3357 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3358 {
3359 switch (filter) {
3360 case VK_FILTER_NEAREST:
3361 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3362 V_008F38_SQ_TEX_XY_FILTER_POINT);
3363 case VK_FILTER_LINEAR:
3364 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3365 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3366 case VK_FILTER_CUBIC_IMG:
3367 default:
3368 fprintf(stderr, "illegal texture filter");
3369 return 0;
3370 }
3371 }
3372
3373 static unsigned
3374 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3375 {
3376 switch (mode) {
3377 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3378 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3379 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3380 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3381 default:
3382 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3383 }
3384 }
3385
3386 static unsigned
3387 radv_tex_bordercolor(VkBorderColor bcolor)
3388 {
3389 switch (bcolor) {
3390 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3391 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3392 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3393 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3394 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3395 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3396 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3397 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3398 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3399 default:
3400 break;
3401 }
3402 return 0;
3403 }
3404
3405 static unsigned
3406 radv_tex_aniso_filter(unsigned filter)
3407 {
3408 if (filter < 2)
3409 return 0;
3410 if (filter < 4)
3411 return 1;
3412 if (filter < 8)
3413 return 2;
3414 if (filter < 16)
3415 return 3;
3416 return 4;
3417 }
3418
3419 static void
3420 radv_init_sampler(struct radv_device *device,
3421 struct radv_sampler *sampler,
3422 const VkSamplerCreateInfo *pCreateInfo)
3423 {
3424 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3425 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3426 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3427 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3428
3429 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3430 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3431 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3432 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3433 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3434 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3435 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3436 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3437 S_008F30_DISABLE_CUBE_WRAP(0) |
3438 S_008F30_COMPAT_MODE(is_vi));
3439 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3440 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3441 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3442 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3443 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3444 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3445 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3446 S_008F38_MIP_POINT_PRECLAMP(0) |
3447 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
3448 S_008F38_FILTER_PREC_FIX(1) |
3449 S_008F38_ANISO_OVERRIDE(is_vi));
3450 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3451 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3452 }
3453
3454 VkResult radv_CreateSampler(
3455 VkDevice _device,
3456 const VkSamplerCreateInfo* pCreateInfo,
3457 const VkAllocationCallbacks* pAllocator,
3458 VkSampler* pSampler)
3459 {
3460 RADV_FROM_HANDLE(radv_device, device, _device);
3461 struct radv_sampler *sampler;
3462
3463 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3464
3465 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3466 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3467 if (!sampler)
3468 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3469
3470 radv_init_sampler(device, sampler, pCreateInfo);
3471 *pSampler = radv_sampler_to_handle(sampler);
3472
3473 return VK_SUCCESS;
3474 }
3475
3476 void radv_DestroySampler(
3477 VkDevice _device,
3478 VkSampler _sampler,
3479 const VkAllocationCallbacks* pAllocator)
3480 {
3481 RADV_FROM_HANDLE(radv_device, device, _device);
3482 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3483
3484 if (!sampler)
3485 return;
3486 vk_free2(&device->alloc, pAllocator, sampler);
3487 }
3488
3489 /* vk_icd.h does not declare this function, so we declare it here to
3490 * suppress Wmissing-prototypes.
3491 */
3492 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3493 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3494
3495 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3496 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3497 {
3498 /* For the full details on loader interface versioning, see
3499 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3500 * What follows is a condensed summary, to help you navigate the large and
3501 * confusing official doc.
3502 *
3503 * - Loader interface v0 is incompatible with later versions. We don't
3504 * support it.
3505 *
3506 * - In loader interface v1:
3507 * - The first ICD entrypoint called by the loader is
3508 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3509 * entrypoint.
3510 * - The ICD must statically expose no other Vulkan symbol unless it is
3511 * linked with -Bsymbolic.
3512 * - Each dispatchable Vulkan handle created by the ICD must be
3513 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3514 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3515 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3516 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3517 * such loader-managed surfaces.
3518 *
3519 * - Loader interface v2 differs from v1 in:
3520 * - The first ICD entrypoint called by the loader is
3521 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3522 * statically expose this entrypoint.
3523 *
3524 * - Loader interface v3 differs from v2 in:
3525 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3526 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3527 * because the loader no longer does so.
3528 */
3529 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3530 return VK_SUCCESS;
3531 }
3532
3533 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3534 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3535 int *pFD)
3536 {
3537 RADV_FROM_HANDLE(radv_device, device, _device);
3538 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3539
3540 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3541
3542 /* At the moment, we support only the below handle types. */
3543 assert(pGetFdInfo->handleType ==
3544 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
3545 pGetFdInfo->handleType ==
3546 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3547
3548 bool ret = radv_get_memory_fd(device, memory, pFD);
3549 if (ret == false)
3550 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3551 return VK_SUCCESS;
3552 }
3553
3554 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3555 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3556 int fd,
3557 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3558 {
3559 switch (handleType) {
3560 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
3561 pMemoryFdProperties->memoryTypeBits = (1 << RADV_MEM_TYPE_COUNT) - 1;
3562 return VK_SUCCESS;
3563
3564 default:
3565 /* The valid usage section for this function says:
3566 *
3567 * "handleType must not be one of the handle types defined as
3568 * opaque."
3569 *
3570 * So opaque handle types fall into the default "unsupported" case.
3571 */
3572 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3573 }
3574 }
3575
3576 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3577 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3578 {
3579 RADV_FROM_HANDLE(radv_device, device, _device);
3580 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3581 uint32_t syncobj_handle = 0;
3582 uint32_t *syncobj_dst = NULL;
3583 assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3584
3585 int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
3586 if (ret != 0)
3587 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3588
3589 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3590 syncobj_dst = &sem->temp_syncobj;
3591 } else {
3592 syncobj_dst = &sem->syncobj;
3593 }
3594
3595 if (*syncobj_dst)
3596 device->ws->destroy_syncobj(device->ws, *syncobj_dst);
3597
3598 *syncobj_dst = syncobj_handle;
3599 close(pImportSemaphoreFdInfo->fd);
3600 return VK_SUCCESS;
3601 }
3602
3603 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3604 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3605 int *pFd)
3606 {
3607 RADV_FROM_HANDLE(radv_device, device, _device);
3608 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3609 int ret;
3610 uint32_t syncobj_handle;
3611
3612 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3613 if (sem->temp_syncobj)
3614 syncobj_handle = sem->temp_syncobj;
3615 else
3616 syncobj_handle = sem->syncobj;
3617 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3618 if (ret)
3619 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3620 return VK_SUCCESS;
3621 }
3622
3623 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3624 VkPhysicalDevice physicalDevice,
3625 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3626 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3627 {
3628 if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3629 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3630 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3631 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3632 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3633 } else {
3634 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3635 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3636 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3637 }
3638 }