amd/addrlib: update to latest version
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static const char *
79 get_chip_name(enum radeon_family family)
80 {
81 switch (family) {
82 case CHIP_TAHITI: return "AMD RADV TAHITI";
83 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
84 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
85 case CHIP_OLAND: return "AMD RADV OLAND";
86 case CHIP_HAINAN: return "AMD RADV HAINAN";
87 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
88 case CHIP_KAVERI: return "AMD RADV KAVERI";
89 case CHIP_KABINI: return "AMD RADV KABINI";
90 case CHIP_HAWAII: return "AMD RADV HAWAII";
91 case CHIP_MULLINS: return "AMD RADV MULLINS";
92 case CHIP_TONGA: return "AMD RADV TONGA";
93 case CHIP_ICELAND: return "AMD RADV ICELAND";
94 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
95 case CHIP_FIJI: return "AMD RADV FIJI";
96 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
97 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
98 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
99 case CHIP_STONEY: return "AMD RADV STONEY";
100 case CHIP_VEGA10: return "AMD RADV VEGA";
101 case CHIP_RAVEN: return "AMD RADV RAVEN";
102 default: return "AMD RADV unknown";
103 }
104 }
105
106 static void
107 radv_physical_device_init_mem_types(struct radv_physical_device *device)
108 {
109 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
110 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
111 device->rad_info.vram_vis_size);
112
113 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
114 device->memory_properties.memoryHeapCount = 0;
115 if (device->rad_info.vram_size - visible_vram_size > 0) {
116 vram_index = device->memory_properties.memoryHeapCount++;
117 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
118 .size = device->rad_info.vram_size - visible_vram_size,
119 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
120 };
121 }
122 if (visible_vram_size) {
123 visible_vram_index = device->memory_properties.memoryHeapCount++;
124 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
125 .size = visible_vram_size,
126 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
127 };
128 }
129 if (device->rad_info.gart_size > 0) {
130 gart_index = device->memory_properties.memoryHeapCount++;
131 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
132 .size = device->rad_info.gart_size,
133 .flags = 0,
134 };
135 }
136
137 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
138 unsigned type_count = 0;
139 if (vram_index >= 0) {
140 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
141 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
142 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
143 .heapIndex = vram_index,
144 };
145 }
146 if (gart_index >= 0) {
147 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
148 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
149 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
150 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
151 .heapIndex = gart_index,
152 };
153 }
154 if (visible_vram_index >= 0) {
155 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
156 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
157 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
158 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
159 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
160 .heapIndex = visible_vram_index,
161 };
162 }
163 if (gart_index >= 0) {
164 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
165 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
166 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
167 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
168 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
169 .heapIndex = gart_index,
170 };
171 }
172 device->memory_properties.memoryTypeCount = type_count;
173 }
174
175 static VkResult
176 radv_physical_device_init(struct radv_physical_device *device,
177 struct radv_instance *instance,
178 drmDevicePtr drm_device)
179 {
180 const char *path = drm_device->nodes[DRM_NODE_RENDER];
181 VkResult result;
182 drmVersionPtr version;
183 int fd;
184
185 fd = open(path, O_RDWR | O_CLOEXEC);
186 if (fd < 0)
187 return VK_ERROR_INCOMPATIBLE_DRIVER;
188
189 version = drmGetVersion(fd);
190 if (!version) {
191 close(fd);
192 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
193 "failed to get version %s: %m", path);
194 }
195
196 if (strcmp(version->name, "amdgpu")) {
197 drmFreeVersion(version);
198 close(fd);
199 return VK_ERROR_INCOMPATIBLE_DRIVER;
200 }
201 drmFreeVersion(version);
202
203 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
204 device->instance = instance;
205 assert(strlen(path) < ARRAY_SIZE(device->path));
206 strncpy(device->path, path, ARRAY_SIZE(device->path));
207
208 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
209 instance->perftest_flags);
210 if (!device->ws) {
211 result = VK_ERROR_INCOMPATIBLE_DRIVER;
212 goto fail;
213 }
214
215 device->local_fd = fd;
216 device->ws->query_info(device->ws, &device->rad_info);
217 result = radv_init_wsi(device);
218 if (result != VK_SUCCESS) {
219 device->ws->destroy(device->ws);
220 goto fail;
221 }
222
223 device->name = get_chip_name(device->rad_info.family);
224
225 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
226 radv_finish_wsi(device);
227 device->ws->destroy(device->ws);
228 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
229 "cannot generate UUID");
230 goto fail;
231 }
232
233 /* These flags affect shader compilation. */
234 uint64_t shader_env_flags =
235 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
236 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
237
238 /* The gpu id is already embeded in the uuid so we just pass "radv"
239 * when creating the cache.
240 */
241 char buf[VK_UUID_SIZE * 2 + 1];
242 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
243 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
244
245 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
246
247 radv_get_driver_uuid(&device->device_uuid);
248 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
249
250 if (device->rad_info.family == CHIP_STONEY ||
251 device->rad_info.chip_class >= GFX9) {
252 device->has_rbplus = true;
253 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
254 }
255
256 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
257 * on SI.
258 */
259 device->has_clear_state = device->rad_info.chip_class >= CIK;
260
261 radv_physical_device_init_mem_types(device);
262 return VK_SUCCESS;
263
264 fail:
265 close(fd);
266 return result;
267 }
268
269 static void
270 radv_physical_device_finish(struct radv_physical_device *device)
271 {
272 radv_finish_wsi(device);
273 device->ws->destroy(device->ws);
274 disk_cache_destroy(device->disk_cache);
275 close(device->local_fd);
276 }
277
278 static void *
279 default_alloc_func(void *pUserData, size_t size, size_t align,
280 VkSystemAllocationScope allocationScope)
281 {
282 return malloc(size);
283 }
284
285 static void *
286 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
287 size_t align, VkSystemAllocationScope allocationScope)
288 {
289 return realloc(pOriginal, size);
290 }
291
292 static void
293 default_free_func(void *pUserData, void *pMemory)
294 {
295 free(pMemory);
296 }
297
298 static const VkAllocationCallbacks default_alloc = {
299 .pUserData = NULL,
300 .pfnAllocation = default_alloc_func,
301 .pfnReallocation = default_realloc_func,
302 .pfnFree = default_free_func,
303 };
304
305 static const struct debug_control radv_debug_options[] = {
306 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
307 {"nodcc", RADV_DEBUG_NO_DCC},
308 {"shaders", RADV_DEBUG_DUMP_SHADERS},
309 {"nocache", RADV_DEBUG_NO_CACHE},
310 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
311 {"nohiz", RADV_DEBUG_NO_HIZ},
312 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
313 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
314 {"allbos", RADV_DEBUG_ALL_BOS},
315 {"noibs", RADV_DEBUG_NO_IBS},
316 {"spirv", RADV_DEBUG_DUMP_SPIRV},
317 {"vmfaults", RADV_DEBUG_VM_FAULTS},
318 {"zerovram", RADV_DEBUG_ZERO_VRAM},
319 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
320 {NULL, 0}
321 };
322
323 const char *
324 radv_get_debug_option_name(int id)
325 {
326 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
327 return radv_debug_options[id].string;
328 }
329
330 static const struct debug_control radv_perftest_options[] = {
331 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
332 {"sisched", RADV_PERFTEST_SISCHED},
333 {NULL, 0}
334 };
335
336 const char *
337 radv_get_perftest_option_name(int id)
338 {
339 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
340 return radv_perftest_options[id].string;
341 }
342
343 VkResult radv_CreateInstance(
344 const VkInstanceCreateInfo* pCreateInfo,
345 const VkAllocationCallbacks* pAllocator,
346 VkInstance* pInstance)
347 {
348 struct radv_instance *instance;
349
350 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
351
352 uint32_t client_version;
353 if (pCreateInfo->pApplicationInfo &&
354 pCreateInfo->pApplicationInfo->apiVersion != 0) {
355 client_version = pCreateInfo->pApplicationInfo->apiVersion;
356 } else {
357 client_version = VK_MAKE_VERSION(1, 0, 0);
358 }
359
360 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
361 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
362 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
363 "Client requested version %d.%d.%d",
364 VK_VERSION_MAJOR(client_version),
365 VK_VERSION_MINOR(client_version),
366 VK_VERSION_PATCH(client_version));
367 }
368
369 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
370 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
371 if (!radv_instance_extension_supported(ext_name))
372 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
373 }
374
375 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
376 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
377 if (!instance)
378 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
379
380 memset(instance, 0, sizeof(*instance));
381
382 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
383
384 if (pAllocator)
385 instance->alloc = *pAllocator;
386 else
387 instance->alloc = default_alloc;
388
389 instance->apiVersion = client_version;
390 instance->physicalDeviceCount = -1;
391
392 _mesa_locale_init();
393
394 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
395
396 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
397 radv_debug_options);
398
399 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
400 radv_perftest_options);
401
402 *pInstance = radv_instance_to_handle(instance);
403
404 return VK_SUCCESS;
405 }
406
407 void radv_DestroyInstance(
408 VkInstance _instance,
409 const VkAllocationCallbacks* pAllocator)
410 {
411 RADV_FROM_HANDLE(radv_instance, instance, _instance);
412
413 if (!instance)
414 return;
415
416 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
417 radv_physical_device_finish(instance->physicalDevices + i);
418 }
419
420 VG(VALGRIND_DESTROY_MEMPOOL(instance));
421
422 _mesa_locale_fini();
423
424 vk_free(&instance->alloc, instance);
425 }
426
427 static VkResult
428 radv_enumerate_devices(struct radv_instance *instance)
429 {
430 /* TODO: Check for more devices ? */
431 drmDevicePtr devices[8];
432 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
433 int max_devices;
434
435 instance->physicalDeviceCount = 0;
436
437 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
438 if (max_devices < 1)
439 return VK_ERROR_INCOMPATIBLE_DRIVER;
440
441 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
442 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
443 devices[i]->bustype == DRM_BUS_PCI &&
444 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
445
446 result = radv_physical_device_init(instance->physicalDevices +
447 instance->physicalDeviceCount,
448 instance,
449 devices[i]);
450 if (result == VK_SUCCESS)
451 ++instance->physicalDeviceCount;
452 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
453 break;
454 }
455 }
456 drmFreeDevices(devices, max_devices);
457
458 return result;
459 }
460
461 VkResult radv_EnumeratePhysicalDevices(
462 VkInstance _instance,
463 uint32_t* pPhysicalDeviceCount,
464 VkPhysicalDevice* pPhysicalDevices)
465 {
466 RADV_FROM_HANDLE(radv_instance, instance, _instance);
467 VkResult result;
468
469 if (instance->physicalDeviceCount < 0) {
470 result = radv_enumerate_devices(instance);
471 if (result != VK_SUCCESS &&
472 result != VK_ERROR_INCOMPATIBLE_DRIVER)
473 return result;
474 }
475
476 if (!pPhysicalDevices) {
477 *pPhysicalDeviceCount = instance->physicalDeviceCount;
478 } else {
479 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
480 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
481 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
482 }
483
484 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
485 : VK_SUCCESS;
486 }
487
488 void radv_GetPhysicalDeviceFeatures(
489 VkPhysicalDevice physicalDevice,
490 VkPhysicalDeviceFeatures* pFeatures)
491 {
492 memset(pFeatures, 0, sizeof(*pFeatures));
493
494 *pFeatures = (VkPhysicalDeviceFeatures) {
495 .robustBufferAccess = true,
496 .fullDrawIndexUint32 = true,
497 .imageCubeArray = true,
498 .independentBlend = true,
499 .geometryShader = true,
500 .tessellationShader = true,
501 .sampleRateShading = true,
502 .dualSrcBlend = true,
503 .logicOp = true,
504 .multiDrawIndirect = true,
505 .drawIndirectFirstInstance = true,
506 .depthClamp = true,
507 .depthBiasClamp = true,
508 .fillModeNonSolid = true,
509 .depthBounds = true,
510 .wideLines = true,
511 .largePoints = true,
512 .alphaToOne = true,
513 .multiViewport = true,
514 .samplerAnisotropy = true,
515 .textureCompressionETC2 = false,
516 .textureCompressionASTC_LDR = false,
517 .textureCompressionBC = true,
518 .occlusionQueryPrecise = true,
519 .pipelineStatisticsQuery = true,
520 .vertexPipelineStoresAndAtomics = true,
521 .fragmentStoresAndAtomics = true,
522 .shaderTessellationAndGeometryPointSize = true,
523 .shaderImageGatherExtended = true,
524 .shaderStorageImageExtendedFormats = true,
525 .shaderStorageImageMultisample = false,
526 .shaderUniformBufferArrayDynamicIndexing = true,
527 .shaderSampledImageArrayDynamicIndexing = true,
528 .shaderStorageBufferArrayDynamicIndexing = true,
529 .shaderStorageImageArrayDynamicIndexing = true,
530 .shaderStorageImageReadWithoutFormat = true,
531 .shaderStorageImageWriteWithoutFormat = true,
532 .shaderClipDistance = true,
533 .shaderCullDistance = true,
534 .shaderFloat64 = true,
535 .shaderInt64 = true,
536 .shaderInt16 = false,
537 .sparseBinding = true,
538 .variableMultisampleRate = true,
539 .inheritedQueries = true,
540 };
541 }
542
543 void radv_GetPhysicalDeviceFeatures2KHR(
544 VkPhysicalDevice physicalDevice,
545 VkPhysicalDeviceFeatures2KHR *pFeatures)
546 {
547 vk_foreach_struct(ext, pFeatures->pNext) {
548 switch (ext->sType) {
549 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
550 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
551 features->variablePointersStorageBuffer = true;
552 features->variablePointers = false;
553 break;
554 }
555 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
556 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
557 features->multiview = true;
558 features->multiviewGeometryShader = true;
559 features->multiviewTessellationShader = true;
560 break;
561 }
562 default:
563 break;
564 }
565 }
566 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
567 }
568
569 void radv_GetPhysicalDeviceProperties(
570 VkPhysicalDevice physicalDevice,
571 VkPhysicalDeviceProperties* pProperties)
572 {
573 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
574 VkSampleCountFlags sample_counts = 0xf;
575
576 /* make sure that the entire descriptor set is addressable with a signed
577 * 32-bit int. So the sum of all limits scaled by descriptor size has to
578 * be at most 2 GiB. the combined image & samples object count as one of
579 * both. This limit is for the pipeline layout, not for the set layout, but
580 * there is no set limit, so we just set a pipeline limit. I don't think
581 * any app is going to hit this soon. */
582 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
583 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
584 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
585 32 /* sampler, largest when combined with image */ +
586 64 /* sampled image */ +
587 64 /* storage image */);
588
589 VkPhysicalDeviceLimits limits = {
590 .maxImageDimension1D = (1 << 14),
591 .maxImageDimension2D = (1 << 14),
592 .maxImageDimension3D = (1 << 11),
593 .maxImageDimensionCube = (1 << 14),
594 .maxImageArrayLayers = (1 << 11),
595 .maxTexelBufferElements = 128 * 1024 * 1024,
596 .maxUniformBufferRange = UINT32_MAX,
597 .maxStorageBufferRange = UINT32_MAX,
598 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
599 .maxMemoryAllocationCount = UINT32_MAX,
600 .maxSamplerAllocationCount = 64 * 1024,
601 .bufferImageGranularity = 64, /* A cache line */
602 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
603 .maxBoundDescriptorSets = MAX_SETS,
604 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
605 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
606 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
607 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
608 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
609 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
610 .maxPerStageResources = max_descriptor_set_size,
611 .maxDescriptorSetSamplers = max_descriptor_set_size,
612 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
613 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
614 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
615 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
616 .maxDescriptorSetSampledImages = max_descriptor_set_size,
617 .maxDescriptorSetStorageImages = max_descriptor_set_size,
618 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
619 .maxVertexInputAttributes = 32,
620 .maxVertexInputBindings = 32,
621 .maxVertexInputAttributeOffset = 2047,
622 .maxVertexInputBindingStride = 2048,
623 .maxVertexOutputComponents = 128,
624 .maxTessellationGenerationLevel = 64,
625 .maxTessellationPatchSize = 32,
626 .maxTessellationControlPerVertexInputComponents = 128,
627 .maxTessellationControlPerVertexOutputComponents = 128,
628 .maxTessellationControlPerPatchOutputComponents = 120,
629 .maxTessellationControlTotalOutputComponents = 4096,
630 .maxTessellationEvaluationInputComponents = 128,
631 .maxTessellationEvaluationOutputComponents = 128,
632 .maxGeometryShaderInvocations = 127,
633 .maxGeometryInputComponents = 64,
634 .maxGeometryOutputComponents = 128,
635 .maxGeometryOutputVertices = 256,
636 .maxGeometryTotalOutputComponents = 1024,
637 .maxFragmentInputComponents = 128,
638 .maxFragmentOutputAttachments = 8,
639 .maxFragmentDualSrcAttachments = 1,
640 .maxFragmentCombinedOutputResources = 8,
641 .maxComputeSharedMemorySize = 32768,
642 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
643 .maxComputeWorkGroupInvocations = 2048,
644 .maxComputeWorkGroupSize = {
645 2048,
646 2048,
647 2048
648 },
649 .subPixelPrecisionBits = 4 /* FIXME */,
650 .subTexelPrecisionBits = 4 /* FIXME */,
651 .mipmapPrecisionBits = 4 /* FIXME */,
652 .maxDrawIndexedIndexValue = UINT32_MAX,
653 .maxDrawIndirectCount = UINT32_MAX,
654 .maxSamplerLodBias = 16,
655 .maxSamplerAnisotropy = 16,
656 .maxViewports = MAX_VIEWPORTS,
657 .maxViewportDimensions = { (1 << 14), (1 << 14) },
658 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
659 .viewportSubPixelBits = 13, /* We take a float? */
660 .minMemoryMapAlignment = 4096, /* A page */
661 .minTexelBufferOffsetAlignment = 1,
662 .minUniformBufferOffsetAlignment = 4,
663 .minStorageBufferOffsetAlignment = 4,
664 .minTexelOffset = -32,
665 .maxTexelOffset = 31,
666 .minTexelGatherOffset = -32,
667 .maxTexelGatherOffset = 31,
668 .minInterpolationOffset = -2,
669 .maxInterpolationOffset = 2,
670 .subPixelInterpolationOffsetBits = 8,
671 .maxFramebufferWidth = (1 << 14),
672 .maxFramebufferHeight = (1 << 14),
673 .maxFramebufferLayers = (1 << 10),
674 .framebufferColorSampleCounts = sample_counts,
675 .framebufferDepthSampleCounts = sample_counts,
676 .framebufferStencilSampleCounts = sample_counts,
677 .framebufferNoAttachmentsSampleCounts = sample_counts,
678 .maxColorAttachments = MAX_RTS,
679 .sampledImageColorSampleCounts = sample_counts,
680 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
681 .sampledImageDepthSampleCounts = sample_counts,
682 .sampledImageStencilSampleCounts = sample_counts,
683 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
684 .maxSampleMaskWords = 1,
685 .timestampComputeAndGraphics = true,
686 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
687 .maxClipDistances = 8,
688 .maxCullDistances = 8,
689 .maxCombinedClipAndCullDistances = 8,
690 .discreteQueuePriorities = 1,
691 .pointSizeRange = { 0.125, 255.875 },
692 .lineWidthRange = { 0.0, 7.9921875 },
693 .pointSizeGranularity = (1.0 / 8.0),
694 .lineWidthGranularity = (1.0 / 128.0),
695 .strictLines = false, /* FINISHME */
696 .standardSampleLocations = true,
697 .optimalBufferCopyOffsetAlignment = 128,
698 .optimalBufferCopyRowPitchAlignment = 128,
699 .nonCoherentAtomSize = 64,
700 };
701
702 *pProperties = (VkPhysicalDeviceProperties) {
703 .apiVersion = radv_physical_device_api_version(pdevice),
704 .driverVersion = vk_get_driver_version(),
705 .vendorID = ATI_VENDOR_ID,
706 .deviceID = pdevice->rad_info.pci_id,
707 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
708 .limits = limits,
709 .sparseProperties = {0},
710 };
711
712 strcpy(pProperties->deviceName, pdevice->name);
713 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
714 }
715
716 void radv_GetPhysicalDeviceProperties2KHR(
717 VkPhysicalDevice physicalDevice,
718 VkPhysicalDeviceProperties2KHR *pProperties)
719 {
720 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
721 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
722
723 vk_foreach_struct(ext, pProperties->pNext) {
724 switch (ext->sType) {
725 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
726 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
727 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
728 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
729 break;
730 }
731 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
732 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
733 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
734 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
735 properties->deviceLUIDValid = false;
736 break;
737 }
738 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
739 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
740 properties->maxMultiviewViewCount = MAX_VIEWS;
741 properties->maxMultiviewInstanceIndex = INT_MAX;
742 break;
743 }
744 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
745 VkPhysicalDevicePointClippingPropertiesKHR *properties =
746 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
747 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
748 break;
749 }
750 default:
751 break;
752 }
753 }
754 }
755
756 static void radv_get_physical_device_queue_family_properties(
757 struct radv_physical_device* pdevice,
758 uint32_t* pCount,
759 VkQueueFamilyProperties** pQueueFamilyProperties)
760 {
761 int num_queue_families = 1;
762 int idx;
763 if (pdevice->rad_info.num_compute_rings > 0 &&
764 pdevice->rad_info.chip_class >= CIK &&
765 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
766 num_queue_families++;
767
768 if (pQueueFamilyProperties == NULL) {
769 *pCount = num_queue_families;
770 return;
771 }
772
773 if (!*pCount)
774 return;
775
776 idx = 0;
777 if (*pCount >= 1) {
778 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
779 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
780 VK_QUEUE_COMPUTE_BIT |
781 VK_QUEUE_TRANSFER_BIT |
782 VK_QUEUE_SPARSE_BINDING_BIT,
783 .queueCount = 1,
784 .timestampValidBits = 64,
785 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
786 };
787 idx++;
788 }
789
790 if (pdevice->rad_info.num_compute_rings > 0 &&
791 pdevice->rad_info.chip_class >= CIK &&
792 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
793 if (*pCount > idx) {
794 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
795 .queueFlags = VK_QUEUE_COMPUTE_BIT |
796 VK_QUEUE_TRANSFER_BIT |
797 VK_QUEUE_SPARSE_BINDING_BIT,
798 .queueCount = pdevice->rad_info.num_compute_rings,
799 .timestampValidBits = 64,
800 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
801 };
802 idx++;
803 }
804 }
805 *pCount = idx;
806 }
807
808 void radv_GetPhysicalDeviceQueueFamilyProperties(
809 VkPhysicalDevice physicalDevice,
810 uint32_t* pCount,
811 VkQueueFamilyProperties* pQueueFamilyProperties)
812 {
813 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
814 if (!pQueueFamilyProperties) {
815 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
816 return;
817 }
818 VkQueueFamilyProperties *properties[] = {
819 pQueueFamilyProperties + 0,
820 pQueueFamilyProperties + 1,
821 pQueueFamilyProperties + 2,
822 };
823 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
824 assert(*pCount <= 3);
825 }
826
827 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
828 VkPhysicalDevice physicalDevice,
829 uint32_t* pCount,
830 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
831 {
832 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
833 if (!pQueueFamilyProperties) {
834 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
835 return;
836 }
837 VkQueueFamilyProperties *properties[] = {
838 &pQueueFamilyProperties[0].queueFamilyProperties,
839 &pQueueFamilyProperties[1].queueFamilyProperties,
840 &pQueueFamilyProperties[2].queueFamilyProperties,
841 };
842 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
843 assert(*pCount <= 3);
844 }
845
846 void radv_GetPhysicalDeviceMemoryProperties(
847 VkPhysicalDevice physicalDevice,
848 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
849 {
850 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
851
852 *pMemoryProperties = physical_device->memory_properties;
853 }
854
855 void radv_GetPhysicalDeviceMemoryProperties2KHR(
856 VkPhysicalDevice physicalDevice,
857 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
858 {
859 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
860 &pMemoryProperties->memoryProperties);
861 }
862
863 static enum radeon_ctx_priority
864 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
865 {
866 /* Default to MEDIUM when a specific global priority isn't requested */
867 if (!pObj)
868 return RADEON_CTX_PRIORITY_MEDIUM;
869
870 switch(pObj->globalPriority) {
871 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME:
872 return RADEON_CTX_PRIORITY_REALTIME;
873 case VK_QUEUE_GLOBAL_PRIORITY_HIGH:
874 return RADEON_CTX_PRIORITY_HIGH;
875 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM:
876 return RADEON_CTX_PRIORITY_MEDIUM;
877 case VK_QUEUE_GLOBAL_PRIORITY_LOW:
878 return RADEON_CTX_PRIORITY_LOW;
879 default:
880 unreachable("Illegal global priority value");
881 return RADEON_CTX_PRIORITY_INVALID;
882 }
883 }
884
885 static int
886 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
887 int queue_family_index, int idx,
888 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
889 {
890 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
891 queue->device = device;
892 queue->queue_family_index = queue_family_index;
893 queue->queue_idx = idx;
894 queue->priority = radv_get_queue_global_priority(global_priority);
895
896 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
897 if (!queue->hw_ctx)
898 return VK_ERROR_OUT_OF_HOST_MEMORY;
899
900 return VK_SUCCESS;
901 }
902
903 static void
904 radv_queue_finish(struct radv_queue *queue)
905 {
906 if (queue->hw_ctx)
907 queue->device->ws->ctx_destroy(queue->hw_ctx);
908
909 if (queue->initial_full_flush_preamble_cs)
910 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
911 if (queue->initial_preamble_cs)
912 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
913 if (queue->continue_preamble_cs)
914 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
915 if (queue->descriptor_bo)
916 queue->device->ws->buffer_destroy(queue->descriptor_bo);
917 if (queue->scratch_bo)
918 queue->device->ws->buffer_destroy(queue->scratch_bo);
919 if (queue->esgs_ring_bo)
920 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
921 if (queue->gsvs_ring_bo)
922 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
923 if (queue->tess_factor_ring_bo)
924 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
925 if (queue->tess_offchip_ring_bo)
926 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
927 if (queue->compute_scratch_bo)
928 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
929 }
930
931 static void
932 radv_device_init_gs_info(struct radv_device *device)
933 {
934 switch (device->physical_device->rad_info.family) {
935 case CHIP_OLAND:
936 case CHIP_HAINAN:
937 case CHIP_KAVERI:
938 case CHIP_KABINI:
939 case CHIP_MULLINS:
940 case CHIP_ICELAND:
941 case CHIP_CARRIZO:
942 case CHIP_STONEY:
943 device->gs_table_depth = 16;
944 return;
945 case CHIP_TAHITI:
946 case CHIP_PITCAIRN:
947 case CHIP_VERDE:
948 case CHIP_BONAIRE:
949 case CHIP_HAWAII:
950 case CHIP_TONGA:
951 case CHIP_FIJI:
952 case CHIP_POLARIS10:
953 case CHIP_POLARIS11:
954 case CHIP_POLARIS12:
955 case CHIP_VEGA10:
956 case CHIP_RAVEN:
957 device->gs_table_depth = 32;
958 return;
959 default:
960 unreachable("unknown GPU");
961 }
962 }
963
964 VkResult radv_CreateDevice(
965 VkPhysicalDevice physicalDevice,
966 const VkDeviceCreateInfo* pCreateInfo,
967 const VkAllocationCallbacks* pAllocator,
968 VkDevice* pDevice)
969 {
970 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
971 VkResult result;
972 struct radv_device *device;
973
974 bool keep_shader_info = false;
975
976 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
977 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
978 if (!radv_physical_device_extension_supported(physical_device, ext_name))
979 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
980
981 if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
982 keep_shader_info = true;
983 }
984
985 /* Check enabled features */
986 if (pCreateInfo->pEnabledFeatures) {
987 VkPhysicalDeviceFeatures supported_features;
988 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
989 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
990 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
991 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
992 for (uint32_t i = 0; i < num_features; i++) {
993 if (enabled_feature[i] && !supported_feature[i])
994 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
995 }
996 }
997
998 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
999 sizeof(*device), 8,
1000 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1001 if (!device)
1002 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1003
1004 memset(device, 0, sizeof(*device));
1005
1006 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1007 device->instance = physical_device->instance;
1008 device->physical_device = physical_device;
1009
1010 device->ws = physical_device->ws;
1011 if (pAllocator)
1012 device->alloc = *pAllocator;
1013 else
1014 device->alloc = physical_device->instance->alloc;
1015
1016 mtx_init(&device->shader_slab_mutex, mtx_plain);
1017 list_inithead(&device->shader_slabs);
1018
1019 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1020 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1021 uint32_t qfi = queue_create->queueFamilyIndex;
1022 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1023 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1024
1025 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1026
1027 device->queues[qfi] = vk_alloc(&device->alloc,
1028 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1029 if (!device->queues[qfi]) {
1030 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1031 goto fail;
1032 }
1033
1034 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1035
1036 device->queue_count[qfi] = queue_create->queueCount;
1037
1038 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1039 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1040 if (result != VK_SUCCESS)
1041 goto fail;
1042 }
1043 }
1044
1045 #if HAVE_LLVM < 0x0400
1046 device->llvm_supports_spill = false;
1047 #else
1048 device->llvm_supports_spill = true;
1049 #endif
1050
1051 /* The maximum number of scratch waves. Scratch space isn't divided
1052 * evenly between CUs. The number is only a function of the number of CUs.
1053 * We can decrease the constant to decrease the scratch buffer size.
1054 *
1055 * sctx->scratch_waves must be >= the maximum posible size of
1056 * 1 threadgroup, so that the hw doesn't hang from being unable
1057 * to start any.
1058 *
1059 * The recommended value is 4 per CU at most. Higher numbers don't
1060 * bring much benefit, but they still occupy chip resources (think
1061 * async compute). I've seen ~2% performance difference between 4 and 32.
1062 */
1063 uint32_t max_threads_per_block = 2048;
1064 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1065 max_threads_per_block / 64);
1066
1067 radv_device_init_gs_info(device);
1068
1069 device->tess_offchip_block_dw_size =
1070 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1071 device->has_distributed_tess =
1072 device->physical_device->rad_info.chip_class >= VI &&
1073 device->physical_device->rad_info.max_se >= 2;
1074
1075 if (getenv("RADV_TRACE_FILE")) {
1076 keep_shader_info = true;
1077
1078 if (!radv_init_trace(device))
1079 goto fail;
1080 }
1081
1082 device->keep_shader_info = keep_shader_info;
1083
1084 result = radv_device_init_meta(device);
1085 if (result != VK_SUCCESS)
1086 goto fail;
1087
1088 radv_device_init_msaa(device);
1089
1090 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1091 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1092 switch (family) {
1093 case RADV_QUEUE_GENERAL:
1094 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1095 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1096 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1097 break;
1098 case RADV_QUEUE_COMPUTE:
1099 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1100 radeon_emit(device->empty_cs[family], 0);
1101 break;
1102 }
1103 device->ws->cs_finalize(device->empty_cs[family]);
1104 }
1105
1106 if (device->physical_device->rad_info.chip_class >= CIK)
1107 cik_create_gfx_config(device);
1108
1109 VkPipelineCacheCreateInfo ci;
1110 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1111 ci.pNext = NULL;
1112 ci.flags = 0;
1113 ci.pInitialData = NULL;
1114 ci.initialDataSize = 0;
1115 VkPipelineCache pc;
1116 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1117 &ci, NULL, &pc);
1118 if (result != VK_SUCCESS)
1119 goto fail;
1120
1121 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1122
1123 *pDevice = radv_device_to_handle(device);
1124 return VK_SUCCESS;
1125
1126 fail:
1127 if (device->trace_bo)
1128 device->ws->buffer_destroy(device->trace_bo);
1129
1130 if (device->gfx_init)
1131 device->ws->buffer_destroy(device->gfx_init);
1132
1133 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1134 for (unsigned q = 0; q < device->queue_count[i]; q++)
1135 radv_queue_finish(&device->queues[i][q]);
1136 if (device->queue_count[i])
1137 vk_free(&device->alloc, device->queues[i]);
1138 }
1139
1140 vk_free(&device->alloc, device);
1141 return result;
1142 }
1143
1144 void radv_DestroyDevice(
1145 VkDevice _device,
1146 const VkAllocationCallbacks* pAllocator)
1147 {
1148 RADV_FROM_HANDLE(radv_device, device, _device);
1149
1150 if (!device)
1151 return;
1152
1153 if (device->trace_bo)
1154 device->ws->buffer_destroy(device->trace_bo);
1155
1156 if (device->gfx_init)
1157 device->ws->buffer_destroy(device->gfx_init);
1158
1159 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1160 for (unsigned q = 0; q < device->queue_count[i]; q++)
1161 radv_queue_finish(&device->queues[i][q]);
1162 if (device->queue_count[i])
1163 vk_free(&device->alloc, device->queues[i]);
1164 if (device->empty_cs[i])
1165 device->ws->cs_destroy(device->empty_cs[i]);
1166 }
1167 radv_device_finish_meta(device);
1168
1169 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1170 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1171
1172 radv_destroy_shader_slabs(device);
1173
1174 vk_free(&device->alloc, device);
1175 }
1176
1177 VkResult radv_EnumerateInstanceLayerProperties(
1178 uint32_t* pPropertyCount,
1179 VkLayerProperties* pProperties)
1180 {
1181 if (pProperties == NULL) {
1182 *pPropertyCount = 0;
1183 return VK_SUCCESS;
1184 }
1185
1186 /* None supported at this time */
1187 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1188 }
1189
1190 VkResult radv_EnumerateDeviceLayerProperties(
1191 VkPhysicalDevice physicalDevice,
1192 uint32_t* pPropertyCount,
1193 VkLayerProperties* pProperties)
1194 {
1195 if (pProperties == NULL) {
1196 *pPropertyCount = 0;
1197 return VK_SUCCESS;
1198 }
1199
1200 /* None supported at this time */
1201 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1202 }
1203
1204 void radv_GetDeviceQueue(
1205 VkDevice _device,
1206 uint32_t queueFamilyIndex,
1207 uint32_t queueIndex,
1208 VkQueue* pQueue)
1209 {
1210 RADV_FROM_HANDLE(radv_device, device, _device);
1211
1212 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1213 }
1214
1215 static void
1216 fill_geom_tess_rings(struct radv_queue *queue,
1217 uint32_t *map,
1218 bool add_sample_positions,
1219 uint32_t esgs_ring_size,
1220 struct radeon_winsys_bo *esgs_ring_bo,
1221 uint32_t gsvs_ring_size,
1222 struct radeon_winsys_bo *gsvs_ring_bo,
1223 uint32_t tess_factor_ring_size,
1224 struct radeon_winsys_bo *tess_factor_ring_bo,
1225 uint32_t tess_offchip_ring_size,
1226 struct radeon_winsys_bo *tess_offchip_ring_bo)
1227 {
1228 uint64_t esgs_va = 0, gsvs_va = 0;
1229 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1230 uint32_t *desc = &map[4];
1231
1232 if (esgs_ring_bo)
1233 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1234 if (gsvs_ring_bo)
1235 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1236 if (tess_factor_ring_bo)
1237 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1238 if (tess_offchip_ring_bo)
1239 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1240
1241 /* stride 0, num records - size, add tid, swizzle, elsize4,
1242 index stride 64 */
1243 desc[0] = esgs_va;
1244 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1245 S_008F04_STRIDE(0) |
1246 S_008F04_SWIZZLE_ENABLE(true);
1247 desc[2] = esgs_ring_size;
1248 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1249 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1250 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1251 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1252 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1253 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1254 S_008F0C_ELEMENT_SIZE(1) |
1255 S_008F0C_INDEX_STRIDE(3) |
1256 S_008F0C_ADD_TID_ENABLE(true);
1257
1258 desc += 4;
1259 /* GS entry for ES->GS ring */
1260 /* stride 0, num records - size, elsize0,
1261 index stride 0 */
1262 desc[0] = esgs_va;
1263 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1264 S_008F04_STRIDE(0) |
1265 S_008F04_SWIZZLE_ENABLE(false);
1266 desc[2] = esgs_ring_size;
1267 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1268 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1269 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1270 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1271 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1272 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1273 S_008F0C_ELEMENT_SIZE(0) |
1274 S_008F0C_INDEX_STRIDE(0) |
1275 S_008F0C_ADD_TID_ENABLE(false);
1276
1277 desc += 4;
1278 /* VS entry for GS->VS ring */
1279 /* stride 0, num records - size, elsize0,
1280 index stride 0 */
1281 desc[0] = gsvs_va;
1282 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1283 S_008F04_STRIDE(0) |
1284 S_008F04_SWIZZLE_ENABLE(false);
1285 desc[2] = gsvs_ring_size;
1286 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1287 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1288 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1289 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1290 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1291 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1292 S_008F0C_ELEMENT_SIZE(0) |
1293 S_008F0C_INDEX_STRIDE(0) |
1294 S_008F0C_ADD_TID_ENABLE(false);
1295 desc += 4;
1296
1297 /* stride gsvs_itemsize, num records 64
1298 elsize 4, index stride 16 */
1299 /* shader will patch stride and desc[2] */
1300 desc[0] = gsvs_va;
1301 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1302 S_008F04_STRIDE(0) |
1303 S_008F04_SWIZZLE_ENABLE(true);
1304 desc[2] = 0;
1305 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1306 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1307 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1308 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1309 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1310 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1311 S_008F0C_ELEMENT_SIZE(1) |
1312 S_008F0C_INDEX_STRIDE(1) |
1313 S_008F0C_ADD_TID_ENABLE(true);
1314 desc += 4;
1315
1316 desc[0] = tess_factor_va;
1317 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1318 S_008F04_STRIDE(0) |
1319 S_008F04_SWIZZLE_ENABLE(false);
1320 desc[2] = tess_factor_ring_size;
1321 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1322 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1323 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1324 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1325 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1326 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1327 S_008F0C_ELEMENT_SIZE(0) |
1328 S_008F0C_INDEX_STRIDE(0) |
1329 S_008F0C_ADD_TID_ENABLE(false);
1330 desc += 4;
1331
1332 desc[0] = tess_offchip_va;
1333 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1334 S_008F04_STRIDE(0) |
1335 S_008F04_SWIZZLE_ENABLE(false);
1336 desc[2] = tess_offchip_ring_size;
1337 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1338 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1339 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1340 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1341 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1342 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1343 S_008F0C_ELEMENT_SIZE(0) |
1344 S_008F0C_INDEX_STRIDE(0) |
1345 S_008F0C_ADD_TID_ENABLE(false);
1346 desc += 4;
1347
1348 /* add sample positions after all rings */
1349 memcpy(desc, queue->device->sample_locations_1x, 8);
1350 desc += 2;
1351 memcpy(desc, queue->device->sample_locations_2x, 16);
1352 desc += 4;
1353 memcpy(desc, queue->device->sample_locations_4x, 32);
1354 desc += 8;
1355 memcpy(desc, queue->device->sample_locations_8x, 64);
1356 desc += 16;
1357 memcpy(desc, queue->device->sample_locations_16x, 128);
1358 }
1359
1360 static unsigned
1361 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1362 {
1363 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1364 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1365 device->physical_device->rad_info.family != CHIP_STONEY;
1366 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1367 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1368 device->physical_device->rad_info.max_se;
1369 unsigned offchip_granularity;
1370 unsigned hs_offchip_param;
1371 switch (device->tess_offchip_block_dw_size) {
1372 default:
1373 assert(0);
1374 /* fall through */
1375 case 8192:
1376 offchip_granularity = V_03093C_X_8K_DWORDS;
1377 break;
1378 case 4096:
1379 offchip_granularity = V_03093C_X_4K_DWORDS;
1380 break;
1381 }
1382
1383 switch (device->physical_device->rad_info.chip_class) {
1384 case SI:
1385 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1386 break;
1387 case CIK:
1388 case VI:
1389 case GFX9:
1390 default:
1391 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1392 break;
1393 }
1394
1395 *max_offchip_buffers_p = max_offchip_buffers;
1396 if (device->physical_device->rad_info.chip_class >= CIK) {
1397 if (device->physical_device->rad_info.chip_class >= VI)
1398 --max_offchip_buffers;
1399 hs_offchip_param =
1400 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1401 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1402 } else {
1403 hs_offchip_param =
1404 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1405 }
1406 return hs_offchip_param;
1407 }
1408
1409 static VkResult
1410 radv_get_preamble_cs(struct radv_queue *queue,
1411 uint32_t scratch_size,
1412 uint32_t compute_scratch_size,
1413 uint32_t esgs_ring_size,
1414 uint32_t gsvs_ring_size,
1415 bool needs_tess_rings,
1416 bool needs_sample_positions,
1417 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1418 struct radeon_winsys_cs **initial_preamble_cs,
1419 struct radeon_winsys_cs **continue_preamble_cs)
1420 {
1421 struct radeon_winsys_bo *scratch_bo = NULL;
1422 struct radeon_winsys_bo *descriptor_bo = NULL;
1423 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1424 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1425 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1426 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1427 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1428 struct radeon_winsys_cs *dest_cs[3] = {0};
1429 bool add_tess_rings = false, add_sample_positions = false;
1430 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1431 unsigned max_offchip_buffers;
1432 unsigned hs_offchip_param = 0;
1433 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1434 if (!queue->has_tess_rings) {
1435 if (needs_tess_rings)
1436 add_tess_rings = true;
1437 }
1438 if (!queue->has_sample_positions) {
1439 if (needs_sample_positions)
1440 add_sample_positions = true;
1441 }
1442 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1443 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1444 &max_offchip_buffers);
1445 tess_offchip_ring_size = max_offchip_buffers *
1446 queue->device->tess_offchip_block_dw_size * 4;
1447
1448 if (scratch_size <= queue->scratch_size &&
1449 compute_scratch_size <= queue->compute_scratch_size &&
1450 esgs_ring_size <= queue->esgs_ring_size &&
1451 gsvs_ring_size <= queue->gsvs_ring_size &&
1452 !add_tess_rings && !add_sample_positions &&
1453 queue->initial_preamble_cs) {
1454 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1455 *initial_preamble_cs = queue->initial_preamble_cs;
1456 *continue_preamble_cs = queue->continue_preamble_cs;
1457 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1458 *continue_preamble_cs = NULL;
1459 return VK_SUCCESS;
1460 }
1461
1462 if (scratch_size > queue->scratch_size) {
1463 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1464 scratch_size,
1465 4096,
1466 RADEON_DOMAIN_VRAM,
1467 ring_bo_flags);
1468 if (!scratch_bo)
1469 goto fail;
1470 } else
1471 scratch_bo = queue->scratch_bo;
1472
1473 if (compute_scratch_size > queue->compute_scratch_size) {
1474 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1475 compute_scratch_size,
1476 4096,
1477 RADEON_DOMAIN_VRAM,
1478 ring_bo_flags);
1479 if (!compute_scratch_bo)
1480 goto fail;
1481
1482 } else
1483 compute_scratch_bo = queue->compute_scratch_bo;
1484
1485 if (esgs_ring_size > queue->esgs_ring_size) {
1486 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1487 esgs_ring_size,
1488 4096,
1489 RADEON_DOMAIN_VRAM,
1490 ring_bo_flags);
1491 if (!esgs_ring_bo)
1492 goto fail;
1493 } else {
1494 esgs_ring_bo = queue->esgs_ring_bo;
1495 esgs_ring_size = queue->esgs_ring_size;
1496 }
1497
1498 if (gsvs_ring_size > queue->gsvs_ring_size) {
1499 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1500 gsvs_ring_size,
1501 4096,
1502 RADEON_DOMAIN_VRAM,
1503 ring_bo_flags);
1504 if (!gsvs_ring_bo)
1505 goto fail;
1506 } else {
1507 gsvs_ring_bo = queue->gsvs_ring_bo;
1508 gsvs_ring_size = queue->gsvs_ring_size;
1509 }
1510
1511 if (add_tess_rings) {
1512 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1513 tess_factor_ring_size,
1514 256,
1515 RADEON_DOMAIN_VRAM,
1516 ring_bo_flags);
1517 if (!tess_factor_ring_bo)
1518 goto fail;
1519 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1520 tess_offchip_ring_size,
1521 256,
1522 RADEON_DOMAIN_VRAM,
1523 ring_bo_flags);
1524 if (!tess_offchip_ring_bo)
1525 goto fail;
1526 } else {
1527 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1528 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1529 }
1530
1531 if (scratch_bo != queue->scratch_bo ||
1532 esgs_ring_bo != queue->esgs_ring_bo ||
1533 gsvs_ring_bo != queue->gsvs_ring_bo ||
1534 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1535 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1536 uint32_t size = 0;
1537 if (gsvs_ring_bo || esgs_ring_bo ||
1538 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1539 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1540 if (add_sample_positions)
1541 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1542 }
1543 else if (scratch_bo)
1544 size = 8; /* 2 dword */
1545
1546 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1547 size,
1548 4096,
1549 RADEON_DOMAIN_VRAM,
1550 RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
1551 if (!descriptor_bo)
1552 goto fail;
1553 } else
1554 descriptor_bo = queue->descriptor_bo;
1555
1556 for(int i = 0; i < 3; ++i) {
1557 struct radeon_winsys_cs *cs = NULL;
1558 cs = queue->device->ws->cs_create(queue->device->ws,
1559 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1560 if (!cs)
1561 goto fail;
1562
1563 dest_cs[i] = cs;
1564
1565 if (scratch_bo)
1566 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1567
1568 if (esgs_ring_bo)
1569 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1570
1571 if (gsvs_ring_bo)
1572 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1573
1574 if (tess_factor_ring_bo)
1575 radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1576
1577 if (tess_offchip_ring_bo)
1578 radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1579
1580 if (descriptor_bo)
1581 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1582
1583 if (descriptor_bo != queue->descriptor_bo) {
1584 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1585
1586 if (scratch_bo) {
1587 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1588 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1589 S_008F04_SWIZZLE_ENABLE(1);
1590 map[0] = scratch_va;
1591 map[1] = rsrc1;
1592 }
1593
1594 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1595 add_sample_positions)
1596 fill_geom_tess_rings(queue, map, add_sample_positions,
1597 esgs_ring_size, esgs_ring_bo,
1598 gsvs_ring_size, gsvs_ring_bo,
1599 tess_factor_ring_size, tess_factor_ring_bo,
1600 tess_offchip_ring_size, tess_offchip_ring_bo);
1601
1602 queue->device->ws->buffer_unmap(descriptor_bo);
1603 }
1604
1605 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1606 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1607 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1608 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1609 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1610 }
1611
1612 if (esgs_ring_bo || gsvs_ring_bo) {
1613 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1614 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1615 radeon_emit(cs, esgs_ring_size >> 8);
1616 radeon_emit(cs, gsvs_ring_size >> 8);
1617 } else {
1618 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1619 radeon_emit(cs, esgs_ring_size >> 8);
1620 radeon_emit(cs, gsvs_ring_size >> 8);
1621 }
1622 }
1623
1624 if (tess_factor_ring_bo) {
1625 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1626 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1627 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1628 S_030938_SIZE(tess_factor_ring_size / 4));
1629 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1630 tf_va >> 8);
1631 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1632 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1633 tf_va >> 40);
1634 }
1635 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1636 } else {
1637 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1638 S_008988_SIZE(tess_factor_ring_size / 4));
1639 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1640 tf_va >> 8);
1641 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1642 hs_offchip_param);
1643 }
1644 }
1645
1646 if (descriptor_bo) {
1647 uint64_t va = radv_buffer_get_va(descriptor_bo);
1648 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1649 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1650 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1651 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1652 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1653
1654 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1655 radeon_set_sh_reg_seq(cs, regs[i], 2);
1656 radeon_emit(cs, va);
1657 radeon_emit(cs, va >> 32);
1658 }
1659 } else {
1660 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1661 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1662 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1663 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1664 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1665 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1666
1667 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1668 radeon_set_sh_reg_seq(cs, regs[i], 2);
1669 radeon_emit(cs, va);
1670 radeon_emit(cs, va >> 32);
1671 }
1672 }
1673 }
1674
1675 if (compute_scratch_bo) {
1676 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1677 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1678 S_008F04_SWIZZLE_ENABLE(1);
1679
1680 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1681
1682 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1683 radeon_emit(cs, scratch_va);
1684 radeon_emit(cs, rsrc1);
1685 }
1686
1687 if (i == 0) {
1688 si_cs_emit_cache_flush(cs,
1689 false,
1690 queue->device->physical_device->rad_info.chip_class,
1691 NULL, 0,
1692 queue->queue_family_index == RING_COMPUTE &&
1693 queue->device->physical_device->rad_info.chip_class >= CIK,
1694 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1695 RADV_CMD_FLAG_INV_ICACHE |
1696 RADV_CMD_FLAG_INV_SMEM_L1 |
1697 RADV_CMD_FLAG_INV_VMEM_L1 |
1698 RADV_CMD_FLAG_INV_GLOBAL_L2);
1699 } else if (i == 1) {
1700 si_cs_emit_cache_flush(cs,
1701 false,
1702 queue->device->physical_device->rad_info.chip_class,
1703 NULL, 0,
1704 queue->queue_family_index == RING_COMPUTE &&
1705 queue->device->physical_device->rad_info.chip_class >= CIK,
1706 RADV_CMD_FLAG_INV_ICACHE |
1707 RADV_CMD_FLAG_INV_SMEM_L1 |
1708 RADV_CMD_FLAG_INV_VMEM_L1 |
1709 RADV_CMD_FLAG_INV_GLOBAL_L2);
1710 }
1711
1712 if (!queue->device->ws->cs_finalize(cs))
1713 goto fail;
1714 }
1715
1716 if (queue->initial_full_flush_preamble_cs)
1717 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1718
1719 if (queue->initial_preamble_cs)
1720 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1721
1722 if (queue->continue_preamble_cs)
1723 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1724
1725 queue->initial_full_flush_preamble_cs = dest_cs[0];
1726 queue->initial_preamble_cs = dest_cs[1];
1727 queue->continue_preamble_cs = dest_cs[2];
1728
1729 if (scratch_bo != queue->scratch_bo) {
1730 if (queue->scratch_bo)
1731 queue->device->ws->buffer_destroy(queue->scratch_bo);
1732 queue->scratch_bo = scratch_bo;
1733 queue->scratch_size = scratch_size;
1734 }
1735
1736 if (compute_scratch_bo != queue->compute_scratch_bo) {
1737 if (queue->compute_scratch_bo)
1738 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1739 queue->compute_scratch_bo = compute_scratch_bo;
1740 queue->compute_scratch_size = compute_scratch_size;
1741 }
1742
1743 if (esgs_ring_bo != queue->esgs_ring_bo) {
1744 if (queue->esgs_ring_bo)
1745 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1746 queue->esgs_ring_bo = esgs_ring_bo;
1747 queue->esgs_ring_size = esgs_ring_size;
1748 }
1749
1750 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1751 if (queue->gsvs_ring_bo)
1752 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1753 queue->gsvs_ring_bo = gsvs_ring_bo;
1754 queue->gsvs_ring_size = gsvs_ring_size;
1755 }
1756
1757 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1758 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1759 }
1760
1761 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1762 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1763 queue->has_tess_rings = true;
1764 }
1765
1766 if (descriptor_bo != queue->descriptor_bo) {
1767 if (queue->descriptor_bo)
1768 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1769
1770 queue->descriptor_bo = descriptor_bo;
1771 }
1772
1773 if (add_sample_positions)
1774 queue->has_sample_positions = true;
1775
1776 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1777 *initial_preamble_cs = queue->initial_preamble_cs;
1778 *continue_preamble_cs = queue->continue_preamble_cs;
1779 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1780 *continue_preamble_cs = NULL;
1781 return VK_SUCCESS;
1782 fail:
1783 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1784 if (dest_cs[i])
1785 queue->device->ws->cs_destroy(dest_cs[i]);
1786 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1787 queue->device->ws->buffer_destroy(descriptor_bo);
1788 if (scratch_bo && scratch_bo != queue->scratch_bo)
1789 queue->device->ws->buffer_destroy(scratch_bo);
1790 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1791 queue->device->ws->buffer_destroy(compute_scratch_bo);
1792 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1793 queue->device->ws->buffer_destroy(esgs_ring_bo);
1794 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1795 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1796 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1797 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1798 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1799 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1800 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1801 }
1802
1803 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1804 int num_sems,
1805 const VkSemaphore *sems,
1806 bool reset_temp)
1807 {
1808 int syncobj_idx = 0, sem_idx = 0;
1809
1810 if (num_sems == 0)
1811 return VK_SUCCESS;
1812 for (uint32_t i = 0; i < num_sems; i++) {
1813 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1814
1815 if (sem->temp_syncobj || sem->syncobj)
1816 counts->syncobj_count++;
1817 else
1818 counts->sem_count++;
1819 }
1820
1821 if (counts->syncobj_count) {
1822 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
1823 if (!counts->syncobj)
1824 return VK_ERROR_OUT_OF_HOST_MEMORY;
1825 }
1826
1827 if (counts->sem_count) {
1828 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
1829 if (!counts->sem) {
1830 free(counts->syncobj);
1831 return VK_ERROR_OUT_OF_HOST_MEMORY;
1832 }
1833 }
1834
1835 for (uint32_t i = 0; i < num_sems; i++) {
1836 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1837
1838 if (sem->temp_syncobj) {
1839 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
1840 if (reset_temp) {
1841 /* after we wait on a temp import - drop it */
1842 sem->temp_syncobj = 0;
1843 }
1844 }
1845 else if (sem->syncobj)
1846 counts->syncobj[syncobj_idx++] = sem->syncobj;
1847 else {
1848 assert(sem->sem);
1849 counts->sem[sem_idx++] = sem->sem;
1850 }
1851 }
1852
1853 return VK_SUCCESS;
1854 }
1855
1856 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
1857 {
1858 free(sem_info->wait.syncobj);
1859 free(sem_info->wait.sem);
1860 free(sem_info->signal.syncobj);
1861 free(sem_info->signal.sem);
1862 }
1863
1864 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1865 int num_wait_sems,
1866 const VkSemaphore *wait_sems,
1867 int num_signal_sems,
1868 const VkSemaphore *signal_sems)
1869 {
1870 VkResult ret;
1871 memset(sem_info, 0, sizeof(*sem_info));
1872
1873 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
1874 if (ret)
1875 return ret;
1876 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
1877 if (ret)
1878 radv_free_sem_info(sem_info);
1879
1880 /* caller can override these */
1881 sem_info->cs_emit_wait = true;
1882 sem_info->cs_emit_signal = true;
1883 return ret;
1884 }
1885
1886 VkResult radv_QueueSubmit(
1887 VkQueue _queue,
1888 uint32_t submitCount,
1889 const VkSubmitInfo* pSubmits,
1890 VkFence _fence)
1891 {
1892 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1893 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1894 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1895 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1896 int ret;
1897 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1898 uint32_t scratch_size = 0;
1899 uint32_t compute_scratch_size = 0;
1900 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1901 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
1902 VkResult result;
1903 bool fence_emitted = false;
1904 bool tess_rings_needed = false;
1905 bool sample_positions_needed = false;
1906
1907 /* Do this first so failing to allocate scratch buffers can't result in
1908 * partially executed submissions. */
1909 for (uint32_t i = 0; i < submitCount; i++) {
1910 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1911 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1912 pSubmits[i].pCommandBuffers[j]);
1913
1914 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1915 compute_scratch_size = MAX2(compute_scratch_size,
1916 cmd_buffer->compute_scratch_size_needed);
1917 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1918 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1919 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1920 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1921 }
1922 }
1923
1924 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1925 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1926 sample_positions_needed, &initial_flush_preamble_cs,
1927 &initial_preamble_cs, &continue_preamble_cs);
1928 if (result != VK_SUCCESS)
1929 return result;
1930
1931 for (uint32_t i = 0; i < submitCount; i++) {
1932 struct radeon_winsys_cs **cs_array;
1933 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1934 bool can_patch = true;
1935 uint32_t advance;
1936 struct radv_winsys_sem_info sem_info;
1937
1938 result = radv_alloc_sem_info(&sem_info,
1939 pSubmits[i].waitSemaphoreCount,
1940 pSubmits[i].pWaitSemaphores,
1941 pSubmits[i].signalSemaphoreCount,
1942 pSubmits[i].pSignalSemaphores);
1943 if (result != VK_SUCCESS)
1944 return result;
1945
1946 if (!pSubmits[i].commandBufferCount) {
1947 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1948 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1949 &queue->device->empty_cs[queue->queue_family_index],
1950 1, NULL, NULL,
1951 &sem_info,
1952 false, base_fence);
1953 if (ret) {
1954 radv_loge("failed to submit CS %d\n", i);
1955 abort();
1956 }
1957 fence_emitted = true;
1958 }
1959 radv_free_sem_info(&sem_info);
1960 continue;
1961 }
1962
1963 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1964 (pSubmits[i].commandBufferCount));
1965
1966 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1967 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1968 pSubmits[i].pCommandBuffers[j]);
1969 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1970
1971 cs_array[j] = cmd_buffer->cs;
1972 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1973 can_patch = false;
1974 }
1975
1976 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1977 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
1978 advance = MIN2(max_cs_submission,
1979 pSubmits[i].commandBufferCount - j);
1980
1981 if (queue->device->trace_bo)
1982 *queue->device->trace_id_ptr = 0;
1983
1984 sem_info.cs_emit_wait = j == 0;
1985 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
1986
1987 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1988 advance, initial_preamble, continue_preamble_cs,
1989 &sem_info,
1990 can_patch, base_fence);
1991
1992 if (ret) {
1993 radv_loge("failed to submit CS %d\n", i);
1994 abort();
1995 }
1996 fence_emitted = true;
1997 if (queue->device->trace_bo) {
1998 radv_check_gpu_hangs(queue, cs_array[j]);
1999 }
2000 }
2001
2002 radv_free_sem_info(&sem_info);
2003 free(cs_array);
2004 }
2005
2006 if (fence) {
2007 if (!fence_emitted) {
2008 struct radv_winsys_sem_info sem_info = {0};
2009 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2010 &queue->device->empty_cs[queue->queue_family_index],
2011 1, NULL, NULL, &sem_info,
2012 false, base_fence);
2013 }
2014 fence->submitted = true;
2015 }
2016
2017 return VK_SUCCESS;
2018 }
2019
2020 VkResult radv_QueueWaitIdle(
2021 VkQueue _queue)
2022 {
2023 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2024
2025 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2026 radv_queue_family_to_ring(queue->queue_family_index),
2027 queue->queue_idx);
2028 return VK_SUCCESS;
2029 }
2030
2031 VkResult radv_DeviceWaitIdle(
2032 VkDevice _device)
2033 {
2034 RADV_FROM_HANDLE(radv_device, device, _device);
2035
2036 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2037 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2038 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2039 }
2040 }
2041 return VK_SUCCESS;
2042 }
2043
2044 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2045 VkInstance instance,
2046 const char* pName)
2047 {
2048 return radv_lookup_entrypoint(pName);
2049 }
2050
2051 /* The loader wants us to expose a second GetInstanceProcAddr function
2052 * to work around certain LD_PRELOAD issues seen in apps.
2053 */
2054 PUBLIC
2055 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2056 VkInstance instance,
2057 const char* pName);
2058
2059 PUBLIC
2060 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2061 VkInstance instance,
2062 const char* pName)
2063 {
2064 return radv_GetInstanceProcAddr(instance, pName);
2065 }
2066
2067 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2068 VkDevice device,
2069 const char* pName)
2070 {
2071 return radv_lookup_entrypoint(pName);
2072 }
2073
2074 bool radv_get_memory_fd(struct radv_device *device,
2075 struct radv_device_memory *memory,
2076 int *pFD)
2077 {
2078 struct radeon_bo_metadata metadata;
2079
2080 if (memory->image) {
2081 radv_init_metadata(device, memory->image, &metadata);
2082 device->ws->buffer_set_metadata(memory->bo, &metadata);
2083 }
2084
2085 return device->ws->buffer_get_fd(device->ws, memory->bo,
2086 pFD);
2087 }
2088
2089 VkResult radv_alloc_memory(VkDevice _device,
2090 const VkMemoryAllocateInfo* pAllocateInfo,
2091 const VkAllocationCallbacks* pAllocator,
2092 enum radv_mem_flags_bits mem_flags,
2093 VkDeviceMemory* pMem)
2094 {
2095 RADV_FROM_HANDLE(radv_device, device, _device);
2096 struct radv_device_memory *mem;
2097 VkResult result;
2098 enum radeon_bo_domain domain;
2099 uint32_t flags = 0;
2100 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2101
2102 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2103
2104 if (pAllocateInfo->allocationSize == 0) {
2105 /* Apparently, this is allowed */
2106 *pMem = VK_NULL_HANDLE;
2107 return VK_SUCCESS;
2108 }
2109
2110 const VkImportMemoryFdInfoKHR *import_info =
2111 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2112 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2113 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2114
2115 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2116 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2117 if (mem == NULL)
2118 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2119
2120 if (dedicate_info) {
2121 mem->image = radv_image_from_handle(dedicate_info->image);
2122 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2123 } else {
2124 mem->image = NULL;
2125 mem->buffer = NULL;
2126 }
2127
2128 if (import_info) {
2129 assert(import_info->handleType ==
2130 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2131 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2132 NULL, NULL);
2133 if (!mem->bo) {
2134 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2135 goto fail;
2136 } else {
2137 close(import_info->fd);
2138 goto out_success;
2139 }
2140 }
2141
2142 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2143 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2144 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2145 domain = RADEON_DOMAIN_GTT;
2146 else
2147 domain = RADEON_DOMAIN_VRAM;
2148
2149 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2150 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2151 else
2152 flags |= RADEON_FLAG_CPU_ACCESS;
2153
2154 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2155 flags |= RADEON_FLAG_GTT_WC;
2156
2157 if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
2158 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2159
2160 if (!dedicate_info && !import_info)
2161 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2162
2163 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2164 domain, flags);
2165
2166 if (!mem->bo) {
2167 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2168 goto fail;
2169 }
2170 mem->type_index = mem_type_index;
2171 out_success:
2172 *pMem = radv_device_memory_to_handle(mem);
2173
2174 return VK_SUCCESS;
2175
2176 fail:
2177 vk_free2(&device->alloc, pAllocator, mem);
2178
2179 return result;
2180 }
2181
2182 VkResult radv_AllocateMemory(
2183 VkDevice _device,
2184 const VkMemoryAllocateInfo* pAllocateInfo,
2185 const VkAllocationCallbacks* pAllocator,
2186 VkDeviceMemory* pMem)
2187 {
2188 return radv_alloc_memory(_device, pAllocateInfo, pAllocator, 0, pMem);
2189 }
2190
2191 void radv_FreeMemory(
2192 VkDevice _device,
2193 VkDeviceMemory _mem,
2194 const VkAllocationCallbacks* pAllocator)
2195 {
2196 RADV_FROM_HANDLE(radv_device, device, _device);
2197 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2198
2199 if (mem == NULL)
2200 return;
2201
2202 device->ws->buffer_destroy(mem->bo);
2203 mem->bo = NULL;
2204
2205 vk_free2(&device->alloc, pAllocator, mem);
2206 }
2207
2208 VkResult radv_MapMemory(
2209 VkDevice _device,
2210 VkDeviceMemory _memory,
2211 VkDeviceSize offset,
2212 VkDeviceSize size,
2213 VkMemoryMapFlags flags,
2214 void** ppData)
2215 {
2216 RADV_FROM_HANDLE(radv_device, device, _device);
2217 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2218
2219 if (mem == NULL) {
2220 *ppData = NULL;
2221 return VK_SUCCESS;
2222 }
2223
2224 *ppData = device->ws->buffer_map(mem->bo);
2225 if (*ppData) {
2226 *ppData += offset;
2227 return VK_SUCCESS;
2228 }
2229
2230 return VK_ERROR_MEMORY_MAP_FAILED;
2231 }
2232
2233 void radv_UnmapMemory(
2234 VkDevice _device,
2235 VkDeviceMemory _memory)
2236 {
2237 RADV_FROM_HANDLE(radv_device, device, _device);
2238 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2239
2240 if (mem == NULL)
2241 return;
2242
2243 device->ws->buffer_unmap(mem->bo);
2244 }
2245
2246 VkResult radv_FlushMappedMemoryRanges(
2247 VkDevice _device,
2248 uint32_t memoryRangeCount,
2249 const VkMappedMemoryRange* pMemoryRanges)
2250 {
2251 return VK_SUCCESS;
2252 }
2253
2254 VkResult radv_InvalidateMappedMemoryRanges(
2255 VkDevice _device,
2256 uint32_t memoryRangeCount,
2257 const VkMappedMemoryRange* pMemoryRanges)
2258 {
2259 return VK_SUCCESS;
2260 }
2261
2262 void radv_GetBufferMemoryRequirements(
2263 VkDevice _device,
2264 VkBuffer _buffer,
2265 VkMemoryRequirements* pMemoryRequirements)
2266 {
2267 RADV_FROM_HANDLE(radv_device, device, _device);
2268 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2269
2270 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2271
2272 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2273 pMemoryRequirements->alignment = 4096;
2274 else
2275 pMemoryRequirements->alignment = 16;
2276
2277 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2278 }
2279
2280 void radv_GetBufferMemoryRequirements2KHR(
2281 VkDevice device,
2282 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2283 VkMemoryRequirements2KHR* pMemoryRequirements)
2284 {
2285 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2286 &pMemoryRequirements->memoryRequirements);
2287 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2288 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2289 switch (ext->sType) {
2290 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2291 VkMemoryDedicatedRequirementsKHR *req =
2292 (VkMemoryDedicatedRequirementsKHR *) ext;
2293 req->requiresDedicatedAllocation = buffer->shareable;
2294 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2295 break;
2296 }
2297 default:
2298 break;
2299 }
2300 }
2301 }
2302
2303 void radv_GetImageMemoryRequirements(
2304 VkDevice _device,
2305 VkImage _image,
2306 VkMemoryRequirements* pMemoryRequirements)
2307 {
2308 RADV_FROM_HANDLE(radv_device, device, _device);
2309 RADV_FROM_HANDLE(radv_image, image, _image);
2310
2311 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2312
2313 pMemoryRequirements->size = image->size;
2314 pMemoryRequirements->alignment = image->alignment;
2315 }
2316
2317 void radv_GetImageMemoryRequirements2KHR(
2318 VkDevice device,
2319 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2320 VkMemoryRequirements2KHR* pMemoryRequirements)
2321 {
2322 radv_GetImageMemoryRequirements(device, pInfo->image,
2323 &pMemoryRequirements->memoryRequirements);
2324
2325 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2326
2327 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2328 switch (ext->sType) {
2329 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2330 VkMemoryDedicatedRequirementsKHR *req =
2331 (VkMemoryDedicatedRequirementsKHR *) ext;
2332 req->requiresDedicatedAllocation = image->shareable;
2333 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2334 break;
2335 }
2336 default:
2337 break;
2338 }
2339 }
2340 }
2341
2342 void radv_GetImageSparseMemoryRequirements(
2343 VkDevice device,
2344 VkImage image,
2345 uint32_t* pSparseMemoryRequirementCount,
2346 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2347 {
2348 stub();
2349 }
2350
2351 void radv_GetImageSparseMemoryRequirements2KHR(
2352 VkDevice device,
2353 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2354 uint32_t* pSparseMemoryRequirementCount,
2355 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2356 {
2357 stub();
2358 }
2359
2360 void radv_GetDeviceMemoryCommitment(
2361 VkDevice device,
2362 VkDeviceMemory memory,
2363 VkDeviceSize* pCommittedMemoryInBytes)
2364 {
2365 *pCommittedMemoryInBytes = 0;
2366 }
2367
2368 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2369 uint32_t bindInfoCount,
2370 const VkBindBufferMemoryInfoKHR *pBindInfos)
2371 {
2372 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2373 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2374 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2375
2376 if (mem) {
2377 buffer->bo = mem->bo;
2378 buffer->offset = pBindInfos[i].memoryOffset;
2379 } else {
2380 buffer->bo = NULL;
2381 }
2382 }
2383 return VK_SUCCESS;
2384 }
2385
2386 VkResult radv_BindBufferMemory(
2387 VkDevice device,
2388 VkBuffer buffer,
2389 VkDeviceMemory memory,
2390 VkDeviceSize memoryOffset)
2391 {
2392 const VkBindBufferMemoryInfoKHR info = {
2393 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2394 .buffer = buffer,
2395 .memory = memory,
2396 .memoryOffset = memoryOffset
2397 };
2398
2399 return radv_BindBufferMemory2KHR(device, 1, &info);
2400 }
2401
2402 VkResult radv_BindImageMemory2KHR(VkDevice device,
2403 uint32_t bindInfoCount,
2404 const VkBindImageMemoryInfoKHR *pBindInfos)
2405 {
2406 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2407 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2408 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2409
2410 if (mem) {
2411 image->bo = mem->bo;
2412 image->offset = pBindInfos[i].memoryOffset;
2413 } else {
2414 image->bo = NULL;
2415 image->offset = 0;
2416 }
2417 }
2418 return VK_SUCCESS;
2419 }
2420
2421
2422 VkResult radv_BindImageMemory(
2423 VkDevice device,
2424 VkImage image,
2425 VkDeviceMemory memory,
2426 VkDeviceSize memoryOffset)
2427 {
2428 const VkBindImageMemoryInfoKHR info = {
2429 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2430 .image = image,
2431 .memory = memory,
2432 .memoryOffset = memoryOffset
2433 };
2434
2435 return radv_BindImageMemory2KHR(device, 1, &info);
2436 }
2437
2438
2439 static void
2440 radv_sparse_buffer_bind_memory(struct radv_device *device,
2441 const VkSparseBufferMemoryBindInfo *bind)
2442 {
2443 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2444
2445 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2446 struct radv_device_memory *mem = NULL;
2447
2448 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2449 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2450
2451 device->ws->buffer_virtual_bind(buffer->bo,
2452 bind->pBinds[i].resourceOffset,
2453 bind->pBinds[i].size,
2454 mem ? mem->bo : NULL,
2455 bind->pBinds[i].memoryOffset);
2456 }
2457 }
2458
2459 static void
2460 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2461 const VkSparseImageOpaqueMemoryBindInfo *bind)
2462 {
2463 RADV_FROM_HANDLE(radv_image, image, bind->image);
2464
2465 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2466 struct radv_device_memory *mem = NULL;
2467
2468 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2469 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2470
2471 device->ws->buffer_virtual_bind(image->bo,
2472 bind->pBinds[i].resourceOffset,
2473 bind->pBinds[i].size,
2474 mem ? mem->bo : NULL,
2475 bind->pBinds[i].memoryOffset);
2476 }
2477 }
2478
2479 VkResult radv_QueueBindSparse(
2480 VkQueue _queue,
2481 uint32_t bindInfoCount,
2482 const VkBindSparseInfo* pBindInfo,
2483 VkFence _fence)
2484 {
2485 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2486 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2487 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2488 bool fence_emitted = false;
2489
2490 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2491 struct radv_winsys_sem_info sem_info;
2492 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2493 radv_sparse_buffer_bind_memory(queue->device,
2494 pBindInfo[i].pBufferBinds + j);
2495 }
2496
2497 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2498 radv_sparse_image_opaque_bind_memory(queue->device,
2499 pBindInfo[i].pImageOpaqueBinds + j);
2500 }
2501
2502 VkResult result;
2503 result = radv_alloc_sem_info(&sem_info,
2504 pBindInfo[i].waitSemaphoreCount,
2505 pBindInfo[i].pWaitSemaphores,
2506 pBindInfo[i].signalSemaphoreCount,
2507 pBindInfo[i].pSignalSemaphores);
2508 if (result != VK_SUCCESS)
2509 return result;
2510
2511 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2512 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2513 &queue->device->empty_cs[queue->queue_family_index],
2514 1, NULL, NULL,
2515 &sem_info,
2516 false, base_fence);
2517 fence_emitted = true;
2518 if (fence)
2519 fence->submitted = true;
2520 }
2521
2522 radv_free_sem_info(&sem_info);
2523
2524 }
2525
2526 if (fence && !fence_emitted) {
2527 fence->signalled = true;
2528 }
2529
2530 return VK_SUCCESS;
2531 }
2532
2533 VkResult radv_CreateFence(
2534 VkDevice _device,
2535 const VkFenceCreateInfo* pCreateInfo,
2536 const VkAllocationCallbacks* pAllocator,
2537 VkFence* pFence)
2538 {
2539 RADV_FROM_HANDLE(radv_device, device, _device);
2540 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2541 sizeof(*fence), 8,
2542 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2543
2544 if (!fence)
2545 return VK_ERROR_OUT_OF_HOST_MEMORY;
2546
2547 memset(fence, 0, sizeof(*fence));
2548 fence->submitted = false;
2549 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2550 fence->fence = device->ws->create_fence();
2551 if (!fence->fence) {
2552 vk_free2(&device->alloc, pAllocator, fence);
2553 return VK_ERROR_OUT_OF_HOST_MEMORY;
2554 }
2555
2556 *pFence = radv_fence_to_handle(fence);
2557
2558 return VK_SUCCESS;
2559 }
2560
2561 void radv_DestroyFence(
2562 VkDevice _device,
2563 VkFence _fence,
2564 const VkAllocationCallbacks* pAllocator)
2565 {
2566 RADV_FROM_HANDLE(radv_device, device, _device);
2567 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2568
2569 if (!fence)
2570 return;
2571 device->ws->destroy_fence(fence->fence);
2572 vk_free2(&device->alloc, pAllocator, fence);
2573 }
2574
2575 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2576 {
2577 uint64_t current_time;
2578 struct timespec tv;
2579
2580 clock_gettime(CLOCK_MONOTONIC, &tv);
2581 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2582
2583 timeout = MIN2(UINT64_MAX - current_time, timeout);
2584
2585 return current_time + timeout;
2586 }
2587
2588 VkResult radv_WaitForFences(
2589 VkDevice _device,
2590 uint32_t fenceCount,
2591 const VkFence* pFences,
2592 VkBool32 waitAll,
2593 uint64_t timeout)
2594 {
2595 RADV_FROM_HANDLE(radv_device, device, _device);
2596 timeout = radv_get_absolute_timeout(timeout);
2597
2598 if (!waitAll && fenceCount > 1) {
2599 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2600 }
2601
2602 for (uint32_t i = 0; i < fenceCount; ++i) {
2603 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2604 bool expired = false;
2605
2606 if (fence->signalled)
2607 continue;
2608
2609 if (!fence->submitted)
2610 return VK_TIMEOUT;
2611
2612 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2613 if (!expired)
2614 return VK_TIMEOUT;
2615
2616 fence->signalled = true;
2617 }
2618
2619 return VK_SUCCESS;
2620 }
2621
2622 VkResult radv_ResetFences(VkDevice device,
2623 uint32_t fenceCount,
2624 const VkFence *pFences)
2625 {
2626 for (unsigned i = 0; i < fenceCount; ++i) {
2627 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2628 fence->submitted = fence->signalled = false;
2629 }
2630
2631 return VK_SUCCESS;
2632 }
2633
2634 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2635 {
2636 RADV_FROM_HANDLE(radv_device, device, _device);
2637 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2638
2639 if (fence->signalled)
2640 return VK_SUCCESS;
2641 if (!fence->submitted)
2642 return VK_NOT_READY;
2643
2644 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2645 return VK_NOT_READY;
2646
2647 return VK_SUCCESS;
2648 }
2649
2650
2651 // Queue semaphore functions
2652
2653 VkResult radv_CreateSemaphore(
2654 VkDevice _device,
2655 const VkSemaphoreCreateInfo* pCreateInfo,
2656 const VkAllocationCallbacks* pAllocator,
2657 VkSemaphore* pSemaphore)
2658 {
2659 RADV_FROM_HANDLE(radv_device, device, _device);
2660 const VkExportSemaphoreCreateInfoKHR *export =
2661 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2662 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2663 export ? export->handleTypes : 0;
2664
2665 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2666 sizeof(*sem), 8,
2667 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2668 if (!sem)
2669 return VK_ERROR_OUT_OF_HOST_MEMORY;
2670
2671 sem->temp_syncobj = 0;
2672 /* create a syncobject if we are going to export this semaphore */
2673 if (handleTypes) {
2674 assert (device->physical_device->rad_info.has_syncobj);
2675 assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2676 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2677 if (ret) {
2678 vk_free2(&device->alloc, pAllocator, sem);
2679 return VK_ERROR_OUT_OF_HOST_MEMORY;
2680 }
2681 sem->sem = NULL;
2682 } else {
2683 sem->sem = device->ws->create_sem(device->ws);
2684 if (!sem->sem) {
2685 vk_free2(&device->alloc, pAllocator, sem);
2686 return VK_ERROR_OUT_OF_HOST_MEMORY;
2687 }
2688 sem->syncobj = 0;
2689 }
2690
2691 *pSemaphore = radv_semaphore_to_handle(sem);
2692 return VK_SUCCESS;
2693 }
2694
2695 void radv_DestroySemaphore(
2696 VkDevice _device,
2697 VkSemaphore _semaphore,
2698 const VkAllocationCallbacks* pAllocator)
2699 {
2700 RADV_FROM_HANDLE(radv_device, device, _device);
2701 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2702 if (!_semaphore)
2703 return;
2704
2705 if (sem->syncobj)
2706 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2707 else
2708 device->ws->destroy_sem(sem->sem);
2709 vk_free2(&device->alloc, pAllocator, sem);
2710 }
2711
2712 VkResult radv_CreateEvent(
2713 VkDevice _device,
2714 const VkEventCreateInfo* pCreateInfo,
2715 const VkAllocationCallbacks* pAllocator,
2716 VkEvent* pEvent)
2717 {
2718 RADV_FROM_HANDLE(radv_device, device, _device);
2719 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2720 sizeof(*event), 8,
2721 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2722
2723 if (!event)
2724 return VK_ERROR_OUT_OF_HOST_MEMORY;
2725
2726 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2727 RADEON_DOMAIN_GTT,
2728 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
2729 if (!event->bo) {
2730 vk_free2(&device->alloc, pAllocator, event);
2731 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2732 }
2733
2734 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2735
2736 *pEvent = radv_event_to_handle(event);
2737
2738 return VK_SUCCESS;
2739 }
2740
2741 void radv_DestroyEvent(
2742 VkDevice _device,
2743 VkEvent _event,
2744 const VkAllocationCallbacks* pAllocator)
2745 {
2746 RADV_FROM_HANDLE(radv_device, device, _device);
2747 RADV_FROM_HANDLE(radv_event, event, _event);
2748
2749 if (!event)
2750 return;
2751 device->ws->buffer_destroy(event->bo);
2752 vk_free2(&device->alloc, pAllocator, event);
2753 }
2754
2755 VkResult radv_GetEventStatus(
2756 VkDevice _device,
2757 VkEvent _event)
2758 {
2759 RADV_FROM_HANDLE(radv_event, event, _event);
2760
2761 if (*event->map == 1)
2762 return VK_EVENT_SET;
2763 return VK_EVENT_RESET;
2764 }
2765
2766 VkResult radv_SetEvent(
2767 VkDevice _device,
2768 VkEvent _event)
2769 {
2770 RADV_FROM_HANDLE(radv_event, event, _event);
2771 *event->map = 1;
2772
2773 return VK_SUCCESS;
2774 }
2775
2776 VkResult radv_ResetEvent(
2777 VkDevice _device,
2778 VkEvent _event)
2779 {
2780 RADV_FROM_HANDLE(radv_event, event, _event);
2781 *event->map = 0;
2782
2783 return VK_SUCCESS;
2784 }
2785
2786 VkResult radv_CreateBuffer(
2787 VkDevice _device,
2788 const VkBufferCreateInfo* pCreateInfo,
2789 const VkAllocationCallbacks* pAllocator,
2790 VkBuffer* pBuffer)
2791 {
2792 RADV_FROM_HANDLE(radv_device, device, _device);
2793 struct radv_buffer *buffer;
2794
2795 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2796
2797 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2798 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2799 if (buffer == NULL)
2800 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2801
2802 buffer->size = pCreateInfo->size;
2803 buffer->usage = pCreateInfo->usage;
2804 buffer->bo = NULL;
2805 buffer->offset = 0;
2806 buffer->flags = pCreateInfo->flags;
2807
2808 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
2809 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
2810
2811 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2812 buffer->bo = device->ws->buffer_create(device->ws,
2813 align64(buffer->size, 4096),
2814 4096, 0, RADEON_FLAG_VIRTUAL);
2815 if (!buffer->bo) {
2816 vk_free2(&device->alloc, pAllocator, buffer);
2817 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2818 }
2819 }
2820
2821 *pBuffer = radv_buffer_to_handle(buffer);
2822
2823 return VK_SUCCESS;
2824 }
2825
2826 void radv_DestroyBuffer(
2827 VkDevice _device,
2828 VkBuffer _buffer,
2829 const VkAllocationCallbacks* pAllocator)
2830 {
2831 RADV_FROM_HANDLE(radv_device, device, _device);
2832 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2833
2834 if (!buffer)
2835 return;
2836
2837 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2838 device->ws->buffer_destroy(buffer->bo);
2839
2840 vk_free2(&device->alloc, pAllocator, buffer);
2841 }
2842
2843 static inline unsigned
2844 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2845 {
2846 if (stencil)
2847 return image->surface.u.legacy.stencil_tiling_index[level];
2848 else
2849 return image->surface.u.legacy.tiling_index[level];
2850 }
2851
2852 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2853 {
2854 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2855 }
2856
2857 static void
2858 radv_initialise_color_surface(struct radv_device *device,
2859 struct radv_color_buffer_info *cb,
2860 struct radv_image_view *iview)
2861 {
2862 const struct vk_format_description *desc;
2863 unsigned ntype, format, swap, endian;
2864 unsigned blend_clamp = 0, blend_bypass = 0;
2865 uint64_t va;
2866 const struct radeon_surf *surf = &iview->image->surface;
2867
2868 desc = vk_format_description(iview->vk_format);
2869
2870 memset(cb, 0, sizeof(*cb));
2871
2872 /* Intensity is implemented as Red, so treat it that way. */
2873 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2874
2875 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2876
2877 cb->cb_color_base = va >> 8;
2878
2879 if (device->physical_device->rad_info.chip_class >= GFX9) {
2880 struct gfx9_surf_meta_flags meta;
2881 if (iview->image->dcc_offset)
2882 meta = iview->image->surface.u.gfx9.dcc;
2883 else
2884 meta = iview->image->surface.u.gfx9.cmask;
2885
2886 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2887 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2888 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2889 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2890
2891 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
2892 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2893 } else {
2894 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2895 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2896
2897 cb->cb_color_base += level_info->offset >> 8;
2898 if (level_info->mode == RADEON_SURF_MODE_2D)
2899 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2900
2901 pitch_tile_max = level_info->nblk_x / 8 - 1;
2902 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2903 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2904
2905 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2906 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2907 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2908
2909 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2910 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2911
2912 if (iview->image->fmask.size) {
2913 if (device->physical_device->rad_info.chip_class >= CIK)
2914 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2915 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2916 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2917 } else {
2918 /* This must be set for fast clear to work without FMASK. */
2919 if (device->physical_device->rad_info.chip_class >= CIK)
2920 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2921 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2922 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2923 }
2924 }
2925
2926 /* CMASK variables */
2927 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2928 va += iview->image->cmask.offset;
2929 cb->cb_color_cmask = va >> 8;
2930
2931 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2932 va += iview->image->dcc_offset;
2933 cb->cb_dcc_base = va >> 8;
2934 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
2935
2936 uint32_t max_slice = radv_surface_layer_count(iview);
2937 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2938 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2939
2940 if (iview->image->info.samples > 1) {
2941 unsigned log_samples = util_logbase2(iview->image->info.samples);
2942
2943 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2944 S_028C74_NUM_FRAGMENTS(log_samples);
2945 }
2946
2947 if (iview->image->fmask.size) {
2948 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2949 cb->cb_color_fmask = va >> 8;
2950 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
2951 } else {
2952 cb->cb_color_fmask = cb->cb_color_base;
2953 }
2954
2955 ntype = radv_translate_color_numformat(iview->vk_format,
2956 desc,
2957 vk_format_get_first_non_void_channel(iview->vk_format));
2958 format = radv_translate_colorformat(iview->vk_format);
2959 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2960 radv_finishme("Illegal color\n");
2961 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2962 endian = radv_colorformat_endian_swap(format);
2963
2964 /* blend clamp should be set for all NORM/SRGB types */
2965 if (ntype == V_028C70_NUMBER_UNORM ||
2966 ntype == V_028C70_NUMBER_SNORM ||
2967 ntype == V_028C70_NUMBER_SRGB)
2968 blend_clamp = 1;
2969
2970 /* set blend bypass according to docs if SINT/UINT or
2971 8/24 COLOR variants */
2972 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2973 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2974 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2975 blend_clamp = 0;
2976 blend_bypass = 1;
2977 }
2978 #if 0
2979 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2980 (format == V_028C70_COLOR_8 ||
2981 format == V_028C70_COLOR_8_8 ||
2982 format == V_028C70_COLOR_8_8_8_8))
2983 ->color_is_int8 = true;
2984 #endif
2985 cb->cb_color_info = S_028C70_FORMAT(format) |
2986 S_028C70_COMP_SWAP(swap) |
2987 S_028C70_BLEND_CLAMP(blend_clamp) |
2988 S_028C70_BLEND_BYPASS(blend_bypass) |
2989 S_028C70_SIMPLE_FLOAT(1) |
2990 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2991 ntype != V_028C70_NUMBER_SNORM &&
2992 ntype != V_028C70_NUMBER_SRGB &&
2993 format != V_028C70_COLOR_8_24 &&
2994 format != V_028C70_COLOR_24_8) |
2995 S_028C70_NUMBER_TYPE(ntype) |
2996 S_028C70_ENDIAN(endian);
2997 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
2998 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2999 if (device->physical_device->rad_info.chip_class == SI) {
3000 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3001 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3002 }
3003 }
3004
3005 if (iview->image->cmask.size &&
3006 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3007 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3008
3009 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3010 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3011
3012 if (device->physical_device->rad_info.chip_class >= VI) {
3013 unsigned max_uncompressed_block_size = 2;
3014 if (iview->image->info.samples > 1) {
3015 if (iview->image->surface.bpe == 1)
3016 max_uncompressed_block_size = 0;
3017 else if (iview->image->surface.bpe == 2)
3018 max_uncompressed_block_size = 1;
3019 }
3020
3021 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3022 S_028C78_INDEPENDENT_64B_BLOCKS(1);
3023 }
3024
3025 /* This must be set for fast clear to work without FMASK. */
3026 if (!iview->image->fmask.size &&
3027 device->physical_device->rad_info.chip_class == SI) {
3028 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3029 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3030 }
3031
3032 if (device->physical_device->rad_info.chip_class >= GFX9) {
3033 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3034 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3035
3036 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3037 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3038 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3039 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3040 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3041 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3042
3043 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3044
3045 }
3046 }
3047
3048 static void
3049 radv_initialise_ds_surface(struct radv_device *device,
3050 struct radv_ds_buffer_info *ds,
3051 struct radv_image_view *iview)
3052 {
3053 unsigned level = iview->base_mip;
3054 unsigned format, stencil_format;
3055 uint64_t va, s_offs, z_offs;
3056 bool stencil_only = false;
3057 memset(ds, 0, sizeof(*ds));
3058 switch (iview->image->vk_format) {
3059 case VK_FORMAT_D24_UNORM_S8_UINT:
3060 case VK_FORMAT_X8_D24_UNORM_PACK32:
3061 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3062 ds->offset_scale = 2.0f;
3063 break;
3064 case VK_FORMAT_D16_UNORM:
3065 case VK_FORMAT_D16_UNORM_S8_UINT:
3066 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3067 ds->offset_scale = 4.0f;
3068 break;
3069 case VK_FORMAT_D32_SFLOAT:
3070 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3071 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3072 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3073 ds->offset_scale = 1.0f;
3074 break;
3075 case VK_FORMAT_S8_UINT:
3076 stencil_only = true;
3077 break;
3078 default:
3079 break;
3080 }
3081
3082 format = radv_translate_dbformat(iview->image->vk_format);
3083 stencil_format = iview->image->surface.has_stencil ?
3084 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3085
3086 uint32_t max_slice = radv_surface_layer_count(iview);
3087 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3088 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3089
3090 ds->db_htile_data_base = 0;
3091 ds->db_htile_surface = 0;
3092
3093 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3094 s_offs = z_offs = va;
3095
3096 if (device->physical_device->rad_info.chip_class >= GFX9) {
3097 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3098 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3099
3100 ds->db_z_info = S_028038_FORMAT(format) |
3101 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3102 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3103 S_028038_MAXMIP(iview->image->info.levels - 1);
3104 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3105 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3106
3107 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3108 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3109 ds->db_depth_view |= S_028008_MIPID(level);
3110
3111 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3112 S_02801C_Y_MAX(iview->image->info.height - 1);
3113
3114 if (radv_htile_enabled(iview->image, level)) {
3115 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3116
3117 if (iview->image->tc_compatible_htile) {
3118 unsigned max_zplanes = 4;
3119
3120 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3121 iview->image->info.samples > 1)
3122 max_zplanes = 2;
3123
3124 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3125 S_028038_ITERATE_FLUSH(1);
3126 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3127 }
3128
3129 if (!iview->image->surface.has_stencil)
3130 /* Use all of the htile_buffer for depth if there's no stencil. */
3131 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3132 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3133 iview->image->htile_offset;
3134 ds->db_htile_data_base = va >> 8;
3135 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3136 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3137 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3138 }
3139 } else {
3140 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3141
3142 if (stencil_only)
3143 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3144
3145 z_offs += iview->image->surface.u.legacy.level[level].offset;
3146 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3147
3148 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3149 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3150 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3151
3152 if (iview->image->info.samples > 1)
3153 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3154
3155 if (device->physical_device->rad_info.chip_class >= CIK) {
3156 struct radeon_info *info = &device->physical_device->rad_info;
3157 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3158 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3159 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3160 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3161 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3162 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3163
3164 if (stencil_only)
3165 tile_mode = stencil_tile_mode;
3166
3167 ds->db_depth_info |=
3168 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3169 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3170 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3171 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3172 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3173 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3174 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3175 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3176 } else {
3177 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3178 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3179 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3180 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3181 if (stencil_only)
3182 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3183 }
3184
3185 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3186 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3187 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3188
3189 if (radv_htile_enabled(iview->image, level)) {
3190 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3191
3192 if (!iview->image->surface.has_stencil &&
3193 !iview->image->tc_compatible_htile)
3194 /* Use all of the htile_buffer for depth if there's no stencil. */
3195 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3196
3197 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3198 iview->image->htile_offset;
3199 ds->db_htile_data_base = va >> 8;
3200 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3201
3202 if (iview->image->tc_compatible_htile) {
3203 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3204
3205 if (iview->image->info.samples <= 1)
3206 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3207 else if (iview->image->info.samples <= 4)
3208 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3209 else
3210 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3211 }
3212 }
3213 }
3214
3215 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3216 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3217 }
3218
3219 VkResult radv_CreateFramebuffer(
3220 VkDevice _device,
3221 const VkFramebufferCreateInfo* pCreateInfo,
3222 const VkAllocationCallbacks* pAllocator,
3223 VkFramebuffer* pFramebuffer)
3224 {
3225 RADV_FROM_HANDLE(radv_device, device, _device);
3226 struct radv_framebuffer *framebuffer;
3227
3228 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3229
3230 size_t size = sizeof(*framebuffer) +
3231 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3232 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3233 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3234 if (framebuffer == NULL)
3235 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3236
3237 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3238 framebuffer->width = pCreateInfo->width;
3239 framebuffer->height = pCreateInfo->height;
3240 framebuffer->layers = pCreateInfo->layers;
3241 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3242 VkImageView _iview = pCreateInfo->pAttachments[i];
3243 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3244 framebuffer->attachments[i].attachment = iview;
3245 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3246 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3247 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3248 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3249 }
3250 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3251 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3252 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3253 }
3254
3255 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3256 return VK_SUCCESS;
3257 }
3258
3259 void radv_DestroyFramebuffer(
3260 VkDevice _device,
3261 VkFramebuffer _fb,
3262 const VkAllocationCallbacks* pAllocator)
3263 {
3264 RADV_FROM_HANDLE(radv_device, device, _device);
3265 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3266
3267 if (!fb)
3268 return;
3269 vk_free2(&device->alloc, pAllocator, fb);
3270 }
3271
3272 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3273 {
3274 switch (address_mode) {
3275 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3276 return V_008F30_SQ_TEX_WRAP;
3277 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3278 return V_008F30_SQ_TEX_MIRROR;
3279 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3280 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3281 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3282 return V_008F30_SQ_TEX_CLAMP_BORDER;
3283 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3284 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3285 default:
3286 unreachable("illegal tex wrap mode");
3287 break;
3288 }
3289 }
3290
3291 static unsigned
3292 radv_tex_compare(VkCompareOp op)
3293 {
3294 switch (op) {
3295 case VK_COMPARE_OP_NEVER:
3296 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3297 case VK_COMPARE_OP_LESS:
3298 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3299 case VK_COMPARE_OP_EQUAL:
3300 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3301 case VK_COMPARE_OP_LESS_OR_EQUAL:
3302 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3303 case VK_COMPARE_OP_GREATER:
3304 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3305 case VK_COMPARE_OP_NOT_EQUAL:
3306 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3307 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3308 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3309 case VK_COMPARE_OP_ALWAYS:
3310 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3311 default:
3312 unreachable("illegal compare mode");
3313 break;
3314 }
3315 }
3316
3317 static unsigned
3318 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3319 {
3320 switch (filter) {
3321 case VK_FILTER_NEAREST:
3322 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3323 V_008F38_SQ_TEX_XY_FILTER_POINT);
3324 case VK_FILTER_LINEAR:
3325 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3326 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3327 case VK_FILTER_CUBIC_IMG:
3328 default:
3329 fprintf(stderr, "illegal texture filter");
3330 return 0;
3331 }
3332 }
3333
3334 static unsigned
3335 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3336 {
3337 switch (mode) {
3338 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3339 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3340 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3341 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3342 default:
3343 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3344 }
3345 }
3346
3347 static unsigned
3348 radv_tex_bordercolor(VkBorderColor bcolor)
3349 {
3350 switch (bcolor) {
3351 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3352 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3353 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3354 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3355 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3356 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3357 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3358 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3359 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3360 default:
3361 break;
3362 }
3363 return 0;
3364 }
3365
3366 static unsigned
3367 radv_tex_aniso_filter(unsigned filter)
3368 {
3369 if (filter < 2)
3370 return 0;
3371 if (filter < 4)
3372 return 1;
3373 if (filter < 8)
3374 return 2;
3375 if (filter < 16)
3376 return 3;
3377 return 4;
3378 }
3379
3380 static void
3381 radv_init_sampler(struct radv_device *device,
3382 struct radv_sampler *sampler,
3383 const VkSamplerCreateInfo *pCreateInfo)
3384 {
3385 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3386 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3387 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3388 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3389
3390 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3391 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3392 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3393 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3394 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3395 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3396 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3397 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3398 S_008F30_DISABLE_CUBE_WRAP(0) |
3399 S_008F30_COMPAT_MODE(is_vi));
3400 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3401 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3402 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3403 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3404 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3405 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3406 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3407 S_008F38_MIP_POINT_PRECLAMP(0) |
3408 S_008F38_DISABLE_LSB_CEIL(1) |
3409 S_008F38_FILTER_PREC_FIX(1) |
3410 S_008F38_ANISO_OVERRIDE(is_vi));
3411 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3412 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3413 }
3414
3415 VkResult radv_CreateSampler(
3416 VkDevice _device,
3417 const VkSamplerCreateInfo* pCreateInfo,
3418 const VkAllocationCallbacks* pAllocator,
3419 VkSampler* pSampler)
3420 {
3421 RADV_FROM_HANDLE(radv_device, device, _device);
3422 struct radv_sampler *sampler;
3423
3424 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3425
3426 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3427 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3428 if (!sampler)
3429 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3430
3431 radv_init_sampler(device, sampler, pCreateInfo);
3432 *pSampler = radv_sampler_to_handle(sampler);
3433
3434 return VK_SUCCESS;
3435 }
3436
3437 void radv_DestroySampler(
3438 VkDevice _device,
3439 VkSampler _sampler,
3440 const VkAllocationCallbacks* pAllocator)
3441 {
3442 RADV_FROM_HANDLE(radv_device, device, _device);
3443 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3444
3445 if (!sampler)
3446 return;
3447 vk_free2(&device->alloc, pAllocator, sampler);
3448 }
3449
3450 /* vk_icd.h does not declare this function, so we declare it here to
3451 * suppress Wmissing-prototypes.
3452 */
3453 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3454 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3455
3456 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3457 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3458 {
3459 /* For the full details on loader interface versioning, see
3460 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3461 * What follows is a condensed summary, to help you navigate the large and
3462 * confusing official doc.
3463 *
3464 * - Loader interface v0 is incompatible with later versions. We don't
3465 * support it.
3466 *
3467 * - In loader interface v1:
3468 * - The first ICD entrypoint called by the loader is
3469 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3470 * entrypoint.
3471 * - The ICD must statically expose no other Vulkan symbol unless it is
3472 * linked with -Bsymbolic.
3473 * - Each dispatchable Vulkan handle created by the ICD must be
3474 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3475 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3476 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3477 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3478 * such loader-managed surfaces.
3479 *
3480 * - Loader interface v2 differs from v1 in:
3481 * - The first ICD entrypoint called by the loader is
3482 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3483 * statically expose this entrypoint.
3484 *
3485 * - Loader interface v3 differs from v2 in:
3486 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3487 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3488 * because the loader no longer does so.
3489 */
3490 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3491 return VK_SUCCESS;
3492 }
3493
3494 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3495 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3496 int *pFD)
3497 {
3498 RADV_FROM_HANDLE(radv_device, device, _device);
3499 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3500
3501 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3502
3503 /* We support only one handle type. */
3504 assert(pGetFdInfo->handleType ==
3505 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3506
3507 bool ret = radv_get_memory_fd(device, memory, pFD);
3508 if (ret == false)
3509 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3510 return VK_SUCCESS;
3511 }
3512
3513 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3514 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3515 int fd,
3516 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3517 {
3518 /* The valid usage section for this function says:
3519 *
3520 * "handleType must not be one of the handle types defined as opaque."
3521 *
3522 * Since we only handle opaque handles for now, there are no FD properties.
3523 */
3524 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3525 }
3526
3527 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3528 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3529 {
3530 RADV_FROM_HANDLE(radv_device, device, _device);
3531 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3532 uint32_t syncobj_handle = 0;
3533 assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3534
3535 int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
3536 if (ret != 0)
3537 return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
3538
3539 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3540 sem->temp_syncobj = syncobj_handle;
3541 } else {
3542 sem->syncobj = syncobj_handle;
3543 }
3544 close(pImportSemaphoreFdInfo->fd);
3545 return VK_SUCCESS;
3546 }
3547
3548 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3549 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3550 int *pFd)
3551 {
3552 RADV_FROM_HANDLE(radv_device, device, _device);
3553 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3554 int ret;
3555 uint32_t syncobj_handle;
3556
3557 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3558 if (sem->temp_syncobj)
3559 syncobj_handle = sem->temp_syncobj;
3560 else
3561 syncobj_handle = sem->syncobj;
3562 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3563 if (ret)
3564 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3565 return VK_SUCCESS;
3566 }
3567
3568 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3569 VkPhysicalDevice physicalDevice,
3570 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3571 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3572 {
3573 if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3574 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3575 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3576 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3577 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3578 } else {
3579 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3580 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3581 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3582 }
3583 }