radv: add nosisched debug option
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_shader.h"
35 #include "radv_cs.h"
36 #include "util/disk_cache.h"
37 #include "util/strtod.h"
38 #include "vk_util.h"
39 #include <xf86drm.h>
40 #include <amdgpu.h>
41 #include <amdgpu_drm.h>
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "gfx9d.h"
47 #include "util/debug.h"
48
49 static int
50 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51 {
52 uint32_t mesa_timestamp, llvm_timestamp;
53 uint16_t f = family;
54 memset(uuid, 0, VK_UUID_SIZE);
55 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
56 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57 return -1;
58
59 memcpy(uuid, &mesa_timestamp, 4);
60 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61 memcpy((char*)uuid + 8, &f, 2);
62 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63 return 0;
64 }
65
66 static void
67 radv_get_driver_uuid(void *uuid)
68 {
69 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
70 }
71
72 static void
73 radv_get_device_uuid(struct radeon_info *info, void *uuid)
74 {
75 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76 }
77
78 static const char *
79 get_chip_name(enum radeon_family family)
80 {
81 switch (family) {
82 case CHIP_TAHITI: return "AMD RADV TAHITI";
83 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
84 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
85 case CHIP_OLAND: return "AMD RADV OLAND";
86 case CHIP_HAINAN: return "AMD RADV HAINAN";
87 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
88 case CHIP_KAVERI: return "AMD RADV KAVERI";
89 case CHIP_KABINI: return "AMD RADV KABINI";
90 case CHIP_HAWAII: return "AMD RADV HAWAII";
91 case CHIP_MULLINS: return "AMD RADV MULLINS";
92 case CHIP_TONGA: return "AMD RADV TONGA";
93 case CHIP_ICELAND: return "AMD RADV ICELAND";
94 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
95 case CHIP_FIJI: return "AMD RADV FIJI";
96 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
97 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
98 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
99 case CHIP_STONEY: return "AMD RADV STONEY";
100 case CHIP_VEGA10: return "AMD RADV VEGA";
101 case CHIP_RAVEN: return "AMD RADV RAVEN";
102 default: return "AMD RADV unknown";
103 }
104 }
105
106 static void
107 radv_physical_device_init_mem_types(struct radv_physical_device *device)
108 {
109 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
110 uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
111 device->rad_info.vram_vis_size);
112
113 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
114 device->memory_properties.memoryHeapCount = 0;
115 if (device->rad_info.vram_size - visible_vram_size > 0) {
116 vram_index = device->memory_properties.memoryHeapCount++;
117 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
118 .size = device->rad_info.vram_size - visible_vram_size,
119 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
120 };
121 }
122 if (visible_vram_size) {
123 visible_vram_index = device->memory_properties.memoryHeapCount++;
124 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
125 .size = visible_vram_size,
126 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
127 };
128 }
129 if (device->rad_info.gart_size > 0) {
130 gart_index = device->memory_properties.memoryHeapCount++;
131 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
132 .size = device->rad_info.gart_size,
133 .flags = 0,
134 };
135 }
136
137 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
138 unsigned type_count = 0;
139 if (vram_index >= 0) {
140 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
141 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
142 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
143 .heapIndex = vram_index,
144 };
145 }
146 if (gart_index >= 0) {
147 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
148 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
149 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
150 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
151 .heapIndex = gart_index,
152 };
153 }
154 if (visible_vram_index >= 0) {
155 device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
156 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
157 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
158 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
159 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
160 .heapIndex = visible_vram_index,
161 };
162 }
163 if (gart_index >= 0) {
164 device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
165 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
166 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
167 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
168 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
169 .heapIndex = gart_index,
170 };
171 }
172 device->memory_properties.memoryTypeCount = type_count;
173 }
174
175 static VkResult
176 radv_physical_device_init(struct radv_physical_device *device,
177 struct radv_instance *instance,
178 drmDevicePtr drm_device)
179 {
180 const char *path = drm_device->nodes[DRM_NODE_RENDER];
181 VkResult result;
182 drmVersionPtr version;
183 int fd;
184
185 fd = open(path, O_RDWR | O_CLOEXEC);
186 if (fd < 0)
187 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
188
189 version = drmGetVersion(fd);
190 if (!version) {
191 close(fd);
192 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
193 "failed to get version %s: %m", path);
194 }
195
196 if (strcmp(version->name, "amdgpu")) {
197 drmFreeVersion(version);
198 close(fd);
199 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
200 }
201 drmFreeVersion(version);
202
203 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
204 device->instance = instance;
205 assert(strlen(path) < ARRAY_SIZE(device->path));
206 strncpy(device->path, path, ARRAY_SIZE(device->path));
207
208 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
209 instance->perftest_flags);
210 if (!device->ws) {
211 result = VK_ERROR_INCOMPATIBLE_DRIVER;
212 goto fail;
213 }
214
215 device->local_fd = fd;
216 device->ws->query_info(device->ws, &device->rad_info);
217 result = radv_init_wsi(device);
218 if (result != VK_SUCCESS) {
219 device->ws->destroy(device->ws);
220 goto fail;
221 }
222
223 device->name = get_chip_name(device->rad_info.family);
224
225 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
226 radv_finish_wsi(device);
227 device->ws->destroy(device->ws);
228 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
229 "cannot generate UUID");
230 goto fail;
231 }
232
233 /* These flags affect shader compilation. */
234 uint64_t shader_env_flags =
235 (device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
236 (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
237
238 /* The gpu id is already embeded in the uuid so we just pass "radv"
239 * when creating the cache.
240 */
241 char buf[VK_UUID_SIZE * 2 + 1];
242 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
243 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
244
245 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
246
247 radv_get_driver_uuid(&device->device_uuid);
248 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
249
250 if (device->rad_info.family == CHIP_STONEY ||
251 device->rad_info.chip_class >= GFX9) {
252 device->has_rbplus = true;
253 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
254 }
255
256 /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
257 * on SI.
258 */
259 device->has_clear_state = device->rad_info.chip_class >= CIK;
260
261 radv_physical_device_init_mem_types(device);
262 return VK_SUCCESS;
263
264 fail:
265 close(fd);
266 return result;
267 }
268
269 static void
270 radv_physical_device_finish(struct radv_physical_device *device)
271 {
272 radv_finish_wsi(device);
273 device->ws->destroy(device->ws);
274 disk_cache_destroy(device->disk_cache);
275 close(device->local_fd);
276 }
277
278 static void *
279 default_alloc_func(void *pUserData, size_t size, size_t align,
280 VkSystemAllocationScope allocationScope)
281 {
282 return malloc(size);
283 }
284
285 static void *
286 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
287 size_t align, VkSystemAllocationScope allocationScope)
288 {
289 return realloc(pOriginal, size);
290 }
291
292 static void
293 default_free_func(void *pUserData, void *pMemory)
294 {
295 free(pMemory);
296 }
297
298 static const VkAllocationCallbacks default_alloc = {
299 .pUserData = NULL,
300 .pfnAllocation = default_alloc_func,
301 .pfnReallocation = default_realloc_func,
302 .pfnFree = default_free_func,
303 };
304
305 static const struct debug_control radv_debug_options[] = {
306 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
307 {"nodcc", RADV_DEBUG_NO_DCC},
308 {"shaders", RADV_DEBUG_DUMP_SHADERS},
309 {"nocache", RADV_DEBUG_NO_CACHE},
310 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
311 {"nohiz", RADV_DEBUG_NO_HIZ},
312 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
313 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
314 {"allbos", RADV_DEBUG_ALL_BOS},
315 {"noibs", RADV_DEBUG_NO_IBS},
316 {"spirv", RADV_DEBUG_DUMP_SPIRV},
317 {"vmfaults", RADV_DEBUG_VM_FAULTS},
318 {"zerovram", RADV_DEBUG_ZERO_VRAM},
319 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
320 {"nosisched", RADV_DEBUG_NO_SISCHED},
321 {NULL, 0}
322 };
323
324 const char *
325 radv_get_debug_option_name(int id)
326 {
327 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
328 return radv_debug_options[id].string;
329 }
330
331 static const struct debug_control radv_perftest_options[] = {
332 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
333 {"sisched", RADV_PERFTEST_SISCHED},
334 {NULL, 0}
335 };
336
337 const char *
338 radv_get_perftest_option_name(int id)
339 {
340 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
341 return radv_perftest_options[id].string;
342 }
343
344 VkResult radv_CreateInstance(
345 const VkInstanceCreateInfo* pCreateInfo,
346 const VkAllocationCallbacks* pAllocator,
347 VkInstance* pInstance)
348 {
349 struct radv_instance *instance;
350
351 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
352
353 uint32_t client_version;
354 if (pCreateInfo->pApplicationInfo &&
355 pCreateInfo->pApplicationInfo->apiVersion != 0) {
356 client_version = pCreateInfo->pApplicationInfo->apiVersion;
357 } else {
358 client_version = VK_MAKE_VERSION(1, 0, 0);
359 }
360
361 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
362 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
363 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
364 "Client requested version %d.%d.%d",
365 VK_VERSION_MAJOR(client_version),
366 VK_VERSION_MINOR(client_version),
367 VK_VERSION_PATCH(client_version));
368 }
369
370 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
371 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
372 if (!radv_instance_extension_supported(ext_name))
373 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
374 }
375
376 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
377 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
378 if (!instance)
379 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
380
381 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
382
383 if (pAllocator)
384 instance->alloc = *pAllocator;
385 else
386 instance->alloc = default_alloc;
387
388 instance->apiVersion = client_version;
389 instance->physicalDeviceCount = -1;
390
391 _mesa_locale_init();
392
393 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
394
395 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
396 radv_debug_options);
397
398 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
399 radv_perftest_options);
400
401 if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
402 /* Disable sisched when the user requests it, this is mostly
403 * useful when the driver force-enable sisched for the given
404 * application.
405 */
406 instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
407 }
408
409 *pInstance = radv_instance_to_handle(instance);
410
411 return VK_SUCCESS;
412 }
413
414 void radv_DestroyInstance(
415 VkInstance _instance,
416 const VkAllocationCallbacks* pAllocator)
417 {
418 RADV_FROM_HANDLE(radv_instance, instance, _instance);
419
420 if (!instance)
421 return;
422
423 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
424 radv_physical_device_finish(instance->physicalDevices + i);
425 }
426
427 VG(VALGRIND_DESTROY_MEMPOOL(instance));
428
429 _mesa_locale_fini();
430
431 vk_free(&instance->alloc, instance);
432 }
433
434 static VkResult
435 radv_enumerate_devices(struct radv_instance *instance)
436 {
437 /* TODO: Check for more devices ? */
438 drmDevicePtr devices[8];
439 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
440 int max_devices;
441
442 instance->physicalDeviceCount = 0;
443
444 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
445 if (max_devices < 1)
446 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
447
448 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
449 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
450 devices[i]->bustype == DRM_BUS_PCI &&
451 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
452
453 result = radv_physical_device_init(instance->physicalDevices +
454 instance->physicalDeviceCount,
455 instance,
456 devices[i]);
457 if (result == VK_SUCCESS)
458 ++instance->physicalDeviceCount;
459 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
460 break;
461 }
462 }
463 drmFreeDevices(devices, max_devices);
464
465 return result;
466 }
467
468 VkResult radv_EnumeratePhysicalDevices(
469 VkInstance _instance,
470 uint32_t* pPhysicalDeviceCount,
471 VkPhysicalDevice* pPhysicalDevices)
472 {
473 RADV_FROM_HANDLE(radv_instance, instance, _instance);
474 VkResult result;
475
476 if (instance->physicalDeviceCount < 0) {
477 result = radv_enumerate_devices(instance);
478 if (result != VK_SUCCESS &&
479 result != VK_ERROR_INCOMPATIBLE_DRIVER)
480 return result;
481 }
482
483 if (!pPhysicalDevices) {
484 *pPhysicalDeviceCount = instance->physicalDeviceCount;
485 } else {
486 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
487 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
488 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
489 }
490
491 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
492 : VK_SUCCESS;
493 }
494
495 void radv_GetPhysicalDeviceFeatures(
496 VkPhysicalDevice physicalDevice,
497 VkPhysicalDeviceFeatures* pFeatures)
498 {
499 memset(pFeatures, 0, sizeof(*pFeatures));
500
501 *pFeatures = (VkPhysicalDeviceFeatures) {
502 .robustBufferAccess = true,
503 .fullDrawIndexUint32 = true,
504 .imageCubeArray = true,
505 .independentBlend = true,
506 .geometryShader = true,
507 .tessellationShader = true,
508 .sampleRateShading = true,
509 .dualSrcBlend = true,
510 .logicOp = true,
511 .multiDrawIndirect = true,
512 .drawIndirectFirstInstance = true,
513 .depthClamp = true,
514 .depthBiasClamp = true,
515 .fillModeNonSolid = true,
516 .depthBounds = true,
517 .wideLines = true,
518 .largePoints = true,
519 .alphaToOne = true,
520 .multiViewport = true,
521 .samplerAnisotropy = true,
522 .textureCompressionETC2 = false,
523 .textureCompressionASTC_LDR = false,
524 .textureCompressionBC = true,
525 .occlusionQueryPrecise = true,
526 .pipelineStatisticsQuery = true,
527 .vertexPipelineStoresAndAtomics = true,
528 .fragmentStoresAndAtomics = true,
529 .shaderTessellationAndGeometryPointSize = true,
530 .shaderImageGatherExtended = true,
531 .shaderStorageImageExtendedFormats = true,
532 .shaderStorageImageMultisample = false,
533 .shaderUniformBufferArrayDynamicIndexing = true,
534 .shaderSampledImageArrayDynamicIndexing = true,
535 .shaderStorageBufferArrayDynamicIndexing = true,
536 .shaderStorageImageArrayDynamicIndexing = true,
537 .shaderStorageImageReadWithoutFormat = true,
538 .shaderStorageImageWriteWithoutFormat = true,
539 .shaderClipDistance = true,
540 .shaderCullDistance = true,
541 .shaderFloat64 = true,
542 .shaderInt64 = true,
543 .shaderInt16 = false,
544 .sparseBinding = true,
545 .variableMultisampleRate = true,
546 .inheritedQueries = true,
547 };
548 }
549
550 void radv_GetPhysicalDeviceFeatures2KHR(
551 VkPhysicalDevice physicalDevice,
552 VkPhysicalDeviceFeatures2KHR *pFeatures)
553 {
554 vk_foreach_struct(ext, pFeatures->pNext) {
555 switch (ext->sType) {
556 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
557 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
558 features->variablePointersStorageBuffer = true;
559 features->variablePointers = false;
560 break;
561 }
562 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
563 VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
564 features->multiview = true;
565 features->multiviewGeometryShader = true;
566 features->multiviewTessellationShader = true;
567 break;
568 }
569 default:
570 break;
571 }
572 }
573 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
574 }
575
576 void radv_GetPhysicalDeviceProperties(
577 VkPhysicalDevice physicalDevice,
578 VkPhysicalDeviceProperties* pProperties)
579 {
580 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
581 VkSampleCountFlags sample_counts = 0xf;
582
583 /* make sure that the entire descriptor set is addressable with a signed
584 * 32-bit int. So the sum of all limits scaled by descriptor size has to
585 * be at most 2 GiB. the combined image & samples object count as one of
586 * both. This limit is for the pipeline layout, not for the set layout, but
587 * there is no set limit, so we just set a pipeline limit. I don't think
588 * any app is going to hit this soon. */
589 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
590 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
591 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
592 32 /* sampler, largest when combined with image */ +
593 64 /* sampled image */ +
594 64 /* storage image */);
595
596 VkPhysicalDeviceLimits limits = {
597 .maxImageDimension1D = (1 << 14),
598 .maxImageDimension2D = (1 << 14),
599 .maxImageDimension3D = (1 << 11),
600 .maxImageDimensionCube = (1 << 14),
601 .maxImageArrayLayers = (1 << 11),
602 .maxTexelBufferElements = 128 * 1024 * 1024,
603 .maxUniformBufferRange = UINT32_MAX,
604 .maxStorageBufferRange = UINT32_MAX,
605 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
606 .maxMemoryAllocationCount = UINT32_MAX,
607 .maxSamplerAllocationCount = 64 * 1024,
608 .bufferImageGranularity = 64, /* A cache line */
609 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
610 .maxBoundDescriptorSets = MAX_SETS,
611 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
612 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
613 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
614 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
615 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
616 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
617 .maxPerStageResources = max_descriptor_set_size,
618 .maxDescriptorSetSamplers = max_descriptor_set_size,
619 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
620 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
621 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
622 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
623 .maxDescriptorSetSampledImages = max_descriptor_set_size,
624 .maxDescriptorSetStorageImages = max_descriptor_set_size,
625 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
626 .maxVertexInputAttributes = 32,
627 .maxVertexInputBindings = 32,
628 .maxVertexInputAttributeOffset = 2047,
629 .maxVertexInputBindingStride = 2048,
630 .maxVertexOutputComponents = 128,
631 .maxTessellationGenerationLevel = 64,
632 .maxTessellationPatchSize = 32,
633 .maxTessellationControlPerVertexInputComponents = 128,
634 .maxTessellationControlPerVertexOutputComponents = 128,
635 .maxTessellationControlPerPatchOutputComponents = 120,
636 .maxTessellationControlTotalOutputComponents = 4096,
637 .maxTessellationEvaluationInputComponents = 128,
638 .maxTessellationEvaluationOutputComponents = 128,
639 .maxGeometryShaderInvocations = 127,
640 .maxGeometryInputComponents = 64,
641 .maxGeometryOutputComponents = 128,
642 .maxGeometryOutputVertices = 256,
643 .maxGeometryTotalOutputComponents = 1024,
644 .maxFragmentInputComponents = 128,
645 .maxFragmentOutputAttachments = 8,
646 .maxFragmentDualSrcAttachments = 1,
647 .maxFragmentCombinedOutputResources = 8,
648 .maxComputeSharedMemorySize = 32768,
649 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
650 .maxComputeWorkGroupInvocations = 2048,
651 .maxComputeWorkGroupSize = {
652 2048,
653 2048,
654 2048
655 },
656 .subPixelPrecisionBits = 4 /* FIXME */,
657 .subTexelPrecisionBits = 4 /* FIXME */,
658 .mipmapPrecisionBits = 4 /* FIXME */,
659 .maxDrawIndexedIndexValue = UINT32_MAX,
660 .maxDrawIndirectCount = UINT32_MAX,
661 .maxSamplerLodBias = 16,
662 .maxSamplerAnisotropy = 16,
663 .maxViewports = MAX_VIEWPORTS,
664 .maxViewportDimensions = { (1 << 14), (1 << 14) },
665 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
666 .viewportSubPixelBits = 13, /* We take a float? */
667 .minMemoryMapAlignment = 4096, /* A page */
668 .minTexelBufferOffsetAlignment = 1,
669 .minUniformBufferOffsetAlignment = 4,
670 .minStorageBufferOffsetAlignment = 4,
671 .minTexelOffset = -32,
672 .maxTexelOffset = 31,
673 .minTexelGatherOffset = -32,
674 .maxTexelGatherOffset = 31,
675 .minInterpolationOffset = -2,
676 .maxInterpolationOffset = 2,
677 .subPixelInterpolationOffsetBits = 8,
678 .maxFramebufferWidth = (1 << 14),
679 .maxFramebufferHeight = (1 << 14),
680 .maxFramebufferLayers = (1 << 10),
681 .framebufferColorSampleCounts = sample_counts,
682 .framebufferDepthSampleCounts = sample_counts,
683 .framebufferStencilSampleCounts = sample_counts,
684 .framebufferNoAttachmentsSampleCounts = sample_counts,
685 .maxColorAttachments = MAX_RTS,
686 .sampledImageColorSampleCounts = sample_counts,
687 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
688 .sampledImageDepthSampleCounts = sample_counts,
689 .sampledImageStencilSampleCounts = sample_counts,
690 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
691 .maxSampleMaskWords = 1,
692 .timestampComputeAndGraphics = true,
693 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
694 .maxClipDistances = 8,
695 .maxCullDistances = 8,
696 .maxCombinedClipAndCullDistances = 8,
697 .discreteQueuePriorities = 1,
698 .pointSizeRange = { 0.125, 255.875 },
699 .lineWidthRange = { 0.0, 7.9921875 },
700 .pointSizeGranularity = (1.0 / 8.0),
701 .lineWidthGranularity = (1.0 / 128.0),
702 .strictLines = false, /* FINISHME */
703 .standardSampleLocations = true,
704 .optimalBufferCopyOffsetAlignment = 128,
705 .optimalBufferCopyRowPitchAlignment = 128,
706 .nonCoherentAtomSize = 64,
707 };
708
709 *pProperties = (VkPhysicalDeviceProperties) {
710 .apiVersion = radv_physical_device_api_version(pdevice),
711 .driverVersion = vk_get_driver_version(),
712 .vendorID = ATI_VENDOR_ID,
713 .deviceID = pdevice->rad_info.pci_id,
714 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
715 .limits = limits,
716 .sparseProperties = {0},
717 };
718
719 strcpy(pProperties->deviceName, pdevice->name);
720 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
721 }
722
723 void radv_GetPhysicalDeviceProperties2KHR(
724 VkPhysicalDevice physicalDevice,
725 VkPhysicalDeviceProperties2KHR *pProperties)
726 {
727 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
728 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
729
730 vk_foreach_struct(ext, pProperties->pNext) {
731 switch (ext->sType) {
732 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
733 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
734 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
735 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
736 break;
737 }
738 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
739 VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
740 memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
741 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
742 properties->deviceLUIDValid = false;
743 break;
744 }
745 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
746 VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
747 properties->maxMultiviewViewCount = MAX_VIEWS;
748 properties->maxMultiviewInstanceIndex = INT_MAX;
749 break;
750 }
751 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
752 VkPhysicalDevicePointClippingPropertiesKHR *properties =
753 (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
754 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
755 break;
756 }
757 default:
758 break;
759 }
760 }
761 }
762
763 static void radv_get_physical_device_queue_family_properties(
764 struct radv_physical_device* pdevice,
765 uint32_t* pCount,
766 VkQueueFamilyProperties** pQueueFamilyProperties)
767 {
768 int num_queue_families = 1;
769 int idx;
770 if (pdevice->rad_info.num_compute_rings > 0 &&
771 pdevice->rad_info.chip_class >= CIK &&
772 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
773 num_queue_families++;
774
775 if (pQueueFamilyProperties == NULL) {
776 *pCount = num_queue_families;
777 return;
778 }
779
780 if (!*pCount)
781 return;
782
783 idx = 0;
784 if (*pCount >= 1) {
785 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
786 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
787 VK_QUEUE_COMPUTE_BIT |
788 VK_QUEUE_TRANSFER_BIT |
789 VK_QUEUE_SPARSE_BINDING_BIT,
790 .queueCount = 1,
791 .timestampValidBits = 64,
792 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
793 };
794 idx++;
795 }
796
797 if (pdevice->rad_info.num_compute_rings > 0 &&
798 pdevice->rad_info.chip_class >= CIK &&
799 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
800 if (*pCount > idx) {
801 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
802 .queueFlags = VK_QUEUE_COMPUTE_BIT |
803 VK_QUEUE_TRANSFER_BIT |
804 VK_QUEUE_SPARSE_BINDING_BIT,
805 .queueCount = pdevice->rad_info.num_compute_rings,
806 .timestampValidBits = 64,
807 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
808 };
809 idx++;
810 }
811 }
812 *pCount = idx;
813 }
814
815 void radv_GetPhysicalDeviceQueueFamilyProperties(
816 VkPhysicalDevice physicalDevice,
817 uint32_t* pCount,
818 VkQueueFamilyProperties* pQueueFamilyProperties)
819 {
820 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
821 if (!pQueueFamilyProperties) {
822 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
823 return;
824 }
825 VkQueueFamilyProperties *properties[] = {
826 pQueueFamilyProperties + 0,
827 pQueueFamilyProperties + 1,
828 pQueueFamilyProperties + 2,
829 };
830 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
831 assert(*pCount <= 3);
832 }
833
834 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
835 VkPhysicalDevice physicalDevice,
836 uint32_t* pCount,
837 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
838 {
839 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
840 if (!pQueueFamilyProperties) {
841 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
842 return;
843 }
844 VkQueueFamilyProperties *properties[] = {
845 &pQueueFamilyProperties[0].queueFamilyProperties,
846 &pQueueFamilyProperties[1].queueFamilyProperties,
847 &pQueueFamilyProperties[2].queueFamilyProperties,
848 };
849 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
850 assert(*pCount <= 3);
851 }
852
853 void radv_GetPhysicalDeviceMemoryProperties(
854 VkPhysicalDevice physicalDevice,
855 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
856 {
857 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
858
859 *pMemoryProperties = physical_device->memory_properties;
860 }
861
862 void radv_GetPhysicalDeviceMemoryProperties2KHR(
863 VkPhysicalDevice physicalDevice,
864 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
865 {
866 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
867 &pMemoryProperties->memoryProperties);
868 }
869
870 static enum radeon_ctx_priority
871 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
872 {
873 /* Default to MEDIUM when a specific global priority isn't requested */
874 if (!pObj)
875 return RADEON_CTX_PRIORITY_MEDIUM;
876
877 switch(pObj->globalPriority) {
878 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME:
879 return RADEON_CTX_PRIORITY_REALTIME;
880 case VK_QUEUE_GLOBAL_PRIORITY_HIGH:
881 return RADEON_CTX_PRIORITY_HIGH;
882 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM:
883 return RADEON_CTX_PRIORITY_MEDIUM;
884 case VK_QUEUE_GLOBAL_PRIORITY_LOW:
885 return RADEON_CTX_PRIORITY_LOW;
886 default:
887 unreachable("Illegal global priority value");
888 return RADEON_CTX_PRIORITY_INVALID;
889 }
890 }
891
892 static int
893 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
894 int queue_family_index, int idx,
895 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
896 {
897 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
898 queue->device = device;
899 queue->queue_family_index = queue_family_index;
900 queue->queue_idx = idx;
901 queue->priority = radv_get_queue_global_priority(global_priority);
902
903 queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
904 if (!queue->hw_ctx)
905 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
906
907 return VK_SUCCESS;
908 }
909
910 static void
911 radv_queue_finish(struct radv_queue *queue)
912 {
913 if (queue->hw_ctx)
914 queue->device->ws->ctx_destroy(queue->hw_ctx);
915
916 if (queue->initial_full_flush_preamble_cs)
917 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
918 if (queue->initial_preamble_cs)
919 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
920 if (queue->continue_preamble_cs)
921 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
922 if (queue->descriptor_bo)
923 queue->device->ws->buffer_destroy(queue->descriptor_bo);
924 if (queue->scratch_bo)
925 queue->device->ws->buffer_destroy(queue->scratch_bo);
926 if (queue->esgs_ring_bo)
927 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
928 if (queue->gsvs_ring_bo)
929 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
930 if (queue->tess_factor_ring_bo)
931 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
932 if (queue->tess_offchip_ring_bo)
933 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
934 if (queue->compute_scratch_bo)
935 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
936 }
937
938 static void
939 radv_device_init_gs_info(struct radv_device *device)
940 {
941 switch (device->physical_device->rad_info.family) {
942 case CHIP_OLAND:
943 case CHIP_HAINAN:
944 case CHIP_KAVERI:
945 case CHIP_KABINI:
946 case CHIP_MULLINS:
947 case CHIP_ICELAND:
948 case CHIP_CARRIZO:
949 case CHIP_STONEY:
950 device->gs_table_depth = 16;
951 return;
952 case CHIP_TAHITI:
953 case CHIP_PITCAIRN:
954 case CHIP_VERDE:
955 case CHIP_BONAIRE:
956 case CHIP_HAWAII:
957 case CHIP_TONGA:
958 case CHIP_FIJI:
959 case CHIP_POLARIS10:
960 case CHIP_POLARIS11:
961 case CHIP_POLARIS12:
962 case CHIP_VEGA10:
963 case CHIP_RAVEN:
964 device->gs_table_depth = 32;
965 return;
966 default:
967 unreachable("unknown GPU");
968 }
969 }
970
971 VkResult radv_CreateDevice(
972 VkPhysicalDevice physicalDevice,
973 const VkDeviceCreateInfo* pCreateInfo,
974 const VkAllocationCallbacks* pAllocator,
975 VkDevice* pDevice)
976 {
977 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
978 VkResult result;
979 struct radv_device *device;
980
981 bool keep_shader_info = false;
982
983 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
984 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
985 if (!radv_physical_device_extension_supported(physical_device, ext_name))
986 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
987
988 if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
989 keep_shader_info = true;
990 }
991
992 /* Check enabled features */
993 if (pCreateInfo->pEnabledFeatures) {
994 VkPhysicalDeviceFeatures supported_features;
995 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
996 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
997 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
998 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
999 for (uint32_t i = 0; i < num_features; i++) {
1000 if (enabled_feature[i] && !supported_feature[i])
1001 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1002 }
1003 }
1004
1005 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
1006 sizeof(*device), 8,
1007 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1008 if (!device)
1009 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1010
1011 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1012 device->instance = physical_device->instance;
1013 device->physical_device = physical_device;
1014
1015 device->ws = physical_device->ws;
1016 if (pAllocator)
1017 device->alloc = *pAllocator;
1018 else
1019 device->alloc = physical_device->instance->alloc;
1020
1021 mtx_init(&device->shader_slab_mutex, mtx_plain);
1022 list_inithead(&device->shader_slabs);
1023
1024 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1025 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1026 uint32_t qfi = queue_create->queueFamilyIndex;
1027 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
1028 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1029
1030 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
1031
1032 device->queues[qfi] = vk_alloc(&device->alloc,
1033 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1034 if (!device->queues[qfi]) {
1035 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1036 goto fail;
1037 }
1038
1039 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1040
1041 device->queue_count[qfi] = queue_create->queueCount;
1042
1043 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1044 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1045 if (result != VK_SUCCESS)
1046 goto fail;
1047 }
1048 }
1049
1050 #if HAVE_LLVM < 0x0400
1051 device->llvm_supports_spill = false;
1052 #else
1053 device->llvm_supports_spill = true;
1054 #endif
1055
1056 /* The maximum number of scratch waves. Scratch space isn't divided
1057 * evenly between CUs. The number is only a function of the number of CUs.
1058 * We can decrease the constant to decrease the scratch buffer size.
1059 *
1060 * sctx->scratch_waves must be >= the maximum posible size of
1061 * 1 threadgroup, so that the hw doesn't hang from being unable
1062 * to start any.
1063 *
1064 * The recommended value is 4 per CU at most. Higher numbers don't
1065 * bring much benefit, but they still occupy chip resources (think
1066 * async compute). I've seen ~2% performance difference between 4 and 32.
1067 */
1068 uint32_t max_threads_per_block = 2048;
1069 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1070 max_threads_per_block / 64);
1071
1072 radv_device_init_gs_info(device);
1073
1074 device->tess_offchip_block_dw_size =
1075 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1076 device->has_distributed_tess =
1077 device->physical_device->rad_info.chip_class >= VI &&
1078 device->physical_device->rad_info.max_se >= 2;
1079
1080 if (getenv("RADV_TRACE_FILE")) {
1081 keep_shader_info = true;
1082
1083 if (!radv_init_trace(device))
1084 goto fail;
1085 }
1086
1087 device->keep_shader_info = keep_shader_info;
1088
1089 result = radv_device_init_meta(device);
1090 if (result != VK_SUCCESS)
1091 goto fail;
1092
1093 radv_device_init_msaa(device);
1094
1095 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1096 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1097 switch (family) {
1098 case RADV_QUEUE_GENERAL:
1099 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1100 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1101 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1102 break;
1103 case RADV_QUEUE_COMPUTE:
1104 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1105 radeon_emit(device->empty_cs[family], 0);
1106 break;
1107 }
1108 device->ws->cs_finalize(device->empty_cs[family]);
1109 }
1110
1111 if (device->physical_device->rad_info.chip_class >= CIK)
1112 cik_create_gfx_config(device);
1113
1114 VkPipelineCacheCreateInfo ci;
1115 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1116 ci.pNext = NULL;
1117 ci.flags = 0;
1118 ci.pInitialData = NULL;
1119 ci.initialDataSize = 0;
1120 VkPipelineCache pc;
1121 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1122 &ci, NULL, &pc);
1123 if (result != VK_SUCCESS)
1124 goto fail;
1125
1126 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1127
1128 *pDevice = radv_device_to_handle(device);
1129 return VK_SUCCESS;
1130
1131 fail:
1132 if (device->trace_bo)
1133 device->ws->buffer_destroy(device->trace_bo);
1134
1135 if (device->gfx_init)
1136 device->ws->buffer_destroy(device->gfx_init);
1137
1138 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1139 for (unsigned q = 0; q < device->queue_count[i]; q++)
1140 radv_queue_finish(&device->queues[i][q]);
1141 if (device->queue_count[i])
1142 vk_free(&device->alloc, device->queues[i]);
1143 }
1144
1145 vk_free(&device->alloc, device);
1146 return result;
1147 }
1148
1149 void radv_DestroyDevice(
1150 VkDevice _device,
1151 const VkAllocationCallbacks* pAllocator)
1152 {
1153 RADV_FROM_HANDLE(radv_device, device, _device);
1154
1155 if (!device)
1156 return;
1157
1158 if (device->trace_bo)
1159 device->ws->buffer_destroy(device->trace_bo);
1160
1161 if (device->gfx_init)
1162 device->ws->buffer_destroy(device->gfx_init);
1163
1164 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1165 for (unsigned q = 0; q < device->queue_count[i]; q++)
1166 radv_queue_finish(&device->queues[i][q]);
1167 if (device->queue_count[i])
1168 vk_free(&device->alloc, device->queues[i]);
1169 if (device->empty_cs[i])
1170 device->ws->cs_destroy(device->empty_cs[i]);
1171 }
1172 radv_device_finish_meta(device);
1173
1174 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1175 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1176
1177 radv_destroy_shader_slabs(device);
1178
1179 vk_free(&device->alloc, device);
1180 }
1181
1182 VkResult radv_EnumerateInstanceLayerProperties(
1183 uint32_t* pPropertyCount,
1184 VkLayerProperties* pProperties)
1185 {
1186 if (pProperties == NULL) {
1187 *pPropertyCount = 0;
1188 return VK_SUCCESS;
1189 }
1190
1191 /* None supported at this time */
1192 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1193 }
1194
1195 VkResult radv_EnumerateDeviceLayerProperties(
1196 VkPhysicalDevice physicalDevice,
1197 uint32_t* pPropertyCount,
1198 VkLayerProperties* pProperties)
1199 {
1200 if (pProperties == NULL) {
1201 *pPropertyCount = 0;
1202 return VK_SUCCESS;
1203 }
1204
1205 /* None supported at this time */
1206 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1207 }
1208
1209 void radv_GetDeviceQueue(
1210 VkDevice _device,
1211 uint32_t queueFamilyIndex,
1212 uint32_t queueIndex,
1213 VkQueue* pQueue)
1214 {
1215 RADV_FROM_HANDLE(radv_device, device, _device);
1216
1217 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1218 }
1219
1220 static void
1221 fill_geom_tess_rings(struct radv_queue *queue,
1222 uint32_t *map,
1223 bool add_sample_positions,
1224 uint32_t esgs_ring_size,
1225 struct radeon_winsys_bo *esgs_ring_bo,
1226 uint32_t gsvs_ring_size,
1227 struct radeon_winsys_bo *gsvs_ring_bo,
1228 uint32_t tess_factor_ring_size,
1229 struct radeon_winsys_bo *tess_factor_ring_bo,
1230 uint32_t tess_offchip_ring_size,
1231 struct radeon_winsys_bo *tess_offchip_ring_bo)
1232 {
1233 uint64_t esgs_va = 0, gsvs_va = 0;
1234 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1235 uint32_t *desc = &map[4];
1236
1237 if (esgs_ring_bo)
1238 esgs_va = radv_buffer_get_va(esgs_ring_bo);
1239 if (gsvs_ring_bo)
1240 gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
1241 if (tess_factor_ring_bo)
1242 tess_factor_va = radv_buffer_get_va(tess_factor_ring_bo);
1243 if (tess_offchip_ring_bo)
1244 tess_offchip_va = radv_buffer_get_va(tess_offchip_ring_bo);
1245
1246 /* stride 0, num records - size, add tid, swizzle, elsize4,
1247 index stride 64 */
1248 desc[0] = esgs_va;
1249 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1250 S_008F04_STRIDE(0) |
1251 S_008F04_SWIZZLE_ENABLE(true);
1252 desc[2] = esgs_ring_size;
1253 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1254 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1255 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1256 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1257 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1258 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1259 S_008F0C_ELEMENT_SIZE(1) |
1260 S_008F0C_INDEX_STRIDE(3) |
1261 S_008F0C_ADD_TID_ENABLE(true);
1262
1263 desc += 4;
1264 /* GS entry for ES->GS ring */
1265 /* stride 0, num records - size, elsize0,
1266 index stride 0 */
1267 desc[0] = esgs_va;
1268 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1269 S_008F04_STRIDE(0) |
1270 S_008F04_SWIZZLE_ENABLE(false);
1271 desc[2] = esgs_ring_size;
1272 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1273 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1274 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1275 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1276 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1277 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1278 S_008F0C_ELEMENT_SIZE(0) |
1279 S_008F0C_INDEX_STRIDE(0) |
1280 S_008F0C_ADD_TID_ENABLE(false);
1281
1282 desc += 4;
1283 /* VS entry for GS->VS ring */
1284 /* stride 0, num records - size, elsize0,
1285 index stride 0 */
1286 desc[0] = gsvs_va;
1287 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1288 S_008F04_STRIDE(0) |
1289 S_008F04_SWIZZLE_ENABLE(false);
1290 desc[2] = gsvs_ring_size;
1291 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1292 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1293 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1294 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1295 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1296 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1297 S_008F0C_ELEMENT_SIZE(0) |
1298 S_008F0C_INDEX_STRIDE(0) |
1299 S_008F0C_ADD_TID_ENABLE(false);
1300 desc += 4;
1301
1302 /* stride gsvs_itemsize, num records 64
1303 elsize 4, index stride 16 */
1304 /* shader will patch stride and desc[2] */
1305 desc[0] = gsvs_va;
1306 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1307 S_008F04_STRIDE(0) |
1308 S_008F04_SWIZZLE_ENABLE(true);
1309 desc[2] = 0;
1310 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1311 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1312 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1313 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1314 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1315 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1316 S_008F0C_ELEMENT_SIZE(1) |
1317 S_008F0C_INDEX_STRIDE(1) |
1318 S_008F0C_ADD_TID_ENABLE(true);
1319 desc += 4;
1320
1321 desc[0] = tess_factor_va;
1322 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1323 S_008F04_STRIDE(0) |
1324 S_008F04_SWIZZLE_ENABLE(false);
1325 desc[2] = tess_factor_ring_size;
1326 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1327 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1328 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1329 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1330 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1331 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1332 S_008F0C_ELEMENT_SIZE(0) |
1333 S_008F0C_INDEX_STRIDE(0) |
1334 S_008F0C_ADD_TID_ENABLE(false);
1335 desc += 4;
1336
1337 desc[0] = tess_offchip_va;
1338 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1339 S_008F04_STRIDE(0) |
1340 S_008F04_SWIZZLE_ENABLE(false);
1341 desc[2] = tess_offchip_ring_size;
1342 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1343 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1344 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1345 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1346 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1347 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1348 S_008F0C_ELEMENT_SIZE(0) |
1349 S_008F0C_INDEX_STRIDE(0) |
1350 S_008F0C_ADD_TID_ENABLE(false);
1351 desc += 4;
1352
1353 /* add sample positions after all rings */
1354 memcpy(desc, queue->device->sample_locations_1x, 8);
1355 desc += 2;
1356 memcpy(desc, queue->device->sample_locations_2x, 16);
1357 desc += 4;
1358 memcpy(desc, queue->device->sample_locations_4x, 32);
1359 desc += 8;
1360 memcpy(desc, queue->device->sample_locations_8x, 64);
1361 desc += 16;
1362 memcpy(desc, queue->device->sample_locations_16x, 128);
1363 }
1364
1365 static unsigned
1366 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1367 {
1368 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1369 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1370 device->physical_device->rad_info.family != CHIP_STONEY;
1371 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1372 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1373 device->physical_device->rad_info.max_se;
1374 unsigned offchip_granularity;
1375 unsigned hs_offchip_param;
1376 switch (device->tess_offchip_block_dw_size) {
1377 default:
1378 assert(0);
1379 /* fall through */
1380 case 8192:
1381 offchip_granularity = V_03093C_X_8K_DWORDS;
1382 break;
1383 case 4096:
1384 offchip_granularity = V_03093C_X_4K_DWORDS;
1385 break;
1386 }
1387
1388 switch (device->physical_device->rad_info.chip_class) {
1389 case SI:
1390 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1391 break;
1392 case CIK:
1393 case VI:
1394 case GFX9:
1395 default:
1396 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1397 break;
1398 }
1399
1400 *max_offchip_buffers_p = max_offchip_buffers;
1401 if (device->physical_device->rad_info.chip_class >= CIK) {
1402 if (device->physical_device->rad_info.chip_class >= VI)
1403 --max_offchip_buffers;
1404 hs_offchip_param =
1405 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1406 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1407 } else {
1408 hs_offchip_param =
1409 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1410 }
1411 return hs_offchip_param;
1412 }
1413
1414 static VkResult
1415 radv_get_preamble_cs(struct radv_queue *queue,
1416 uint32_t scratch_size,
1417 uint32_t compute_scratch_size,
1418 uint32_t esgs_ring_size,
1419 uint32_t gsvs_ring_size,
1420 bool needs_tess_rings,
1421 bool needs_sample_positions,
1422 struct radeon_winsys_cs **initial_full_flush_preamble_cs,
1423 struct radeon_winsys_cs **initial_preamble_cs,
1424 struct radeon_winsys_cs **continue_preamble_cs)
1425 {
1426 struct radeon_winsys_bo *scratch_bo = NULL;
1427 struct radeon_winsys_bo *descriptor_bo = NULL;
1428 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1429 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1430 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1431 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1432 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1433 struct radeon_winsys_cs *dest_cs[3] = {0};
1434 bool add_tess_rings = false, add_sample_positions = false;
1435 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1436 unsigned max_offchip_buffers;
1437 unsigned hs_offchip_param = 0;
1438 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
1439 if (!queue->has_tess_rings) {
1440 if (needs_tess_rings)
1441 add_tess_rings = true;
1442 }
1443 if (!queue->has_sample_positions) {
1444 if (needs_sample_positions)
1445 add_sample_positions = true;
1446 }
1447 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1448 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1449 &max_offchip_buffers);
1450 tess_offchip_ring_size = max_offchip_buffers *
1451 queue->device->tess_offchip_block_dw_size * 4;
1452
1453 if (scratch_size <= queue->scratch_size &&
1454 compute_scratch_size <= queue->compute_scratch_size &&
1455 esgs_ring_size <= queue->esgs_ring_size &&
1456 gsvs_ring_size <= queue->gsvs_ring_size &&
1457 !add_tess_rings && !add_sample_positions &&
1458 queue->initial_preamble_cs) {
1459 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1460 *initial_preamble_cs = queue->initial_preamble_cs;
1461 *continue_preamble_cs = queue->continue_preamble_cs;
1462 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1463 *continue_preamble_cs = NULL;
1464 return VK_SUCCESS;
1465 }
1466
1467 if (scratch_size > queue->scratch_size) {
1468 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1469 scratch_size,
1470 4096,
1471 RADEON_DOMAIN_VRAM,
1472 ring_bo_flags);
1473 if (!scratch_bo)
1474 goto fail;
1475 } else
1476 scratch_bo = queue->scratch_bo;
1477
1478 if (compute_scratch_size > queue->compute_scratch_size) {
1479 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1480 compute_scratch_size,
1481 4096,
1482 RADEON_DOMAIN_VRAM,
1483 ring_bo_flags);
1484 if (!compute_scratch_bo)
1485 goto fail;
1486
1487 } else
1488 compute_scratch_bo = queue->compute_scratch_bo;
1489
1490 if (esgs_ring_size > queue->esgs_ring_size) {
1491 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1492 esgs_ring_size,
1493 4096,
1494 RADEON_DOMAIN_VRAM,
1495 ring_bo_flags);
1496 if (!esgs_ring_bo)
1497 goto fail;
1498 } else {
1499 esgs_ring_bo = queue->esgs_ring_bo;
1500 esgs_ring_size = queue->esgs_ring_size;
1501 }
1502
1503 if (gsvs_ring_size > queue->gsvs_ring_size) {
1504 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1505 gsvs_ring_size,
1506 4096,
1507 RADEON_DOMAIN_VRAM,
1508 ring_bo_flags);
1509 if (!gsvs_ring_bo)
1510 goto fail;
1511 } else {
1512 gsvs_ring_bo = queue->gsvs_ring_bo;
1513 gsvs_ring_size = queue->gsvs_ring_size;
1514 }
1515
1516 if (add_tess_rings) {
1517 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1518 tess_factor_ring_size,
1519 256,
1520 RADEON_DOMAIN_VRAM,
1521 ring_bo_flags);
1522 if (!tess_factor_ring_bo)
1523 goto fail;
1524 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1525 tess_offchip_ring_size,
1526 256,
1527 RADEON_DOMAIN_VRAM,
1528 ring_bo_flags);
1529 if (!tess_offchip_ring_bo)
1530 goto fail;
1531 } else {
1532 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1533 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1534 }
1535
1536 if (scratch_bo != queue->scratch_bo ||
1537 esgs_ring_bo != queue->esgs_ring_bo ||
1538 gsvs_ring_bo != queue->gsvs_ring_bo ||
1539 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1540 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1541 uint32_t size = 0;
1542 if (gsvs_ring_bo || esgs_ring_bo ||
1543 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1544 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1545 if (add_sample_positions)
1546 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1547 }
1548 else if (scratch_bo)
1549 size = 8; /* 2 dword */
1550
1551 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1552 size,
1553 4096,
1554 RADEON_DOMAIN_VRAM,
1555 RADEON_FLAG_CPU_ACCESS|RADEON_FLAG_NO_INTERPROCESS_SHARING);
1556 if (!descriptor_bo)
1557 goto fail;
1558 } else
1559 descriptor_bo = queue->descriptor_bo;
1560
1561 for(int i = 0; i < 3; ++i) {
1562 struct radeon_winsys_cs *cs = NULL;
1563 cs = queue->device->ws->cs_create(queue->device->ws,
1564 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1565 if (!cs)
1566 goto fail;
1567
1568 dest_cs[i] = cs;
1569
1570 if (scratch_bo)
1571 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo, 8);
1572
1573 if (esgs_ring_bo)
1574 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo, 8);
1575
1576 if (gsvs_ring_bo)
1577 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo, 8);
1578
1579 if (tess_factor_ring_bo)
1580 radv_cs_add_buffer(queue->device->ws, cs, tess_factor_ring_bo, 8);
1581
1582 if (tess_offchip_ring_bo)
1583 radv_cs_add_buffer(queue->device->ws, cs, tess_offchip_ring_bo, 8);
1584
1585 if (descriptor_bo)
1586 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo, 8);
1587
1588 if (descriptor_bo != queue->descriptor_bo) {
1589 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1590
1591 if (scratch_bo) {
1592 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
1593 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1594 S_008F04_SWIZZLE_ENABLE(1);
1595 map[0] = scratch_va;
1596 map[1] = rsrc1;
1597 }
1598
1599 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1600 add_sample_positions)
1601 fill_geom_tess_rings(queue, map, add_sample_positions,
1602 esgs_ring_size, esgs_ring_bo,
1603 gsvs_ring_size, gsvs_ring_bo,
1604 tess_factor_ring_size, tess_factor_ring_bo,
1605 tess_offchip_ring_size, tess_offchip_ring_bo);
1606
1607 queue->device->ws->buffer_unmap(descriptor_bo);
1608 }
1609
1610 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1611 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1612 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1613 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1614 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1615 }
1616
1617 if (esgs_ring_bo || gsvs_ring_bo) {
1618 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1619 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1620 radeon_emit(cs, esgs_ring_size >> 8);
1621 radeon_emit(cs, gsvs_ring_size >> 8);
1622 } else {
1623 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1624 radeon_emit(cs, esgs_ring_size >> 8);
1625 radeon_emit(cs, gsvs_ring_size >> 8);
1626 }
1627 }
1628
1629 if (tess_factor_ring_bo) {
1630 uint64_t tf_va = radv_buffer_get_va(tess_factor_ring_bo);
1631 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1632 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1633 S_030938_SIZE(tess_factor_ring_size / 4));
1634 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1635 tf_va >> 8);
1636 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1637 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1638 tf_va >> 40);
1639 }
1640 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1641 } else {
1642 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1643 S_008988_SIZE(tess_factor_ring_size / 4));
1644 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1645 tf_va >> 8);
1646 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1647 hs_offchip_param);
1648 }
1649 }
1650
1651 if (descriptor_bo) {
1652 uint64_t va = radv_buffer_get_va(descriptor_bo);
1653 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1654 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1655 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1656 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
1657 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
1658
1659 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1660 radeon_set_sh_reg_seq(cs, regs[i], 2);
1661 radeon_emit(cs, va);
1662 radeon_emit(cs, va >> 32);
1663 }
1664 } else {
1665 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1666 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1667 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1668 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1669 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1670 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1671
1672 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1673 radeon_set_sh_reg_seq(cs, regs[i], 2);
1674 radeon_emit(cs, va);
1675 radeon_emit(cs, va >> 32);
1676 }
1677 }
1678 }
1679
1680 if (compute_scratch_bo) {
1681 uint64_t scratch_va = radv_buffer_get_va(compute_scratch_bo);
1682 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1683 S_008F04_SWIZZLE_ENABLE(1);
1684
1685 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo, 8);
1686
1687 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1688 radeon_emit(cs, scratch_va);
1689 radeon_emit(cs, rsrc1);
1690 }
1691
1692 if (i == 0) {
1693 si_cs_emit_cache_flush(cs,
1694 false,
1695 queue->device->physical_device->rad_info.chip_class,
1696 NULL, 0,
1697 queue->queue_family_index == RING_COMPUTE &&
1698 queue->device->physical_device->rad_info.chip_class >= CIK,
1699 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
1700 RADV_CMD_FLAG_INV_ICACHE |
1701 RADV_CMD_FLAG_INV_SMEM_L1 |
1702 RADV_CMD_FLAG_INV_VMEM_L1 |
1703 RADV_CMD_FLAG_INV_GLOBAL_L2);
1704 } else if (i == 1) {
1705 si_cs_emit_cache_flush(cs,
1706 false,
1707 queue->device->physical_device->rad_info.chip_class,
1708 NULL, 0,
1709 queue->queue_family_index == RING_COMPUTE &&
1710 queue->device->physical_device->rad_info.chip_class >= CIK,
1711 RADV_CMD_FLAG_INV_ICACHE |
1712 RADV_CMD_FLAG_INV_SMEM_L1 |
1713 RADV_CMD_FLAG_INV_VMEM_L1 |
1714 RADV_CMD_FLAG_INV_GLOBAL_L2);
1715 }
1716
1717 if (!queue->device->ws->cs_finalize(cs))
1718 goto fail;
1719 }
1720
1721 if (queue->initial_full_flush_preamble_cs)
1722 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1723
1724 if (queue->initial_preamble_cs)
1725 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1726
1727 if (queue->continue_preamble_cs)
1728 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1729
1730 queue->initial_full_flush_preamble_cs = dest_cs[0];
1731 queue->initial_preamble_cs = dest_cs[1];
1732 queue->continue_preamble_cs = dest_cs[2];
1733
1734 if (scratch_bo != queue->scratch_bo) {
1735 if (queue->scratch_bo)
1736 queue->device->ws->buffer_destroy(queue->scratch_bo);
1737 queue->scratch_bo = scratch_bo;
1738 queue->scratch_size = scratch_size;
1739 }
1740
1741 if (compute_scratch_bo != queue->compute_scratch_bo) {
1742 if (queue->compute_scratch_bo)
1743 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1744 queue->compute_scratch_bo = compute_scratch_bo;
1745 queue->compute_scratch_size = compute_scratch_size;
1746 }
1747
1748 if (esgs_ring_bo != queue->esgs_ring_bo) {
1749 if (queue->esgs_ring_bo)
1750 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1751 queue->esgs_ring_bo = esgs_ring_bo;
1752 queue->esgs_ring_size = esgs_ring_size;
1753 }
1754
1755 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1756 if (queue->gsvs_ring_bo)
1757 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1758 queue->gsvs_ring_bo = gsvs_ring_bo;
1759 queue->gsvs_ring_size = gsvs_ring_size;
1760 }
1761
1762 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1763 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1764 }
1765
1766 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1767 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1768 queue->has_tess_rings = true;
1769 }
1770
1771 if (descriptor_bo != queue->descriptor_bo) {
1772 if (queue->descriptor_bo)
1773 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1774
1775 queue->descriptor_bo = descriptor_bo;
1776 }
1777
1778 if (add_sample_positions)
1779 queue->has_sample_positions = true;
1780
1781 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
1782 *initial_preamble_cs = queue->initial_preamble_cs;
1783 *continue_preamble_cs = queue->continue_preamble_cs;
1784 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1785 *continue_preamble_cs = NULL;
1786 return VK_SUCCESS;
1787 fail:
1788 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1789 if (dest_cs[i])
1790 queue->device->ws->cs_destroy(dest_cs[i]);
1791 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1792 queue->device->ws->buffer_destroy(descriptor_bo);
1793 if (scratch_bo && scratch_bo != queue->scratch_bo)
1794 queue->device->ws->buffer_destroy(scratch_bo);
1795 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1796 queue->device->ws->buffer_destroy(compute_scratch_bo);
1797 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1798 queue->device->ws->buffer_destroy(esgs_ring_bo);
1799 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1800 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1801 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1802 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1803 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1804 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1805 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1806 }
1807
1808 static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
1809 int num_sems,
1810 const VkSemaphore *sems,
1811 bool reset_temp)
1812 {
1813 int syncobj_idx = 0, sem_idx = 0;
1814
1815 if (num_sems == 0)
1816 return VK_SUCCESS;
1817 for (uint32_t i = 0; i < num_sems; i++) {
1818 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1819
1820 if (sem->temp_syncobj || sem->syncobj)
1821 counts->syncobj_count++;
1822 else
1823 counts->sem_count++;
1824 }
1825
1826 if (counts->syncobj_count) {
1827 counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
1828 if (!counts->syncobj)
1829 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1830 }
1831
1832 if (counts->sem_count) {
1833 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
1834 if (!counts->sem) {
1835 free(counts->syncobj);
1836 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1837 }
1838 }
1839
1840 for (uint32_t i = 0; i < num_sems; i++) {
1841 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1842
1843 if (sem->temp_syncobj) {
1844 counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
1845 }
1846 else if (sem->syncobj)
1847 counts->syncobj[syncobj_idx++] = sem->syncobj;
1848 else {
1849 assert(sem->sem);
1850 counts->sem[sem_idx++] = sem->sem;
1851 }
1852 }
1853
1854 return VK_SUCCESS;
1855 }
1856
1857 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
1858 {
1859 free(sem_info->wait.syncobj);
1860 free(sem_info->wait.sem);
1861 free(sem_info->signal.syncobj);
1862 free(sem_info->signal.sem);
1863 }
1864
1865
1866 static void radv_free_temp_syncobjs(struct radv_device *device,
1867 int num_sems,
1868 const VkSemaphore *sems)
1869 {
1870 for (uint32_t i = 0; i < num_sems; i++) {
1871 RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
1872
1873 if (sem->temp_syncobj) {
1874 device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
1875 sem->temp_syncobj = 0;
1876 }
1877 }
1878 }
1879
1880 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1881 int num_wait_sems,
1882 const VkSemaphore *wait_sems,
1883 int num_signal_sems,
1884 const VkSemaphore *signal_sems)
1885 {
1886 VkResult ret;
1887 memset(sem_info, 0, sizeof(*sem_info));
1888
1889 ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
1890 if (ret)
1891 return ret;
1892 ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
1893 if (ret)
1894 radv_free_sem_info(sem_info);
1895
1896 /* caller can override these */
1897 sem_info->cs_emit_wait = true;
1898 sem_info->cs_emit_signal = true;
1899 return ret;
1900 }
1901
1902 VkResult radv_QueueSubmit(
1903 VkQueue _queue,
1904 uint32_t submitCount,
1905 const VkSubmitInfo* pSubmits,
1906 VkFence _fence)
1907 {
1908 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1909 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1910 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1911 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1912 int ret;
1913 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1914 uint32_t scratch_size = 0;
1915 uint32_t compute_scratch_size = 0;
1916 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1917 struct radeon_winsys_cs *initial_preamble_cs = NULL, *initial_flush_preamble_cs = NULL, *continue_preamble_cs = NULL;
1918 VkResult result;
1919 bool fence_emitted = false;
1920 bool tess_rings_needed = false;
1921 bool sample_positions_needed = false;
1922
1923 /* Do this first so failing to allocate scratch buffers can't result in
1924 * partially executed submissions. */
1925 for (uint32_t i = 0; i < submitCount; i++) {
1926 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1927 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1928 pSubmits[i].pCommandBuffers[j]);
1929
1930 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1931 compute_scratch_size = MAX2(compute_scratch_size,
1932 cmd_buffer->compute_scratch_size_needed);
1933 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1934 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1935 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1936 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1937 }
1938 }
1939
1940 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1941 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1942 sample_positions_needed, &initial_flush_preamble_cs,
1943 &initial_preamble_cs, &continue_preamble_cs);
1944 if (result != VK_SUCCESS)
1945 return result;
1946
1947 for (uint32_t i = 0; i < submitCount; i++) {
1948 struct radeon_winsys_cs **cs_array;
1949 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1950 bool can_patch = true;
1951 uint32_t advance;
1952 struct radv_winsys_sem_info sem_info;
1953
1954 result = radv_alloc_sem_info(&sem_info,
1955 pSubmits[i].waitSemaphoreCount,
1956 pSubmits[i].pWaitSemaphores,
1957 pSubmits[i].signalSemaphoreCount,
1958 pSubmits[i].pSignalSemaphores);
1959 if (result != VK_SUCCESS)
1960 return result;
1961
1962 if (!pSubmits[i].commandBufferCount) {
1963 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1964 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1965 &queue->device->empty_cs[queue->queue_family_index],
1966 1, NULL, NULL,
1967 &sem_info,
1968 false, base_fence);
1969 if (ret) {
1970 radv_loge("failed to submit CS %d\n", i);
1971 abort();
1972 }
1973 fence_emitted = true;
1974 }
1975 radv_free_sem_info(&sem_info);
1976 continue;
1977 }
1978
1979 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1980 (pSubmits[i].commandBufferCount));
1981
1982 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1983 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1984 pSubmits[i].pCommandBuffers[j]);
1985 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1986
1987 cs_array[j] = cmd_buffer->cs;
1988 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1989 can_patch = false;
1990 }
1991
1992 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1993 struct radeon_winsys_cs *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
1994 advance = MIN2(max_cs_submission,
1995 pSubmits[i].commandBufferCount - j);
1996
1997 if (queue->device->trace_bo)
1998 *queue->device->trace_id_ptr = 0;
1999
2000 sem_info.cs_emit_wait = j == 0;
2001 sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount;
2002
2003 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
2004 advance, initial_preamble, continue_preamble_cs,
2005 &sem_info,
2006 can_patch, base_fence);
2007
2008 if (ret) {
2009 radv_loge("failed to submit CS %d\n", i);
2010 abort();
2011 }
2012 fence_emitted = true;
2013 if (queue->device->trace_bo) {
2014 radv_check_gpu_hangs(queue, cs_array[j]);
2015 }
2016 }
2017
2018 radv_free_temp_syncobjs(queue->device,
2019 pSubmits[i].waitSemaphoreCount,
2020 pSubmits[i].pWaitSemaphores);
2021 radv_free_sem_info(&sem_info);
2022 free(cs_array);
2023 }
2024
2025 if (fence) {
2026 if (!fence_emitted) {
2027 struct radv_winsys_sem_info sem_info = {0};
2028 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
2029 &queue->device->empty_cs[queue->queue_family_index],
2030 1, NULL, NULL, &sem_info,
2031 false, base_fence);
2032 }
2033 fence->submitted = true;
2034 }
2035
2036 return VK_SUCCESS;
2037 }
2038
2039 VkResult radv_QueueWaitIdle(
2040 VkQueue _queue)
2041 {
2042 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2043
2044 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
2045 radv_queue_family_to_ring(queue->queue_family_index),
2046 queue->queue_idx);
2047 return VK_SUCCESS;
2048 }
2049
2050 VkResult radv_DeviceWaitIdle(
2051 VkDevice _device)
2052 {
2053 RADV_FROM_HANDLE(radv_device, device, _device);
2054
2055 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2056 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2057 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2058 }
2059 }
2060 return VK_SUCCESS;
2061 }
2062
2063 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2064 VkInstance instance,
2065 const char* pName)
2066 {
2067 return radv_lookup_entrypoint(pName);
2068 }
2069
2070 /* The loader wants us to expose a second GetInstanceProcAddr function
2071 * to work around certain LD_PRELOAD issues seen in apps.
2072 */
2073 PUBLIC
2074 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2075 VkInstance instance,
2076 const char* pName);
2077
2078 PUBLIC
2079 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2080 VkInstance instance,
2081 const char* pName)
2082 {
2083 return radv_GetInstanceProcAddr(instance, pName);
2084 }
2085
2086 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2087 VkDevice device,
2088 const char* pName)
2089 {
2090 return radv_lookup_entrypoint(pName);
2091 }
2092
2093 bool radv_get_memory_fd(struct radv_device *device,
2094 struct radv_device_memory *memory,
2095 int *pFD)
2096 {
2097 struct radeon_bo_metadata metadata;
2098
2099 if (memory->image) {
2100 radv_init_metadata(device, memory->image, &metadata);
2101 device->ws->buffer_set_metadata(memory->bo, &metadata);
2102 }
2103
2104 return device->ws->buffer_get_fd(device->ws, memory->bo,
2105 pFD);
2106 }
2107
2108 VkResult radv_alloc_memory(VkDevice _device,
2109 const VkMemoryAllocateInfo* pAllocateInfo,
2110 const VkAllocationCallbacks* pAllocator,
2111 enum radv_mem_flags_bits mem_flags,
2112 VkDeviceMemory* pMem)
2113 {
2114 RADV_FROM_HANDLE(radv_device, device, _device);
2115 struct radv_device_memory *mem;
2116 VkResult result;
2117 enum radeon_bo_domain domain;
2118 uint32_t flags = 0;
2119 enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
2120
2121 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2122
2123 if (pAllocateInfo->allocationSize == 0) {
2124 /* Apparently, this is allowed */
2125 *pMem = VK_NULL_HANDLE;
2126 return VK_SUCCESS;
2127 }
2128
2129 const VkImportMemoryFdInfoKHR *import_info =
2130 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2131 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2132 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2133
2134 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2135 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2136 if (mem == NULL)
2137 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2138
2139 if (dedicate_info) {
2140 mem->image = radv_image_from_handle(dedicate_info->image);
2141 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2142 } else {
2143 mem->image = NULL;
2144 mem->buffer = NULL;
2145 }
2146
2147 if (import_info) {
2148 assert(import_info->handleType ==
2149 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2150 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2151 NULL, NULL);
2152 if (!mem->bo) {
2153 result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
2154 goto fail;
2155 } else {
2156 close(import_info->fd);
2157 goto out_success;
2158 }
2159 }
2160
2161 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2162 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2163 mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
2164 domain = RADEON_DOMAIN_GTT;
2165 else
2166 domain = RADEON_DOMAIN_VRAM;
2167
2168 if (mem_type_index == RADV_MEM_TYPE_VRAM)
2169 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2170 else
2171 flags |= RADEON_FLAG_CPU_ACCESS;
2172
2173 if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2174 flags |= RADEON_FLAG_GTT_WC;
2175
2176 if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
2177 flags |= RADEON_FLAG_IMPLICIT_SYNC;
2178
2179 if (!dedicate_info && !import_info)
2180 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
2181
2182 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2183 domain, flags);
2184
2185 if (!mem->bo) {
2186 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2187 goto fail;
2188 }
2189 mem->type_index = mem_type_index;
2190 out_success:
2191 *pMem = radv_device_memory_to_handle(mem);
2192
2193 return VK_SUCCESS;
2194
2195 fail:
2196 vk_free2(&device->alloc, pAllocator, mem);
2197
2198 return result;
2199 }
2200
2201 VkResult radv_AllocateMemory(
2202 VkDevice _device,
2203 const VkMemoryAllocateInfo* pAllocateInfo,
2204 const VkAllocationCallbacks* pAllocator,
2205 VkDeviceMemory* pMem)
2206 {
2207 return radv_alloc_memory(_device, pAllocateInfo, pAllocator, 0, pMem);
2208 }
2209
2210 void radv_FreeMemory(
2211 VkDevice _device,
2212 VkDeviceMemory _mem,
2213 const VkAllocationCallbacks* pAllocator)
2214 {
2215 RADV_FROM_HANDLE(radv_device, device, _device);
2216 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2217
2218 if (mem == NULL)
2219 return;
2220
2221 device->ws->buffer_destroy(mem->bo);
2222 mem->bo = NULL;
2223
2224 vk_free2(&device->alloc, pAllocator, mem);
2225 }
2226
2227 VkResult radv_MapMemory(
2228 VkDevice _device,
2229 VkDeviceMemory _memory,
2230 VkDeviceSize offset,
2231 VkDeviceSize size,
2232 VkMemoryMapFlags flags,
2233 void** ppData)
2234 {
2235 RADV_FROM_HANDLE(radv_device, device, _device);
2236 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2237
2238 if (mem == NULL) {
2239 *ppData = NULL;
2240 return VK_SUCCESS;
2241 }
2242
2243 *ppData = device->ws->buffer_map(mem->bo);
2244 if (*ppData) {
2245 *ppData += offset;
2246 return VK_SUCCESS;
2247 }
2248
2249 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
2250 }
2251
2252 void radv_UnmapMemory(
2253 VkDevice _device,
2254 VkDeviceMemory _memory)
2255 {
2256 RADV_FROM_HANDLE(radv_device, device, _device);
2257 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2258
2259 if (mem == NULL)
2260 return;
2261
2262 device->ws->buffer_unmap(mem->bo);
2263 }
2264
2265 VkResult radv_FlushMappedMemoryRanges(
2266 VkDevice _device,
2267 uint32_t memoryRangeCount,
2268 const VkMappedMemoryRange* pMemoryRanges)
2269 {
2270 return VK_SUCCESS;
2271 }
2272
2273 VkResult radv_InvalidateMappedMemoryRanges(
2274 VkDevice _device,
2275 uint32_t memoryRangeCount,
2276 const VkMappedMemoryRange* pMemoryRanges)
2277 {
2278 return VK_SUCCESS;
2279 }
2280
2281 void radv_GetBufferMemoryRequirements(
2282 VkDevice _device,
2283 VkBuffer _buffer,
2284 VkMemoryRequirements* pMemoryRequirements)
2285 {
2286 RADV_FROM_HANDLE(radv_device, device, _device);
2287 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2288
2289 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2290
2291 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2292 pMemoryRequirements->alignment = 4096;
2293 else
2294 pMemoryRequirements->alignment = 16;
2295
2296 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2297 }
2298
2299 void radv_GetBufferMemoryRequirements2KHR(
2300 VkDevice device,
2301 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2302 VkMemoryRequirements2KHR* pMemoryRequirements)
2303 {
2304 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2305 &pMemoryRequirements->memoryRequirements);
2306 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
2307 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2308 switch (ext->sType) {
2309 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2310 VkMemoryDedicatedRequirementsKHR *req =
2311 (VkMemoryDedicatedRequirementsKHR *) ext;
2312 req->requiresDedicatedAllocation = buffer->shareable;
2313 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2314 break;
2315 }
2316 default:
2317 break;
2318 }
2319 }
2320 }
2321
2322 void radv_GetImageMemoryRequirements(
2323 VkDevice _device,
2324 VkImage _image,
2325 VkMemoryRequirements* pMemoryRequirements)
2326 {
2327 RADV_FROM_HANDLE(radv_device, device, _device);
2328 RADV_FROM_HANDLE(radv_image, image, _image);
2329
2330 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
2331
2332 pMemoryRequirements->size = image->size;
2333 pMemoryRequirements->alignment = image->alignment;
2334 }
2335
2336 void radv_GetImageMemoryRequirements2KHR(
2337 VkDevice device,
2338 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2339 VkMemoryRequirements2KHR* pMemoryRequirements)
2340 {
2341 radv_GetImageMemoryRequirements(device, pInfo->image,
2342 &pMemoryRequirements->memoryRequirements);
2343
2344 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
2345
2346 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2347 switch (ext->sType) {
2348 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2349 VkMemoryDedicatedRequirementsKHR *req =
2350 (VkMemoryDedicatedRequirementsKHR *) ext;
2351 req->requiresDedicatedAllocation = image->shareable;
2352 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2353 break;
2354 }
2355 default:
2356 break;
2357 }
2358 }
2359 }
2360
2361 void radv_GetImageSparseMemoryRequirements(
2362 VkDevice device,
2363 VkImage image,
2364 uint32_t* pSparseMemoryRequirementCount,
2365 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2366 {
2367 stub();
2368 }
2369
2370 void radv_GetImageSparseMemoryRequirements2KHR(
2371 VkDevice device,
2372 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2373 uint32_t* pSparseMemoryRequirementCount,
2374 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2375 {
2376 stub();
2377 }
2378
2379 void radv_GetDeviceMemoryCommitment(
2380 VkDevice device,
2381 VkDeviceMemory memory,
2382 VkDeviceSize* pCommittedMemoryInBytes)
2383 {
2384 *pCommittedMemoryInBytes = 0;
2385 }
2386
2387 VkResult radv_BindBufferMemory2KHR(VkDevice device,
2388 uint32_t bindInfoCount,
2389 const VkBindBufferMemoryInfoKHR *pBindInfos)
2390 {
2391 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2392 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2393 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
2394
2395 if (mem) {
2396 buffer->bo = mem->bo;
2397 buffer->offset = pBindInfos[i].memoryOffset;
2398 } else {
2399 buffer->bo = NULL;
2400 }
2401 }
2402 return VK_SUCCESS;
2403 }
2404
2405 VkResult radv_BindBufferMemory(
2406 VkDevice device,
2407 VkBuffer buffer,
2408 VkDeviceMemory memory,
2409 VkDeviceSize memoryOffset)
2410 {
2411 const VkBindBufferMemoryInfoKHR info = {
2412 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2413 .buffer = buffer,
2414 .memory = memory,
2415 .memoryOffset = memoryOffset
2416 };
2417
2418 return radv_BindBufferMemory2KHR(device, 1, &info);
2419 }
2420
2421 VkResult radv_BindImageMemory2KHR(VkDevice device,
2422 uint32_t bindInfoCount,
2423 const VkBindImageMemoryInfoKHR *pBindInfos)
2424 {
2425 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2426 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
2427 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
2428
2429 if (mem) {
2430 image->bo = mem->bo;
2431 image->offset = pBindInfos[i].memoryOffset;
2432 } else {
2433 image->bo = NULL;
2434 image->offset = 0;
2435 }
2436 }
2437 return VK_SUCCESS;
2438 }
2439
2440
2441 VkResult radv_BindImageMemory(
2442 VkDevice device,
2443 VkImage image,
2444 VkDeviceMemory memory,
2445 VkDeviceSize memoryOffset)
2446 {
2447 const VkBindImageMemoryInfoKHR info = {
2448 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2449 .image = image,
2450 .memory = memory,
2451 .memoryOffset = memoryOffset
2452 };
2453
2454 return radv_BindImageMemory2KHR(device, 1, &info);
2455 }
2456
2457
2458 static void
2459 radv_sparse_buffer_bind_memory(struct radv_device *device,
2460 const VkSparseBufferMemoryBindInfo *bind)
2461 {
2462 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2463
2464 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2465 struct radv_device_memory *mem = NULL;
2466
2467 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2468 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2469
2470 device->ws->buffer_virtual_bind(buffer->bo,
2471 bind->pBinds[i].resourceOffset,
2472 bind->pBinds[i].size,
2473 mem ? mem->bo : NULL,
2474 bind->pBinds[i].memoryOffset);
2475 }
2476 }
2477
2478 static void
2479 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2480 const VkSparseImageOpaqueMemoryBindInfo *bind)
2481 {
2482 RADV_FROM_HANDLE(radv_image, image, bind->image);
2483
2484 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2485 struct radv_device_memory *mem = NULL;
2486
2487 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2488 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2489
2490 device->ws->buffer_virtual_bind(image->bo,
2491 bind->pBinds[i].resourceOffset,
2492 bind->pBinds[i].size,
2493 mem ? mem->bo : NULL,
2494 bind->pBinds[i].memoryOffset);
2495 }
2496 }
2497
2498 VkResult radv_QueueBindSparse(
2499 VkQueue _queue,
2500 uint32_t bindInfoCount,
2501 const VkBindSparseInfo* pBindInfo,
2502 VkFence _fence)
2503 {
2504 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2505 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2506 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2507 bool fence_emitted = false;
2508
2509 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2510 struct radv_winsys_sem_info sem_info;
2511 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2512 radv_sparse_buffer_bind_memory(queue->device,
2513 pBindInfo[i].pBufferBinds + j);
2514 }
2515
2516 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2517 radv_sparse_image_opaque_bind_memory(queue->device,
2518 pBindInfo[i].pImageOpaqueBinds + j);
2519 }
2520
2521 VkResult result;
2522 result = radv_alloc_sem_info(&sem_info,
2523 pBindInfo[i].waitSemaphoreCount,
2524 pBindInfo[i].pWaitSemaphores,
2525 pBindInfo[i].signalSemaphoreCount,
2526 pBindInfo[i].pSignalSemaphores);
2527 if (result != VK_SUCCESS)
2528 return result;
2529
2530 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2531 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2532 &queue->device->empty_cs[queue->queue_family_index],
2533 1, NULL, NULL,
2534 &sem_info,
2535 false, base_fence);
2536 fence_emitted = true;
2537 if (fence)
2538 fence->submitted = true;
2539 }
2540
2541 radv_free_sem_info(&sem_info);
2542
2543 }
2544
2545 if (fence && !fence_emitted) {
2546 fence->signalled = true;
2547 }
2548
2549 return VK_SUCCESS;
2550 }
2551
2552 VkResult radv_CreateFence(
2553 VkDevice _device,
2554 const VkFenceCreateInfo* pCreateInfo,
2555 const VkAllocationCallbacks* pAllocator,
2556 VkFence* pFence)
2557 {
2558 RADV_FROM_HANDLE(radv_device, device, _device);
2559 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2560 sizeof(*fence), 8,
2561 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2562
2563 if (!fence)
2564 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2565
2566 fence->submitted = false;
2567 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2568 fence->fence = device->ws->create_fence();
2569 if (!fence->fence) {
2570 vk_free2(&device->alloc, pAllocator, fence);
2571 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2572 }
2573
2574 *pFence = radv_fence_to_handle(fence);
2575
2576 return VK_SUCCESS;
2577 }
2578
2579 void radv_DestroyFence(
2580 VkDevice _device,
2581 VkFence _fence,
2582 const VkAllocationCallbacks* pAllocator)
2583 {
2584 RADV_FROM_HANDLE(radv_device, device, _device);
2585 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2586
2587 if (!fence)
2588 return;
2589 device->ws->destroy_fence(fence->fence);
2590 vk_free2(&device->alloc, pAllocator, fence);
2591 }
2592
2593 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2594 {
2595 uint64_t current_time;
2596 struct timespec tv;
2597
2598 clock_gettime(CLOCK_MONOTONIC, &tv);
2599 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2600
2601 timeout = MIN2(UINT64_MAX - current_time, timeout);
2602
2603 return current_time + timeout;
2604 }
2605
2606 VkResult radv_WaitForFences(
2607 VkDevice _device,
2608 uint32_t fenceCount,
2609 const VkFence* pFences,
2610 VkBool32 waitAll,
2611 uint64_t timeout)
2612 {
2613 RADV_FROM_HANDLE(radv_device, device, _device);
2614 timeout = radv_get_absolute_timeout(timeout);
2615
2616 if (!waitAll && fenceCount > 1) {
2617 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2618 }
2619
2620 for (uint32_t i = 0; i < fenceCount; ++i) {
2621 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2622 bool expired = false;
2623
2624 if (fence->signalled)
2625 continue;
2626
2627 if (!fence->submitted)
2628 return VK_TIMEOUT;
2629
2630 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2631 if (!expired)
2632 return VK_TIMEOUT;
2633
2634 fence->signalled = true;
2635 }
2636
2637 return VK_SUCCESS;
2638 }
2639
2640 VkResult radv_ResetFences(VkDevice device,
2641 uint32_t fenceCount,
2642 const VkFence *pFences)
2643 {
2644 for (unsigned i = 0; i < fenceCount; ++i) {
2645 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2646 fence->submitted = fence->signalled = false;
2647 }
2648
2649 return VK_SUCCESS;
2650 }
2651
2652 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2653 {
2654 RADV_FROM_HANDLE(radv_device, device, _device);
2655 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2656
2657 if (fence->signalled)
2658 return VK_SUCCESS;
2659 if (!fence->submitted)
2660 return VK_NOT_READY;
2661
2662 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2663 return VK_NOT_READY;
2664
2665 return VK_SUCCESS;
2666 }
2667
2668
2669 // Queue semaphore functions
2670
2671 VkResult radv_CreateSemaphore(
2672 VkDevice _device,
2673 const VkSemaphoreCreateInfo* pCreateInfo,
2674 const VkAllocationCallbacks* pAllocator,
2675 VkSemaphore* pSemaphore)
2676 {
2677 RADV_FROM_HANDLE(radv_device, device, _device);
2678 const VkExportSemaphoreCreateInfoKHR *export =
2679 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
2680 VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
2681 export ? export->handleTypes : 0;
2682
2683 struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
2684 sizeof(*sem), 8,
2685 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2686 if (!sem)
2687 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2688
2689 sem->temp_syncobj = 0;
2690 /* create a syncobject if we are going to export this semaphore */
2691 if (handleTypes) {
2692 assert (device->physical_device->rad_info.has_syncobj);
2693 assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
2694 int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
2695 if (ret) {
2696 vk_free2(&device->alloc, pAllocator, sem);
2697 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2698 }
2699 sem->sem = NULL;
2700 } else {
2701 sem->sem = device->ws->create_sem(device->ws);
2702 if (!sem->sem) {
2703 vk_free2(&device->alloc, pAllocator, sem);
2704 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2705 }
2706 sem->syncobj = 0;
2707 }
2708
2709 *pSemaphore = radv_semaphore_to_handle(sem);
2710 return VK_SUCCESS;
2711 }
2712
2713 void radv_DestroySemaphore(
2714 VkDevice _device,
2715 VkSemaphore _semaphore,
2716 const VkAllocationCallbacks* pAllocator)
2717 {
2718 RADV_FROM_HANDLE(radv_device, device, _device);
2719 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
2720 if (!_semaphore)
2721 return;
2722
2723 if (sem->syncobj)
2724 device->ws->destroy_syncobj(device->ws, sem->syncobj);
2725 else
2726 device->ws->destroy_sem(sem->sem);
2727 vk_free2(&device->alloc, pAllocator, sem);
2728 }
2729
2730 VkResult radv_CreateEvent(
2731 VkDevice _device,
2732 const VkEventCreateInfo* pCreateInfo,
2733 const VkAllocationCallbacks* pAllocator,
2734 VkEvent* pEvent)
2735 {
2736 RADV_FROM_HANDLE(radv_device, device, _device);
2737 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2738 sizeof(*event), 8,
2739 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2740
2741 if (!event)
2742 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2743
2744 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2745 RADEON_DOMAIN_GTT,
2746 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
2747 if (!event->bo) {
2748 vk_free2(&device->alloc, pAllocator, event);
2749 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2750 }
2751
2752 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2753
2754 *pEvent = radv_event_to_handle(event);
2755
2756 return VK_SUCCESS;
2757 }
2758
2759 void radv_DestroyEvent(
2760 VkDevice _device,
2761 VkEvent _event,
2762 const VkAllocationCallbacks* pAllocator)
2763 {
2764 RADV_FROM_HANDLE(radv_device, device, _device);
2765 RADV_FROM_HANDLE(radv_event, event, _event);
2766
2767 if (!event)
2768 return;
2769 device->ws->buffer_destroy(event->bo);
2770 vk_free2(&device->alloc, pAllocator, event);
2771 }
2772
2773 VkResult radv_GetEventStatus(
2774 VkDevice _device,
2775 VkEvent _event)
2776 {
2777 RADV_FROM_HANDLE(radv_event, event, _event);
2778
2779 if (*event->map == 1)
2780 return VK_EVENT_SET;
2781 return VK_EVENT_RESET;
2782 }
2783
2784 VkResult radv_SetEvent(
2785 VkDevice _device,
2786 VkEvent _event)
2787 {
2788 RADV_FROM_HANDLE(radv_event, event, _event);
2789 *event->map = 1;
2790
2791 return VK_SUCCESS;
2792 }
2793
2794 VkResult radv_ResetEvent(
2795 VkDevice _device,
2796 VkEvent _event)
2797 {
2798 RADV_FROM_HANDLE(radv_event, event, _event);
2799 *event->map = 0;
2800
2801 return VK_SUCCESS;
2802 }
2803
2804 VkResult radv_CreateBuffer(
2805 VkDevice _device,
2806 const VkBufferCreateInfo* pCreateInfo,
2807 const VkAllocationCallbacks* pAllocator,
2808 VkBuffer* pBuffer)
2809 {
2810 RADV_FROM_HANDLE(radv_device, device, _device);
2811 struct radv_buffer *buffer;
2812
2813 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2814
2815 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2816 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2817 if (buffer == NULL)
2818 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2819
2820 buffer->size = pCreateInfo->size;
2821 buffer->usage = pCreateInfo->usage;
2822 buffer->bo = NULL;
2823 buffer->offset = 0;
2824 buffer->flags = pCreateInfo->flags;
2825
2826 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
2827 EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR) != NULL;
2828
2829 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2830 buffer->bo = device->ws->buffer_create(device->ws,
2831 align64(buffer->size, 4096),
2832 4096, 0, RADEON_FLAG_VIRTUAL);
2833 if (!buffer->bo) {
2834 vk_free2(&device->alloc, pAllocator, buffer);
2835 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2836 }
2837 }
2838
2839 *pBuffer = radv_buffer_to_handle(buffer);
2840
2841 return VK_SUCCESS;
2842 }
2843
2844 void radv_DestroyBuffer(
2845 VkDevice _device,
2846 VkBuffer _buffer,
2847 const VkAllocationCallbacks* pAllocator)
2848 {
2849 RADV_FROM_HANDLE(radv_device, device, _device);
2850 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2851
2852 if (!buffer)
2853 return;
2854
2855 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2856 device->ws->buffer_destroy(buffer->bo);
2857
2858 vk_free2(&device->alloc, pAllocator, buffer);
2859 }
2860
2861 static inline unsigned
2862 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2863 {
2864 if (stencil)
2865 return image->surface.u.legacy.stencil_tiling_index[level];
2866 else
2867 return image->surface.u.legacy.tiling_index[level];
2868 }
2869
2870 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2871 {
2872 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2873 }
2874
2875 static void
2876 radv_initialise_color_surface(struct radv_device *device,
2877 struct radv_color_buffer_info *cb,
2878 struct radv_image_view *iview)
2879 {
2880 const struct vk_format_description *desc;
2881 unsigned ntype, format, swap, endian;
2882 unsigned blend_clamp = 0, blend_bypass = 0;
2883 uint64_t va;
2884 const struct radeon_surf *surf = &iview->image->surface;
2885
2886 desc = vk_format_description(iview->vk_format);
2887
2888 memset(cb, 0, sizeof(*cb));
2889
2890 /* Intensity is implemented as Red, so treat it that way. */
2891 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2892
2893 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2894
2895 cb->cb_color_base = va >> 8;
2896
2897 if (device->physical_device->rad_info.chip_class >= GFX9) {
2898 struct gfx9_surf_meta_flags meta;
2899 if (iview->image->dcc_offset)
2900 meta = iview->image->surface.u.gfx9.dcc;
2901 else
2902 meta = iview->image->surface.u.gfx9.cmask;
2903
2904 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2905 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2906 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2907 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2908
2909 cb->cb_color_base += iview->image->surface.u.gfx9.surf_offset >> 8;
2910 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2911 } else {
2912 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2913 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2914
2915 cb->cb_color_base += level_info->offset >> 8;
2916 if (level_info->mode == RADEON_SURF_MODE_2D)
2917 cb->cb_color_base |= iview->image->surface.tile_swizzle;
2918
2919 pitch_tile_max = level_info->nblk_x / 8 - 1;
2920 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2921 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2922
2923 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2924 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2925 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2926
2927 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2928 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2929
2930 if (iview->image->fmask.size) {
2931 if (device->physical_device->rad_info.chip_class >= CIK)
2932 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2933 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2934 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2935 } else {
2936 /* This must be set for fast clear to work without FMASK. */
2937 if (device->physical_device->rad_info.chip_class >= CIK)
2938 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2939 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2940 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2941 }
2942 }
2943
2944 /* CMASK variables */
2945 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2946 va += iview->image->cmask.offset;
2947 cb->cb_color_cmask = va >> 8;
2948
2949 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
2950 va += iview->image->dcc_offset;
2951 cb->cb_dcc_base = va >> 8;
2952 cb->cb_dcc_base |= iview->image->surface.tile_swizzle;
2953
2954 uint32_t max_slice = radv_surface_layer_count(iview);
2955 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2956 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2957
2958 if (iview->image->info.samples > 1) {
2959 unsigned log_samples = util_logbase2(iview->image->info.samples);
2960
2961 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2962 S_028C74_NUM_FRAGMENTS(log_samples);
2963 }
2964
2965 if (iview->image->fmask.size) {
2966 va = radv_buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2967 cb->cb_color_fmask = va >> 8;
2968 cb->cb_color_fmask |= iview->image->fmask.tile_swizzle;
2969 } else {
2970 cb->cb_color_fmask = cb->cb_color_base;
2971 }
2972
2973 ntype = radv_translate_color_numformat(iview->vk_format,
2974 desc,
2975 vk_format_get_first_non_void_channel(iview->vk_format));
2976 format = radv_translate_colorformat(iview->vk_format);
2977 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2978 radv_finishme("Illegal color\n");
2979 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2980 endian = radv_colorformat_endian_swap(format);
2981
2982 /* blend clamp should be set for all NORM/SRGB types */
2983 if (ntype == V_028C70_NUMBER_UNORM ||
2984 ntype == V_028C70_NUMBER_SNORM ||
2985 ntype == V_028C70_NUMBER_SRGB)
2986 blend_clamp = 1;
2987
2988 /* set blend bypass according to docs if SINT/UINT or
2989 8/24 COLOR variants */
2990 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2991 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2992 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2993 blend_clamp = 0;
2994 blend_bypass = 1;
2995 }
2996 #if 0
2997 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2998 (format == V_028C70_COLOR_8 ||
2999 format == V_028C70_COLOR_8_8 ||
3000 format == V_028C70_COLOR_8_8_8_8))
3001 ->color_is_int8 = true;
3002 #endif
3003 cb->cb_color_info = S_028C70_FORMAT(format) |
3004 S_028C70_COMP_SWAP(swap) |
3005 S_028C70_BLEND_CLAMP(blend_clamp) |
3006 S_028C70_BLEND_BYPASS(blend_bypass) |
3007 S_028C70_SIMPLE_FLOAT(1) |
3008 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
3009 ntype != V_028C70_NUMBER_SNORM &&
3010 ntype != V_028C70_NUMBER_SRGB &&
3011 format != V_028C70_COLOR_8_24 &&
3012 format != V_028C70_COLOR_24_8) |
3013 S_028C70_NUMBER_TYPE(ntype) |
3014 S_028C70_ENDIAN(endian);
3015 if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
3016 cb->cb_color_info |= S_028C70_COMPRESSION(1);
3017 if (device->physical_device->rad_info.chip_class == SI) {
3018 unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
3019 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
3020 }
3021 }
3022
3023 if (iview->image->cmask.size &&
3024 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
3025 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
3026
3027 if (radv_vi_dcc_enabled(iview->image, iview->base_mip))
3028 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
3029
3030 if (device->physical_device->rad_info.chip_class >= VI) {
3031 unsigned max_uncompressed_block_size = 2;
3032 if (iview->image->info.samples > 1) {
3033 if (iview->image->surface.bpe == 1)
3034 max_uncompressed_block_size = 0;
3035 else if (iview->image->surface.bpe == 2)
3036 max_uncompressed_block_size = 1;
3037 }
3038
3039 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
3040 S_028C78_INDEPENDENT_64B_BLOCKS(1);
3041 }
3042
3043 /* This must be set for fast clear to work without FMASK. */
3044 if (!iview->image->fmask.size &&
3045 device->physical_device->rad_info.chip_class == SI) {
3046 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
3047 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
3048 }
3049
3050 if (device->physical_device->rad_info.chip_class >= GFX9) {
3051 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
3052 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
3053
3054 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
3055 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
3056 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
3057 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
3058 S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
3059 S_028C68_MAX_MIP(iview->image->info.levels - 1);
3060
3061 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3062
3063 }
3064 }
3065
3066 static void
3067 radv_initialise_ds_surface(struct radv_device *device,
3068 struct radv_ds_buffer_info *ds,
3069 struct radv_image_view *iview)
3070 {
3071 unsigned level = iview->base_mip;
3072 unsigned format, stencil_format;
3073 uint64_t va, s_offs, z_offs;
3074 bool stencil_only = false;
3075 memset(ds, 0, sizeof(*ds));
3076 switch (iview->image->vk_format) {
3077 case VK_FORMAT_D24_UNORM_S8_UINT:
3078 case VK_FORMAT_X8_D24_UNORM_PACK32:
3079 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
3080 ds->offset_scale = 2.0f;
3081 break;
3082 case VK_FORMAT_D16_UNORM:
3083 case VK_FORMAT_D16_UNORM_S8_UINT:
3084 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
3085 ds->offset_scale = 4.0f;
3086 break;
3087 case VK_FORMAT_D32_SFLOAT:
3088 case VK_FORMAT_D32_SFLOAT_S8_UINT:
3089 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
3090 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
3091 ds->offset_scale = 1.0f;
3092 break;
3093 case VK_FORMAT_S8_UINT:
3094 stencil_only = true;
3095 break;
3096 default:
3097 break;
3098 }
3099
3100 format = radv_translate_dbformat(iview->image->vk_format);
3101 stencil_format = iview->image->surface.has_stencil ?
3102 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
3103
3104 uint32_t max_slice = radv_surface_layer_count(iview);
3105 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
3106 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
3107
3108 ds->db_htile_data_base = 0;
3109 ds->db_htile_surface = 0;
3110
3111 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
3112 s_offs = z_offs = va;
3113
3114 if (device->physical_device->rad_info.chip_class >= GFX9) {
3115 assert(iview->image->surface.u.gfx9.surf_offset == 0);
3116 s_offs += iview->image->surface.u.gfx9.stencil_offset;
3117
3118 ds->db_z_info = S_028038_FORMAT(format) |
3119 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
3120 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
3121 S_028038_MAXMIP(iview->image->info.levels - 1);
3122 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
3123 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
3124
3125 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
3126 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
3127 ds->db_depth_view |= S_028008_MIPID(level);
3128
3129 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
3130 S_02801C_Y_MAX(iview->image->info.height - 1);
3131
3132 if (radv_htile_enabled(iview->image, level)) {
3133 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
3134
3135 if (iview->image->tc_compatible_htile) {
3136 unsigned max_zplanes = 4;
3137
3138 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
3139 iview->image->info.samples > 1)
3140 max_zplanes = 2;
3141
3142 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1) |
3143 S_028038_ITERATE_FLUSH(1);
3144 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
3145 }
3146
3147 if (!iview->image->surface.has_stencil)
3148 /* Use all of the htile_buffer for depth if there's no stencil. */
3149 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
3150 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3151 iview->image->htile_offset;
3152 ds->db_htile_data_base = va >> 8;
3153 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
3154 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
3155 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
3156 }
3157 } else {
3158 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
3159
3160 if (stencil_only)
3161 level_info = &iview->image->surface.u.legacy.stencil_level[level];
3162
3163 z_offs += iview->image->surface.u.legacy.level[level].offset;
3164 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
3165
3166 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!iview->image->tc_compatible_htile);
3167 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
3168 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
3169
3170 if (iview->image->info.samples > 1)
3171 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
3172
3173 if (device->physical_device->rad_info.chip_class >= CIK) {
3174 struct radeon_info *info = &device->physical_device->rad_info;
3175 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3176 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3177 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3178 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3179 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3180 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3181
3182 if (stencil_only)
3183 tile_mode = stencil_tile_mode;
3184
3185 ds->db_depth_info |=
3186 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3187 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3188 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3189 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3190 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3191 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3192 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3193 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3194 } else {
3195 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3196 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3197 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3198 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3199 if (stencil_only)
3200 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3201 }
3202
3203 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3204 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3205 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3206
3207 if (radv_htile_enabled(iview->image, level)) {
3208 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3209
3210 if (!iview->image->surface.has_stencil &&
3211 !iview->image->tc_compatible_htile)
3212 /* Use all of the htile_buffer for depth if there's no stencil. */
3213 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3214
3215 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
3216 iview->image->htile_offset;
3217 ds->db_htile_data_base = va >> 8;
3218 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3219
3220 if (iview->image->tc_compatible_htile) {
3221 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
3222
3223 if (iview->image->info.samples <= 1)
3224 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
3225 else if (iview->image->info.samples <= 4)
3226 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
3227 else
3228 ds->db_z_info|= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
3229 }
3230 }
3231 }
3232
3233 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3234 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3235 }
3236
3237 VkResult radv_CreateFramebuffer(
3238 VkDevice _device,
3239 const VkFramebufferCreateInfo* pCreateInfo,
3240 const VkAllocationCallbacks* pAllocator,
3241 VkFramebuffer* pFramebuffer)
3242 {
3243 RADV_FROM_HANDLE(radv_device, device, _device);
3244 struct radv_framebuffer *framebuffer;
3245
3246 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3247
3248 size_t size = sizeof(*framebuffer) +
3249 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3250 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3251 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3252 if (framebuffer == NULL)
3253 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3254
3255 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3256 framebuffer->width = pCreateInfo->width;
3257 framebuffer->height = pCreateInfo->height;
3258 framebuffer->layers = pCreateInfo->layers;
3259 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3260 VkImageView _iview = pCreateInfo->pAttachments[i];
3261 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3262 framebuffer->attachments[i].attachment = iview;
3263 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3264 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3265 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3266 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3267 }
3268 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3269 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3270 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3271 }
3272
3273 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3274 return VK_SUCCESS;
3275 }
3276
3277 void radv_DestroyFramebuffer(
3278 VkDevice _device,
3279 VkFramebuffer _fb,
3280 const VkAllocationCallbacks* pAllocator)
3281 {
3282 RADV_FROM_HANDLE(radv_device, device, _device);
3283 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3284
3285 if (!fb)
3286 return;
3287 vk_free2(&device->alloc, pAllocator, fb);
3288 }
3289
3290 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3291 {
3292 switch (address_mode) {
3293 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3294 return V_008F30_SQ_TEX_WRAP;
3295 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3296 return V_008F30_SQ_TEX_MIRROR;
3297 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3298 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3299 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3300 return V_008F30_SQ_TEX_CLAMP_BORDER;
3301 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3302 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3303 default:
3304 unreachable("illegal tex wrap mode");
3305 break;
3306 }
3307 }
3308
3309 static unsigned
3310 radv_tex_compare(VkCompareOp op)
3311 {
3312 switch (op) {
3313 case VK_COMPARE_OP_NEVER:
3314 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3315 case VK_COMPARE_OP_LESS:
3316 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3317 case VK_COMPARE_OP_EQUAL:
3318 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3319 case VK_COMPARE_OP_LESS_OR_EQUAL:
3320 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3321 case VK_COMPARE_OP_GREATER:
3322 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3323 case VK_COMPARE_OP_NOT_EQUAL:
3324 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3325 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3326 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3327 case VK_COMPARE_OP_ALWAYS:
3328 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3329 default:
3330 unreachable("illegal compare mode");
3331 break;
3332 }
3333 }
3334
3335 static unsigned
3336 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3337 {
3338 switch (filter) {
3339 case VK_FILTER_NEAREST:
3340 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3341 V_008F38_SQ_TEX_XY_FILTER_POINT);
3342 case VK_FILTER_LINEAR:
3343 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3344 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3345 case VK_FILTER_CUBIC_IMG:
3346 default:
3347 fprintf(stderr, "illegal texture filter");
3348 return 0;
3349 }
3350 }
3351
3352 static unsigned
3353 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3354 {
3355 switch (mode) {
3356 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3357 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3358 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3359 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3360 default:
3361 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3362 }
3363 }
3364
3365 static unsigned
3366 radv_tex_bordercolor(VkBorderColor bcolor)
3367 {
3368 switch (bcolor) {
3369 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3370 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3371 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3372 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3373 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3374 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3375 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3376 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3377 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3378 default:
3379 break;
3380 }
3381 return 0;
3382 }
3383
3384 static unsigned
3385 radv_tex_aniso_filter(unsigned filter)
3386 {
3387 if (filter < 2)
3388 return 0;
3389 if (filter < 4)
3390 return 1;
3391 if (filter < 8)
3392 return 2;
3393 if (filter < 16)
3394 return 3;
3395 return 4;
3396 }
3397
3398 static void
3399 radv_init_sampler(struct radv_device *device,
3400 struct radv_sampler *sampler,
3401 const VkSamplerCreateInfo *pCreateInfo)
3402 {
3403 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3404 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3405 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3406 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3407
3408 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3409 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3410 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3411 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3412 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3413 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3414 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3415 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3416 S_008F30_DISABLE_CUBE_WRAP(0) |
3417 S_008F30_COMPAT_MODE(is_vi));
3418 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3419 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3420 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3421 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3422 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3423 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3424 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3425 S_008F38_MIP_POINT_PRECLAMP(0) |
3426 S_008F38_DISABLE_LSB_CEIL(1) |
3427 S_008F38_FILTER_PREC_FIX(1) |
3428 S_008F38_ANISO_OVERRIDE(is_vi));
3429 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3430 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3431 }
3432
3433 VkResult radv_CreateSampler(
3434 VkDevice _device,
3435 const VkSamplerCreateInfo* pCreateInfo,
3436 const VkAllocationCallbacks* pAllocator,
3437 VkSampler* pSampler)
3438 {
3439 RADV_FROM_HANDLE(radv_device, device, _device);
3440 struct radv_sampler *sampler;
3441
3442 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3443
3444 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3445 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3446 if (!sampler)
3447 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3448
3449 radv_init_sampler(device, sampler, pCreateInfo);
3450 *pSampler = radv_sampler_to_handle(sampler);
3451
3452 return VK_SUCCESS;
3453 }
3454
3455 void radv_DestroySampler(
3456 VkDevice _device,
3457 VkSampler _sampler,
3458 const VkAllocationCallbacks* pAllocator)
3459 {
3460 RADV_FROM_HANDLE(radv_device, device, _device);
3461 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3462
3463 if (!sampler)
3464 return;
3465 vk_free2(&device->alloc, pAllocator, sampler);
3466 }
3467
3468 /* vk_icd.h does not declare this function, so we declare it here to
3469 * suppress Wmissing-prototypes.
3470 */
3471 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3472 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3473
3474 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3475 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3476 {
3477 /* For the full details on loader interface versioning, see
3478 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3479 * What follows is a condensed summary, to help you navigate the large and
3480 * confusing official doc.
3481 *
3482 * - Loader interface v0 is incompatible with later versions. We don't
3483 * support it.
3484 *
3485 * - In loader interface v1:
3486 * - The first ICD entrypoint called by the loader is
3487 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3488 * entrypoint.
3489 * - The ICD must statically expose no other Vulkan symbol unless it is
3490 * linked with -Bsymbolic.
3491 * - Each dispatchable Vulkan handle created by the ICD must be
3492 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3493 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3494 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3495 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3496 * such loader-managed surfaces.
3497 *
3498 * - Loader interface v2 differs from v1 in:
3499 * - The first ICD entrypoint called by the loader is
3500 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3501 * statically expose this entrypoint.
3502 *
3503 * - Loader interface v3 differs from v2 in:
3504 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3505 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3506 * because the loader no longer does so.
3507 */
3508 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3509 return VK_SUCCESS;
3510 }
3511
3512 VkResult radv_GetMemoryFdKHR(VkDevice _device,
3513 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3514 int *pFD)
3515 {
3516 RADV_FROM_HANDLE(radv_device, device, _device);
3517 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
3518
3519 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3520
3521 /* We support only one handle type. */
3522 assert(pGetFdInfo->handleType ==
3523 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3524
3525 bool ret = radv_get_memory_fd(device, memory, pFD);
3526 if (ret == false)
3527 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3528 return VK_SUCCESS;
3529 }
3530
3531 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
3532 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
3533 int fd,
3534 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3535 {
3536 /* The valid usage section for this function says:
3537 *
3538 * "handleType must not be one of the handle types defined as opaque."
3539 *
3540 * Since we only handle opaque handles for now, there are no FD properties.
3541 */
3542 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3543 }
3544
3545 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
3546 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
3547 {
3548 RADV_FROM_HANDLE(radv_device, device, _device);
3549 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
3550 uint32_t syncobj_handle = 0;
3551 uint32_t *syncobj_dst = NULL;
3552 assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3553
3554 int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
3555 if (ret != 0)
3556 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3557
3558 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
3559 syncobj_dst = &sem->temp_syncobj;
3560 } else {
3561 syncobj_dst = &sem->syncobj;
3562 }
3563
3564 if (*syncobj_dst)
3565 device->ws->destroy_syncobj(device->ws, *syncobj_dst);
3566
3567 *syncobj_dst = syncobj_handle;
3568 close(pImportSemaphoreFdInfo->fd);
3569 return VK_SUCCESS;
3570 }
3571
3572 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
3573 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
3574 int *pFd)
3575 {
3576 RADV_FROM_HANDLE(radv_device, device, _device);
3577 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
3578 int ret;
3579 uint32_t syncobj_handle;
3580
3581 assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
3582 if (sem->temp_syncobj)
3583 syncobj_handle = sem->temp_syncobj;
3584 else
3585 syncobj_handle = sem->syncobj;
3586 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
3587 if (ret)
3588 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
3589 return VK_SUCCESS;
3590 }
3591
3592 void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
3593 VkPhysicalDevice physicalDevice,
3594 const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
3595 VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
3596 {
3597 if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
3598 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3599 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
3600 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
3601 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
3602 } else {
3603 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
3604 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
3605 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
3606 }
3607 }