radv/vulkan: Move radv_get_driver_version to src/vulkan/util
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51 uint32_t mesa_timestamp, llvm_timestamp;
52 uint16_t f = family;
53 memset(uuid, 0, VK_UUID_SIZE);
54 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56 return -1;
57
58 memcpy(uuid, &mesa_timestamp, 4);
59 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60 memcpy((char*)uuid + 8, &f, 2);
61 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62 return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67 memset(uuid, 0, VK_UUID_SIZE);
68 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75 {
76 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77 .specVersion = 25,
78 },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80 {
81 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82 .specVersion = 6,
83 },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86 {
87 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92 {
93 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94 .specVersion = 5,
95 },
96 #endif
97 {
98 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99 .specVersion = 1,
100 },
101 {
102 .extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
103 .specVersion = 1,
104 },
105 };
106
107 static const VkExtensionProperties common_device_extensions[] = {
108 {
109 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
110 .specVersion = 1,
111 },
112 {
113 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
114 .specVersion = 1,
115 },
116 {
117 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
118 .specVersion = 1,
119 },
120 {
121 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
122 .specVersion = 1,
123 },
124 {
125 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
126 .specVersion = 1,
127 },
128 {
129 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
130 .specVersion = 68,
131 },
132 {
133 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
134 .specVersion = 1,
135 },
136 {
137 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
138 .specVersion = 1,
139 },
140 {
141 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
142 .specVersion = 1,
143 },
144 {
145 .extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
146 .specVersion = 1,
147 },
148 {
149 .extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
150 .specVersion = 1,
151 },
152 };
153
154 static VkResult
155 radv_extensions_register(struct radv_instance *instance,
156 struct radv_extensions *extensions,
157 const VkExtensionProperties *new_ext,
158 uint32_t num_ext)
159 {
160 size_t new_size;
161 VkExtensionProperties *new_ptr;
162
163 assert(new_ext && num_ext > 0);
164
165 if (!new_ext)
166 return VK_ERROR_INITIALIZATION_FAILED;
167
168 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
169 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
170 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
171
172 /* Old array continues to be valid, update nothing */
173 if (!new_ptr)
174 return VK_ERROR_OUT_OF_HOST_MEMORY;
175
176 memcpy(&new_ptr[extensions->num_ext], new_ext,
177 num_ext * sizeof(VkExtensionProperties));
178 extensions->ext_array = new_ptr;
179 extensions->num_ext += num_ext;
180
181 return VK_SUCCESS;
182 }
183
184 static void
185 radv_extensions_finish(struct radv_instance *instance,
186 struct radv_extensions *extensions)
187 {
188 assert(extensions);
189
190 if (!extensions)
191 radv_loge("Attemted to free invalid extension struct\n");
192
193 if (extensions->ext_array)
194 vk_free(&instance->alloc, extensions->ext_array);
195 }
196
197 static bool
198 is_extension_enabled(const VkExtensionProperties *extensions,
199 size_t num_ext,
200 const char *name)
201 {
202 assert(extensions && name);
203
204 for (uint32_t i = 0; i < num_ext; i++) {
205 if (strcmp(name, extensions[i].extensionName) == 0)
206 return true;
207 }
208
209 return false;
210 }
211
212 static const char *
213 get_chip_name(enum radeon_family family)
214 {
215 switch (family) {
216 case CHIP_TAHITI: return "AMD RADV TAHITI";
217 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
218 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
219 case CHIP_OLAND: return "AMD RADV OLAND";
220 case CHIP_HAINAN: return "AMD RADV HAINAN";
221 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
222 case CHIP_KAVERI: return "AMD RADV KAVERI";
223 case CHIP_KABINI: return "AMD RADV KABINI";
224 case CHIP_HAWAII: return "AMD RADV HAWAII";
225 case CHIP_MULLINS: return "AMD RADV MULLINS";
226 case CHIP_TONGA: return "AMD RADV TONGA";
227 case CHIP_ICELAND: return "AMD RADV ICELAND";
228 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
229 case CHIP_FIJI: return "AMD RADV FIJI";
230 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
231 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
232 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
233 case CHIP_STONEY: return "AMD RADV STONEY";
234 case CHIP_VEGA10: return "AMD RADV VEGA";
235 case CHIP_RAVEN: return "AMD RADV RAVEN";
236 default: return "AMD RADV unknown";
237 }
238 }
239
240 static VkResult
241 radv_physical_device_init(struct radv_physical_device *device,
242 struct radv_instance *instance,
243 drmDevicePtr drm_device)
244 {
245 const char *path = drm_device->nodes[DRM_NODE_RENDER];
246 VkResult result;
247 drmVersionPtr version;
248 int fd;
249
250 fd = open(path, O_RDWR | O_CLOEXEC);
251 if (fd < 0)
252 return VK_ERROR_INCOMPATIBLE_DRIVER;
253
254 version = drmGetVersion(fd);
255 if (!version) {
256 close(fd);
257 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
258 "failed to get version %s: %m", path);
259 }
260
261 if (strcmp(version->name, "amdgpu")) {
262 drmFreeVersion(version);
263 close(fd);
264 return VK_ERROR_INCOMPATIBLE_DRIVER;
265 }
266 drmFreeVersion(version);
267
268 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
269 device->instance = instance;
270 assert(strlen(path) < ARRAY_SIZE(device->path));
271 strncpy(device->path, path, ARRAY_SIZE(device->path));
272
273 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
274 if (!device->ws) {
275 result = VK_ERROR_INCOMPATIBLE_DRIVER;
276 goto fail;
277 }
278
279 device->local_fd = fd;
280 device->ws->query_info(device->ws, &device->rad_info);
281 result = radv_init_wsi(device);
282 if (result != VK_SUCCESS) {
283 device->ws->destroy(device->ws);
284 goto fail;
285 }
286
287 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
288 radv_finish_wsi(device);
289 device->ws->destroy(device->ws);
290 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
291 "cannot generate UUID");
292 goto fail;
293 }
294
295 result = radv_extensions_register(instance,
296 &device->extensions,
297 common_device_extensions,
298 ARRAY_SIZE(common_device_extensions));
299 if (result != VK_SUCCESS)
300 goto fail;
301
302 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
303 device->name = get_chip_name(device->rad_info.family);
304
305 radv_get_device_uuid(drm_device, device->device_uuid);
306
307 if (device->rad_info.family == CHIP_STONEY ||
308 device->rad_info.chip_class >= GFX9) {
309 device->has_rbplus = true;
310 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
311 }
312
313 return VK_SUCCESS;
314
315 fail:
316 close(fd);
317 return result;
318 }
319
320 static void
321 radv_physical_device_finish(struct radv_physical_device *device)
322 {
323 radv_extensions_finish(device->instance, &device->extensions);
324 radv_finish_wsi(device);
325 device->ws->destroy(device->ws);
326 close(device->local_fd);
327 }
328
329 static void *
330 default_alloc_func(void *pUserData, size_t size, size_t align,
331 VkSystemAllocationScope allocationScope)
332 {
333 return malloc(size);
334 }
335
336 static void *
337 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
338 size_t align, VkSystemAllocationScope allocationScope)
339 {
340 return realloc(pOriginal, size);
341 }
342
343 static void
344 default_free_func(void *pUserData, void *pMemory)
345 {
346 free(pMemory);
347 }
348
349 static const VkAllocationCallbacks default_alloc = {
350 .pUserData = NULL,
351 .pfnAllocation = default_alloc_func,
352 .pfnReallocation = default_realloc_func,
353 .pfnFree = default_free_func,
354 };
355
356 static const struct debug_control radv_debug_options[] = {
357 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
358 {"nodcc", RADV_DEBUG_NO_DCC},
359 {"shaders", RADV_DEBUG_DUMP_SHADERS},
360 {"nocache", RADV_DEBUG_NO_CACHE},
361 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
362 {"nohiz", RADV_DEBUG_NO_HIZ},
363 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
364 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
365 {"allbos", RADV_DEBUG_ALL_BOS},
366 {"noibs", RADV_DEBUG_NO_IBS},
367 {NULL, 0}
368 };
369
370 VkResult radv_CreateInstance(
371 const VkInstanceCreateInfo* pCreateInfo,
372 const VkAllocationCallbacks* pAllocator,
373 VkInstance* pInstance)
374 {
375 struct radv_instance *instance;
376
377 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
378
379 uint32_t client_version;
380 if (pCreateInfo->pApplicationInfo &&
381 pCreateInfo->pApplicationInfo->apiVersion != 0) {
382 client_version = pCreateInfo->pApplicationInfo->apiVersion;
383 } else {
384 client_version = VK_MAKE_VERSION(1, 0, 0);
385 }
386
387 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
388 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
389 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
390 "Client requested version %d.%d.%d",
391 VK_VERSION_MAJOR(client_version),
392 VK_VERSION_MINOR(client_version),
393 VK_VERSION_PATCH(client_version));
394 }
395
396 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
397 if (!is_extension_enabled(instance_extensions,
398 ARRAY_SIZE(instance_extensions),
399 pCreateInfo->ppEnabledExtensionNames[i]))
400 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
401 }
402
403 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
404 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
405 if (!instance)
406 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
407
408 memset(instance, 0, sizeof(*instance));
409
410 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
411
412 if (pAllocator)
413 instance->alloc = *pAllocator;
414 else
415 instance->alloc = default_alloc;
416
417 instance->apiVersion = client_version;
418 instance->physicalDeviceCount = -1;
419
420 _mesa_locale_init();
421
422 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
423
424 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
425 radv_debug_options);
426
427 *pInstance = radv_instance_to_handle(instance);
428
429 return VK_SUCCESS;
430 }
431
432 void radv_DestroyInstance(
433 VkInstance _instance,
434 const VkAllocationCallbacks* pAllocator)
435 {
436 RADV_FROM_HANDLE(radv_instance, instance, _instance);
437
438 if (!instance)
439 return;
440
441 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
442 radv_physical_device_finish(instance->physicalDevices + i);
443 }
444
445 VG(VALGRIND_DESTROY_MEMPOOL(instance));
446
447 _mesa_locale_fini();
448
449 vk_free(&instance->alloc, instance);
450 }
451
452 static VkResult
453 radv_enumerate_devices(struct radv_instance *instance)
454 {
455 /* TODO: Check for more devices ? */
456 drmDevicePtr devices[8];
457 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
458 int max_devices;
459
460 instance->physicalDeviceCount = 0;
461
462 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
463 if (max_devices < 1)
464 return VK_ERROR_INCOMPATIBLE_DRIVER;
465
466 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
467 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
468 devices[i]->bustype == DRM_BUS_PCI &&
469 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
470
471 result = radv_physical_device_init(instance->physicalDevices +
472 instance->physicalDeviceCount,
473 instance,
474 devices[i]);
475 if (result == VK_SUCCESS)
476 ++instance->physicalDeviceCount;
477 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
478 break;
479 }
480 }
481 drmFreeDevices(devices, max_devices);
482
483 return result;
484 }
485
486 VkResult radv_EnumeratePhysicalDevices(
487 VkInstance _instance,
488 uint32_t* pPhysicalDeviceCount,
489 VkPhysicalDevice* pPhysicalDevices)
490 {
491 RADV_FROM_HANDLE(radv_instance, instance, _instance);
492 VkResult result;
493
494 if (instance->physicalDeviceCount < 0) {
495 result = radv_enumerate_devices(instance);
496 if (result != VK_SUCCESS &&
497 result != VK_ERROR_INCOMPATIBLE_DRIVER)
498 return result;
499 }
500
501 if (!pPhysicalDevices) {
502 *pPhysicalDeviceCount = instance->physicalDeviceCount;
503 } else {
504 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
505 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
506 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
507 }
508
509 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
510 : VK_SUCCESS;
511 }
512
513 void radv_GetPhysicalDeviceFeatures(
514 VkPhysicalDevice physicalDevice,
515 VkPhysicalDeviceFeatures* pFeatures)
516 {
517 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
518 bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
519 memset(pFeatures, 0, sizeof(*pFeatures));
520
521 *pFeatures = (VkPhysicalDeviceFeatures) {
522 .robustBufferAccess = true,
523 .fullDrawIndexUint32 = true,
524 .imageCubeArray = true,
525 .independentBlend = true,
526 .geometryShader = !is_gfx9,
527 .tessellationShader = !is_gfx9,
528 .sampleRateShading = false,
529 .dualSrcBlend = true,
530 .logicOp = true,
531 .multiDrawIndirect = true,
532 .drawIndirectFirstInstance = true,
533 .depthClamp = true,
534 .depthBiasClamp = true,
535 .fillModeNonSolid = true,
536 .depthBounds = true,
537 .wideLines = true,
538 .largePoints = true,
539 .alphaToOne = true,
540 .multiViewport = true,
541 .samplerAnisotropy = true,
542 .textureCompressionETC2 = false,
543 .textureCompressionASTC_LDR = false,
544 .textureCompressionBC = true,
545 .occlusionQueryPrecise = true,
546 .pipelineStatisticsQuery = true,
547 .vertexPipelineStoresAndAtomics = true,
548 .fragmentStoresAndAtomics = true,
549 .shaderTessellationAndGeometryPointSize = true,
550 .shaderImageGatherExtended = true,
551 .shaderStorageImageExtendedFormats = true,
552 .shaderStorageImageMultisample = false,
553 .shaderUniformBufferArrayDynamicIndexing = true,
554 .shaderSampledImageArrayDynamicIndexing = true,
555 .shaderStorageBufferArrayDynamicIndexing = true,
556 .shaderStorageImageArrayDynamicIndexing = true,
557 .shaderStorageImageReadWithoutFormat = true,
558 .shaderStorageImageWriteWithoutFormat = true,
559 .shaderClipDistance = true,
560 .shaderCullDistance = true,
561 .shaderFloat64 = true,
562 .shaderInt64 = false,
563 .shaderInt16 = false,
564 .sparseBinding = true,
565 .variableMultisampleRate = true,
566 .inheritedQueries = true,
567 };
568 }
569
570 void radv_GetPhysicalDeviceFeatures2KHR(
571 VkPhysicalDevice physicalDevice,
572 VkPhysicalDeviceFeatures2KHR *pFeatures)
573 {
574 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
575 }
576
577 void radv_GetPhysicalDeviceProperties(
578 VkPhysicalDevice physicalDevice,
579 VkPhysicalDeviceProperties* pProperties)
580 {
581 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
582 VkSampleCountFlags sample_counts = 0xf;
583
584 /* make sure that the entire descriptor set is addressable with a signed
585 * 32-bit int. So the sum of all limits scaled by descriptor size has to
586 * be at most 2 GiB. the combined image & samples object count as one of
587 * both. This limit is for the pipeline layout, not for the set layout, but
588 * there is no set limit, so we just set a pipeline limit. I don't think
589 * any app is going to hit this soon. */
590 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
591 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
592 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
593 32 /* sampler, largest when combined with image */ +
594 64 /* sampled image */ +
595 64 /* storage image */);
596
597 VkPhysicalDeviceLimits limits = {
598 .maxImageDimension1D = (1 << 14),
599 .maxImageDimension2D = (1 << 14),
600 .maxImageDimension3D = (1 << 11),
601 .maxImageDimensionCube = (1 << 14),
602 .maxImageArrayLayers = (1 << 11),
603 .maxTexelBufferElements = 128 * 1024 * 1024,
604 .maxUniformBufferRange = UINT32_MAX,
605 .maxStorageBufferRange = UINT32_MAX,
606 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
607 .maxMemoryAllocationCount = UINT32_MAX,
608 .maxSamplerAllocationCount = 64 * 1024,
609 .bufferImageGranularity = 64, /* A cache line */
610 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
611 .maxBoundDescriptorSets = MAX_SETS,
612 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
613 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
614 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
615 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
616 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
617 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
618 .maxPerStageResources = max_descriptor_set_size,
619 .maxDescriptorSetSamplers = max_descriptor_set_size,
620 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
621 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
622 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
623 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
624 .maxDescriptorSetSampledImages = max_descriptor_set_size,
625 .maxDescriptorSetStorageImages = max_descriptor_set_size,
626 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
627 .maxVertexInputAttributes = 32,
628 .maxVertexInputBindings = 32,
629 .maxVertexInputAttributeOffset = 2047,
630 .maxVertexInputBindingStride = 2048,
631 .maxVertexOutputComponents = 128,
632 .maxTessellationGenerationLevel = 64,
633 .maxTessellationPatchSize = 32,
634 .maxTessellationControlPerVertexInputComponents = 128,
635 .maxTessellationControlPerVertexOutputComponents = 128,
636 .maxTessellationControlPerPatchOutputComponents = 120,
637 .maxTessellationControlTotalOutputComponents = 4096,
638 .maxTessellationEvaluationInputComponents = 128,
639 .maxTessellationEvaluationOutputComponents = 128,
640 .maxGeometryShaderInvocations = 127,
641 .maxGeometryInputComponents = 64,
642 .maxGeometryOutputComponents = 128,
643 .maxGeometryOutputVertices = 256,
644 .maxGeometryTotalOutputComponents = 1024,
645 .maxFragmentInputComponents = 128,
646 .maxFragmentOutputAttachments = 8,
647 .maxFragmentDualSrcAttachments = 1,
648 .maxFragmentCombinedOutputResources = 8,
649 .maxComputeSharedMemorySize = 32768,
650 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
651 .maxComputeWorkGroupInvocations = 2048,
652 .maxComputeWorkGroupSize = {
653 2048,
654 2048,
655 2048
656 },
657 .subPixelPrecisionBits = 4 /* FIXME */,
658 .subTexelPrecisionBits = 4 /* FIXME */,
659 .mipmapPrecisionBits = 4 /* FIXME */,
660 .maxDrawIndexedIndexValue = UINT32_MAX,
661 .maxDrawIndirectCount = UINT32_MAX,
662 .maxSamplerLodBias = 16,
663 .maxSamplerAnisotropy = 16,
664 .maxViewports = MAX_VIEWPORTS,
665 .maxViewportDimensions = { (1 << 14), (1 << 14) },
666 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
667 .viewportSubPixelBits = 13, /* We take a float? */
668 .minMemoryMapAlignment = 4096, /* A page */
669 .minTexelBufferOffsetAlignment = 1,
670 .minUniformBufferOffsetAlignment = 4,
671 .minStorageBufferOffsetAlignment = 4,
672 .minTexelOffset = -32,
673 .maxTexelOffset = 31,
674 .minTexelGatherOffset = -32,
675 .maxTexelGatherOffset = 31,
676 .minInterpolationOffset = -2,
677 .maxInterpolationOffset = 2,
678 .subPixelInterpolationOffsetBits = 8,
679 .maxFramebufferWidth = (1 << 14),
680 .maxFramebufferHeight = (1 << 14),
681 .maxFramebufferLayers = (1 << 10),
682 .framebufferColorSampleCounts = sample_counts,
683 .framebufferDepthSampleCounts = sample_counts,
684 .framebufferStencilSampleCounts = sample_counts,
685 .framebufferNoAttachmentsSampleCounts = sample_counts,
686 .maxColorAttachments = MAX_RTS,
687 .sampledImageColorSampleCounts = sample_counts,
688 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
689 .sampledImageDepthSampleCounts = sample_counts,
690 .sampledImageStencilSampleCounts = sample_counts,
691 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
692 .maxSampleMaskWords = 1,
693 .timestampComputeAndGraphics = true,
694 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
695 .maxClipDistances = 8,
696 .maxCullDistances = 8,
697 .maxCombinedClipAndCullDistances = 8,
698 .discreteQueuePriorities = 1,
699 .pointSizeRange = { 0.125, 255.875 },
700 .lineWidthRange = { 0.0, 7.9921875 },
701 .pointSizeGranularity = (1.0 / 8.0),
702 .lineWidthGranularity = (1.0 / 128.0),
703 .strictLines = false, /* FINISHME */
704 .standardSampleLocations = true,
705 .optimalBufferCopyOffsetAlignment = 128,
706 .optimalBufferCopyRowPitchAlignment = 128,
707 .nonCoherentAtomSize = 64,
708 };
709
710 *pProperties = (VkPhysicalDeviceProperties) {
711 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
712 .driverVersion = vk_get_driver_version(),
713 .vendorID = 0x1002,
714 .deviceID = pdevice->rad_info.pci_id,
715 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
716 .limits = limits,
717 .sparseProperties = {0},
718 };
719
720 strcpy(pProperties->deviceName, pdevice->name);
721 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
722 }
723
724 void radv_GetPhysicalDeviceProperties2KHR(
725 VkPhysicalDevice physicalDevice,
726 VkPhysicalDeviceProperties2KHR *pProperties)
727 {
728 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
729 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
730
731 vk_foreach_struct(ext, pProperties->pNext) {
732 switch (ext->sType) {
733 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
734 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
735 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
736 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
737 break;
738 }
739 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
740 VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
741 radv_device_get_cache_uuid(0, properties->driverUUID);
742 memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
743 properties->deviceLUIDValid = false;
744 break;
745 }
746 default:
747 break;
748 }
749 }
750 }
751
752 static void radv_get_physical_device_queue_family_properties(
753 struct radv_physical_device* pdevice,
754 uint32_t* pCount,
755 VkQueueFamilyProperties** pQueueFamilyProperties)
756 {
757 int num_queue_families = 1;
758 int idx;
759 if (pdevice->rad_info.num_compute_rings > 0 &&
760 pdevice->rad_info.chip_class >= CIK &&
761 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
762 num_queue_families++;
763
764 if (pQueueFamilyProperties == NULL) {
765 *pCount = num_queue_families;
766 return;
767 }
768
769 if (!*pCount)
770 return;
771
772 idx = 0;
773 if (*pCount >= 1) {
774 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
775 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
776 VK_QUEUE_COMPUTE_BIT |
777 VK_QUEUE_TRANSFER_BIT |
778 VK_QUEUE_SPARSE_BINDING_BIT,
779 .queueCount = 1,
780 .timestampValidBits = 64,
781 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
782 };
783 idx++;
784 }
785
786 if (pdevice->rad_info.num_compute_rings > 0 &&
787 pdevice->rad_info.chip_class >= CIK &&
788 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
789 if (*pCount > idx) {
790 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
791 .queueFlags = VK_QUEUE_COMPUTE_BIT |
792 VK_QUEUE_TRANSFER_BIT |
793 VK_QUEUE_SPARSE_BINDING_BIT,
794 .queueCount = pdevice->rad_info.num_compute_rings,
795 .timestampValidBits = 64,
796 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
797 };
798 idx++;
799 }
800 }
801 *pCount = idx;
802 }
803
804 void radv_GetPhysicalDeviceQueueFamilyProperties(
805 VkPhysicalDevice physicalDevice,
806 uint32_t* pCount,
807 VkQueueFamilyProperties* pQueueFamilyProperties)
808 {
809 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
810 if (!pQueueFamilyProperties) {
811 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
812 return;
813 }
814 VkQueueFamilyProperties *properties[] = {
815 pQueueFamilyProperties + 0,
816 pQueueFamilyProperties + 1,
817 pQueueFamilyProperties + 2,
818 };
819 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
820 assert(*pCount <= 3);
821 }
822
823 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
824 VkPhysicalDevice physicalDevice,
825 uint32_t* pCount,
826 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
827 {
828 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
829 if (!pQueueFamilyProperties) {
830 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
831 return;
832 }
833 VkQueueFamilyProperties *properties[] = {
834 &pQueueFamilyProperties[0].queueFamilyProperties,
835 &pQueueFamilyProperties[1].queueFamilyProperties,
836 &pQueueFamilyProperties[2].queueFamilyProperties,
837 };
838 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
839 assert(*pCount <= 3);
840 }
841
842 void radv_GetPhysicalDeviceMemoryProperties(
843 VkPhysicalDevice physicalDevice,
844 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
845 {
846 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
847
848 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
849
850 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
851 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
852 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
853 .heapIndex = RADV_MEM_HEAP_VRAM,
854 };
855 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
856 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
857 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
858 .heapIndex = RADV_MEM_HEAP_GTT,
859 };
860 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
861 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
862 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
863 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
864 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
865 };
866 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
867 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
868 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
869 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
870 .heapIndex = RADV_MEM_HEAP_GTT,
871 };
872
873 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
874
875 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
876 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
877 .size = physical_device->rad_info.vram_size -
878 physical_device->rad_info.vram_vis_size,
879 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
880 };
881 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
882 .size = physical_device->rad_info.vram_vis_size,
883 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
884 };
885 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
886 .size = physical_device->rad_info.gart_size,
887 .flags = 0,
888 };
889 }
890
891 void radv_GetPhysicalDeviceMemoryProperties2KHR(
892 VkPhysicalDevice physicalDevice,
893 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
894 {
895 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
896 &pMemoryProperties->memoryProperties);
897 }
898
899 static int
900 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
901 int queue_family_index, int idx)
902 {
903 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
904 queue->device = device;
905 queue->queue_family_index = queue_family_index;
906 queue->queue_idx = idx;
907
908 queue->hw_ctx = device->ws->ctx_create(device->ws);
909 if (!queue->hw_ctx)
910 return VK_ERROR_OUT_OF_HOST_MEMORY;
911
912 return VK_SUCCESS;
913 }
914
915 static void
916 radv_queue_finish(struct radv_queue *queue)
917 {
918 if (queue->hw_ctx)
919 queue->device->ws->ctx_destroy(queue->hw_ctx);
920
921 if (queue->initial_preamble_cs)
922 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
923 if (queue->continue_preamble_cs)
924 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
925 if (queue->descriptor_bo)
926 queue->device->ws->buffer_destroy(queue->descriptor_bo);
927 if (queue->scratch_bo)
928 queue->device->ws->buffer_destroy(queue->scratch_bo);
929 if (queue->esgs_ring_bo)
930 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
931 if (queue->gsvs_ring_bo)
932 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
933 if (queue->tess_factor_ring_bo)
934 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
935 if (queue->tess_offchip_ring_bo)
936 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
937 if (queue->compute_scratch_bo)
938 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
939 }
940
941 static void
942 radv_device_init_gs_info(struct radv_device *device)
943 {
944 switch (device->physical_device->rad_info.family) {
945 case CHIP_OLAND:
946 case CHIP_HAINAN:
947 case CHIP_KAVERI:
948 case CHIP_KABINI:
949 case CHIP_MULLINS:
950 case CHIP_ICELAND:
951 case CHIP_CARRIZO:
952 case CHIP_STONEY:
953 device->gs_table_depth = 16;
954 return;
955 case CHIP_TAHITI:
956 case CHIP_PITCAIRN:
957 case CHIP_VERDE:
958 case CHIP_BONAIRE:
959 case CHIP_HAWAII:
960 case CHIP_TONGA:
961 case CHIP_FIJI:
962 case CHIP_POLARIS10:
963 case CHIP_POLARIS11:
964 case CHIP_POLARIS12:
965 case CHIP_VEGA10:
966 case CHIP_RAVEN:
967 device->gs_table_depth = 32;
968 return;
969 default:
970 unreachable("unknown GPU");
971 }
972 }
973
974 VkResult radv_CreateDevice(
975 VkPhysicalDevice physicalDevice,
976 const VkDeviceCreateInfo* pCreateInfo,
977 const VkAllocationCallbacks* pAllocator,
978 VkDevice* pDevice)
979 {
980 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
981 VkResult result;
982 struct radv_device *device;
983
984 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
985 if (!is_extension_enabled(physical_device->extensions.ext_array,
986 physical_device->extensions.num_ext,
987 pCreateInfo->ppEnabledExtensionNames[i]))
988 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
989 }
990
991 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
992 sizeof(*device), 8,
993 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
994 if (!device)
995 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
996
997 memset(device, 0, sizeof(*device));
998
999 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1000 device->instance = physical_device->instance;
1001 device->physical_device = physical_device;
1002
1003 device->debug_flags = device->instance->debug_flags;
1004
1005 device->ws = physical_device->ws;
1006 if (pAllocator)
1007 device->alloc = *pAllocator;
1008 else
1009 device->alloc = physical_device->instance->alloc;
1010
1011 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1012 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1013 uint32_t qfi = queue_create->queueFamilyIndex;
1014
1015 device->queues[qfi] = vk_alloc(&device->alloc,
1016 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1017 if (!device->queues[qfi]) {
1018 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1019 goto fail;
1020 }
1021
1022 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1023
1024 device->queue_count[qfi] = queue_create->queueCount;
1025
1026 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1027 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1028 if (result != VK_SUCCESS)
1029 goto fail;
1030 }
1031 }
1032
1033 #if HAVE_LLVM < 0x0400
1034 device->llvm_supports_spill = false;
1035 #else
1036 device->llvm_supports_spill = true;
1037 #endif
1038
1039 /* The maximum number of scratch waves. Scratch space isn't divided
1040 * evenly between CUs. The number is only a function of the number of CUs.
1041 * We can decrease the constant to decrease the scratch buffer size.
1042 *
1043 * sctx->scratch_waves must be >= the maximum posible size of
1044 * 1 threadgroup, so that the hw doesn't hang from being unable
1045 * to start any.
1046 *
1047 * The recommended value is 4 per CU at most. Higher numbers don't
1048 * bring much benefit, but they still occupy chip resources (think
1049 * async compute). I've seen ~2% performance difference between 4 and 32.
1050 */
1051 uint32_t max_threads_per_block = 2048;
1052 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1053 max_threads_per_block / 64);
1054
1055 radv_device_init_gs_info(device);
1056
1057 device->tess_offchip_block_dw_size =
1058 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1059 device->has_distributed_tess =
1060 device->physical_device->rad_info.chip_class >= VI &&
1061 device->physical_device->rad_info.max_se >= 2;
1062
1063 result = radv_device_init_meta(device);
1064 if (result != VK_SUCCESS)
1065 goto fail;
1066
1067 radv_device_init_msaa(device);
1068
1069 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1070 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1071 switch (family) {
1072 case RADV_QUEUE_GENERAL:
1073 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1074 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1075 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1076 break;
1077 case RADV_QUEUE_COMPUTE:
1078 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1079 radeon_emit(device->empty_cs[family], 0);
1080 break;
1081 }
1082 device->ws->cs_finalize(device->empty_cs[family]);
1083
1084 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1085 switch (family) {
1086 case RADV_QUEUE_GENERAL:
1087 case RADV_QUEUE_COMPUTE:
1088 si_cs_emit_cache_flush(device->flush_cs[family],
1089 device->physical_device->rad_info.chip_class,
1090 NULL, 0,
1091 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1092 RADV_CMD_FLAG_INV_ICACHE |
1093 RADV_CMD_FLAG_INV_SMEM_L1 |
1094 RADV_CMD_FLAG_INV_VMEM_L1 |
1095 RADV_CMD_FLAG_INV_GLOBAL_L2);
1096 break;
1097 }
1098 device->ws->cs_finalize(device->flush_cs[family]);
1099
1100 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1101 switch (family) {
1102 case RADV_QUEUE_GENERAL:
1103 case RADV_QUEUE_COMPUTE:
1104 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1105 device->physical_device->rad_info.chip_class,
1106 NULL, 0,
1107 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1108 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1109 RADV_CMD_FLAG_INV_ICACHE |
1110 RADV_CMD_FLAG_INV_SMEM_L1 |
1111 RADV_CMD_FLAG_INV_VMEM_L1 |
1112 RADV_CMD_FLAG_INV_GLOBAL_L2);
1113 break;
1114 }
1115 device->ws->cs_finalize(device->flush_shader_cs[family]);
1116 }
1117
1118 if (getenv("RADV_TRACE_FILE")) {
1119 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1120 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1121 if (!device->trace_bo)
1122 goto fail;
1123
1124 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1125 if (!device->trace_id_ptr)
1126 goto fail;
1127 }
1128
1129 if (device->physical_device->rad_info.chip_class >= CIK)
1130 cik_create_gfx_config(device);
1131
1132 VkPipelineCacheCreateInfo ci;
1133 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1134 ci.pNext = NULL;
1135 ci.flags = 0;
1136 ci.pInitialData = NULL;
1137 ci.initialDataSize = 0;
1138 VkPipelineCache pc;
1139 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1140 &ci, NULL, &pc);
1141 if (result != VK_SUCCESS)
1142 goto fail;
1143
1144 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1145
1146 *pDevice = radv_device_to_handle(device);
1147 return VK_SUCCESS;
1148
1149 fail:
1150 if (device->trace_bo)
1151 device->ws->buffer_destroy(device->trace_bo);
1152
1153 if (device->gfx_init)
1154 device->ws->buffer_destroy(device->gfx_init);
1155
1156 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1157 for (unsigned q = 0; q < device->queue_count[i]; q++)
1158 radv_queue_finish(&device->queues[i][q]);
1159 if (device->queue_count[i])
1160 vk_free(&device->alloc, device->queues[i]);
1161 }
1162
1163 vk_free(&device->alloc, device);
1164 return result;
1165 }
1166
1167 void radv_DestroyDevice(
1168 VkDevice _device,
1169 const VkAllocationCallbacks* pAllocator)
1170 {
1171 RADV_FROM_HANDLE(radv_device, device, _device);
1172
1173 if (!device)
1174 return;
1175
1176 if (device->trace_bo)
1177 device->ws->buffer_destroy(device->trace_bo);
1178
1179 if (device->gfx_init)
1180 device->ws->buffer_destroy(device->gfx_init);
1181
1182 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1183 for (unsigned q = 0; q < device->queue_count[i]; q++)
1184 radv_queue_finish(&device->queues[i][q]);
1185 if (device->queue_count[i])
1186 vk_free(&device->alloc, device->queues[i]);
1187 if (device->empty_cs[i])
1188 device->ws->cs_destroy(device->empty_cs[i]);
1189 if (device->flush_cs[i])
1190 device->ws->cs_destroy(device->flush_cs[i]);
1191 if (device->flush_shader_cs[i])
1192 device->ws->cs_destroy(device->flush_shader_cs[i]);
1193 }
1194 radv_device_finish_meta(device);
1195
1196 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1197 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1198
1199 vk_free(&device->alloc, device);
1200 }
1201
1202 VkResult radv_EnumerateInstanceExtensionProperties(
1203 const char* pLayerName,
1204 uint32_t* pPropertyCount,
1205 VkExtensionProperties* pProperties)
1206 {
1207 if (pProperties == NULL) {
1208 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1209 return VK_SUCCESS;
1210 }
1211
1212 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1213 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1214
1215 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1216 return VK_INCOMPLETE;
1217
1218 return VK_SUCCESS;
1219 }
1220
1221 VkResult radv_EnumerateDeviceExtensionProperties(
1222 VkPhysicalDevice physicalDevice,
1223 const char* pLayerName,
1224 uint32_t* pPropertyCount,
1225 VkExtensionProperties* pProperties)
1226 {
1227 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1228
1229 if (pProperties == NULL) {
1230 *pPropertyCount = pdevice->extensions.num_ext;
1231 return VK_SUCCESS;
1232 }
1233
1234 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1235 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1236
1237 if (*pPropertyCount < pdevice->extensions.num_ext)
1238 return VK_INCOMPLETE;
1239
1240 return VK_SUCCESS;
1241 }
1242
1243 VkResult radv_EnumerateInstanceLayerProperties(
1244 uint32_t* pPropertyCount,
1245 VkLayerProperties* pProperties)
1246 {
1247 if (pProperties == NULL) {
1248 *pPropertyCount = 0;
1249 return VK_SUCCESS;
1250 }
1251
1252 /* None supported at this time */
1253 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1254 }
1255
1256 VkResult radv_EnumerateDeviceLayerProperties(
1257 VkPhysicalDevice physicalDevice,
1258 uint32_t* pPropertyCount,
1259 VkLayerProperties* pProperties)
1260 {
1261 if (pProperties == NULL) {
1262 *pPropertyCount = 0;
1263 return VK_SUCCESS;
1264 }
1265
1266 /* None supported at this time */
1267 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1268 }
1269
1270 void radv_GetDeviceQueue(
1271 VkDevice _device,
1272 uint32_t queueFamilyIndex,
1273 uint32_t queueIndex,
1274 VkQueue* pQueue)
1275 {
1276 RADV_FROM_HANDLE(radv_device, device, _device);
1277
1278 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1279 }
1280
1281 static void radv_dump_trace(struct radv_device *device,
1282 struct radeon_winsys_cs *cs)
1283 {
1284 const char *filename = getenv("RADV_TRACE_FILE");
1285 FILE *f = fopen(filename, "w");
1286 if (!f) {
1287 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1288 return;
1289 }
1290
1291 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1292 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1293 fclose(f);
1294 }
1295
1296 static void
1297 fill_geom_tess_rings(struct radv_queue *queue,
1298 uint32_t *map,
1299 bool add_sample_positions,
1300 uint32_t esgs_ring_size,
1301 struct radeon_winsys_bo *esgs_ring_bo,
1302 uint32_t gsvs_ring_size,
1303 struct radeon_winsys_bo *gsvs_ring_bo,
1304 uint32_t tess_factor_ring_size,
1305 struct radeon_winsys_bo *tess_factor_ring_bo,
1306 uint32_t tess_offchip_ring_size,
1307 struct radeon_winsys_bo *tess_offchip_ring_bo)
1308 {
1309 uint64_t esgs_va = 0, gsvs_va = 0;
1310 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1311 uint32_t *desc = &map[4];
1312
1313 if (esgs_ring_bo)
1314 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1315 if (gsvs_ring_bo)
1316 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1317 if (tess_factor_ring_bo)
1318 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1319 if (tess_offchip_ring_bo)
1320 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1321
1322 /* stride 0, num records - size, add tid, swizzle, elsize4,
1323 index stride 64 */
1324 desc[0] = esgs_va;
1325 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1326 S_008F04_STRIDE(0) |
1327 S_008F04_SWIZZLE_ENABLE(true);
1328 desc[2] = esgs_ring_size;
1329 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1330 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1331 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1332 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1333 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1334 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1335 S_008F0C_ELEMENT_SIZE(1) |
1336 S_008F0C_INDEX_STRIDE(3) |
1337 S_008F0C_ADD_TID_ENABLE(true);
1338
1339 desc += 4;
1340 /* GS entry for ES->GS ring */
1341 /* stride 0, num records - size, elsize0,
1342 index stride 0 */
1343 desc[0] = esgs_va;
1344 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1345 S_008F04_STRIDE(0) |
1346 S_008F04_SWIZZLE_ENABLE(false);
1347 desc[2] = esgs_ring_size;
1348 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1349 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1350 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1351 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1352 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1353 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1354 S_008F0C_ELEMENT_SIZE(0) |
1355 S_008F0C_INDEX_STRIDE(0) |
1356 S_008F0C_ADD_TID_ENABLE(false);
1357
1358 desc += 4;
1359 /* VS entry for GS->VS ring */
1360 /* stride 0, num records - size, elsize0,
1361 index stride 0 */
1362 desc[0] = gsvs_va;
1363 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1364 S_008F04_STRIDE(0) |
1365 S_008F04_SWIZZLE_ENABLE(false);
1366 desc[2] = gsvs_ring_size;
1367 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1368 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1369 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1370 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1371 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1372 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1373 S_008F0C_ELEMENT_SIZE(0) |
1374 S_008F0C_INDEX_STRIDE(0) |
1375 S_008F0C_ADD_TID_ENABLE(false);
1376 desc += 4;
1377
1378 /* stride gsvs_itemsize, num records 64
1379 elsize 4, index stride 16 */
1380 /* shader will patch stride and desc[2] */
1381 desc[0] = gsvs_va;
1382 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1383 S_008F04_STRIDE(0) |
1384 S_008F04_SWIZZLE_ENABLE(true);
1385 desc[2] = 0;
1386 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1387 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1388 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1389 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1390 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1391 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1392 S_008F0C_ELEMENT_SIZE(1) |
1393 S_008F0C_INDEX_STRIDE(1) |
1394 S_008F0C_ADD_TID_ENABLE(true);
1395 desc += 4;
1396
1397 desc[0] = tess_factor_va;
1398 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1399 S_008F04_STRIDE(0) |
1400 S_008F04_SWIZZLE_ENABLE(false);
1401 desc[2] = tess_factor_ring_size;
1402 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1403 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1404 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1405 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1406 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1407 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1408 S_008F0C_ELEMENT_SIZE(0) |
1409 S_008F0C_INDEX_STRIDE(0) |
1410 S_008F0C_ADD_TID_ENABLE(false);
1411 desc += 4;
1412
1413 desc[0] = tess_offchip_va;
1414 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1415 S_008F04_STRIDE(0) |
1416 S_008F04_SWIZZLE_ENABLE(false);
1417 desc[2] = tess_offchip_ring_size;
1418 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1419 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1420 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1421 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1422 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1423 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1424 S_008F0C_ELEMENT_SIZE(0) |
1425 S_008F0C_INDEX_STRIDE(0) |
1426 S_008F0C_ADD_TID_ENABLE(false);
1427 desc += 4;
1428
1429 /* add sample positions after all rings */
1430 memcpy(desc, queue->device->sample_locations_1x, 8);
1431 desc += 2;
1432 memcpy(desc, queue->device->sample_locations_2x, 16);
1433 desc += 4;
1434 memcpy(desc, queue->device->sample_locations_4x, 32);
1435 desc += 8;
1436 memcpy(desc, queue->device->sample_locations_8x, 64);
1437 desc += 16;
1438 memcpy(desc, queue->device->sample_locations_16x, 128);
1439 }
1440
1441 static unsigned
1442 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1443 {
1444 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1445 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1446 device->physical_device->rad_info.family != CHIP_STONEY;
1447 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1448 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1449 device->physical_device->rad_info.max_se;
1450 unsigned offchip_granularity;
1451 unsigned hs_offchip_param;
1452 switch (device->tess_offchip_block_dw_size) {
1453 default:
1454 assert(0);
1455 /* fall through */
1456 case 8192:
1457 offchip_granularity = V_03093C_X_8K_DWORDS;
1458 break;
1459 case 4096:
1460 offchip_granularity = V_03093C_X_4K_DWORDS;
1461 break;
1462 }
1463
1464 switch (device->physical_device->rad_info.chip_class) {
1465 case SI:
1466 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1467 break;
1468 case CIK:
1469 case VI:
1470 case GFX9:
1471 default:
1472 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1473 break;
1474 }
1475
1476 *max_offchip_buffers_p = max_offchip_buffers;
1477 if (device->physical_device->rad_info.chip_class >= CIK) {
1478 if (device->physical_device->rad_info.chip_class >= VI)
1479 --max_offchip_buffers;
1480 hs_offchip_param =
1481 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1482 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1483 } else {
1484 hs_offchip_param =
1485 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1486 }
1487 return hs_offchip_param;
1488 }
1489
1490 static VkResult
1491 radv_get_preamble_cs(struct radv_queue *queue,
1492 uint32_t scratch_size,
1493 uint32_t compute_scratch_size,
1494 uint32_t esgs_ring_size,
1495 uint32_t gsvs_ring_size,
1496 bool needs_tess_rings,
1497 bool needs_sample_positions,
1498 struct radeon_winsys_cs **initial_preamble_cs,
1499 struct radeon_winsys_cs **continue_preamble_cs)
1500 {
1501 struct radeon_winsys_bo *scratch_bo = NULL;
1502 struct radeon_winsys_bo *descriptor_bo = NULL;
1503 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1504 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1505 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1506 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1507 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1508 struct radeon_winsys_cs *dest_cs[2] = {0};
1509 bool add_tess_rings = false, add_sample_positions = false;
1510 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1511 unsigned max_offchip_buffers;
1512 unsigned hs_offchip_param = 0;
1513 if (!queue->has_tess_rings) {
1514 if (needs_tess_rings)
1515 add_tess_rings = true;
1516 }
1517 if (!queue->has_sample_positions) {
1518 if (needs_sample_positions)
1519 add_sample_positions = true;
1520 }
1521 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1522 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1523 &max_offchip_buffers);
1524 tess_offchip_ring_size = max_offchip_buffers *
1525 queue->device->tess_offchip_block_dw_size * 4;
1526
1527 if (scratch_size <= queue->scratch_size &&
1528 compute_scratch_size <= queue->compute_scratch_size &&
1529 esgs_ring_size <= queue->esgs_ring_size &&
1530 gsvs_ring_size <= queue->gsvs_ring_size &&
1531 !add_tess_rings && !add_sample_positions &&
1532 queue->initial_preamble_cs) {
1533 *initial_preamble_cs = queue->initial_preamble_cs;
1534 *continue_preamble_cs = queue->continue_preamble_cs;
1535 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1536 *continue_preamble_cs = NULL;
1537 return VK_SUCCESS;
1538 }
1539
1540 if (scratch_size > queue->scratch_size) {
1541 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1542 scratch_size,
1543 4096,
1544 RADEON_DOMAIN_VRAM,
1545 RADEON_FLAG_NO_CPU_ACCESS);
1546 if (!scratch_bo)
1547 goto fail;
1548 } else
1549 scratch_bo = queue->scratch_bo;
1550
1551 if (compute_scratch_size > queue->compute_scratch_size) {
1552 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1553 compute_scratch_size,
1554 4096,
1555 RADEON_DOMAIN_VRAM,
1556 RADEON_FLAG_NO_CPU_ACCESS);
1557 if (!compute_scratch_bo)
1558 goto fail;
1559
1560 } else
1561 compute_scratch_bo = queue->compute_scratch_bo;
1562
1563 if (esgs_ring_size > queue->esgs_ring_size) {
1564 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1565 esgs_ring_size,
1566 4096,
1567 RADEON_DOMAIN_VRAM,
1568 RADEON_FLAG_NO_CPU_ACCESS);
1569 if (!esgs_ring_bo)
1570 goto fail;
1571 } else {
1572 esgs_ring_bo = queue->esgs_ring_bo;
1573 esgs_ring_size = queue->esgs_ring_size;
1574 }
1575
1576 if (gsvs_ring_size > queue->gsvs_ring_size) {
1577 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1578 gsvs_ring_size,
1579 4096,
1580 RADEON_DOMAIN_VRAM,
1581 RADEON_FLAG_NO_CPU_ACCESS);
1582 if (!gsvs_ring_bo)
1583 goto fail;
1584 } else {
1585 gsvs_ring_bo = queue->gsvs_ring_bo;
1586 gsvs_ring_size = queue->gsvs_ring_size;
1587 }
1588
1589 if (add_tess_rings) {
1590 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1591 tess_factor_ring_size,
1592 256,
1593 RADEON_DOMAIN_VRAM,
1594 RADEON_FLAG_NO_CPU_ACCESS);
1595 if (!tess_factor_ring_bo)
1596 goto fail;
1597 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1598 tess_offchip_ring_size,
1599 256,
1600 RADEON_DOMAIN_VRAM,
1601 RADEON_FLAG_NO_CPU_ACCESS);
1602 if (!tess_offchip_ring_bo)
1603 goto fail;
1604 } else {
1605 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1606 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1607 }
1608
1609 if (scratch_bo != queue->scratch_bo ||
1610 esgs_ring_bo != queue->esgs_ring_bo ||
1611 gsvs_ring_bo != queue->gsvs_ring_bo ||
1612 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1613 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1614 uint32_t size = 0;
1615 if (gsvs_ring_bo || esgs_ring_bo ||
1616 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1617 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1618 if (add_sample_positions)
1619 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1620 }
1621 else if (scratch_bo)
1622 size = 8; /* 2 dword */
1623
1624 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1625 size,
1626 4096,
1627 RADEON_DOMAIN_VRAM,
1628 RADEON_FLAG_CPU_ACCESS);
1629 if (!descriptor_bo)
1630 goto fail;
1631 } else
1632 descriptor_bo = queue->descriptor_bo;
1633
1634 for(int i = 0; i < 2; ++i) {
1635 struct radeon_winsys_cs *cs = NULL;
1636 cs = queue->device->ws->cs_create(queue->device->ws,
1637 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1638 if (!cs)
1639 goto fail;
1640
1641 dest_cs[i] = cs;
1642
1643 if (scratch_bo)
1644 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1645
1646 if (esgs_ring_bo)
1647 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1648
1649 if (gsvs_ring_bo)
1650 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1651
1652 if (tess_factor_ring_bo)
1653 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1654
1655 if (tess_offchip_ring_bo)
1656 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1657
1658 if (descriptor_bo)
1659 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1660
1661 if (descriptor_bo != queue->descriptor_bo) {
1662 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1663
1664 if (scratch_bo) {
1665 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1666 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1667 S_008F04_SWIZZLE_ENABLE(1);
1668 map[0] = scratch_va;
1669 map[1] = rsrc1;
1670 }
1671
1672 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1673 add_sample_positions)
1674 fill_geom_tess_rings(queue, map, add_sample_positions,
1675 esgs_ring_size, esgs_ring_bo,
1676 gsvs_ring_size, gsvs_ring_bo,
1677 tess_factor_ring_size, tess_factor_ring_bo,
1678 tess_offchip_ring_size, tess_offchip_ring_bo);
1679
1680 queue->device->ws->buffer_unmap(descriptor_bo);
1681 }
1682
1683 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1684 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1685 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1686 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1687 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1688 }
1689
1690 if (esgs_ring_bo || gsvs_ring_bo) {
1691 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1692 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1693 radeon_emit(cs, esgs_ring_size >> 8);
1694 radeon_emit(cs, gsvs_ring_size >> 8);
1695 } else {
1696 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1697 radeon_emit(cs, esgs_ring_size >> 8);
1698 radeon_emit(cs, gsvs_ring_size >> 8);
1699 }
1700 }
1701
1702 if (tess_factor_ring_bo) {
1703 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1704 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1705 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1706 S_030938_SIZE(tess_factor_ring_size / 4));
1707 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1708 tf_va >> 8);
1709 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1710 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1711 tf_va >> 40);
1712 }
1713 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1714 } else {
1715 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1716 S_008988_SIZE(tess_factor_ring_size / 4));
1717 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1718 tf_va >> 8);
1719 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1720 hs_offchip_param);
1721 }
1722 }
1723
1724 if (descriptor_bo) {
1725 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1726 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1727 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1728 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1729 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1730 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1731
1732 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1733
1734 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1735 radeon_set_sh_reg_seq(cs, regs[i], 2);
1736 radeon_emit(cs, va);
1737 radeon_emit(cs, va >> 32);
1738 }
1739 }
1740
1741 if (compute_scratch_bo) {
1742 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1743 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1744 S_008F04_SWIZZLE_ENABLE(1);
1745
1746 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1747
1748 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1749 radeon_emit(cs, scratch_va);
1750 radeon_emit(cs, rsrc1);
1751 }
1752
1753 if (!i) {
1754 si_cs_emit_cache_flush(cs,
1755 queue->device->physical_device->rad_info.chip_class,
1756 NULL, 0,
1757 queue->queue_family_index == RING_COMPUTE &&
1758 queue->device->physical_device->rad_info.chip_class >= CIK,
1759 RADV_CMD_FLAG_INV_ICACHE |
1760 RADV_CMD_FLAG_INV_SMEM_L1 |
1761 RADV_CMD_FLAG_INV_VMEM_L1 |
1762 RADV_CMD_FLAG_INV_GLOBAL_L2);
1763 }
1764
1765 if (!queue->device->ws->cs_finalize(cs))
1766 goto fail;
1767 }
1768
1769 if (queue->initial_preamble_cs)
1770 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1771
1772 if (queue->continue_preamble_cs)
1773 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1774
1775 queue->initial_preamble_cs = dest_cs[0];
1776 queue->continue_preamble_cs = dest_cs[1];
1777
1778 if (scratch_bo != queue->scratch_bo) {
1779 if (queue->scratch_bo)
1780 queue->device->ws->buffer_destroy(queue->scratch_bo);
1781 queue->scratch_bo = scratch_bo;
1782 queue->scratch_size = scratch_size;
1783 }
1784
1785 if (compute_scratch_bo != queue->compute_scratch_bo) {
1786 if (queue->compute_scratch_bo)
1787 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1788 queue->compute_scratch_bo = compute_scratch_bo;
1789 queue->compute_scratch_size = compute_scratch_size;
1790 }
1791
1792 if (esgs_ring_bo != queue->esgs_ring_bo) {
1793 if (queue->esgs_ring_bo)
1794 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1795 queue->esgs_ring_bo = esgs_ring_bo;
1796 queue->esgs_ring_size = esgs_ring_size;
1797 }
1798
1799 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1800 if (queue->gsvs_ring_bo)
1801 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1802 queue->gsvs_ring_bo = gsvs_ring_bo;
1803 queue->gsvs_ring_size = gsvs_ring_size;
1804 }
1805
1806 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1807 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1808 }
1809
1810 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1811 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1812 queue->has_tess_rings = true;
1813 }
1814
1815 if (descriptor_bo != queue->descriptor_bo) {
1816 if (queue->descriptor_bo)
1817 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1818
1819 queue->descriptor_bo = descriptor_bo;
1820 }
1821
1822 if (add_sample_positions)
1823 queue->has_sample_positions = true;
1824
1825 *initial_preamble_cs = queue->initial_preamble_cs;
1826 *continue_preamble_cs = queue->continue_preamble_cs;
1827 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1828 *continue_preamble_cs = NULL;
1829 return VK_SUCCESS;
1830 fail:
1831 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1832 if (dest_cs[i])
1833 queue->device->ws->cs_destroy(dest_cs[i]);
1834 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1835 queue->device->ws->buffer_destroy(descriptor_bo);
1836 if (scratch_bo && scratch_bo != queue->scratch_bo)
1837 queue->device->ws->buffer_destroy(scratch_bo);
1838 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1839 queue->device->ws->buffer_destroy(compute_scratch_bo);
1840 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1841 queue->device->ws->buffer_destroy(esgs_ring_bo);
1842 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1843 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1844 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1845 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1846 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1847 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1848 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1849 }
1850
1851 VkResult radv_QueueSubmit(
1852 VkQueue _queue,
1853 uint32_t submitCount,
1854 const VkSubmitInfo* pSubmits,
1855 VkFence _fence)
1856 {
1857 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1858 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1859 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1860 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1861 int ret;
1862 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1863 uint32_t scratch_size = 0;
1864 uint32_t compute_scratch_size = 0;
1865 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1866 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1867 VkResult result;
1868 bool fence_emitted = false;
1869 bool tess_rings_needed = false;
1870 bool sample_positions_needed = false;
1871
1872 /* Do this first so failing to allocate scratch buffers can't result in
1873 * partially executed submissions. */
1874 for (uint32_t i = 0; i < submitCount; i++) {
1875 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1876 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1877 pSubmits[i].pCommandBuffers[j]);
1878
1879 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1880 compute_scratch_size = MAX2(compute_scratch_size,
1881 cmd_buffer->compute_scratch_size_needed);
1882 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1883 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1884 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1885 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1886 }
1887 }
1888
1889 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1890 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1891 sample_positions_needed,
1892 &initial_preamble_cs, &continue_preamble_cs);
1893 if (result != VK_SUCCESS)
1894 return result;
1895
1896 for (uint32_t i = 0; i < submitCount; i++) {
1897 struct radeon_winsys_cs **cs_array;
1898 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1899 bool can_patch = !do_flush;
1900 uint32_t advance;
1901
1902 if (!pSubmits[i].commandBufferCount) {
1903 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1904 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1905 &queue->device->empty_cs[queue->queue_family_index],
1906 1, NULL, NULL,
1907 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1908 pSubmits[i].waitSemaphoreCount,
1909 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1910 pSubmits[i].signalSemaphoreCount,
1911 false, base_fence);
1912 if (ret) {
1913 radv_loge("failed to submit CS %d\n", i);
1914 abort();
1915 }
1916 fence_emitted = true;
1917 }
1918 continue;
1919 }
1920
1921 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1922 (pSubmits[i].commandBufferCount + do_flush));
1923
1924 if(do_flush)
1925 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1926 queue->device->flush_shader_cs[queue->queue_family_index] :
1927 queue->device->flush_cs[queue->queue_family_index];
1928
1929 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1930 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1931 pSubmits[i].pCommandBuffers[j]);
1932 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1933
1934 cs_array[j + do_flush] = cmd_buffer->cs;
1935 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1936 can_patch = false;
1937 }
1938
1939 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1940 advance = MIN2(max_cs_submission,
1941 pSubmits[i].commandBufferCount + do_flush - j);
1942 bool b = j == 0;
1943 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1944
1945 if (queue->device->trace_bo)
1946 *queue->device->trace_id_ptr = 0;
1947
1948 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1949 advance, initial_preamble_cs, continue_preamble_cs,
1950 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1951 b ? pSubmits[i].waitSemaphoreCount : 0,
1952 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1953 e ? pSubmits[i].signalSemaphoreCount : 0,
1954 can_patch, base_fence);
1955
1956 if (ret) {
1957 radv_loge("failed to submit CS %d\n", i);
1958 abort();
1959 }
1960 fence_emitted = true;
1961 if (queue->device->trace_bo) {
1962 bool success = queue->device->ws->ctx_wait_idle(
1963 queue->hw_ctx,
1964 radv_queue_family_to_ring(
1965 queue->queue_family_index),
1966 queue->queue_idx);
1967
1968 if (!success) { /* Hang */
1969 radv_dump_trace(queue->device, cs_array[j]);
1970 abort();
1971 }
1972 }
1973 }
1974 free(cs_array);
1975 }
1976
1977 if (fence) {
1978 if (!fence_emitted)
1979 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1980 &queue->device->empty_cs[queue->queue_family_index],
1981 1, NULL, NULL, NULL, 0, NULL, 0,
1982 false, base_fence);
1983
1984 fence->submitted = true;
1985 }
1986
1987 return VK_SUCCESS;
1988 }
1989
1990 VkResult radv_QueueWaitIdle(
1991 VkQueue _queue)
1992 {
1993 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1994
1995 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1996 radv_queue_family_to_ring(queue->queue_family_index),
1997 queue->queue_idx);
1998 return VK_SUCCESS;
1999 }
2000
2001 VkResult radv_DeviceWaitIdle(
2002 VkDevice _device)
2003 {
2004 RADV_FROM_HANDLE(radv_device, device, _device);
2005
2006 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2007 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2008 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2009 }
2010 }
2011 return VK_SUCCESS;
2012 }
2013
2014 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2015 VkInstance instance,
2016 const char* pName)
2017 {
2018 return radv_lookup_entrypoint(pName);
2019 }
2020
2021 /* The loader wants us to expose a second GetInstanceProcAddr function
2022 * to work around certain LD_PRELOAD issues seen in apps.
2023 */
2024 PUBLIC
2025 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2026 VkInstance instance,
2027 const char* pName);
2028
2029 PUBLIC
2030 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2031 VkInstance instance,
2032 const char* pName)
2033 {
2034 return radv_GetInstanceProcAddr(instance, pName);
2035 }
2036
2037 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2038 VkDevice device,
2039 const char* pName)
2040 {
2041 return radv_lookup_entrypoint(pName);
2042 }
2043
2044 bool radv_get_memory_fd(struct radv_device *device,
2045 struct radv_device_memory *memory,
2046 int *pFD)
2047 {
2048 struct radeon_bo_metadata metadata;
2049
2050 if (memory->image) {
2051 radv_init_metadata(device, memory->image, &metadata);
2052 device->ws->buffer_set_metadata(memory->bo, &metadata);
2053 }
2054
2055 return device->ws->buffer_get_fd(device->ws, memory->bo,
2056 pFD);
2057 }
2058
2059 VkResult radv_AllocateMemory(
2060 VkDevice _device,
2061 const VkMemoryAllocateInfo* pAllocateInfo,
2062 const VkAllocationCallbacks* pAllocator,
2063 VkDeviceMemory* pMem)
2064 {
2065 RADV_FROM_HANDLE(radv_device, device, _device);
2066 struct radv_device_memory *mem;
2067 VkResult result;
2068 enum radeon_bo_domain domain;
2069 uint32_t flags = 0;
2070
2071 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2072
2073 if (pAllocateInfo->allocationSize == 0) {
2074 /* Apparently, this is allowed */
2075 *pMem = VK_NULL_HANDLE;
2076 return VK_SUCCESS;
2077 }
2078
2079 const VkImportMemoryFdInfoKHX *import_info =
2080 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
2081 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
2082 vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
2083
2084 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2085 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2086 if (mem == NULL)
2087 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2088
2089 if (dedicate_info) {
2090 mem->image = radv_image_from_handle(dedicate_info->image);
2091 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2092 } else {
2093 mem->image = NULL;
2094 mem->buffer = NULL;
2095 }
2096
2097 if (import_info) {
2098 assert(import_info->handleType ==
2099 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
2100 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
2101 NULL, NULL);
2102 if (!mem->bo)
2103 goto fail;
2104 else
2105 goto out_success;
2106 }
2107
2108 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2109 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2110 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2111 domain = RADEON_DOMAIN_GTT;
2112 else
2113 domain = RADEON_DOMAIN_VRAM;
2114
2115 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2116 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2117 else
2118 flags |= RADEON_FLAG_CPU_ACCESS;
2119
2120 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2121 flags |= RADEON_FLAG_GTT_WC;
2122
2123 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2124 domain, flags);
2125
2126 if (!mem->bo) {
2127 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2128 goto fail;
2129 }
2130 mem->type_index = pAllocateInfo->memoryTypeIndex;
2131 out_success:
2132 *pMem = radv_device_memory_to_handle(mem);
2133
2134 return VK_SUCCESS;
2135
2136 fail:
2137 vk_free2(&device->alloc, pAllocator, mem);
2138
2139 return result;
2140 }
2141
2142 void radv_FreeMemory(
2143 VkDevice _device,
2144 VkDeviceMemory _mem,
2145 const VkAllocationCallbacks* pAllocator)
2146 {
2147 RADV_FROM_HANDLE(radv_device, device, _device);
2148 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2149
2150 if (mem == NULL)
2151 return;
2152
2153 device->ws->buffer_destroy(mem->bo);
2154 mem->bo = NULL;
2155
2156 vk_free2(&device->alloc, pAllocator, mem);
2157 }
2158
2159 VkResult radv_MapMemory(
2160 VkDevice _device,
2161 VkDeviceMemory _memory,
2162 VkDeviceSize offset,
2163 VkDeviceSize size,
2164 VkMemoryMapFlags flags,
2165 void** ppData)
2166 {
2167 RADV_FROM_HANDLE(radv_device, device, _device);
2168 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2169
2170 if (mem == NULL) {
2171 *ppData = NULL;
2172 return VK_SUCCESS;
2173 }
2174
2175 *ppData = device->ws->buffer_map(mem->bo);
2176 if (*ppData) {
2177 *ppData += offset;
2178 return VK_SUCCESS;
2179 }
2180
2181 return VK_ERROR_MEMORY_MAP_FAILED;
2182 }
2183
2184 void radv_UnmapMemory(
2185 VkDevice _device,
2186 VkDeviceMemory _memory)
2187 {
2188 RADV_FROM_HANDLE(radv_device, device, _device);
2189 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2190
2191 if (mem == NULL)
2192 return;
2193
2194 device->ws->buffer_unmap(mem->bo);
2195 }
2196
2197 VkResult radv_FlushMappedMemoryRanges(
2198 VkDevice _device,
2199 uint32_t memoryRangeCount,
2200 const VkMappedMemoryRange* pMemoryRanges)
2201 {
2202 return VK_SUCCESS;
2203 }
2204
2205 VkResult radv_InvalidateMappedMemoryRanges(
2206 VkDevice _device,
2207 uint32_t memoryRangeCount,
2208 const VkMappedMemoryRange* pMemoryRanges)
2209 {
2210 return VK_SUCCESS;
2211 }
2212
2213 void radv_GetBufferMemoryRequirements(
2214 VkDevice device,
2215 VkBuffer _buffer,
2216 VkMemoryRequirements* pMemoryRequirements)
2217 {
2218 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2219
2220 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2221
2222 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2223 pMemoryRequirements->alignment = 4096;
2224 else
2225 pMemoryRequirements->alignment = 16;
2226
2227 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2228 }
2229
2230 void radv_GetImageMemoryRequirements(
2231 VkDevice device,
2232 VkImage _image,
2233 VkMemoryRequirements* pMemoryRequirements)
2234 {
2235 RADV_FROM_HANDLE(radv_image, image, _image);
2236
2237 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2238
2239 pMemoryRequirements->size = image->size;
2240 pMemoryRequirements->alignment = image->alignment;
2241 }
2242
2243 void radv_GetImageSparseMemoryRequirements(
2244 VkDevice device,
2245 VkImage image,
2246 uint32_t* pSparseMemoryRequirementCount,
2247 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2248 {
2249 stub();
2250 }
2251
2252 void radv_GetDeviceMemoryCommitment(
2253 VkDevice device,
2254 VkDeviceMemory memory,
2255 VkDeviceSize* pCommittedMemoryInBytes)
2256 {
2257 *pCommittedMemoryInBytes = 0;
2258 }
2259
2260 VkResult radv_BindBufferMemory(
2261 VkDevice device,
2262 VkBuffer _buffer,
2263 VkDeviceMemory _memory,
2264 VkDeviceSize memoryOffset)
2265 {
2266 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2267 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2268
2269 if (mem) {
2270 buffer->bo = mem->bo;
2271 buffer->offset = memoryOffset;
2272 } else {
2273 buffer->bo = NULL;
2274 buffer->offset = 0;
2275 }
2276
2277 return VK_SUCCESS;
2278 }
2279
2280 VkResult radv_BindImageMemory(
2281 VkDevice device,
2282 VkImage _image,
2283 VkDeviceMemory _memory,
2284 VkDeviceSize memoryOffset)
2285 {
2286 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2287 RADV_FROM_HANDLE(radv_image, image, _image);
2288
2289 if (mem) {
2290 image->bo = mem->bo;
2291 image->offset = memoryOffset;
2292 } else {
2293 image->bo = NULL;
2294 image->offset = 0;
2295 }
2296
2297 return VK_SUCCESS;
2298 }
2299
2300
2301 static void
2302 radv_sparse_buffer_bind_memory(struct radv_device *device,
2303 const VkSparseBufferMemoryBindInfo *bind)
2304 {
2305 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2306
2307 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2308 struct radv_device_memory *mem = NULL;
2309
2310 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2311 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2312
2313 device->ws->buffer_virtual_bind(buffer->bo,
2314 bind->pBinds[i].resourceOffset,
2315 bind->pBinds[i].size,
2316 mem ? mem->bo : NULL,
2317 bind->pBinds[i].memoryOffset);
2318 }
2319 }
2320
2321 static void
2322 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2323 const VkSparseImageOpaqueMemoryBindInfo *bind)
2324 {
2325 RADV_FROM_HANDLE(radv_image, image, bind->image);
2326
2327 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2328 struct radv_device_memory *mem = NULL;
2329
2330 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2331 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2332
2333 device->ws->buffer_virtual_bind(image->bo,
2334 bind->pBinds[i].resourceOffset,
2335 bind->pBinds[i].size,
2336 mem ? mem->bo : NULL,
2337 bind->pBinds[i].memoryOffset);
2338 }
2339 }
2340
2341 VkResult radv_QueueBindSparse(
2342 VkQueue _queue,
2343 uint32_t bindInfoCount,
2344 const VkBindSparseInfo* pBindInfo,
2345 VkFence _fence)
2346 {
2347 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2348 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2349 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2350 bool fence_emitted = false;
2351
2352 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2353 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2354 radv_sparse_buffer_bind_memory(queue->device,
2355 pBindInfo[i].pBufferBinds + j);
2356 }
2357
2358 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2359 radv_sparse_image_opaque_bind_memory(queue->device,
2360 pBindInfo[i].pImageOpaqueBinds + j);
2361 }
2362
2363 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2364 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2365 &queue->device->empty_cs[queue->queue_family_index],
2366 1, NULL, NULL,
2367 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2368 pBindInfo[i].waitSemaphoreCount,
2369 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2370 pBindInfo[i].signalSemaphoreCount,
2371 false, base_fence);
2372 fence_emitted = true;
2373 if (fence)
2374 fence->submitted = true;
2375 }
2376 }
2377
2378 if (fence && !fence_emitted) {
2379 fence->signalled = true;
2380 }
2381
2382 return VK_SUCCESS;
2383 }
2384
2385 VkResult radv_CreateFence(
2386 VkDevice _device,
2387 const VkFenceCreateInfo* pCreateInfo,
2388 const VkAllocationCallbacks* pAllocator,
2389 VkFence* pFence)
2390 {
2391 RADV_FROM_HANDLE(radv_device, device, _device);
2392 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2393 sizeof(*fence), 8,
2394 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2395
2396 if (!fence)
2397 return VK_ERROR_OUT_OF_HOST_MEMORY;
2398
2399 memset(fence, 0, sizeof(*fence));
2400 fence->submitted = false;
2401 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2402 fence->fence = device->ws->create_fence();
2403 if (!fence->fence) {
2404 vk_free2(&device->alloc, pAllocator, fence);
2405 return VK_ERROR_OUT_OF_HOST_MEMORY;
2406 }
2407
2408 *pFence = radv_fence_to_handle(fence);
2409
2410 return VK_SUCCESS;
2411 }
2412
2413 void radv_DestroyFence(
2414 VkDevice _device,
2415 VkFence _fence,
2416 const VkAllocationCallbacks* pAllocator)
2417 {
2418 RADV_FROM_HANDLE(radv_device, device, _device);
2419 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2420
2421 if (!fence)
2422 return;
2423 device->ws->destroy_fence(fence->fence);
2424 vk_free2(&device->alloc, pAllocator, fence);
2425 }
2426
2427 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2428 {
2429 uint64_t current_time;
2430 struct timespec tv;
2431
2432 clock_gettime(CLOCK_MONOTONIC, &tv);
2433 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2434
2435 timeout = MIN2(UINT64_MAX - current_time, timeout);
2436
2437 return current_time + timeout;
2438 }
2439
2440 VkResult radv_WaitForFences(
2441 VkDevice _device,
2442 uint32_t fenceCount,
2443 const VkFence* pFences,
2444 VkBool32 waitAll,
2445 uint64_t timeout)
2446 {
2447 RADV_FROM_HANDLE(radv_device, device, _device);
2448 timeout = radv_get_absolute_timeout(timeout);
2449
2450 if (!waitAll && fenceCount > 1) {
2451 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2452 }
2453
2454 for (uint32_t i = 0; i < fenceCount; ++i) {
2455 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2456 bool expired = false;
2457
2458 if (fence->signalled)
2459 continue;
2460
2461 if (!fence->submitted)
2462 return VK_TIMEOUT;
2463
2464 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2465 if (!expired)
2466 return VK_TIMEOUT;
2467
2468 fence->signalled = true;
2469 }
2470
2471 return VK_SUCCESS;
2472 }
2473
2474 VkResult radv_ResetFences(VkDevice device,
2475 uint32_t fenceCount,
2476 const VkFence *pFences)
2477 {
2478 for (unsigned i = 0; i < fenceCount; ++i) {
2479 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2480 fence->submitted = fence->signalled = false;
2481 }
2482
2483 return VK_SUCCESS;
2484 }
2485
2486 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2487 {
2488 RADV_FROM_HANDLE(radv_device, device, _device);
2489 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2490
2491 if (fence->signalled)
2492 return VK_SUCCESS;
2493 if (!fence->submitted)
2494 return VK_NOT_READY;
2495
2496 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2497 return VK_NOT_READY;
2498
2499 return VK_SUCCESS;
2500 }
2501
2502
2503 // Queue semaphore functions
2504
2505 VkResult radv_CreateSemaphore(
2506 VkDevice _device,
2507 const VkSemaphoreCreateInfo* pCreateInfo,
2508 const VkAllocationCallbacks* pAllocator,
2509 VkSemaphore* pSemaphore)
2510 {
2511 RADV_FROM_HANDLE(radv_device, device, _device);
2512 struct radeon_winsys_sem *sem;
2513
2514 sem = device->ws->create_sem(device->ws);
2515 if (!sem)
2516 return VK_ERROR_OUT_OF_HOST_MEMORY;
2517
2518 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2519 return VK_SUCCESS;
2520 }
2521
2522 void radv_DestroySemaphore(
2523 VkDevice _device,
2524 VkSemaphore _semaphore,
2525 const VkAllocationCallbacks* pAllocator)
2526 {
2527 RADV_FROM_HANDLE(radv_device, device, _device);
2528 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2529 if (!_semaphore)
2530 return;
2531
2532 device->ws->destroy_sem(sem);
2533 }
2534
2535 VkResult radv_CreateEvent(
2536 VkDevice _device,
2537 const VkEventCreateInfo* pCreateInfo,
2538 const VkAllocationCallbacks* pAllocator,
2539 VkEvent* pEvent)
2540 {
2541 RADV_FROM_HANDLE(radv_device, device, _device);
2542 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2543 sizeof(*event), 8,
2544 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2545
2546 if (!event)
2547 return VK_ERROR_OUT_OF_HOST_MEMORY;
2548
2549 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2550 RADEON_DOMAIN_GTT,
2551 RADEON_FLAG_CPU_ACCESS);
2552 if (!event->bo) {
2553 vk_free2(&device->alloc, pAllocator, event);
2554 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2555 }
2556
2557 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2558
2559 *pEvent = radv_event_to_handle(event);
2560
2561 return VK_SUCCESS;
2562 }
2563
2564 void radv_DestroyEvent(
2565 VkDevice _device,
2566 VkEvent _event,
2567 const VkAllocationCallbacks* pAllocator)
2568 {
2569 RADV_FROM_HANDLE(radv_device, device, _device);
2570 RADV_FROM_HANDLE(radv_event, event, _event);
2571
2572 if (!event)
2573 return;
2574 device->ws->buffer_destroy(event->bo);
2575 vk_free2(&device->alloc, pAllocator, event);
2576 }
2577
2578 VkResult radv_GetEventStatus(
2579 VkDevice _device,
2580 VkEvent _event)
2581 {
2582 RADV_FROM_HANDLE(radv_event, event, _event);
2583
2584 if (*event->map == 1)
2585 return VK_EVENT_SET;
2586 return VK_EVENT_RESET;
2587 }
2588
2589 VkResult radv_SetEvent(
2590 VkDevice _device,
2591 VkEvent _event)
2592 {
2593 RADV_FROM_HANDLE(radv_event, event, _event);
2594 *event->map = 1;
2595
2596 return VK_SUCCESS;
2597 }
2598
2599 VkResult radv_ResetEvent(
2600 VkDevice _device,
2601 VkEvent _event)
2602 {
2603 RADV_FROM_HANDLE(radv_event, event, _event);
2604 *event->map = 0;
2605
2606 return VK_SUCCESS;
2607 }
2608
2609 VkResult radv_CreateBuffer(
2610 VkDevice _device,
2611 const VkBufferCreateInfo* pCreateInfo,
2612 const VkAllocationCallbacks* pAllocator,
2613 VkBuffer* pBuffer)
2614 {
2615 RADV_FROM_HANDLE(radv_device, device, _device);
2616 struct radv_buffer *buffer;
2617
2618 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2619
2620 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2621 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2622 if (buffer == NULL)
2623 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2624
2625 buffer->size = pCreateInfo->size;
2626 buffer->usage = pCreateInfo->usage;
2627 buffer->bo = NULL;
2628 buffer->offset = 0;
2629 buffer->flags = pCreateInfo->flags;
2630
2631 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2632 buffer->bo = device->ws->buffer_create(device->ws,
2633 align64(buffer->size, 4096),
2634 4096, 0, RADEON_FLAG_VIRTUAL);
2635 if (!buffer->bo) {
2636 vk_free2(&device->alloc, pAllocator, buffer);
2637 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2638 }
2639 }
2640
2641 *pBuffer = radv_buffer_to_handle(buffer);
2642
2643 return VK_SUCCESS;
2644 }
2645
2646 void radv_DestroyBuffer(
2647 VkDevice _device,
2648 VkBuffer _buffer,
2649 const VkAllocationCallbacks* pAllocator)
2650 {
2651 RADV_FROM_HANDLE(radv_device, device, _device);
2652 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2653
2654 if (!buffer)
2655 return;
2656
2657 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2658 device->ws->buffer_destroy(buffer->bo);
2659
2660 vk_free2(&device->alloc, pAllocator, buffer);
2661 }
2662
2663 static inline unsigned
2664 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2665 {
2666 if (stencil)
2667 return image->surface.u.legacy.stencil_tiling_index[level];
2668 else
2669 return image->surface.u.legacy.tiling_index[level];
2670 }
2671
2672 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2673 {
2674 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2675 }
2676
2677 static void
2678 radv_initialise_color_surface(struct radv_device *device,
2679 struct radv_color_buffer_info *cb,
2680 struct radv_image_view *iview)
2681 {
2682 const struct vk_format_description *desc;
2683 unsigned ntype, format, swap, endian;
2684 unsigned blend_clamp = 0, blend_bypass = 0;
2685 uint64_t va;
2686 const struct radeon_surf *surf = &iview->image->surface;
2687
2688 desc = vk_format_description(iview->vk_format);
2689
2690 memset(cb, 0, sizeof(*cb));
2691
2692 /* Intensity is implemented as Red, so treat it that way. */
2693 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2694
2695 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2696
2697 if (device->physical_device->rad_info.chip_class >= GFX9) {
2698 struct gfx9_surf_meta_flags meta;
2699 if (iview->image->dcc_offset)
2700 meta = iview->image->surface.u.gfx9.dcc;
2701 else
2702 meta = iview->image->surface.u.gfx9.cmask;
2703
2704 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2705 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2706 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2707 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2708
2709 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2710 } else {
2711 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2712 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2713
2714 va += level_info->offset;
2715
2716 pitch_tile_max = level_info->nblk_x / 8 - 1;
2717 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2718 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2719
2720 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2721 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2722 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2723
2724 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2725 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2726
2727 if (iview->image->fmask.size) {
2728 if (device->physical_device->rad_info.chip_class >= CIK)
2729 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2730 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2731 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2732 } else {
2733 /* This must be set for fast clear to work without FMASK. */
2734 if (device->physical_device->rad_info.chip_class >= CIK)
2735 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2736 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2737 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2738 }
2739 }
2740
2741 cb->cb_color_base = va >> 8;
2742
2743 /* CMASK variables */
2744 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2745 va += iview->image->cmask.offset;
2746 cb->cb_color_cmask = va >> 8;
2747
2748 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2749 va += iview->image->dcc_offset;
2750 cb->cb_dcc_base = va >> 8;
2751
2752 uint32_t max_slice = radv_surface_layer_count(iview);
2753 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2754 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2755
2756 if (iview->image->info.samples > 1) {
2757 unsigned log_samples = util_logbase2(iview->image->info.samples);
2758
2759 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2760 S_028C74_NUM_FRAGMENTS(log_samples);
2761 }
2762
2763 if (iview->image->fmask.size) {
2764 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2765 cb->cb_color_fmask = va >> 8;
2766 } else {
2767 cb->cb_color_fmask = cb->cb_color_base;
2768 }
2769
2770 ntype = radv_translate_color_numformat(iview->vk_format,
2771 desc,
2772 vk_format_get_first_non_void_channel(iview->vk_format));
2773 format = radv_translate_colorformat(iview->vk_format);
2774 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2775 radv_finishme("Illegal color\n");
2776 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2777 endian = radv_colorformat_endian_swap(format);
2778
2779 /* blend clamp should be set for all NORM/SRGB types */
2780 if (ntype == V_028C70_NUMBER_UNORM ||
2781 ntype == V_028C70_NUMBER_SNORM ||
2782 ntype == V_028C70_NUMBER_SRGB)
2783 blend_clamp = 1;
2784
2785 /* set blend bypass according to docs if SINT/UINT or
2786 8/24 COLOR variants */
2787 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2788 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2789 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2790 blend_clamp = 0;
2791 blend_bypass = 1;
2792 }
2793 #if 0
2794 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2795 (format == V_028C70_COLOR_8 ||
2796 format == V_028C70_COLOR_8_8 ||
2797 format == V_028C70_COLOR_8_8_8_8))
2798 ->color_is_int8 = true;
2799 #endif
2800 cb->cb_color_info = S_028C70_FORMAT(format) |
2801 S_028C70_COMP_SWAP(swap) |
2802 S_028C70_BLEND_CLAMP(blend_clamp) |
2803 S_028C70_BLEND_BYPASS(blend_bypass) |
2804 S_028C70_SIMPLE_FLOAT(1) |
2805 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2806 ntype != V_028C70_NUMBER_SNORM &&
2807 ntype != V_028C70_NUMBER_SRGB &&
2808 format != V_028C70_COLOR_8_24 &&
2809 format != V_028C70_COLOR_24_8) |
2810 S_028C70_NUMBER_TYPE(ntype) |
2811 S_028C70_ENDIAN(endian);
2812 if (iview->image->info.samples > 1)
2813 if (iview->image->fmask.size)
2814 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2815
2816 if (iview->image->cmask.size &&
2817 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2818 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2819
2820 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2821 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2822
2823 if (device->physical_device->rad_info.chip_class >= VI) {
2824 unsigned max_uncompressed_block_size = 2;
2825 if (iview->image->info.samples > 1) {
2826 if (iview->image->surface.bpe == 1)
2827 max_uncompressed_block_size = 0;
2828 else if (iview->image->surface.bpe == 2)
2829 max_uncompressed_block_size = 1;
2830 }
2831
2832 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2833 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2834 }
2835
2836 /* This must be set for fast clear to work without FMASK. */
2837 if (!iview->image->fmask.size &&
2838 device->physical_device->rad_info.chip_class == SI) {
2839 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2840 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2841 }
2842
2843 if (device->physical_device->rad_info.chip_class >= GFX9) {
2844 uint32_t max_slice = radv_surface_layer_count(iview);
2845 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2846
2847 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2848 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2849 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2850 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2851 S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2852 S_028C68_MAX_MIP(iview->image->info.levels);
2853
2854 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2855
2856 }
2857 }
2858
2859 static void
2860 radv_initialise_ds_surface(struct radv_device *device,
2861 struct radv_ds_buffer_info *ds,
2862 struct radv_image_view *iview)
2863 {
2864 unsigned level = iview->base_mip;
2865 unsigned format, stencil_format;
2866 uint64_t va, s_offs, z_offs;
2867 bool stencil_only = false;
2868 memset(ds, 0, sizeof(*ds));
2869 switch (iview->vk_format) {
2870 case VK_FORMAT_D24_UNORM_S8_UINT:
2871 case VK_FORMAT_X8_D24_UNORM_PACK32:
2872 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2873 ds->offset_scale = 2.0f;
2874 break;
2875 case VK_FORMAT_D16_UNORM:
2876 case VK_FORMAT_D16_UNORM_S8_UINT:
2877 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2878 ds->offset_scale = 4.0f;
2879 break;
2880 case VK_FORMAT_D32_SFLOAT:
2881 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2882 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2883 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2884 ds->offset_scale = 1.0f;
2885 break;
2886 case VK_FORMAT_S8_UINT:
2887 stencil_only = true;
2888 break;
2889 default:
2890 break;
2891 }
2892
2893 format = radv_translate_dbformat(iview->vk_format);
2894 stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2895 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2896
2897 uint32_t max_slice = radv_surface_layer_count(iview);
2898 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2899 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2900
2901 ds->db_htile_data_base = 0;
2902 ds->db_htile_surface = 0;
2903
2904 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2905 s_offs = z_offs = va;
2906
2907 if (device->physical_device->rad_info.chip_class >= GFX9) {
2908 assert(iview->image->surface.u.gfx9.surf_offset == 0);
2909 s_offs += iview->image->surface.u.gfx9.stencil_offset;
2910
2911 ds->db_z_info = S_028038_FORMAT(format) |
2912 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
2913 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2914 S_028038_MAXMIP(iview->image->info.levels - 1);
2915 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2916 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
2917
2918 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2919 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
2920 ds->db_depth_view |= S_028008_MIPID(level);
2921
2922 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
2923 S_02801C_Y_MAX(iview->image->info.height - 1);
2924
2925 /* Only use HTILE for the first level. */
2926 if (iview->image->surface.htile_size && !level) {
2927 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
2928
2929 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2930 /* Use all of the htile_buffer for depth if there's no stencil. */
2931 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2932 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2933 iview->image->htile_offset;
2934 ds->db_htile_data_base = va >> 8;
2935 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2936 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
2937 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
2938 }
2939 } else {
2940 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2941
2942 if (stencil_only)
2943 level_info = &iview->image->surface.u.legacy.stencil_level[level];
2944
2945 z_offs += iview->image->surface.u.legacy.level[level].offset;
2946 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2947
2948 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2949 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2950 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
2951
2952 if (iview->image->info.samples > 1)
2953 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2954
2955 if (device->physical_device->rad_info.chip_class >= CIK) {
2956 struct radeon_info *info = &device->physical_device->rad_info;
2957 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
2958 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
2959 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
2960 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2961 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2962 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2963
2964 if (stencil_only)
2965 tile_mode = stencil_tile_mode;
2966
2967 ds->db_depth_info |=
2968 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2969 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2970 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2971 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2972 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2973 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2974 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2975 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2976 } else {
2977 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2978 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2979 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2980 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2981 }
2982
2983 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2984 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2985 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2986
2987 if (iview->image->surface.htile_size && !level) {
2988 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
2989
2990 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2991 /* Use all of the htile_buffer for depth if there's no stencil. */
2992 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2993
2994 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2995 iview->image->htile_offset;
2996 ds->db_htile_data_base = va >> 8;
2997 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2998 }
2999 }
3000
3001 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3002 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3003 }
3004
3005 VkResult radv_CreateFramebuffer(
3006 VkDevice _device,
3007 const VkFramebufferCreateInfo* pCreateInfo,
3008 const VkAllocationCallbacks* pAllocator,
3009 VkFramebuffer* pFramebuffer)
3010 {
3011 RADV_FROM_HANDLE(radv_device, device, _device);
3012 struct radv_framebuffer *framebuffer;
3013
3014 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3015
3016 size_t size = sizeof(*framebuffer) +
3017 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3018 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3019 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3020 if (framebuffer == NULL)
3021 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3022
3023 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3024 framebuffer->width = pCreateInfo->width;
3025 framebuffer->height = pCreateInfo->height;
3026 framebuffer->layers = pCreateInfo->layers;
3027 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3028 VkImageView _iview = pCreateInfo->pAttachments[i];
3029 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3030 framebuffer->attachments[i].attachment = iview;