radv: implement VK_KHR_push_descriptor
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 {
88 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
89 .specVersion = 1,
90 },
91 };
92
93 static const VkExtensionProperties common_device_extensions[] = {
94 {
95 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
100 .specVersion = 1,
101 },
102 {
103 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
112 .specVersion = 68,
113 },
114 {
115 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 {
119 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
120 .specVersion = 1,
121 },
122 {
123 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
124 .specVersion = 1,
125 },
126 };
127
128 static VkResult
129 radv_extensions_register(struct radv_instance *instance,
130 struct radv_extensions *extensions,
131 const VkExtensionProperties *new_ext,
132 uint32_t num_ext)
133 {
134 size_t new_size;
135 VkExtensionProperties *new_ptr;
136
137 assert(new_ext && num_ext > 0);
138
139 if (!new_ext)
140 return VK_ERROR_INITIALIZATION_FAILED;
141
142 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
143 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
144 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
145
146 /* Old array continues to be valid, update nothing */
147 if (!new_ptr)
148 return VK_ERROR_OUT_OF_HOST_MEMORY;
149
150 memcpy(&new_ptr[extensions->num_ext], new_ext,
151 num_ext * sizeof(VkExtensionProperties));
152 extensions->ext_array = new_ptr;
153 extensions->num_ext += num_ext;
154
155 return VK_SUCCESS;
156 }
157
158 static void
159 radv_extensions_finish(struct radv_instance *instance,
160 struct radv_extensions *extensions)
161 {
162 assert(extensions);
163
164 if (!extensions)
165 radv_loge("Attemted to free invalid extension struct\n");
166
167 if (extensions->ext_array)
168 vk_free(&instance->alloc, extensions->ext_array);
169 }
170
171 static bool
172 is_extension_enabled(const VkExtensionProperties *extensions,
173 size_t num_ext,
174 const char *name)
175 {
176 assert(extensions && name);
177
178 for (uint32_t i = 0; i < num_ext; i++) {
179 if (strcmp(name, extensions[i].extensionName) == 0)
180 return true;
181 }
182
183 return false;
184 }
185
186 static VkResult
187 radv_physical_device_init(struct radv_physical_device *device,
188 struct radv_instance *instance,
189 const char *path)
190 {
191 VkResult result;
192 drmVersionPtr version;
193 int fd;
194
195 fd = open(path, O_RDWR | O_CLOEXEC);
196 if (fd < 0)
197 return VK_ERROR_INCOMPATIBLE_DRIVER;
198
199 version = drmGetVersion(fd);
200 if (!version) {
201 close(fd);
202 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
203 "failed to get version %s: %m", path);
204 }
205
206 if (strcmp(version->name, "amdgpu")) {
207 drmFreeVersion(version);
208 close(fd);
209 return VK_ERROR_INCOMPATIBLE_DRIVER;
210 }
211 drmFreeVersion(version);
212
213 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
214 device->instance = instance;
215 assert(strlen(path) < ARRAY_SIZE(device->path));
216 strncpy(device->path, path, ARRAY_SIZE(device->path));
217
218 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
219 if (!device->ws) {
220 result = VK_ERROR_INCOMPATIBLE_DRIVER;
221 goto fail;
222 }
223
224 device->local_fd = fd;
225 device->ws->query_info(device->ws, &device->rad_info);
226 result = radv_init_wsi(device);
227 if (result != VK_SUCCESS) {
228 device->ws->destroy(device->ws);
229 goto fail;
230 }
231
232 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
233 radv_finish_wsi(device);
234 device->ws->destroy(device->ws);
235 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
236 "cannot generate UUID");
237 goto fail;
238 }
239
240 result = radv_extensions_register(instance,
241 &device->extensions,
242 common_device_extensions,
243 ARRAY_SIZE(common_device_extensions));
244 if (result != VK_SUCCESS)
245 goto fail;
246
247 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
248 device->name = device->rad_info.name;
249
250 return VK_SUCCESS;
251
252 fail:
253 close(fd);
254 return result;
255 }
256
257 static void
258 radv_physical_device_finish(struct radv_physical_device *device)
259 {
260 radv_extensions_finish(device->instance, &device->extensions);
261 radv_finish_wsi(device);
262 device->ws->destroy(device->ws);
263 close(device->local_fd);
264 }
265
266
267 static void *
268 default_alloc_func(void *pUserData, size_t size, size_t align,
269 VkSystemAllocationScope allocationScope)
270 {
271 return malloc(size);
272 }
273
274 static void *
275 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
276 size_t align, VkSystemAllocationScope allocationScope)
277 {
278 return realloc(pOriginal, size);
279 }
280
281 static void
282 default_free_func(void *pUserData, void *pMemory)
283 {
284 free(pMemory);
285 }
286
287 static const VkAllocationCallbacks default_alloc = {
288 .pUserData = NULL,
289 .pfnAllocation = default_alloc_func,
290 .pfnReallocation = default_realloc_func,
291 .pfnFree = default_free_func,
292 };
293
294 static const struct debug_control radv_debug_options[] = {
295 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
296 {"nodcc", RADV_DEBUG_NO_DCC},
297 {"shaders", RADV_DEBUG_DUMP_SHADERS},
298 {"nocache", RADV_DEBUG_NO_CACHE},
299 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
300 {"nohiz", RADV_DEBUG_NO_HIZ},
301 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
302 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
303 {"allbos", RADV_DEBUG_ALL_BOS},
304 {"noibs", RADV_DEBUG_NO_IBS},
305 {NULL, 0}
306 };
307
308 VkResult radv_CreateInstance(
309 const VkInstanceCreateInfo* pCreateInfo,
310 const VkAllocationCallbacks* pAllocator,
311 VkInstance* pInstance)
312 {
313 struct radv_instance *instance;
314
315 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
316
317 uint32_t client_version;
318 if (pCreateInfo->pApplicationInfo &&
319 pCreateInfo->pApplicationInfo->apiVersion != 0) {
320 client_version = pCreateInfo->pApplicationInfo->apiVersion;
321 } else {
322 client_version = VK_MAKE_VERSION(1, 0, 0);
323 }
324
325 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
326 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
327 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
328 "Client requested version %d.%d.%d",
329 VK_VERSION_MAJOR(client_version),
330 VK_VERSION_MINOR(client_version),
331 VK_VERSION_PATCH(client_version));
332 }
333
334 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
335 if (!is_extension_enabled(instance_extensions,
336 ARRAY_SIZE(instance_extensions),
337 pCreateInfo->ppEnabledExtensionNames[i]))
338 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
339 }
340
341 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
342 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
343 if (!instance)
344 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
345
346 memset(instance, 0, sizeof(*instance));
347
348 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
349
350 if (pAllocator)
351 instance->alloc = *pAllocator;
352 else
353 instance->alloc = default_alloc;
354
355 instance->apiVersion = client_version;
356 instance->physicalDeviceCount = -1;
357
358 _mesa_locale_init();
359
360 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
361
362 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
363 radv_debug_options);
364
365 *pInstance = radv_instance_to_handle(instance);
366
367 return VK_SUCCESS;
368 }
369
370 void radv_DestroyInstance(
371 VkInstance _instance,
372 const VkAllocationCallbacks* pAllocator)
373 {
374 RADV_FROM_HANDLE(radv_instance, instance, _instance);
375
376 if (!instance)
377 return;
378
379 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
380 radv_physical_device_finish(instance->physicalDevices + i);
381 }
382
383 VG(VALGRIND_DESTROY_MEMPOOL(instance));
384
385 _mesa_locale_fini();
386
387 vk_free(&instance->alloc, instance);
388 }
389
390 static VkResult
391 radv_enumerate_devices(struct radv_instance *instance)
392 {
393 /* TODO: Check for more devices ? */
394 drmDevicePtr devices[8];
395 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
396 int max_devices;
397
398 instance->physicalDeviceCount = 0;
399
400 max_devices = drmGetDevices2(0, devices, sizeof(devices));
401 if (max_devices < 1)
402 return VK_ERROR_INCOMPATIBLE_DRIVER;
403
404 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
405 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
406 devices[i]->bustype == DRM_BUS_PCI &&
407 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
408
409 result = radv_physical_device_init(instance->physicalDevices +
410 instance->physicalDeviceCount,
411 instance,
412 devices[i]->nodes[DRM_NODE_RENDER]);
413 if (result == VK_SUCCESS)
414 ++instance->physicalDeviceCount;
415 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
416 return result;
417 }
418 }
419 return result;
420 }
421
422 VkResult radv_EnumeratePhysicalDevices(
423 VkInstance _instance,
424 uint32_t* pPhysicalDeviceCount,
425 VkPhysicalDevice* pPhysicalDevices)
426 {
427 RADV_FROM_HANDLE(radv_instance, instance, _instance);
428 VkResult result;
429
430 if (instance->physicalDeviceCount < 0) {
431 result = radv_enumerate_devices(instance);
432 if (result != VK_SUCCESS &&
433 result != VK_ERROR_INCOMPATIBLE_DRIVER)
434 return result;
435 }
436
437 if (!pPhysicalDevices) {
438 *pPhysicalDeviceCount = instance->physicalDeviceCount;
439 } else {
440 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
441 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
442 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
443 }
444
445 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
446 : VK_SUCCESS;
447 }
448
449 void radv_GetPhysicalDeviceFeatures(
450 VkPhysicalDevice physicalDevice,
451 VkPhysicalDeviceFeatures* pFeatures)
452 {
453 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
454
455 memset(pFeatures, 0, sizeof(*pFeatures));
456
457 *pFeatures = (VkPhysicalDeviceFeatures) {
458 .robustBufferAccess = true,
459 .fullDrawIndexUint32 = true,
460 .imageCubeArray = true,
461 .independentBlend = true,
462 .geometryShader = true,
463 .tessellationShader = true,
464 .sampleRateShading = false,
465 .dualSrcBlend = true,
466 .logicOp = true,
467 .multiDrawIndirect = true,
468 .drawIndirectFirstInstance = true,
469 .depthClamp = true,
470 .depthBiasClamp = true,
471 .fillModeNonSolid = true,
472 .depthBounds = true,
473 .wideLines = true,
474 .largePoints = true,
475 .alphaToOne = true,
476 .multiViewport = true,
477 .samplerAnisotropy = true,
478 .textureCompressionETC2 = false,
479 .textureCompressionASTC_LDR = false,
480 .textureCompressionBC = true,
481 .occlusionQueryPrecise = true,
482 .pipelineStatisticsQuery = false,
483 .vertexPipelineStoresAndAtomics = true,
484 .fragmentStoresAndAtomics = true,
485 .shaderTessellationAndGeometryPointSize = true,
486 .shaderImageGatherExtended = true,
487 .shaderStorageImageExtendedFormats = true,
488 .shaderStorageImageMultisample = false,
489 .shaderUniformBufferArrayDynamicIndexing = true,
490 .shaderSampledImageArrayDynamicIndexing = true,
491 .shaderStorageBufferArrayDynamicIndexing = true,
492 .shaderStorageImageArrayDynamicIndexing = true,
493 .shaderStorageImageReadWithoutFormat = true,
494 .shaderStorageImageWriteWithoutFormat = true,
495 .shaderClipDistance = true,
496 .shaderCullDistance = true,
497 .shaderFloat64 = true,
498 .shaderInt64 = false,
499 .shaderInt16 = false,
500 .sparseBinding = true,
501 .variableMultisampleRate = false,
502 .inheritedQueries = false,
503 };
504 }
505
506 void radv_GetPhysicalDeviceFeatures2KHR(
507 VkPhysicalDevice physicalDevice,
508 VkPhysicalDeviceFeatures2KHR *pFeatures)
509 {
510 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
511 }
512
513 static uint32_t radv_get_driver_version()
514 {
515 const char *minor_string = strchr(VERSION, '.');
516 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
517 int major = atoi(VERSION);
518 int minor = minor_string ? atoi(minor_string + 1) : 0;
519 int patch = patch_string ? atoi(patch_string + 1) : 0;
520 if (strstr(VERSION, "devel")) {
521 if (patch == 0) {
522 patch = 99;
523 if (minor == 0) {
524 minor = 99;
525 --major;
526 } else
527 --minor;
528 } else
529 --patch;
530 }
531 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
532 return version;
533 }
534
535 void radv_GetPhysicalDeviceProperties(
536 VkPhysicalDevice physicalDevice,
537 VkPhysicalDeviceProperties* pProperties)
538 {
539 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
540 VkSampleCountFlags sample_counts = 0xf;
541 VkPhysicalDeviceLimits limits = {
542 .maxImageDimension1D = (1 << 14),
543 .maxImageDimension2D = (1 << 14),
544 .maxImageDimension3D = (1 << 11),
545 .maxImageDimensionCube = (1 << 14),
546 .maxImageArrayLayers = (1 << 11),
547 .maxTexelBufferElements = 128 * 1024 * 1024,
548 .maxUniformBufferRange = UINT32_MAX,
549 .maxStorageBufferRange = UINT32_MAX,
550 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
551 .maxMemoryAllocationCount = UINT32_MAX,
552 .maxSamplerAllocationCount = 64 * 1024,
553 .bufferImageGranularity = 64, /* A cache line */
554 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
555 .maxBoundDescriptorSets = MAX_SETS,
556 .maxPerStageDescriptorSamplers = (1u << 31) / 16,
557 .maxPerStageDescriptorUniformBuffers = (1u << 31) / 16,
558 .maxPerStageDescriptorStorageBuffers = (1u << 31) / 16,
559 .maxPerStageDescriptorSampledImages = (1u << 31) / 96,
560 .maxPerStageDescriptorStorageImages = (1u << 31) / 64,
561 .maxPerStageDescriptorInputAttachments = (1u << 31) / 64,
562 .maxPerStageResources = (1u << 31) / 32,
563 .maxDescriptorSetSamplers = 256,
564 .maxDescriptorSetUniformBuffers = (1u << 31) / 16,
565 .maxDescriptorSetUniformBuffersDynamic = 8,
566 .maxDescriptorSetStorageBuffers = (1u << 31) / 16,
567 .maxDescriptorSetStorageBuffersDynamic = 8,
568 .maxDescriptorSetSampledImages = (1u << 31) / 96,
569 .maxDescriptorSetStorageImages = (1u << 31) / 64,
570 .maxDescriptorSetInputAttachments = (1u << 31) / 64,
571 .maxVertexInputAttributes = 32,
572 .maxVertexInputBindings = 32,
573 .maxVertexInputAttributeOffset = 2047,
574 .maxVertexInputBindingStride = 2048,
575 .maxVertexOutputComponents = 128,
576 .maxTessellationGenerationLevel = 64,
577 .maxTessellationPatchSize = 32,
578 .maxTessellationControlPerVertexInputComponents = 128,
579 .maxTessellationControlPerVertexOutputComponents = 128,
580 .maxTessellationControlPerPatchOutputComponents = 120,
581 .maxTessellationControlTotalOutputComponents = 4096,
582 .maxTessellationEvaluationInputComponents = 128,
583 .maxTessellationEvaluationOutputComponents = 128,
584 .maxGeometryShaderInvocations = 32,
585 .maxGeometryInputComponents = 64,
586 .maxGeometryOutputComponents = 128,
587 .maxGeometryOutputVertices = 256,
588 .maxGeometryTotalOutputComponents = 1024,
589 .maxFragmentInputComponents = 128,
590 .maxFragmentOutputAttachments = 8,
591 .maxFragmentDualSrcAttachments = 1,
592 .maxFragmentCombinedOutputResources = 8,
593 .maxComputeSharedMemorySize = 32768,
594 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
595 .maxComputeWorkGroupInvocations = 2048,
596 .maxComputeWorkGroupSize = {
597 2048,
598 2048,
599 2048
600 },
601 .subPixelPrecisionBits = 4 /* FIXME */,
602 .subTexelPrecisionBits = 4 /* FIXME */,
603 .mipmapPrecisionBits = 4 /* FIXME */,
604 .maxDrawIndexedIndexValue = UINT32_MAX,
605 .maxDrawIndirectCount = UINT32_MAX,
606 .maxSamplerLodBias = 16,
607 .maxSamplerAnisotropy = 16,
608 .maxViewports = MAX_VIEWPORTS,
609 .maxViewportDimensions = { (1 << 14), (1 << 14) },
610 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
611 .viewportSubPixelBits = 13, /* We take a float? */
612 .minMemoryMapAlignment = 4096, /* A page */
613 .minTexelBufferOffsetAlignment = 1,
614 .minUniformBufferOffsetAlignment = 4,
615 .minStorageBufferOffsetAlignment = 4,
616 .minTexelOffset = -32,
617 .maxTexelOffset = 31,
618 .minTexelGatherOffset = -32,
619 .maxTexelGatherOffset = 31,
620 .minInterpolationOffset = -2,
621 .maxInterpolationOffset = 2,
622 .subPixelInterpolationOffsetBits = 8,
623 .maxFramebufferWidth = (1 << 14),
624 .maxFramebufferHeight = (1 << 14),
625 .maxFramebufferLayers = (1 << 10),
626 .framebufferColorSampleCounts = sample_counts,
627 .framebufferDepthSampleCounts = sample_counts,
628 .framebufferStencilSampleCounts = sample_counts,
629 .framebufferNoAttachmentsSampleCounts = sample_counts,
630 .maxColorAttachments = MAX_RTS,
631 .sampledImageColorSampleCounts = sample_counts,
632 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
633 .sampledImageDepthSampleCounts = sample_counts,
634 .sampledImageStencilSampleCounts = sample_counts,
635 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
636 .maxSampleMaskWords = 1,
637 .timestampComputeAndGraphics = false,
638 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
639 .maxClipDistances = 8,
640 .maxCullDistances = 8,
641 .maxCombinedClipAndCullDistances = 8,
642 .discreteQueuePriorities = 1,
643 .pointSizeRange = { 0.125, 255.875 },
644 .lineWidthRange = { 0.0, 7.9921875 },
645 .pointSizeGranularity = (1.0 / 8.0),
646 .lineWidthGranularity = (1.0 / 128.0),
647 .strictLines = false, /* FINISHME */
648 .standardSampleLocations = true,
649 .optimalBufferCopyOffsetAlignment = 128,
650 .optimalBufferCopyRowPitchAlignment = 128,
651 .nonCoherentAtomSize = 64,
652 };
653
654 *pProperties = (VkPhysicalDeviceProperties) {
655 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
656 .driverVersion = radv_get_driver_version(),
657 .vendorID = 0x1002,
658 .deviceID = pdevice->rad_info.pci_id,
659 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
660 .limits = limits,
661 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
662 };
663
664 strcpy(pProperties->deviceName, pdevice->name);
665 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
666 }
667
668 void radv_GetPhysicalDeviceProperties2KHR(
669 VkPhysicalDevice physicalDevice,
670 VkPhysicalDeviceProperties2KHR *pProperties)
671 {
672 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
673
674 vk_foreach_struct(ext, pProperties->pNext) {
675 switch (ext->sType) {
676 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
677 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
678 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
679 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
680 break;
681 }
682 default:
683 break;
684 }
685 }
686 }
687
688 static void radv_get_physical_device_queue_family_properties(
689 struct radv_physical_device* pdevice,
690 uint32_t* pCount,
691 VkQueueFamilyProperties** pQueueFamilyProperties)
692 {
693 int num_queue_families = 1;
694 int idx;
695 if (pdevice->rad_info.compute_rings > 0 &&
696 pdevice->rad_info.chip_class >= CIK &&
697 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
698 num_queue_families++;
699
700 if (pQueueFamilyProperties == NULL) {
701 *pCount = num_queue_families;
702 return;
703 }
704
705 if (!*pCount)
706 return;
707
708 idx = 0;
709 if (*pCount >= 1) {
710 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
711 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
712 VK_QUEUE_COMPUTE_BIT |
713 VK_QUEUE_TRANSFER_BIT |
714 VK_QUEUE_SPARSE_BINDING_BIT,
715 .queueCount = 1,
716 .timestampValidBits = 64,
717 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
718 };
719 idx++;
720 }
721
722 if (pdevice->rad_info.compute_rings > 0 &&
723 pdevice->rad_info.chip_class >= CIK &&
724 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
725 if (*pCount > idx) {
726 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
727 .queueFlags = VK_QUEUE_COMPUTE_BIT |
728 VK_QUEUE_TRANSFER_BIT |
729 VK_QUEUE_SPARSE_BINDING_BIT,
730 .queueCount = pdevice->rad_info.compute_rings,
731 .timestampValidBits = 64,
732 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
733 };
734 idx++;
735 }
736 }
737 *pCount = idx;
738 }
739
740 void radv_GetPhysicalDeviceQueueFamilyProperties(
741 VkPhysicalDevice physicalDevice,
742 uint32_t* pCount,
743 VkQueueFamilyProperties* pQueueFamilyProperties)
744 {
745 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
746 if (!pQueueFamilyProperties) {
747 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
748 return;
749 }
750 VkQueueFamilyProperties *properties[] = {
751 pQueueFamilyProperties + 0,
752 pQueueFamilyProperties + 1,
753 pQueueFamilyProperties + 2,
754 };
755 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
756 assert(*pCount <= 3);
757 }
758
759 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
760 VkPhysicalDevice physicalDevice,
761 uint32_t* pCount,
762 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
763 {
764 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
765 if (!pQueueFamilyProperties) {
766 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
767 return;
768 }
769 VkQueueFamilyProperties *properties[] = {
770 &pQueueFamilyProperties[0].queueFamilyProperties,
771 &pQueueFamilyProperties[1].queueFamilyProperties,
772 &pQueueFamilyProperties[2].queueFamilyProperties,
773 };
774 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
775 assert(*pCount <= 3);
776 }
777
778 void radv_GetPhysicalDeviceMemoryProperties(
779 VkPhysicalDevice physicalDevice,
780 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
781 {
782 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
783
784 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
785
786 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
787 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
788 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
789 .heapIndex = RADV_MEM_HEAP_VRAM,
790 };
791 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
792 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
793 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
794 .heapIndex = RADV_MEM_HEAP_GTT,
795 };
796 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
797 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
798 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
799 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
800 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
801 };
802 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
803 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
804 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
805 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
806 .heapIndex = RADV_MEM_HEAP_GTT,
807 };
808
809 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
810
811 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
812 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
813 .size = physical_device->rad_info.vram_size -
814 physical_device->rad_info.visible_vram_size,
815 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
816 };
817 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
818 .size = physical_device->rad_info.visible_vram_size,
819 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
820 };
821 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
822 .size = physical_device->rad_info.gart_size,
823 .flags = 0,
824 };
825 }
826
827 void radv_GetPhysicalDeviceMemoryProperties2KHR(
828 VkPhysicalDevice physicalDevice,
829 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
830 {
831 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
832 &pMemoryProperties->memoryProperties);
833 }
834
835 static int
836 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
837 int queue_family_index, int idx)
838 {
839 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
840 queue->device = device;
841 queue->queue_family_index = queue_family_index;
842 queue->queue_idx = idx;
843
844 queue->hw_ctx = device->ws->ctx_create(device->ws);
845 if (!queue->hw_ctx)
846 return VK_ERROR_OUT_OF_HOST_MEMORY;
847
848 return VK_SUCCESS;
849 }
850
851 static void
852 radv_queue_finish(struct radv_queue *queue)
853 {
854 if (queue->hw_ctx)
855 queue->device->ws->ctx_destroy(queue->hw_ctx);
856
857 if (queue->initial_preamble_cs)
858 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
859 if (queue->continue_preamble_cs)
860 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
861 if (queue->descriptor_bo)
862 queue->device->ws->buffer_destroy(queue->descriptor_bo);
863 if (queue->scratch_bo)
864 queue->device->ws->buffer_destroy(queue->scratch_bo);
865 if (queue->esgs_ring_bo)
866 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
867 if (queue->gsvs_ring_bo)
868 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
869 if (queue->tess_factor_ring_bo)
870 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
871 if (queue->tess_offchip_ring_bo)
872 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
873 if (queue->compute_scratch_bo)
874 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
875 }
876
877 static void
878 radv_device_init_gs_info(struct radv_device *device)
879 {
880 switch (device->physical_device->rad_info.family) {
881 case CHIP_OLAND:
882 case CHIP_HAINAN:
883 case CHIP_KAVERI:
884 case CHIP_KABINI:
885 case CHIP_MULLINS:
886 case CHIP_ICELAND:
887 case CHIP_CARRIZO:
888 case CHIP_STONEY:
889 device->gs_table_depth = 16;
890 return;
891 case CHIP_TAHITI:
892 case CHIP_PITCAIRN:
893 case CHIP_VERDE:
894 case CHIP_BONAIRE:
895 case CHIP_HAWAII:
896 case CHIP_TONGA:
897 case CHIP_FIJI:
898 case CHIP_POLARIS10:
899 case CHIP_POLARIS11:
900 device->gs_table_depth = 32;
901 return;
902 default:
903 unreachable("unknown GPU");
904 }
905 }
906
907 VkResult radv_CreateDevice(
908 VkPhysicalDevice physicalDevice,
909 const VkDeviceCreateInfo* pCreateInfo,
910 const VkAllocationCallbacks* pAllocator,
911 VkDevice* pDevice)
912 {
913 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
914 VkResult result;
915 struct radv_device *device;
916
917 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
918 if (!is_extension_enabled(physical_device->extensions.ext_array,
919 physical_device->extensions.num_ext,
920 pCreateInfo->ppEnabledExtensionNames[i]))
921 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
922 }
923
924 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
925 sizeof(*device), 8,
926 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
927 if (!device)
928 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
929
930 memset(device, 0, sizeof(*device));
931
932 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
933 device->instance = physical_device->instance;
934 device->physical_device = physical_device;
935
936 device->debug_flags = device->instance->debug_flags;
937
938 device->ws = physical_device->ws;
939 if (pAllocator)
940 device->alloc = *pAllocator;
941 else
942 device->alloc = physical_device->instance->alloc;
943
944 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
945 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
946 uint32_t qfi = queue_create->queueFamilyIndex;
947
948 device->queues[qfi] = vk_alloc(&device->alloc,
949 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
950 if (!device->queues[qfi]) {
951 result = VK_ERROR_OUT_OF_HOST_MEMORY;
952 goto fail;
953 }
954
955 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
956
957 device->queue_count[qfi] = queue_create->queueCount;
958
959 for (unsigned q = 0; q < queue_create->queueCount; q++) {
960 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
961 if (result != VK_SUCCESS)
962 goto fail;
963 }
964 }
965
966 #if HAVE_LLVM < 0x0400
967 device->llvm_supports_spill = false;
968 #else
969 device->llvm_supports_spill = true;
970 #endif
971
972 /* The maximum number of scratch waves. Scratch space isn't divided
973 * evenly between CUs. The number is only a function of the number of CUs.
974 * We can decrease the constant to decrease the scratch buffer size.
975 *
976 * sctx->scratch_waves must be >= the maximum posible size of
977 * 1 threadgroup, so that the hw doesn't hang from being unable
978 * to start any.
979 *
980 * The recommended value is 4 per CU at most. Higher numbers don't
981 * bring much benefit, but they still occupy chip resources (think
982 * async compute). I've seen ~2% performance difference between 4 and 32.
983 */
984 uint32_t max_threads_per_block = 2048;
985 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
986 max_threads_per_block / 64);
987
988 radv_device_init_gs_info(device);
989
990 device->tess_offchip_block_dw_size =
991 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
992 device->has_distributed_tess =
993 device->physical_device->rad_info.chip_class >= VI &&
994 device->physical_device->rad_info.max_se >= 2;
995
996 result = radv_device_init_meta(device);
997 if (result != VK_SUCCESS)
998 goto fail;
999
1000 radv_device_init_msaa(device);
1001
1002 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1003 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1004 switch (family) {
1005 case RADV_QUEUE_GENERAL:
1006 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1007 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1008 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1009 break;
1010 case RADV_QUEUE_COMPUTE:
1011 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1012 radeon_emit(device->empty_cs[family], 0);
1013 break;
1014 }
1015 device->ws->cs_finalize(device->empty_cs[family]);
1016
1017 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1018 switch (family) {
1019 case RADV_QUEUE_GENERAL:
1020 case RADV_QUEUE_COMPUTE:
1021 si_cs_emit_cache_flush(device->flush_cs[family],
1022 device->physical_device->rad_info.chip_class,
1023 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1024 RADV_CMD_FLAG_INV_ICACHE |
1025 RADV_CMD_FLAG_INV_SMEM_L1 |
1026 RADV_CMD_FLAG_INV_VMEM_L1 |
1027 RADV_CMD_FLAG_INV_GLOBAL_L2);
1028 break;
1029 }
1030 device->ws->cs_finalize(device->flush_cs[family]);
1031 }
1032
1033 if (getenv("RADV_TRACE_FILE")) {
1034 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1035 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1036 if (!device->trace_bo)
1037 goto fail;
1038
1039 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1040 if (!device->trace_id_ptr)
1041 goto fail;
1042 }
1043
1044 if (device->physical_device->rad_info.chip_class >= CIK)
1045 cik_create_gfx_config(device);
1046
1047 VkPipelineCacheCreateInfo ci;
1048 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1049 ci.pNext = NULL;
1050 ci.flags = 0;
1051 ci.pInitialData = NULL;
1052 ci.initialDataSize = 0;
1053 VkPipelineCache pc;
1054 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1055 &ci, NULL, &pc);
1056 if (result != VK_SUCCESS)
1057 goto fail;
1058
1059 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1060
1061 *pDevice = radv_device_to_handle(device);
1062 return VK_SUCCESS;
1063
1064 fail:
1065 if (device->trace_bo)
1066 device->ws->buffer_destroy(device->trace_bo);
1067
1068 if (device->gfx_init)
1069 device->ws->buffer_destroy(device->gfx_init);
1070
1071 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1072 for (unsigned q = 0; q < device->queue_count[i]; q++)
1073 radv_queue_finish(&device->queues[i][q]);
1074 if (device->queue_count[i])
1075 vk_free(&device->alloc, device->queues[i]);
1076 }
1077
1078 vk_free(&device->alloc, device);
1079 return result;
1080 }
1081
1082 void radv_DestroyDevice(
1083 VkDevice _device,
1084 const VkAllocationCallbacks* pAllocator)
1085 {
1086 RADV_FROM_HANDLE(radv_device, device, _device);
1087
1088 if (!device)
1089 return;
1090
1091 if (device->trace_bo)
1092 device->ws->buffer_destroy(device->trace_bo);
1093
1094 if (device->gfx_init)
1095 device->ws->buffer_destroy(device->gfx_init);
1096
1097 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1098 for (unsigned q = 0; q < device->queue_count[i]; q++)
1099 radv_queue_finish(&device->queues[i][q]);
1100 if (device->queue_count[i])
1101 vk_free(&device->alloc, device->queues[i]);
1102 if (device->empty_cs[i])
1103 device->ws->cs_destroy(device->empty_cs[i]);
1104 if (device->flush_cs[i])
1105 device->ws->cs_destroy(device->flush_cs[i]);
1106 }
1107 radv_device_finish_meta(device);
1108
1109 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1110 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1111
1112 vk_free(&device->alloc, device);
1113 }
1114
1115 VkResult radv_EnumerateInstanceExtensionProperties(
1116 const char* pLayerName,
1117 uint32_t* pPropertyCount,
1118 VkExtensionProperties* pProperties)
1119 {
1120 if (pProperties == NULL) {
1121 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1122 return VK_SUCCESS;
1123 }
1124
1125 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1126 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1127
1128 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1129 return VK_INCOMPLETE;
1130
1131 return VK_SUCCESS;
1132 }
1133
1134 VkResult radv_EnumerateDeviceExtensionProperties(
1135 VkPhysicalDevice physicalDevice,
1136 const char* pLayerName,
1137 uint32_t* pPropertyCount,
1138 VkExtensionProperties* pProperties)
1139 {
1140 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1141
1142 if (pProperties == NULL) {
1143 *pPropertyCount = pdevice->extensions.num_ext;
1144 return VK_SUCCESS;
1145 }
1146
1147 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1148 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1149
1150 if (*pPropertyCount < pdevice->extensions.num_ext)
1151 return VK_INCOMPLETE;
1152
1153 return VK_SUCCESS;
1154 }
1155
1156 VkResult radv_EnumerateInstanceLayerProperties(
1157 uint32_t* pPropertyCount,
1158 VkLayerProperties* pProperties)
1159 {
1160 if (pProperties == NULL) {
1161 *pPropertyCount = 0;
1162 return VK_SUCCESS;
1163 }
1164
1165 /* None supported at this time */
1166 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1167 }
1168
1169 VkResult radv_EnumerateDeviceLayerProperties(
1170 VkPhysicalDevice physicalDevice,
1171 uint32_t* pPropertyCount,
1172 VkLayerProperties* pProperties)
1173 {
1174 if (pProperties == NULL) {
1175 *pPropertyCount = 0;
1176 return VK_SUCCESS;
1177 }
1178
1179 /* None supported at this time */
1180 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1181 }
1182
1183 void radv_GetDeviceQueue(
1184 VkDevice _device,
1185 uint32_t queueFamilyIndex,
1186 uint32_t queueIndex,
1187 VkQueue* pQueue)
1188 {
1189 RADV_FROM_HANDLE(radv_device, device, _device);
1190
1191 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1192 }
1193
1194 static void radv_dump_trace(struct radv_device *device,
1195 struct radeon_winsys_cs *cs)
1196 {
1197 const char *filename = getenv("RADV_TRACE_FILE");
1198 FILE *f = fopen(filename, "w");
1199 if (!f) {
1200 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1201 return;
1202 }
1203
1204 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1205 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1206 fclose(f);
1207 }
1208
1209 static void
1210 fill_geom_tess_rings(struct radv_queue *queue,
1211 uint32_t *map,
1212 bool add_sample_positions,
1213 uint32_t esgs_ring_size,
1214 struct radeon_winsys_bo *esgs_ring_bo,
1215 uint32_t gsvs_ring_size,
1216 struct radeon_winsys_bo *gsvs_ring_bo,
1217 uint32_t tess_factor_ring_size,
1218 struct radeon_winsys_bo *tess_factor_ring_bo,
1219 uint32_t tess_offchip_ring_size,
1220 struct radeon_winsys_bo *tess_offchip_ring_bo)
1221 {
1222 uint64_t esgs_va = 0, gsvs_va = 0;
1223 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1224 uint32_t *desc = &map[4];
1225
1226 if (esgs_ring_bo)
1227 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1228 if (gsvs_ring_bo)
1229 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1230 if (tess_factor_ring_bo)
1231 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1232 if (tess_offchip_ring_bo)
1233 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1234
1235 /* stride 0, num records - size, add tid, swizzle, elsize4,
1236 index stride 64 */
1237 desc[0] = esgs_va;
1238 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1239 S_008F04_STRIDE(0) |
1240 S_008F04_SWIZZLE_ENABLE(true);
1241 desc[2] = esgs_ring_size;
1242 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1243 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1244 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1245 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1246 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1247 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1248 S_008F0C_ELEMENT_SIZE(1) |
1249 S_008F0C_INDEX_STRIDE(3) |
1250 S_008F0C_ADD_TID_ENABLE(true);
1251
1252 desc += 4;
1253 /* GS entry for ES->GS ring */
1254 /* stride 0, num records - size, elsize0,
1255 index stride 0 */
1256 desc[0] = esgs_va;
1257 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1258 S_008F04_STRIDE(0) |
1259 S_008F04_SWIZZLE_ENABLE(false);
1260 desc[2] = esgs_ring_size;
1261 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1262 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1263 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1264 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1265 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1266 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1267 S_008F0C_ELEMENT_SIZE(0) |
1268 S_008F0C_INDEX_STRIDE(0) |
1269 S_008F0C_ADD_TID_ENABLE(false);
1270
1271 desc += 4;
1272 /* VS entry for GS->VS ring */
1273 /* stride 0, num records - size, elsize0,
1274 index stride 0 */
1275 desc[0] = gsvs_va;
1276 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1277 S_008F04_STRIDE(0) |
1278 S_008F04_SWIZZLE_ENABLE(false);
1279 desc[2] = gsvs_ring_size;
1280 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1281 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1282 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1283 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1284 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1285 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1286 S_008F0C_ELEMENT_SIZE(0) |
1287 S_008F0C_INDEX_STRIDE(0) |
1288 S_008F0C_ADD_TID_ENABLE(false);
1289 desc += 4;
1290
1291 /* stride gsvs_itemsize, num records 64
1292 elsize 4, index stride 16 */
1293 /* shader will patch stride and desc[2] */
1294 desc[0] = gsvs_va;
1295 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1296 S_008F04_STRIDE(0) |
1297 S_008F04_SWIZZLE_ENABLE(true);
1298 desc[2] = 0;
1299 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1300 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1301 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1302 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1303 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1304 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1305 S_008F0C_ELEMENT_SIZE(1) |
1306 S_008F0C_INDEX_STRIDE(1) |
1307 S_008F0C_ADD_TID_ENABLE(true);
1308 desc += 4;
1309
1310 desc[0] = tess_factor_va;
1311 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1312 S_008F04_STRIDE(0) |
1313 S_008F04_SWIZZLE_ENABLE(false);
1314 desc[2] = tess_factor_ring_size;
1315 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1316 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1317 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1318 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1319 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1320 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1321 S_008F0C_ELEMENT_SIZE(0) |
1322 S_008F0C_INDEX_STRIDE(0) |
1323 S_008F0C_ADD_TID_ENABLE(false);
1324 desc += 4;
1325
1326 desc[0] = tess_offchip_va;
1327 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1328 S_008F04_STRIDE(0) |
1329 S_008F04_SWIZZLE_ENABLE(false);
1330 desc[2] = tess_offchip_ring_size;
1331 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1332 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1333 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1334 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1335 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1336 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1337 S_008F0C_ELEMENT_SIZE(0) |
1338 S_008F0C_INDEX_STRIDE(0) |
1339 S_008F0C_ADD_TID_ENABLE(false);
1340 desc += 4;
1341
1342 /* add sample positions after all rings */
1343 memcpy(desc, queue->device->sample_locations_1x, 8);
1344 desc += 2;
1345 memcpy(desc, queue->device->sample_locations_2x, 16);
1346 desc += 4;
1347 memcpy(desc, queue->device->sample_locations_4x, 32);
1348 desc += 8;
1349 memcpy(desc, queue->device->sample_locations_8x, 64);
1350 desc += 16;
1351 memcpy(desc, queue->device->sample_locations_16x, 128);
1352 }
1353
1354 static unsigned
1355 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1356 {
1357 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1358 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1359 device->physical_device->rad_info.family != CHIP_STONEY;
1360 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1361 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1362 device->physical_device->rad_info.max_se;
1363 unsigned offchip_granularity;
1364 unsigned hs_offchip_param;
1365 switch (device->tess_offchip_block_dw_size) {
1366 default:
1367 assert(0);
1368 /* fall through */
1369 case 8192:
1370 offchip_granularity = V_03093C_X_8K_DWORDS;
1371 break;
1372 case 4096:
1373 offchip_granularity = V_03093C_X_4K_DWORDS;
1374 break;
1375 }
1376
1377 switch (device->physical_device->rad_info.chip_class) {
1378 case SI:
1379 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1380 break;
1381 case CIK:
1382 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1383 break;
1384 case VI:
1385 default:
1386 max_offchip_buffers = MIN2(max_offchip_buffers, 512);
1387 break;
1388 }
1389
1390 *max_offchip_buffers_p = max_offchip_buffers;
1391 if (device->physical_device->rad_info.chip_class >= CIK) {
1392 if (device->physical_device->rad_info.chip_class >= VI)
1393 --max_offchip_buffers;
1394 hs_offchip_param =
1395 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1396 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1397 } else {
1398 hs_offchip_param =
1399 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1400 }
1401 return hs_offchip_param;
1402 }
1403
1404 static VkResult
1405 radv_get_preamble_cs(struct radv_queue *queue,
1406 uint32_t scratch_size,
1407 uint32_t compute_scratch_size,
1408 uint32_t esgs_ring_size,
1409 uint32_t gsvs_ring_size,
1410 bool needs_tess_rings,
1411 bool needs_sample_positions,
1412 struct radeon_winsys_cs **initial_preamble_cs,
1413 struct radeon_winsys_cs **continue_preamble_cs)
1414 {
1415 struct radeon_winsys_bo *scratch_bo = NULL;
1416 struct radeon_winsys_bo *descriptor_bo = NULL;
1417 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1418 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1419 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1420 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1421 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1422 struct radeon_winsys_cs *dest_cs[2] = {0};
1423 bool add_tess_rings = false, add_sample_positions = false;
1424 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1425 unsigned max_offchip_buffers;
1426 unsigned hs_offchip_param = 0;
1427 if (!queue->has_tess_rings) {
1428 if (needs_tess_rings)
1429 add_tess_rings = true;
1430 }
1431 if (!queue->has_sample_positions) {
1432 if (needs_sample_positions)
1433 add_sample_positions = true;
1434 }
1435 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1436 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1437 &max_offchip_buffers);
1438 tess_offchip_ring_size = max_offchip_buffers *
1439 queue->device->tess_offchip_block_dw_size * 4;
1440
1441 if (scratch_size <= queue->scratch_size &&
1442 compute_scratch_size <= queue->compute_scratch_size &&
1443 esgs_ring_size <= queue->esgs_ring_size &&
1444 gsvs_ring_size <= queue->gsvs_ring_size &&
1445 !add_tess_rings && !add_sample_positions &&
1446 queue->initial_preamble_cs) {
1447 *initial_preamble_cs = queue->initial_preamble_cs;
1448 *continue_preamble_cs = queue->continue_preamble_cs;
1449 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1450 *continue_preamble_cs = NULL;
1451 return VK_SUCCESS;
1452 }
1453
1454 if (scratch_size > queue->scratch_size) {
1455 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1456 scratch_size,
1457 4096,
1458 RADEON_DOMAIN_VRAM,
1459 RADEON_FLAG_NO_CPU_ACCESS);
1460 if (!scratch_bo)
1461 goto fail;
1462 } else
1463 scratch_bo = queue->scratch_bo;
1464
1465 if (compute_scratch_size > queue->compute_scratch_size) {
1466 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1467 compute_scratch_size,
1468 4096,
1469 RADEON_DOMAIN_VRAM,
1470 RADEON_FLAG_NO_CPU_ACCESS);
1471 if (!compute_scratch_bo)
1472 goto fail;
1473
1474 } else
1475 compute_scratch_bo = queue->compute_scratch_bo;
1476
1477 if (esgs_ring_size > queue->esgs_ring_size) {
1478 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1479 esgs_ring_size,
1480 4096,
1481 RADEON_DOMAIN_VRAM,
1482 RADEON_FLAG_NO_CPU_ACCESS);
1483 if (!esgs_ring_bo)
1484 goto fail;
1485 } else {
1486 esgs_ring_bo = queue->esgs_ring_bo;
1487 esgs_ring_size = queue->esgs_ring_size;
1488 }
1489
1490 if (gsvs_ring_size > queue->gsvs_ring_size) {
1491 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1492 gsvs_ring_size,
1493 4096,
1494 RADEON_DOMAIN_VRAM,
1495 RADEON_FLAG_NO_CPU_ACCESS);
1496 if (!gsvs_ring_bo)
1497 goto fail;
1498 } else {
1499 gsvs_ring_bo = queue->gsvs_ring_bo;
1500 gsvs_ring_size = queue->gsvs_ring_size;
1501 }
1502
1503 if (add_tess_rings) {
1504 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1505 tess_factor_ring_size,
1506 256,
1507 RADEON_DOMAIN_VRAM,
1508 RADEON_FLAG_NO_CPU_ACCESS);
1509 if (!tess_factor_ring_bo)
1510 goto fail;
1511 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1512 tess_offchip_ring_size,
1513 256,
1514 RADEON_DOMAIN_VRAM,
1515 RADEON_FLAG_NO_CPU_ACCESS);
1516 if (!tess_offchip_ring_bo)
1517 goto fail;
1518 } else {
1519 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1520 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1521 }
1522
1523 if (scratch_bo != queue->scratch_bo ||
1524 esgs_ring_bo != queue->esgs_ring_bo ||
1525 gsvs_ring_bo != queue->gsvs_ring_bo ||
1526 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1527 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1528 uint32_t size = 0;
1529 if (gsvs_ring_bo || esgs_ring_bo ||
1530 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1531 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1532 if (add_sample_positions)
1533 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1534 }
1535 else if (scratch_bo)
1536 size = 8; /* 2 dword */
1537
1538 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1539 size,
1540 4096,
1541 RADEON_DOMAIN_VRAM,
1542 RADEON_FLAG_CPU_ACCESS);
1543 if (!descriptor_bo)
1544 goto fail;
1545 } else
1546 descriptor_bo = queue->descriptor_bo;
1547
1548 for(int i = 0; i < 2; ++i) {
1549 struct radeon_winsys_cs *cs = NULL;
1550 cs = queue->device->ws->cs_create(queue->device->ws,
1551 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1552 if (!cs)
1553 goto fail;
1554
1555 dest_cs[i] = cs;
1556
1557 if (scratch_bo)
1558 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1559
1560 if (esgs_ring_bo)
1561 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1562
1563 if (gsvs_ring_bo)
1564 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1565
1566 if (tess_factor_ring_bo)
1567 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1568
1569 if (tess_offchip_ring_bo)
1570 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1571
1572 if (descriptor_bo)
1573 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1574
1575 if (descriptor_bo != queue->descriptor_bo) {
1576 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1577
1578 if (scratch_bo) {
1579 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1580 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1581 S_008F04_SWIZZLE_ENABLE(1);
1582 map[0] = scratch_va;
1583 map[1] = rsrc1;
1584 }
1585
1586 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1587 add_sample_positions)
1588 fill_geom_tess_rings(queue, map, add_sample_positions,
1589 esgs_ring_size, esgs_ring_bo,
1590 gsvs_ring_size, gsvs_ring_bo,
1591 tess_factor_ring_size, tess_factor_ring_bo,
1592 tess_offchip_ring_size, tess_offchip_ring_bo);
1593
1594 queue->device->ws->buffer_unmap(descriptor_bo);
1595 }
1596
1597 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1598 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1599 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1600 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1601 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1602 }
1603
1604 if (esgs_ring_bo || gsvs_ring_bo) {
1605 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1606 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1607 radeon_emit(cs, esgs_ring_size >> 8);
1608 radeon_emit(cs, gsvs_ring_size >> 8);
1609 } else {
1610 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1611 radeon_emit(cs, esgs_ring_size >> 8);
1612 radeon_emit(cs, gsvs_ring_size >> 8);
1613 }
1614 }
1615
1616 if (tess_factor_ring_bo) {
1617 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1618 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1619 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1620 S_030938_SIZE(tess_factor_ring_size / 4));
1621 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1622 tf_va >> 8);
1623 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1624 } else {
1625 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1626 S_008988_SIZE(tess_factor_ring_size / 4));
1627 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1628 tf_va >> 8);
1629 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1630 hs_offchip_param);
1631 }
1632 }
1633
1634 if (descriptor_bo) {
1635 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1636 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1637 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1638 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1639 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1640 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1641
1642 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1643
1644 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1645 radeon_set_sh_reg_seq(cs, regs[i], 2);
1646 radeon_emit(cs, va);
1647 radeon_emit(cs, va >> 32);
1648 }
1649 }
1650
1651 if (compute_scratch_bo) {
1652 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1653 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1654 S_008F04_SWIZZLE_ENABLE(1);
1655
1656 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1657
1658 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1659 radeon_emit(cs, scratch_va);
1660 radeon_emit(cs, rsrc1);
1661 }
1662
1663 if (!i) {
1664 si_cs_emit_cache_flush(cs,
1665 queue->device->physical_device->rad_info.chip_class,
1666 queue->queue_family_index == RING_COMPUTE &&
1667 queue->device->physical_device->rad_info.chip_class >= CIK,
1668 RADV_CMD_FLAG_INV_ICACHE |
1669 RADV_CMD_FLAG_INV_SMEM_L1 |
1670 RADV_CMD_FLAG_INV_VMEM_L1 |
1671 RADV_CMD_FLAG_INV_GLOBAL_L2);
1672 }
1673
1674 if (!queue->device->ws->cs_finalize(cs))
1675 goto fail;
1676 }
1677
1678 if (queue->initial_preamble_cs)
1679 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1680
1681 if (queue->continue_preamble_cs)
1682 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1683
1684 queue->initial_preamble_cs = dest_cs[0];
1685 queue->continue_preamble_cs = dest_cs[1];
1686
1687 if (scratch_bo != queue->scratch_bo) {
1688 if (queue->scratch_bo)
1689 queue->device->ws->buffer_destroy(queue->scratch_bo);
1690 queue->scratch_bo = scratch_bo;
1691 queue->scratch_size = scratch_size;
1692 }
1693
1694 if (compute_scratch_bo != queue->compute_scratch_bo) {
1695 if (queue->compute_scratch_bo)
1696 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1697 queue->compute_scratch_bo = compute_scratch_bo;
1698 queue->compute_scratch_size = compute_scratch_size;
1699 }
1700
1701 if (esgs_ring_bo != queue->esgs_ring_bo) {
1702 if (queue->esgs_ring_bo)
1703 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1704 queue->esgs_ring_bo = esgs_ring_bo;
1705 queue->esgs_ring_size = esgs_ring_size;
1706 }
1707
1708 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1709 if (queue->gsvs_ring_bo)
1710 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1711 queue->gsvs_ring_bo = gsvs_ring_bo;
1712 queue->gsvs_ring_size = gsvs_ring_size;
1713 }
1714
1715 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1716 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1717 }
1718
1719 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1720 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1721 queue->has_tess_rings = true;
1722 }
1723
1724 if (descriptor_bo != queue->descriptor_bo) {
1725 if (queue->descriptor_bo)
1726 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1727
1728 queue->descriptor_bo = descriptor_bo;
1729 }
1730
1731 if (add_sample_positions)
1732 queue->has_sample_positions = true;
1733
1734 *initial_preamble_cs = queue->initial_preamble_cs;
1735 *continue_preamble_cs = queue->continue_preamble_cs;
1736 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1737 *continue_preamble_cs = NULL;
1738 return VK_SUCCESS;
1739 fail:
1740 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1741 if (dest_cs[i])
1742 queue->device->ws->cs_destroy(dest_cs[i]);
1743 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1744 queue->device->ws->buffer_destroy(descriptor_bo);
1745 if (scratch_bo && scratch_bo != queue->scratch_bo)
1746 queue->device->ws->buffer_destroy(scratch_bo);
1747 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1748 queue->device->ws->buffer_destroy(compute_scratch_bo);
1749 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1750 queue->device->ws->buffer_destroy(esgs_ring_bo);
1751 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1752 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1753 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1754 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1755 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1756 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1757 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1758 }
1759
1760 VkResult radv_QueueSubmit(
1761 VkQueue _queue,
1762 uint32_t submitCount,
1763 const VkSubmitInfo* pSubmits,
1764 VkFence _fence)
1765 {
1766 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1767 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1768 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1769 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1770 int ret;
1771 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1772 uint32_t scratch_size = 0;
1773 uint32_t compute_scratch_size = 0;
1774 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1775 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1776 VkResult result;
1777 bool fence_emitted = false;
1778 bool tess_rings_needed = false;
1779 bool sample_positions_needed = false;
1780
1781 /* Do this first so failing to allocate scratch buffers can't result in
1782 * partially executed submissions. */
1783 for (uint32_t i = 0; i < submitCount; i++) {
1784 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1785 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1786 pSubmits[i].pCommandBuffers[j]);
1787
1788 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1789 compute_scratch_size = MAX2(compute_scratch_size,
1790 cmd_buffer->compute_scratch_size_needed);
1791 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1792 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1793 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1794 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1795 }
1796 }
1797
1798 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1799 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1800 sample_positions_needed,
1801 &initial_preamble_cs, &continue_preamble_cs);
1802 if (result != VK_SUCCESS)
1803 return result;
1804
1805 for (uint32_t i = 0; i < submitCount; i++) {
1806 struct radeon_winsys_cs **cs_array;
1807 bool do_flush = !i;
1808 bool can_patch = !do_flush;
1809 uint32_t advance;
1810
1811 if (!pSubmits[i].commandBufferCount) {
1812 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1813 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1814 &queue->device->empty_cs[queue->queue_family_index],
1815 1, NULL, NULL,
1816 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1817 pSubmits[i].waitSemaphoreCount,
1818 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1819 pSubmits[i].signalSemaphoreCount,
1820 false, base_fence);
1821 if (ret) {
1822 radv_loge("failed to submit CS %d\n", i);
1823 abort();
1824 }
1825 fence_emitted = true;
1826 }
1827 continue;
1828 }
1829
1830 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1831 (pSubmits[i].commandBufferCount + do_flush));
1832
1833 if(do_flush)
1834 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1835
1836 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1837 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1838 pSubmits[i].pCommandBuffers[j]);
1839 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1840
1841 cs_array[j + do_flush] = cmd_buffer->cs;
1842 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1843 can_patch = false;
1844 }
1845
1846 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1847 advance = MIN2(max_cs_submission,
1848 pSubmits[i].commandBufferCount + do_flush - j);
1849 bool b = j == 0;
1850 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1851
1852 if (queue->device->trace_bo)
1853 *queue->device->trace_id_ptr = 0;
1854
1855 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1856 advance, initial_preamble_cs, continue_preamble_cs,
1857 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1858 b ? pSubmits[i].waitSemaphoreCount : 0,
1859 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1860 e ? pSubmits[i].signalSemaphoreCount : 0,
1861 can_patch, base_fence);
1862
1863 if (ret) {
1864 radv_loge("failed to submit CS %d\n", i);
1865 abort();
1866 }
1867 fence_emitted = true;
1868 if (queue->device->trace_bo) {
1869 bool success = queue->device->ws->ctx_wait_idle(
1870 queue->hw_ctx,
1871 radv_queue_family_to_ring(
1872 queue->queue_family_index),
1873 queue->queue_idx);
1874
1875 if (!success) { /* Hang */
1876 radv_dump_trace(queue->device, cs_array[j]);
1877 abort();
1878 }
1879 }
1880 }
1881 free(cs_array);
1882 }
1883
1884 if (fence) {
1885 if (!fence_emitted)
1886 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1887 &queue->device->empty_cs[queue->queue_family_index],
1888 1, NULL, NULL, NULL, 0, NULL, 0,
1889 false, base_fence);
1890
1891 fence->submitted = true;
1892 }
1893
1894 return VK_SUCCESS;
1895 }
1896
1897 VkResult radv_QueueWaitIdle(
1898 VkQueue _queue)
1899 {
1900 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1901
1902 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1903 radv_queue_family_to_ring(queue->queue_family_index),
1904 queue->queue_idx);
1905 return VK_SUCCESS;
1906 }
1907
1908 VkResult radv_DeviceWaitIdle(
1909 VkDevice _device)
1910 {
1911 RADV_FROM_HANDLE(radv_device, device, _device);
1912
1913 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1914 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1915 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1916 }
1917 }
1918 return VK_SUCCESS;
1919 }
1920
1921 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1922 VkInstance instance,
1923 const char* pName)
1924 {
1925 return radv_lookup_entrypoint(pName);
1926 }
1927
1928 /* The loader wants us to expose a second GetInstanceProcAddr function
1929 * to work around certain LD_PRELOAD issues seen in apps.
1930 */
1931 PUBLIC
1932 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1933 VkInstance instance,
1934 const char* pName);
1935
1936 PUBLIC
1937 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1938 VkInstance instance,
1939 const char* pName)
1940 {
1941 return radv_GetInstanceProcAddr(instance, pName);
1942 }
1943
1944 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1945 VkDevice device,
1946 const char* pName)
1947 {
1948 return radv_lookup_entrypoint(pName);
1949 }
1950
1951 bool radv_get_memory_fd(struct radv_device *device,
1952 struct radv_device_memory *memory,
1953 int *pFD)
1954 {
1955 struct radeon_bo_metadata metadata;
1956
1957 if (memory->image) {
1958 radv_init_metadata(device, memory->image, &metadata);
1959 device->ws->buffer_set_metadata(memory->bo, &metadata);
1960 }
1961
1962 return device->ws->buffer_get_fd(device->ws, memory->bo,
1963 pFD);
1964 }
1965
1966 VkResult radv_AllocateMemory(
1967 VkDevice _device,
1968 const VkMemoryAllocateInfo* pAllocateInfo,
1969 const VkAllocationCallbacks* pAllocator,
1970 VkDeviceMemory* pMem)
1971 {
1972 RADV_FROM_HANDLE(radv_device, device, _device);
1973 struct radv_device_memory *mem;
1974 VkResult result;
1975 enum radeon_bo_domain domain;
1976 uint32_t flags = 0;
1977 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1978 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1979
1980 if (pAllocateInfo->allocationSize == 0) {
1981 /* Apparently, this is allowed */
1982 *pMem = VK_NULL_HANDLE;
1983 return VK_SUCCESS;
1984 }
1985
1986 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1987 switch (ext->sType) {
1988 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1989 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1990 break;
1991 default:
1992 break;
1993 }
1994 }
1995
1996 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1997 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1998 if (mem == NULL)
1999 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2000
2001 if (dedicate_info) {
2002 mem->image = radv_image_from_handle(dedicate_info->image);
2003 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2004 } else {
2005 mem->image = NULL;
2006 mem->buffer = NULL;
2007 }
2008
2009 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2010 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2011 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2012 domain = RADEON_DOMAIN_GTT;
2013 else
2014 domain = RADEON_DOMAIN_VRAM;
2015
2016 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2017 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2018 else
2019 flags |= RADEON_FLAG_CPU_ACCESS;
2020
2021 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2022 flags |= RADEON_FLAG_GTT_WC;
2023
2024 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
2025 domain, flags);
2026
2027 if (!mem->bo) {
2028 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2029 goto fail;
2030 }
2031 mem->type_index = pAllocateInfo->memoryTypeIndex;
2032
2033 *pMem = radv_device_memory_to_handle(mem);
2034
2035 return VK_SUCCESS;
2036
2037 fail:
2038 vk_free2(&device->alloc, pAllocator, mem);
2039
2040 return result;
2041 }
2042
2043 void radv_FreeMemory(
2044 VkDevice _device,
2045 VkDeviceMemory _mem,
2046 const VkAllocationCallbacks* pAllocator)
2047 {
2048 RADV_FROM_HANDLE(radv_device, device, _device);
2049 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2050
2051 if (mem == NULL)
2052 return;
2053
2054 device->ws->buffer_destroy(mem->bo);
2055 mem->bo = NULL;
2056
2057 vk_free2(&device->alloc, pAllocator, mem);
2058 }
2059
2060 VkResult radv_MapMemory(
2061 VkDevice _device,
2062 VkDeviceMemory _memory,
2063 VkDeviceSize offset,
2064 VkDeviceSize size,
2065 VkMemoryMapFlags flags,
2066 void** ppData)
2067 {
2068 RADV_FROM_HANDLE(radv_device, device, _device);
2069 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2070
2071 if (mem == NULL) {
2072 *ppData = NULL;
2073 return VK_SUCCESS;
2074 }
2075
2076 *ppData = device->ws->buffer_map(mem->bo);
2077 if (*ppData) {
2078 *ppData += offset;
2079 return VK_SUCCESS;
2080 }
2081
2082 return VK_ERROR_MEMORY_MAP_FAILED;
2083 }
2084
2085 void radv_UnmapMemory(
2086 VkDevice _device,
2087 VkDeviceMemory _memory)
2088 {
2089 RADV_FROM_HANDLE(radv_device, device, _device);
2090 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2091
2092 if (mem == NULL)
2093 return;
2094
2095 device->ws->buffer_unmap(mem->bo);
2096 }
2097
2098 VkResult radv_FlushMappedMemoryRanges(
2099 VkDevice _device,
2100 uint32_t memoryRangeCount,
2101 const VkMappedMemoryRange* pMemoryRanges)
2102 {
2103 return VK_SUCCESS;
2104 }
2105
2106 VkResult radv_InvalidateMappedMemoryRanges(
2107 VkDevice _device,
2108 uint32_t memoryRangeCount,
2109 const VkMappedMemoryRange* pMemoryRanges)
2110 {
2111 return VK_SUCCESS;
2112 }
2113
2114 void radv_GetBufferMemoryRequirements(
2115 VkDevice device,
2116 VkBuffer _buffer,
2117 VkMemoryRequirements* pMemoryRequirements)
2118 {
2119 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2120
2121 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2122
2123 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2124 pMemoryRequirements->alignment = 4096;
2125 else
2126 pMemoryRequirements->alignment = 16;
2127
2128 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2129 }
2130
2131 void radv_GetImageMemoryRequirements(
2132 VkDevice device,
2133 VkImage _image,
2134 VkMemoryRequirements* pMemoryRequirements)
2135 {
2136 RADV_FROM_HANDLE(radv_image, image, _image);
2137
2138 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2139
2140 pMemoryRequirements->size = image->size;
2141 pMemoryRequirements->alignment = image->alignment;
2142 }
2143
2144 void radv_GetImageSparseMemoryRequirements(
2145 VkDevice device,
2146 VkImage image,
2147 uint32_t* pSparseMemoryRequirementCount,
2148 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2149 {
2150 stub();
2151 }
2152
2153 void radv_GetDeviceMemoryCommitment(
2154 VkDevice device,
2155 VkDeviceMemory memory,
2156 VkDeviceSize* pCommittedMemoryInBytes)
2157 {
2158 *pCommittedMemoryInBytes = 0;
2159 }
2160
2161 VkResult radv_BindBufferMemory(
2162 VkDevice device,
2163 VkBuffer _buffer,
2164 VkDeviceMemory _memory,
2165 VkDeviceSize memoryOffset)
2166 {
2167 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2168 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2169
2170 if (mem) {
2171 buffer->bo = mem->bo;
2172 buffer->offset = memoryOffset;
2173 } else {
2174 buffer->bo = NULL;
2175 buffer->offset = 0;
2176 }
2177
2178 return VK_SUCCESS;
2179 }
2180
2181 VkResult radv_BindImageMemory(
2182 VkDevice device,
2183 VkImage _image,
2184 VkDeviceMemory _memory,
2185 VkDeviceSize memoryOffset)
2186 {
2187 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2188 RADV_FROM_HANDLE(radv_image, image, _image);
2189
2190 if (mem) {
2191 image->bo = mem->bo;
2192 image->offset = memoryOffset;
2193 } else {
2194 image->bo = NULL;
2195 image->offset = 0;
2196 }
2197
2198 return VK_SUCCESS;
2199 }
2200
2201
2202 static void
2203 radv_sparse_buffer_bind_memory(struct radv_device *device,
2204 const VkSparseBufferMemoryBindInfo *bind)
2205 {
2206 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2207
2208 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2209 struct radv_device_memory *mem = NULL;
2210
2211 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2212 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2213
2214 device->ws->buffer_virtual_bind(buffer->bo,
2215 bind->pBinds[i].resourceOffset,
2216 bind->pBinds[i].size,
2217 mem ? mem->bo : NULL,
2218 bind->pBinds[i].memoryOffset);
2219 }
2220 }
2221
2222 static void
2223 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2224 const VkSparseImageOpaqueMemoryBindInfo *bind)
2225 {
2226 RADV_FROM_HANDLE(radv_image, image, bind->image);
2227
2228 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2229 struct radv_device_memory *mem = NULL;
2230
2231 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2232 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2233
2234 device->ws->buffer_virtual_bind(image->bo,
2235 bind->pBinds[i].resourceOffset,
2236 bind->pBinds[i].size,
2237 mem ? mem->bo : NULL,
2238 bind->pBinds[i].memoryOffset);
2239 }
2240 }
2241
2242 VkResult radv_QueueBindSparse(
2243 VkQueue _queue,
2244 uint32_t bindInfoCount,
2245 const VkBindSparseInfo* pBindInfo,
2246 VkFence _fence)
2247 {
2248 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2249 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2250 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2251 bool fence_emitted = false;
2252
2253 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2254 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2255 radv_sparse_buffer_bind_memory(queue->device,
2256 pBindInfo[i].pBufferBinds + j);
2257 }
2258
2259 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2260 radv_sparse_image_opaque_bind_memory(queue->device,
2261 pBindInfo[i].pImageOpaqueBinds + j);
2262 }
2263
2264 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2265 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2266 &queue->device->empty_cs[queue->queue_family_index],
2267 1, NULL, NULL,
2268 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2269 pBindInfo[i].waitSemaphoreCount,
2270 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2271 pBindInfo[i].signalSemaphoreCount,
2272 false, base_fence);
2273 fence_emitted = true;
2274 if (fence)
2275 fence->submitted = true;
2276 }
2277 }
2278
2279 if (fence && !fence_emitted) {
2280 fence->signalled = true;
2281 }
2282
2283 return VK_SUCCESS;
2284 }
2285
2286 VkResult radv_CreateFence(
2287 VkDevice _device,
2288 const VkFenceCreateInfo* pCreateInfo,
2289 const VkAllocationCallbacks* pAllocator,
2290 VkFence* pFence)
2291 {
2292 RADV_FROM_HANDLE(radv_device, device, _device);
2293 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2294 sizeof(*fence), 8,
2295 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2296
2297 if (!fence)
2298 return VK_ERROR_OUT_OF_HOST_MEMORY;
2299
2300 memset(fence, 0, sizeof(*fence));
2301 fence->submitted = false;
2302 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2303 fence->fence = device->ws->create_fence();
2304 if (!fence->fence) {
2305 vk_free2(&device->alloc, pAllocator, fence);
2306 return VK_ERROR_OUT_OF_HOST_MEMORY;
2307 }
2308
2309 *pFence = radv_fence_to_handle(fence);
2310
2311 return VK_SUCCESS;
2312 }
2313
2314 void radv_DestroyFence(
2315 VkDevice _device,
2316 VkFence _fence,
2317 const VkAllocationCallbacks* pAllocator)
2318 {
2319 RADV_FROM_HANDLE(radv_device, device, _device);
2320 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2321
2322 if (!fence)
2323 return;
2324 device->ws->destroy_fence(fence->fence);
2325 vk_free2(&device->alloc, pAllocator, fence);
2326 }
2327
2328 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2329 {
2330 uint64_t current_time;
2331 struct timespec tv;
2332
2333 clock_gettime(CLOCK_MONOTONIC, &tv);
2334 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2335
2336 timeout = MIN2(UINT64_MAX - current_time, timeout);
2337
2338 return current_time + timeout;
2339 }
2340
2341 VkResult radv_WaitForFences(
2342 VkDevice _device,
2343 uint32_t fenceCount,
2344 const VkFence* pFences,
2345 VkBool32 waitAll,
2346 uint64_t timeout)
2347 {
2348 RADV_FROM_HANDLE(radv_device, device, _device);
2349 timeout = radv_get_absolute_timeout(timeout);
2350
2351 if (!waitAll && fenceCount > 1) {
2352 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2353 }
2354
2355 for (uint32_t i = 0; i < fenceCount; ++i) {
2356 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2357 bool expired = false;
2358
2359 if (fence->signalled)
2360 continue;
2361
2362 if (!fence->submitted)
2363 return VK_TIMEOUT;
2364
2365 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2366 if (!expired)
2367 return VK_TIMEOUT;
2368
2369 fence->signalled = true;
2370 }
2371
2372 return VK_SUCCESS;
2373 }
2374
2375 VkResult radv_ResetFences(VkDevice device,
2376 uint32_t fenceCount,
2377 const VkFence *pFences)
2378 {
2379 for (unsigned i = 0; i < fenceCount; ++i) {
2380 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2381 fence->submitted = fence->signalled = false;
2382 }
2383
2384 return VK_SUCCESS;
2385 }
2386
2387 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2388 {
2389 RADV_FROM_HANDLE(radv_device, device, _device);
2390 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2391
2392 if (fence->signalled)
2393 return VK_SUCCESS;
2394 if (!fence->submitted)
2395 return VK_NOT_READY;
2396
2397 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2398 return VK_NOT_READY;
2399
2400 return VK_SUCCESS;
2401 }
2402
2403
2404 // Queue semaphore functions
2405
2406 VkResult radv_CreateSemaphore(
2407 VkDevice _device,
2408 const VkSemaphoreCreateInfo* pCreateInfo,
2409 const VkAllocationCallbacks* pAllocator,
2410 VkSemaphore* pSemaphore)
2411 {
2412 RADV_FROM_HANDLE(radv_device, device, _device);
2413 struct radeon_winsys_sem *sem;
2414
2415 sem = device->ws->create_sem(device->ws);
2416 if (!sem)
2417 return VK_ERROR_OUT_OF_HOST_MEMORY;
2418
2419 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2420 return VK_SUCCESS;
2421 }
2422
2423 void radv_DestroySemaphore(
2424 VkDevice _device,
2425 VkSemaphore _semaphore,
2426 const VkAllocationCallbacks* pAllocator)
2427 {
2428 RADV_FROM_HANDLE(radv_device, device, _device);
2429 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2430 if (!_semaphore)
2431 return;
2432
2433 device->ws->destroy_sem(sem);
2434 }
2435
2436 VkResult radv_CreateEvent(
2437 VkDevice _device,
2438 const VkEventCreateInfo* pCreateInfo,
2439 const VkAllocationCallbacks* pAllocator,
2440 VkEvent* pEvent)
2441 {
2442 RADV_FROM_HANDLE(radv_device, device, _device);
2443 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2444 sizeof(*event), 8,
2445 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2446
2447 if (!event)
2448 return VK_ERROR_OUT_OF_HOST_MEMORY;
2449
2450 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2451 RADEON_DOMAIN_GTT,
2452 RADEON_FLAG_CPU_ACCESS);
2453 if (!event->bo) {
2454 vk_free2(&device->alloc, pAllocator, event);
2455 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2456 }
2457
2458 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2459
2460 *pEvent = radv_event_to_handle(event);
2461
2462 return VK_SUCCESS;
2463 }
2464
2465 void radv_DestroyEvent(
2466 VkDevice _device,
2467 VkEvent _event,
2468 const VkAllocationCallbacks* pAllocator)
2469 {
2470 RADV_FROM_HANDLE(radv_device, device, _device);
2471 RADV_FROM_HANDLE(radv_event, event, _event);
2472
2473 if (!event)
2474 return;
2475 device->ws->buffer_destroy(event->bo);
2476 vk_free2(&device->alloc, pAllocator, event);
2477 }
2478
2479 VkResult radv_GetEventStatus(
2480 VkDevice _device,
2481 VkEvent _event)
2482 {
2483 RADV_FROM_HANDLE(radv_event, event, _event);
2484
2485 if (*event->map == 1)
2486 return VK_EVENT_SET;
2487 return VK_EVENT_RESET;
2488 }
2489
2490 VkResult radv_SetEvent(
2491 VkDevice _device,
2492 VkEvent _event)
2493 {
2494 RADV_FROM_HANDLE(radv_event, event, _event);
2495 *event->map = 1;
2496
2497 return VK_SUCCESS;
2498 }
2499
2500 VkResult radv_ResetEvent(
2501 VkDevice _device,
2502 VkEvent _event)
2503 {
2504 RADV_FROM_HANDLE(radv_event, event, _event);
2505 *event->map = 0;
2506
2507 return VK_SUCCESS;
2508 }
2509
2510 VkResult radv_CreateBuffer(
2511 VkDevice _device,
2512 const VkBufferCreateInfo* pCreateInfo,
2513 const VkAllocationCallbacks* pAllocator,
2514 VkBuffer* pBuffer)
2515 {
2516 RADV_FROM_HANDLE(radv_device, device, _device);
2517 struct radv_buffer *buffer;
2518
2519 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2520
2521 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2522 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2523 if (buffer == NULL)
2524 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2525
2526 buffer->size = pCreateInfo->size;
2527 buffer->usage = pCreateInfo->usage;
2528 buffer->bo = NULL;
2529 buffer->offset = 0;
2530 buffer->flags = pCreateInfo->flags;
2531
2532 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2533 buffer->bo = device->ws->buffer_create(device->ws,
2534 align64(buffer->size, 4096),
2535 4096, 0, RADEON_FLAG_VIRTUAL);
2536 if (!buffer->bo) {
2537 vk_free2(&device->alloc, pAllocator, buffer);
2538 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2539 }
2540 }
2541
2542 *pBuffer = radv_buffer_to_handle(buffer);
2543
2544 return VK_SUCCESS;
2545 }
2546
2547 void radv_DestroyBuffer(
2548 VkDevice _device,
2549 VkBuffer _buffer,
2550 const VkAllocationCallbacks* pAllocator)
2551 {
2552 RADV_FROM_HANDLE(radv_device, device, _device);
2553 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2554
2555 if (!buffer)
2556 return;
2557
2558 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2559 device->ws->buffer_destroy(buffer->bo);
2560
2561 vk_free2(&device->alloc, pAllocator, buffer);
2562 }
2563
2564 static inline unsigned
2565 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2566 {
2567 if (stencil)
2568 return image->surface.stencil_tiling_index[level];
2569 else
2570 return image->surface.tiling_index[level];
2571 }
2572
2573 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2574 {
2575 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2576 }
2577
2578 static void
2579 radv_initialise_color_surface(struct radv_device *device,
2580 struct radv_color_buffer_info *cb,
2581 struct radv_image_view *iview)
2582 {
2583 const struct vk_format_description *desc;
2584 unsigned ntype, format, swap, endian;
2585 unsigned blend_clamp = 0, blend_bypass = 0;
2586 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2587 uint64_t va;
2588 const struct radeon_surf *surf = &iview->image->surface;
2589 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2590
2591 desc = vk_format_description(iview->vk_format);
2592
2593 memset(cb, 0, sizeof(*cb));
2594
2595 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2596 va += level_info->offset;
2597 cb->cb_color_base = va >> 8;
2598
2599 /* CMASK variables */
2600 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2601 va += iview->image->cmask.offset;
2602 cb->cb_color_cmask = va >> 8;
2603 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2604
2605 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2606 va += iview->image->dcc_offset;
2607 cb->cb_dcc_base = va >> 8;
2608
2609 uint32_t max_slice = radv_surface_layer_count(iview);
2610 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2611 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2612
2613 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2614 pitch_tile_max = level_info->nblk_x / 8 - 1;
2615 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2616 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2617
2618 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2619 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2620
2621 /* Intensity is implemented as Red, so treat it that way. */
2622 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2623 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2624
2625 if (iview->image->samples > 1) {
2626 unsigned log_samples = util_logbase2(iview->image->samples);
2627
2628 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2629 S_028C74_NUM_FRAGMENTS(log_samples);
2630 }
2631
2632 if (iview->image->fmask.size) {
2633 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2634 if (device->physical_device->rad_info.chip_class >= CIK)
2635 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2636 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2637 cb->cb_color_fmask = va >> 8;
2638 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2639 } else {
2640 /* This must be set for fast clear to work without FMASK. */
2641 if (device->physical_device->rad_info.chip_class >= CIK)
2642 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2643 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2644 cb->cb_color_fmask = cb->cb_color_base;
2645 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2646 }
2647
2648 ntype = radv_translate_color_numformat(iview->vk_format,
2649 desc,
2650 vk_format_get_first_non_void_channel(iview->vk_format));
2651 format = radv_translate_colorformat(iview->vk_format);
2652 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2653 radv_finishme("Illegal color\n");
2654 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2655 endian = radv_colorformat_endian_swap(format);
2656
2657 /* blend clamp should be set for all NORM/SRGB types */
2658 if (ntype == V_028C70_NUMBER_UNORM ||
2659 ntype == V_028C70_NUMBER_SNORM ||
2660 ntype == V_028C70_NUMBER_SRGB)
2661 blend_clamp = 1;
2662
2663 /* set blend bypass according to docs if SINT/UINT or
2664 8/24 COLOR variants */
2665 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2666 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2667 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2668 blend_clamp = 0;
2669 blend_bypass = 1;
2670 }
2671 #if 0
2672 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2673 (format == V_028C70_COLOR_8 ||
2674 format == V_028C70_COLOR_8_8 ||
2675 format == V_028C70_COLOR_8_8_8_8))
2676 ->color_is_int8 = true;
2677 #endif
2678 cb->cb_color_info = S_028C70_FORMAT(format) |
2679 S_028C70_COMP_SWAP(swap) |
2680 S_028C70_BLEND_CLAMP(blend_clamp) |
2681 S_028C70_BLEND_BYPASS(blend_bypass) |
2682 S_028C70_SIMPLE_FLOAT(1) |
2683 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2684 ntype != V_028C70_NUMBER_SNORM &&
2685 ntype != V_028C70_NUMBER_SRGB &&
2686 format != V_028C70_COLOR_8_24 &&
2687 format != V_028C70_COLOR_24_8) |
2688 S_028C70_NUMBER_TYPE(ntype) |
2689 S_028C70_ENDIAN(endian);
2690 if (iview->image->samples > 1)
2691 if (iview->image->fmask.size)
2692 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2693
2694 if (iview->image->cmask.size &&
2695 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2696 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2697
2698 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2699 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2700
2701 if (device->physical_device->rad_info.chip_class >= VI) {
2702 unsigned max_uncompressed_block_size = 2;
2703 if (iview->image->samples > 1) {
2704 if (iview->image->surface.bpe == 1)
2705 max_uncompressed_block_size = 0;
2706 else if (iview->image->surface.bpe == 2)
2707 max_uncompressed_block_size = 1;
2708 }
2709
2710 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2711 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2712 }
2713
2714 /* This must be set for fast clear to work without FMASK. */
2715 if (!iview->image->fmask.size &&
2716 device->physical_device->rad_info.chip_class == SI) {
2717 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2718 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2719 }
2720 }
2721
2722 static void
2723 radv_initialise_ds_surface(struct radv_device *device,
2724 struct radv_ds_buffer_info *ds,
2725 struct radv_image_view *iview)
2726 {
2727 unsigned level = iview->base_mip;
2728 unsigned format;
2729 uint64_t va, s_offs, z_offs;
2730 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2731 memset(ds, 0, sizeof(*ds));
2732 switch (iview->vk_format) {
2733 case VK_FORMAT_D24_UNORM_S8_UINT:
2734 case VK_FORMAT_X8_D24_UNORM_PACK32:
2735 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2736 ds->offset_scale = 2.0f;
2737 break;
2738 case VK_FORMAT_D16_UNORM:
2739 case VK_FORMAT_D16_UNORM_S8_UINT:
2740 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2741 ds->offset_scale = 4.0f;
2742 break;
2743 case VK_FORMAT_D32_SFLOAT:
2744 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2745 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2746 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2747 ds->offset_scale = 1.0f;
2748 break;
2749 default:
2750 break;
2751 }
2752
2753 format = radv_translate_dbformat(iview->vk_format);
2754
2755 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2756 s_offs = z_offs = va;
2757 z_offs += iview->image->surface.level[level].offset;
2758 s_offs += iview->image->surface.stencil_level[level].offset;
2759
2760 uint32_t max_slice = radv_surface_layer_count(iview);
2761 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2762 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2763 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2764 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2765
2766 if (iview->image->samples > 1)
2767 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2768
2769 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2770 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2771 else
2772 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2773
2774 if (device->physical_device->rad_info.chip_class >= CIK) {
2775 struct radeon_info *info = &device->physical_device->rad_info;
2776 unsigned tiling_index = iview->image->surface.tiling_index[level];
2777 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2778 unsigned macro_index = iview->image->surface.macro_tile_index;
2779 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2780 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2781 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2782
2783 ds->db_depth_info |=
2784 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2785 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2786 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2787 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2788 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2789 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2790 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2791 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2792 } else {
2793 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2794 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2795 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2796 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2797 }
2798
2799 if (iview->image->surface.htile_size && !level) {
2800 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2801 S_028040_ALLOW_EXPCLEAR(1);
2802
2803 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2804 /* Workaround: For a not yet understood reason, the
2805 * combination of MSAA, fast stencil clear and stencil
2806 * decompress messes with subsequent stencil buffer
2807 * uses. Problem was reproduced on Verde, Bonaire,
2808 * Tonga, and Carrizo.
2809 *
2810 * Disabling EXPCLEAR works around the problem.
2811 *
2812 * Check piglit's arb_texture_multisample-stencil-clear
2813 * test if you want to try changing this.
2814 */
2815 if (iview->image->samples <= 1)
2816 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2817 } else
2818 /* Use all of the htile_buffer for depth if there's no stencil. */
2819 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2820
2821 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2822 iview->image->htile_offset;
2823 ds->db_htile_data_base = va >> 8;
2824 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2825 } else {
2826 ds->db_htile_data_base = 0;
2827 ds->db_htile_surface = 0;
2828 }
2829
2830 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2831 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2832
2833 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2834 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2835 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2836 }
2837
2838 VkResult radv_CreateFramebuffer(
2839 VkDevice _device,
2840 const VkFramebufferCreateInfo* pCreateInfo,
2841 const VkAllocationCallbacks* pAllocator,
2842 VkFramebuffer* pFramebuffer)
2843 {
2844 RADV_FROM_HANDLE(radv_device, device, _device);
2845 struct radv_framebuffer *framebuffer;
2846
2847 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2848
2849 size_t size = sizeof(*framebuffer) +
2850 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2851 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2852 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2853 if (framebuffer == NULL)
2854 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2855
2856 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2857 framebuffer->width = pCreateInfo->width;
2858 framebuffer->height = pCreateInfo->height;
2859 framebuffer->layers = pCreateInfo->layers;
2860 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2861 VkImageView _iview = pCreateInfo->pAttachments[i];
2862 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2863 framebuffer->attachments[i].attachment = iview;
2864 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2865 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2866 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2867 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2868 }
2869 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2870 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2871 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2872 }
2873
2874 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2875 return VK_SUCCESS;
2876 }
2877
2878 void radv_DestroyFramebuffer(
2879 VkDevice _device,
2880 VkFramebuffer _fb,
2881 const VkAllocationCallbacks* pAllocator)
2882 {
2883 RADV_FROM_HANDLE(radv_device, device, _device);
2884 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2885
2886 if (!fb)
2887 return;
2888 vk_free2(&device->alloc, pAllocator, fb);
2889 }
2890
2891 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2892 {
2893 switch (address_mode) {
2894 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2895 return V_008F30_SQ_TEX_WRAP;
2896 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2897 return V_008F30_SQ_TEX_MIRROR;
2898 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2899 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2900 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2901 return V_008F30_SQ_TEX_CLAMP_BORDER;
2902 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2903 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2904 default:
2905 unreachable("illegal tex wrap mode");
2906 break;
2907 }
2908 }
2909
2910 static unsigned
2911 radv_tex_compare(VkCompareOp op)
2912 {
2913 switch (op) {
2914 case VK_COMPARE_OP_NEVER:
2915 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2916 case VK_COMPARE_OP_LESS:
2917 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2918 case VK_COMPARE_OP_EQUAL:
2919 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2920 case VK_COMPARE_OP_LESS_OR_EQUAL:
2921 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2922 case VK_COMPARE_OP_GREATER:
2923 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2924 case VK_COMPARE_OP_NOT_EQUAL:
2925 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2926 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2927 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2928 case VK_COMPARE_OP_ALWAYS:
2929 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2930 default:
2931 unreachable("illegal compare mode");
2932 break;
2933 }
2934 }
2935
2936 static unsigned
2937 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2938 {
2939 switch (filter) {
2940 case VK_FILTER_NEAREST:
2941 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2942 V_008F38_SQ_TEX_XY_FILTER_POINT);
2943 case VK_FILTER_LINEAR:
2944 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2945 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2946 case VK_FILTER_CUBIC_IMG:
2947 default:
2948 fprintf(stderr, "illegal texture filter");
2949 return 0;
2950 }
2951 }
2952
2953 static unsigned
2954 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2955 {
2956 switch (mode) {
2957 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2958 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2959 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2960 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2961 default:
2962 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2963 }
2964 }
2965
2966 static unsigned
2967 radv_tex_bordercolor(VkBorderColor bcolor)
2968 {
2969 switch (bcolor) {
2970 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2971 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2972 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2973 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2974 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2975 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2976 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2977 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2978 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2979 default:
2980 break;
2981 }
2982 return 0;
2983 }
2984
2985 static unsigned
2986 radv_tex_aniso_filter(unsigned filter)
2987 {
2988 if (filter < 2)
2989 return 0;
2990 if (filter < 4)
2991 return 1;
2992 if (filter < 8)
2993 return 2;
2994 if (filter < 16)
2995 return 3;
2996 return 4;
2997 }
2998
2999 static void
3000 radv_init_sampler(struct radv_device *device,
3001 struct radv_sampler *sampler,
3002 const VkSamplerCreateInfo *pCreateInfo)
3003 {
3004 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3005 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3006 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3007 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3008
3009 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3010 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3011 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3012 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3013 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3014 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3015 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3016 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3017 S_008F30_DISABLE_CUBE_WRAP(0) |
3018 S_008F30_COMPAT_MODE(is_vi));
3019 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3020 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3021 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3022 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3023 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3024 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3025 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3026 S_008F38_MIP_POINT_PRECLAMP(0) |
3027 S_008F38_DISABLE_LSB_CEIL(1) |
3028 S_008F38_FILTER_PREC_FIX(1) |
3029 S_008F38_ANISO_OVERRIDE(is_vi));
3030 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3031 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3032 }
3033
3034 VkResult radv_CreateSampler(
3035 VkDevice _device,
3036 const VkSamplerCreateInfo* pCreateInfo,
3037 const VkAllocationCallbacks* pAllocator,
3038 VkSampler* pSampler)
3039 {
3040 RADV_FROM_HANDLE(radv_device, device, _device);
3041 struct radv_sampler *sampler;
3042
3043 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3044
3045 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3046 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3047 if (!sampler)
3048 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3049
3050 radv_init_sampler(device, sampler, pCreateInfo);
3051 *pSampler = radv_sampler_to_handle(sampler);
3052
3053 return VK_SUCCESS;
3054 }
3055
3056 void radv_DestroySampler(
3057 VkDevice _device,
3058 VkSampler _sampler,
3059 const VkAllocationCallbacks* pAllocator)
3060 {
3061 RADV_FROM_HANDLE(radv_device, device, _device);
3062 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3063
3064 if (!sampler)
3065 return;
3066 vk_free2(&device->alloc, pAllocator, sampler);
3067 }
3068
3069
3070 /* vk_icd.h does not declare this function, so we declare it here to
3071 * suppress Wmissing-prototypes.
3072 */
3073 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3074 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3075
3076 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3077 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3078 {
3079 /* For the full details on loader interface versioning, see
3080 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3081 * What follows is a condensed summary, to help you navigate the large and
3082 * confusing official doc.
3083 *
3084 * - Loader interface v0 is incompatible with later versions. We don't
3085 * support it.
3086 *
3087 * - In loader interface v1:
3088 * - The first ICD entrypoint called by the loader is
3089 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3090 * entrypoint.
3091 * - The ICD must statically expose no other Vulkan symbol unless it is
3092 * linked with -Bsymbolic.
3093 * - Each dispatchable Vulkan handle created by the ICD must be
3094 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3095 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3096 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3097 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3098 * such loader-managed surfaces.
3099 *
3100 * - Loader interface v2 differs from v1 in:
3101 * - The first ICD entrypoint called by the loader is
3102 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3103 * statically expose this entrypoint.
3104 *
3105 * - Loader interface v3 differs from v2 in:
3106 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3107 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3108 * because the loader no longer does so.
3109 */
3110 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3111 return VK_SUCCESS;
3112 }