radv/amdgpu: Add some debug flags.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 };
88
89 static const VkExtensionProperties common_device_extensions[] = {
90 {
91 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
92 .specVersion = 1,
93 },
94 {
95 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
100 .specVersion = 68,
101 },
102 {
103 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
369 radv_physical_device_finish(instance->physicalDevices + i);
370 }
371
372 VG(VALGRIND_DESTROY_MEMPOOL(instance));
373
374 _mesa_locale_fini();
375
376 vk_free(&instance->alloc, instance);
377 }
378
379 VkResult radv_EnumeratePhysicalDevices(
380 VkInstance _instance,
381 uint32_t* pPhysicalDeviceCount,
382 VkPhysicalDevice* pPhysicalDevices)
383 {
384 RADV_FROM_HANDLE(radv_instance, instance, _instance);
385 VkResult result;
386
387 if (instance->physicalDeviceCount < 0) {
388 char path[20];
389 instance->physicalDeviceCount = 0;
390 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
391 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
392 result = radv_physical_device_init(instance->physicalDevices +
393 instance->physicalDeviceCount,
394 instance, path);
395 if (result == VK_SUCCESS)
396 ++instance->physicalDeviceCount;
397 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
398 return result;
399 }
400 }
401
402 if (!pPhysicalDevices) {
403 *pPhysicalDeviceCount = instance->physicalDeviceCount;
404 } else {
405 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
406 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
407 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
408 }
409
410 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
411 : VK_SUCCESS;
412 }
413
414 void radv_GetPhysicalDeviceFeatures(
415 VkPhysicalDevice physicalDevice,
416 VkPhysicalDeviceFeatures* pFeatures)
417 {
418 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
419
420 memset(pFeatures, 0, sizeof(*pFeatures));
421
422 *pFeatures = (VkPhysicalDeviceFeatures) {
423 .robustBufferAccess = true,
424 .fullDrawIndexUint32 = true,
425 .imageCubeArray = true,
426 .independentBlend = true,
427 .geometryShader = true,
428 .tessellationShader = false,
429 .sampleRateShading = false,
430 .dualSrcBlend = true,
431 .logicOp = true,
432 .multiDrawIndirect = true,
433 .drawIndirectFirstInstance = true,
434 .depthClamp = true,
435 .depthBiasClamp = true,
436 .fillModeNonSolid = true,
437 .depthBounds = true,
438 .wideLines = true,
439 .largePoints = true,
440 .alphaToOne = true,
441 .multiViewport = true,
442 .samplerAnisotropy = true,
443 .textureCompressionETC2 = false,
444 .textureCompressionASTC_LDR = false,
445 .textureCompressionBC = true,
446 .occlusionQueryPrecise = true,
447 .pipelineStatisticsQuery = false,
448 .vertexPipelineStoresAndAtomics = true,
449 .fragmentStoresAndAtomics = true,
450 .shaderTessellationAndGeometryPointSize = true,
451 .shaderImageGatherExtended = true,
452 .shaderStorageImageExtendedFormats = true,
453 .shaderStorageImageMultisample = false,
454 .shaderUniformBufferArrayDynamicIndexing = true,
455 .shaderSampledImageArrayDynamicIndexing = true,
456 .shaderStorageBufferArrayDynamicIndexing = true,
457 .shaderStorageImageArrayDynamicIndexing = true,
458 .shaderStorageImageReadWithoutFormat = true,
459 .shaderStorageImageWriteWithoutFormat = true,
460 .shaderClipDistance = true,
461 .shaderCullDistance = true,
462 .shaderFloat64 = true,
463 .shaderInt64 = false,
464 .shaderInt16 = false,
465 .alphaToOne = true,
466 .variableMultisampleRate = false,
467 .inheritedQueries = false,
468 };
469 }
470
471 void radv_GetPhysicalDeviceFeatures2KHR(
472 VkPhysicalDevice physicalDevice,
473 VkPhysicalDeviceFeatures2KHR *pFeatures)
474 {
475 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
476 }
477
478 void radv_GetPhysicalDeviceProperties(
479 VkPhysicalDevice physicalDevice,
480 VkPhysicalDeviceProperties* pProperties)
481 {
482 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
483 VkSampleCountFlags sample_counts = 0xf;
484 VkPhysicalDeviceLimits limits = {
485 .maxImageDimension1D = (1 << 14),
486 .maxImageDimension2D = (1 << 14),
487 .maxImageDimension3D = (1 << 11),
488 .maxImageDimensionCube = (1 << 14),
489 .maxImageArrayLayers = (1 << 11),
490 .maxTexelBufferElements = 128 * 1024 * 1024,
491 .maxUniformBufferRange = UINT32_MAX,
492 .maxStorageBufferRange = UINT32_MAX,
493 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
494 .maxMemoryAllocationCount = UINT32_MAX,
495 .maxSamplerAllocationCount = 64 * 1024,
496 .bufferImageGranularity = 64, /* A cache line */
497 .sparseAddressSpaceSize = 0,
498 .maxBoundDescriptorSets = MAX_SETS,
499 .maxPerStageDescriptorSamplers = 64,
500 .maxPerStageDescriptorUniformBuffers = 64,
501 .maxPerStageDescriptorStorageBuffers = 64,
502 .maxPerStageDescriptorSampledImages = 64,
503 .maxPerStageDescriptorStorageImages = 64,
504 .maxPerStageDescriptorInputAttachments = 64,
505 .maxPerStageResources = 128,
506 .maxDescriptorSetSamplers = 256,
507 .maxDescriptorSetUniformBuffers = 256,
508 .maxDescriptorSetUniformBuffersDynamic = 256,
509 .maxDescriptorSetStorageBuffers = 256,
510 .maxDescriptorSetStorageBuffersDynamic = 256,
511 .maxDescriptorSetSampledImages = 256,
512 .maxDescriptorSetStorageImages = 256,
513 .maxDescriptorSetInputAttachments = 256,
514 .maxVertexInputAttributes = 32,
515 .maxVertexInputBindings = 32,
516 .maxVertexInputAttributeOffset = 2047,
517 .maxVertexInputBindingStride = 2048,
518 .maxVertexOutputComponents = 128,
519 .maxTessellationGenerationLevel = 0,
520 .maxTessellationPatchSize = 0,
521 .maxTessellationControlPerVertexInputComponents = 0,
522 .maxTessellationControlPerVertexOutputComponents = 0,
523 .maxTessellationControlPerPatchOutputComponents = 0,
524 .maxTessellationControlTotalOutputComponents = 0,
525 .maxTessellationEvaluationInputComponents = 0,
526 .maxTessellationEvaluationOutputComponents = 0,
527 .maxGeometryShaderInvocations = 32,
528 .maxGeometryInputComponents = 64,
529 .maxGeometryOutputComponents = 128,
530 .maxGeometryOutputVertices = 256,
531 .maxGeometryTotalOutputComponents = 1024,
532 .maxFragmentInputComponents = 128,
533 .maxFragmentOutputAttachments = 8,
534 .maxFragmentDualSrcAttachments = 1,
535 .maxFragmentCombinedOutputResources = 8,
536 .maxComputeSharedMemorySize = 32768,
537 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
538 .maxComputeWorkGroupInvocations = 2048,
539 .maxComputeWorkGroupSize = {
540 2048,
541 2048,
542 2048
543 },
544 .subPixelPrecisionBits = 4 /* FIXME */,
545 .subTexelPrecisionBits = 4 /* FIXME */,
546 .mipmapPrecisionBits = 4 /* FIXME */,
547 .maxDrawIndexedIndexValue = UINT32_MAX,
548 .maxDrawIndirectCount = UINT32_MAX,
549 .maxSamplerLodBias = 16,
550 .maxSamplerAnisotropy = 16,
551 .maxViewports = MAX_VIEWPORTS,
552 .maxViewportDimensions = { (1 << 14), (1 << 14) },
553 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
554 .viewportSubPixelBits = 13, /* We take a float? */
555 .minMemoryMapAlignment = 4096, /* A page */
556 .minTexelBufferOffsetAlignment = 1,
557 .minUniformBufferOffsetAlignment = 4,
558 .minStorageBufferOffsetAlignment = 4,
559 .minTexelOffset = -32,
560 .maxTexelOffset = 31,
561 .minTexelGatherOffset = -32,
562 .maxTexelGatherOffset = 31,
563 .minInterpolationOffset = -2,
564 .maxInterpolationOffset = 2,
565 .subPixelInterpolationOffsetBits = 8,
566 .maxFramebufferWidth = (1 << 14),
567 .maxFramebufferHeight = (1 << 14),
568 .maxFramebufferLayers = (1 << 10),
569 .framebufferColorSampleCounts = sample_counts,
570 .framebufferDepthSampleCounts = sample_counts,
571 .framebufferStencilSampleCounts = sample_counts,
572 .framebufferNoAttachmentsSampleCounts = sample_counts,
573 .maxColorAttachments = MAX_RTS,
574 .sampledImageColorSampleCounts = sample_counts,
575 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
576 .sampledImageDepthSampleCounts = sample_counts,
577 .sampledImageStencilSampleCounts = sample_counts,
578 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
579 .maxSampleMaskWords = 1,
580 .timestampComputeAndGraphics = false,
581 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
582 .maxClipDistances = 8,
583 .maxCullDistances = 8,
584 .maxCombinedClipAndCullDistances = 8,
585 .discreteQueuePriorities = 1,
586 .pointSizeRange = { 0.125, 255.875 },
587 .lineWidthRange = { 0.0, 7.9921875 },
588 .pointSizeGranularity = (1.0 / 8.0),
589 .lineWidthGranularity = (1.0 / 128.0),
590 .strictLines = false, /* FINISHME */
591 .standardSampleLocations = true,
592 .optimalBufferCopyOffsetAlignment = 128,
593 .optimalBufferCopyRowPitchAlignment = 128,
594 .nonCoherentAtomSize = 64,
595 };
596
597 *pProperties = (VkPhysicalDeviceProperties) {
598 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
599 .driverVersion = 1,
600 .vendorID = 0x1002,
601 .deviceID = pdevice->rad_info.pci_id,
602 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
603 .limits = limits,
604 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
605 };
606
607 strcpy(pProperties->deviceName, pdevice->name);
608 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
609 }
610
611 void radv_GetPhysicalDeviceProperties2KHR(
612 VkPhysicalDevice physicalDevice,
613 VkPhysicalDeviceProperties2KHR *pProperties)
614 {
615 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
616 }
617
618 static void radv_get_physical_device_queue_family_properties(
619 struct radv_physical_device* pdevice,
620 uint32_t* pCount,
621 VkQueueFamilyProperties** pQueueFamilyProperties)
622 {
623 int num_queue_families = 1;
624 int idx;
625 if (pdevice->rad_info.compute_rings > 0 &&
626 pdevice->rad_info.chip_class >= CIK &&
627 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
628 num_queue_families++;
629
630 if (pQueueFamilyProperties == NULL) {
631 *pCount = num_queue_families;
632 return;
633 }
634
635 if (!*pCount)
636 return;
637
638 idx = 0;
639 if (*pCount >= 1) {
640 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
641 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
642 VK_QUEUE_COMPUTE_BIT |
643 VK_QUEUE_TRANSFER_BIT,
644 .queueCount = 1,
645 .timestampValidBits = 64,
646 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
647 };
648 idx++;
649 }
650
651 if (pdevice->rad_info.compute_rings > 0 &&
652 pdevice->rad_info.chip_class >= CIK &&
653 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
654 if (*pCount > idx) {
655 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
656 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
657 .queueCount = pdevice->rad_info.compute_rings,
658 .timestampValidBits = 64,
659 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
660 };
661 idx++;
662 }
663 }
664 *pCount = idx;
665 }
666
667 void radv_GetPhysicalDeviceQueueFamilyProperties(
668 VkPhysicalDevice physicalDevice,
669 uint32_t* pCount,
670 VkQueueFamilyProperties* pQueueFamilyProperties)
671 {
672 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
673 if (!pQueueFamilyProperties) {
674 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
675 return;
676 }
677 VkQueueFamilyProperties *properties[] = {
678 pQueueFamilyProperties + 0,
679 pQueueFamilyProperties + 1,
680 pQueueFamilyProperties + 2,
681 };
682 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
683 assert(*pCount <= 3);
684 }
685
686 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
687 VkPhysicalDevice physicalDevice,
688 uint32_t* pCount,
689 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
690 {
691 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
692 if (!pQueueFamilyProperties) {
693 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
694 return;
695 }
696 VkQueueFamilyProperties *properties[] = {
697 &pQueueFamilyProperties[0].queueFamilyProperties,
698 &pQueueFamilyProperties[1].queueFamilyProperties,
699 &pQueueFamilyProperties[2].queueFamilyProperties,
700 };
701 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
702 assert(*pCount <= 3);
703 }
704
705 void radv_GetPhysicalDeviceMemoryProperties(
706 VkPhysicalDevice physicalDevice,
707 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
708 {
709 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
710
711 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
712
713 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
714 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
715 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
716 .heapIndex = RADV_MEM_HEAP_VRAM,
717 };
718 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
719 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
720 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
721 .heapIndex = RADV_MEM_HEAP_GTT,
722 };
723 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
724 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
725 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
726 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
727 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
728 };
729 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
730 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
731 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
732 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
733 .heapIndex = RADV_MEM_HEAP_GTT,
734 };
735
736 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
737
738 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
739 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
740 .size = physical_device->rad_info.vram_size -
741 physical_device->rad_info.visible_vram_size,
742 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
743 };
744 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
745 .size = physical_device->rad_info.visible_vram_size,
746 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
747 };
748 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
749 .size = physical_device->rad_info.gart_size,
750 .flags = 0,
751 };
752 }
753
754 void radv_GetPhysicalDeviceMemoryProperties2KHR(
755 VkPhysicalDevice physicalDevice,
756 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
757 {
758 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
759 &pMemoryProperties->memoryProperties);
760 }
761
762 static int
763 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
764 int queue_family_index, int idx)
765 {
766 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
767 queue->device = device;
768 queue->queue_family_index = queue_family_index;
769 queue->queue_idx = idx;
770
771 queue->hw_ctx = device->ws->ctx_create(device->ws);
772 if (!queue->hw_ctx)
773 return VK_ERROR_OUT_OF_HOST_MEMORY;
774
775 return VK_SUCCESS;
776 }
777
778 static void
779 radv_queue_finish(struct radv_queue *queue)
780 {
781 if (queue->hw_ctx)
782 queue->device->ws->ctx_destroy(queue->hw_ctx);
783
784 if (queue->initial_preamble_cs)
785 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
786 if (queue->continue_preamble_cs)
787 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
788 if (queue->descriptor_bo)
789 queue->device->ws->buffer_destroy(queue->descriptor_bo);
790 if (queue->scratch_bo)
791 queue->device->ws->buffer_destroy(queue->scratch_bo);
792 if (queue->esgs_ring_bo)
793 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
794 if (queue->gsvs_ring_bo)
795 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
796 if (queue->compute_scratch_bo)
797 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
798 }
799
800 static void
801 radv_device_init_gs_info(struct radv_device *device)
802 {
803 switch (device->physical_device->rad_info.family) {
804 case CHIP_OLAND:
805 case CHIP_HAINAN:
806 case CHIP_KAVERI:
807 case CHIP_KABINI:
808 case CHIP_MULLINS:
809 case CHIP_ICELAND:
810 case CHIP_CARRIZO:
811 case CHIP_STONEY:
812 device->gs_table_depth = 16;
813 return;
814 case CHIP_TAHITI:
815 case CHIP_PITCAIRN:
816 case CHIP_VERDE:
817 case CHIP_BONAIRE:
818 case CHIP_HAWAII:
819 case CHIP_TONGA:
820 case CHIP_FIJI:
821 case CHIP_POLARIS10:
822 case CHIP_POLARIS11:
823 device->gs_table_depth = 32;
824 return;
825 default:
826 unreachable("unknown GPU");
827 }
828 }
829
830 VkResult radv_CreateDevice(
831 VkPhysicalDevice physicalDevice,
832 const VkDeviceCreateInfo* pCreateInfo,
833 const VkAllocationCallbacks* pAllocator,
834 VkDevice* pDevice)
835 {
836 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
837 VkResult result;
838 struct radv_device *device;
839
840 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
841 if (!is_extension_enabled(physical_device->extensions.ext_array,
842 physical_device->extensions.num_ext,
843 pCreateInfo->ppEnabledExtensionNames[i]))
844 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
845 }
846
847 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
848 sizeof(*device), 8,
849 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
850 if (!device)
851 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
852
853 memset(device, 0, sizeof(*device));
854
855 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
856 device->instance = physical_device->instance;
857 device->physical_device = physical_device;
858
859 device->debug_flags = device->instance->debug_flags;
860
861 device->ws = physical_device->ws;
862 if (pAllocator)
863 device->alloc = *pAllocator;
864 else
865 device->alloc = physical_device->instance->alloc;
866
867 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
868 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
869 uint32_t qfi = queue_create->queueFamilyIndex;
870
871 device->queues[qfi] = vk_alloc(&device->alloc,
872 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
873 if (!device->queues[qfi]) {
874 result = VK_ERROR_OUT_OF_HOST_MEMORY;
875 goto fail;
876 }
877
878 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
879
880 device->queue_count[qfi] = queue_create->queueCount;
881
882 for (unsigned q = 0; q < queue_create->queueCount; q++) {
883 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
884 if (result != VK_SUCCESS)
885 goto fail;
886 }
887 }
888
889 #if HAVE_LLVM < 0x0400
890 device->llvm_supports_spill = false;
891 #else
892 device->llvm_supports_spill = true;
893 #endif
894
895 /* The maximum number of scratch waves. Scratch space isn't divided
896 * evenly between CUs. The number is only a function of the number of CUs.
897 * We can decrease the constant to decrease the scratch buffer size.
898 *
899 * sctx->scratch_waves must be >= the maximum posible size of
900 * 1 threadgroup, so that the hw doesn't hang from being unable
901 * to start any.
902 *
903 * The recommended value is 4 per CU at most. Higher numbers don't
904 * bring much benefit, but they still occupy chip resources (think
905 * async compute). I've seen ~2% performance difference between 4 and 32.
906 */
907 uint32_t max_threads_per_block = 2048;
908 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
909 max_threads_per_block / 64);
910
911 radv_device_init_gs_info(device);
912
913 result = radv_device_init_meta(device);
914 if (result != VK_SUCCESS)
915 goto fail;
916
917 radv_device_init_msaa(device);
918
919 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
920 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
921 switch (family) {
922 case RADV_QUEUE_GENERAL:
923 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
924 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
925 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
926 break;
927 case RADV_QUEUE_COMPUTE:
928 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
929 radeon_emit(device->empty_cs[family], 0);
930 break;
931 }
932 device->ws->cs_finalize(device->empty_cs[family]);
933
934 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
935 switch (family) {
936 case RADV_QUEUE_GENERAL:
937 case RADV_QUEUE_COMPUTE:
938 si_cs_emit_cache_flush(device->flush_cs[family],
939 device->physical_device->rad_info.chip_class,
940 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
941 RADV_CMD_FLAG_INV_ICACHE |
942 RADV_CMD_FLAG_INV_SMEM_L1 |
943 RADV_CMD_FLAG_INV_VMEM_L1 |
944 RADV_CMD_FLAG_INV_GLOBAL_L2);
945 break;
946 }
947 device->ws->cs_finalize(device->flush_cs[family]);
948 }
949
950 if (getenv("RADV_TRACE_FILE")) {
951 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
952 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
953 if (!device->trace_bo)
954 goto fail;
955
956 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
957 if (!device->trace_id_ptr)
958 goto fail;
959 }
960
961 if (device->physical_device->rad_info.chip_class >= CIK)
962 cik_create_gfx_config(device);
963
964 *pDevice = radv_device_to_handle(device);
965 return VK_SUCCESS;
966
967 fail:
968 if (device->trace_bo)
969 device->ws->buffer_destroy(device->trace_bo);
970
971 if (device->gfx_init)
972 device->ws->buffer_destroy(device->gfx_init);
973
974 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
975 for (unsigned q = 0; q < device->queue_count[i]; q++)
976 radv_queue_finish(&device->queues[i][q]);
977 if (device->queue_count[i])
978 vk_free(&device->alloc, device->queues[i]);
979 }
980
981 vk_free(&device->alloc, device);
982 return result;
983 }
984
985 void radv_DestroyDevice(
986 VkDevice _device,
987 const VkAllocationCallbacks* pAllocator)
988 {
989 RADV_FROM_HANDLE(radv_device, device, _device);
990
991 if (device->trace_bo)
992 device->ws->buffer_destroy(device->trace_bo);
993
994 if (device->gfx_init)
995 device->ws->buffer_destroy(device->gfx_init);
996
997 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
998 for (unsigned q = 0; q < device->queue_count[i]; q++)
999 radv_queue_finish(&device->queues[i][q]);
1000 if (device->queue_count[i])
1001 vk_free(&device->alloc, device->queues[i]);
1002 if (device->empty_cs[i])
1003 device->ws->cs_destroy(device->empty_cs[i]);
1004 if (device->flush_cs[i])
1005 device->ws->cs_destroy(device->flush_cs[i]);
1006 }
1007 radv_device_finish_meta(device);
1008
1009 vk_free(&device->alloc, device);
1010 }
1011
1012 VkResult radv_EnumerateInstanceExtensionProperties(
1013 const char* pLayerName,
1014 uint32_t* pPropertyCount,
1015 VkExtensionProperties* pProperties)
1016 {
1017 if (pProperties == NULL) {
1018 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1019 return VK_SUCCESS;
1020 }
1021
1022 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1023 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1024
1025 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1026 return VK_INCOMPLETE;
1027
1028 return VK_SUCCESS;
1029 }
1030
1031 VkResult radv_EnumerateDeviceExtensionProperties(
1032 VkPhysicalDevice physicalDevice,
1033 const char* pLayerName,
1034 uint32_t* pPropertyCount,
1035 VkExtensionProperties* pProperties)
1036 {
1037 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1038
1039 if (pProperties == NULL) {
1040 *pPropertyCount = pdevice->extensions.num_ext;
1041 return VK_SUCCESS;
1042 }
1043
1044 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1045 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1046
1047 if (*pPropertyCount < pdevice->extensions.num_ext)
1048 return VK_INCOMPLETE;
1049
1050 return VK_SUCCESS;
1051 }
1052
1053 VkResult radv_EnumerateInstanceLayerProperties(
1054 uint32_t* pPropertyCount,
1055 VkLayerProperties* pProperties)
1056 {
1057 if (pProperties == NULL) {
1058 *pPropertyCount = 0;
1059 return VK_SUCCESS;
1060 }
1061
1062 /* None supported at this time */
1063 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1064 }
1065
1066 VkResult radv_EnumerateDeviceLayerProperties(
1067 VkPhysicalDevice physicalDevice,
1068 uint32_t* pPropertyCount,
1069 VkLayerProperties* pProperties)
1070 {
1071 if (pProperties == NULL) {
1072 *pPropertyCount = 0;
1073 return VK_SUCCESS;
1074 }
1075
1076 /* None supported at this time */
1077 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1078 }
1079
1080 void radv_GetDeviceQueue(
1081 VkDevice _device,
1082 uint32_t queueFamilyIndex,
1083 uint32_t queueIndex,
1084 VkQueue* pQueue)
1085 {
1086 RADV_FROM_HANDLE(radv_device, device, _device);
1087
1088 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1089 }
1090
1091 static void radv_dump_trace(struct radv_device *device,
1092 struct radeon_winsys_cs *cs)
1093 {
1094 const char *filename = getenv("RADV_TRACE_FILE");
1095 FILE *f = fopen(filename, "w");
1096 if (!f) {
1097 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1098 return;
1099 }
1100
1101 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1102 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1103 fclose(f);
1104 }
1105
1106 static void
1107 fill_geom_rings(struct radv_queue *queue,
1108 uint32_t *map,
1109 uint32_t esgs_ring_size,
1110 struct radeon_winsys_bo *esgs_ring_bo,
1111 uint32_t gsvs_ring_size,
1112 struct radeon_winsys_bo *gsvs_ring_bo)
1113 {
1114 uint64_t esgs_va = 0, gsvs_va = 0;
1115 uint32_t *desc = &map[4];
1116
1117 if (esgs_ring_bo)
1118 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1119 if (gsvs_ring_bo)
1120 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1121
1122 /* stride 0, num records - size, add tid, swizzle, elsize4,
1123 index stride 64 */
1124 desc[0] = esgs_va;
1125 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1126 S_008F04_STRIDE(0) |
1127 S_008F04_SWIZZLE_ENABLE(true);
1128 desc[2] = esgs_ring_size;
1129 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1130 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1131 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1132 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1133 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1134 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1135 S_008F0C_ELEMENT_SIZE(1) |
1136 S_008F0C_INDEX_STRIDE(3) |
1137 S_008F0C_ADD_TID_ENABLE(true);
1138
1139 desc += 4;
1140 /* GS entry for ES->GS ring */
1141 /* stride 0, num records - size, elsize0,
1142 index stride 0 */
1143 desc[0] = esgs_va;
1144 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1145 S_008F04_STRIDE(0) |
1146 S_008F04_SWIZZLE_ENABLE(false);
1147 desc[2] = esgs_ring_size;
1148 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1149 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1150 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1151 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1152 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1153 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1154 S_008F0C_ELEMENT_SIZE(0) |
1155 S_008F0C_INDEX_STRIDE(0) |
1156 S_008F0C_ADD_TID_ENABLE(false);
1157
1158 desc += 4;
1159 /* VS entry for GS->VS ring */
1160 /* stride 0, num records - size, elsize0,
1161 index stride 0 */
1162 desc[0] = gsvs_va;
1163 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1164 S_008F04_STRIDE(0) |
1165 S_008F04_SWIZZLE_ENABLE(false);
1166 desc[2] = gsvs_ring_size;
1167 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1168 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1169 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1170 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1171 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1172 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1173 S_008F0C_ELEMENT_SIZE(0) |
1174 S_008F0C_INDEX_STRIDE(0) |
1175 S_008F0C_ADD_TID_ENABLE(false);
1176 desc += 4;
1177
1178 /* stride gsvs_itemsize, num records 64
1179 elsize 4, index stride 16 */
1180 /* shader will patch stride and desc[2] */
1181 desc[0] = gsvs_va;
1182 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1183 S_008F04_STRIDE(0) |
1184 S_008F04_SWIZZLE_ENABLE(true);
1185 desc[2] = 0;
1186 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1187 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1188 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1189 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1190 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1191 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1192 S_008F0C_ELEMENT_SIZE(1) |
1193 S_008F0C_INDEX_STRIDE(1) |
1194 S_008F0C_ADD_TID_ENABLE(true);
1195 }
1196
1197 static VkResult
1198 radv_get_preamble_cs(struct radv_queue *queue,
1199 uint32_t scratch_size,
1200 uint32_t compute_scratch_size,
1201 uint32_t esgs_ring_size,
1202 uint32_t gsvs_ring_size,
1203 struct radeon_winsys_cs **initial_preamble_cs,
1204 struct radeon_winsys_cs **continue_preamble_cs)
1205 {
1206 struct radeon_winsys_bo *scratch_bo = NULL;
1207 struct radeon_winsys_bo *descriptor_bo = NULL;
1208 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1209 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1210 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1211 struct radeon_winsys_cs *dest_cs[2] = {0};
1212
1213 if (scratch_size <= queue->scratch_size &&
1214 compute_scratch_size <= queue->compute_scratch_size &&
1215 esgs_ring_size <= queue->esgs_ring_size &&
1216 gsvs_ring_size <= queue->gsvs_ring_size &&
1217 queue->initial_preamble_cs) {
1218 *initial_preamble_cs = queue->initial_preamble_cs;
1219 *continue_preamble_cs = queue->continue_preamble_cs;
1220 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1221 *continue_preamble_cs = NULL;
1222 return VK_SUCCESS;
1223 }
1224
1225 if (scratch_size > queue->scratch_size) {
1226 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1227 scratch_size,
1228 4096,
1229 RADEON_DOMAIN_VRAM,
1230 RADEON_FLAG_NO_CPU_ACCESS);
1231 if (!scratch_bo)
1232 goto fail;
1233 } else
1234 scratch_bo = queue->scratch_bo;
1235
1236 if (compute_scratch_size > queue->compute_scratch_size) {
1237 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1238 compute_scratch_size,
1239 4096,
1240 RADEON_DOMAIN_VRAM,
1241 RADEON_FLAG_NO_CPU_ACCESS);
1242 if (!compute_scratch_bo)
1243 goto fail;
1244
1245 } else
1246 compute_scratch_bo = queue->compute_scratch_bo;
1247
1248 if (esgs_ring_size > queue->esgs_ring_size) {
1249 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1250 esgs_ring_size,
1251 4096,
1252 RADEON_DOMAIN_VRAM,
1253 RADEON_FLAG_NO_CPU_ACCESS);
1254 if (!esgs_ring_bo)
1255 goto fail;
1256 } else {
1257 esgs_ring_bo = queue->esgs_ring_bo;
1258 esgs_ring_size = queue->esgs_ring_size;
1259 }
1260
1261 if (gsvs_ring_size > queue->gsvs_ring_size) {
1262 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1263 gsvs_ring_size,
1264 4096,
1265 RADEON_DOMAIN_VRAM,
1266 RADEON_FLAG_NO_CPU_ACCESS);
1267 if (!gsvs_ring_bo)
1268 goto fail;
1269 } else {
1270 gsvs_ring_bo = queue->gsvs_ring_bo;
1271 gsvs_ring_size = queue->gsvs_ring_size;
1272 }
1273
1274 if (scratch_bo != queue->scratch_bo ||
1275 esgs_ring_bo != queue->esgs_ring_bo ||
1276 gsvs_ring_bo != queue->gsvs_ring_bo) {
1277 uint32_t size = 0;
1278 if (gsvs_ring_bo || esgs_ring_bo)
1279 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1280 else if (scratch_bo)
1281 size = 8; /* 2 dword */
1282
1283 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1284 size,
1285 4096,
1286 RADEON_DOMAIN_VRAM,
1287 RADEON_FLAG_CPU_ACCESS);
1288 if (!descriptor_bo)
1289 goto fail;
1290 } else
1291 descriptor_bo = queue->descriptor_bo;
1292
1293 for(int i = 0; i < 2; ++i) {
1294 struct radeon_winsys_cs *cs = NULL;
1295 cs = queue->device->ws->cs_create(queue->device->ws,
1296 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1297 if (!cs)
1298 goto fail;
1299
1300 dest_cs[i] = cs;
1301
1302 if (scratch_bo)
1303 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1304
1305 if (esgs_ring_bo)
1306 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1307
1308 if (gsvs_ring_bo)
1309 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1310
1311 if (descriptor_bo)
1312 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1313
1314 if (descriptor_bo != queue->descriptor_bo) {
1315 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1316
1317 if (scratch_bo) {
1318 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1319 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1320 S_008F04_SWIZZLE_ENABLE(1);
1321 map[0] = scratch_va;
1322 map[1] = rsrc1;
1323 }
1324
1325 if (esgs_ring_bo || gsvs_ring_bo)
1326 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1327
1328 queue->device->ws->buffer_unmap(descriptor_bo);
1329 }
1330
1331 if (esgs_ring_bo || gsvs_ring_bo) {
1332 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1333 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1334 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1335 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1336
1337 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1338 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1339 radeon_emit(cs, esgs_ring_size >> 8);
1340 radeon_emit(cs, gsvs_ring_size >> 8);
1341 } else {
1342 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1343 radeon_emit(cs, esgs_ring_size >> 8);
1344 radeon_emit(cs, gsvs_ring_size >> 8);
1345 }
1346 }
1347
1348 if (descriptor_bo) {
1349 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1350 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1351 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1352 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1353 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1354 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1355
1356 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1357
1358 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1359 radeon_set_sh_reg_seq(cs, regs[i], 2);
1360 radeon_emit(cs, va);
1361 radeon_emit(cs, va >> 32);
1362 }
1363 }
1364
1365 if (compute_scratch_bo) {
1366 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1367 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1368 S_008F04_SWIZZLE_ENABLE(1);
1369
1370 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1371
1372 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1373 radeon_emit(cs, scratch_va);
1374 radeon_emit(cs, rsrc1);
1375 }
1376
1377 if (!i) {
1378 si_cs_emit_cache_flush(cs,
1379 queue->device->physical_device->rad_info.chip_class,
1380 queue->queue_family_index == RING_COMPUTE &&
1381 queue->device->physical_device->rad_info.chip_class >= CIK,
1382 RADV_CMD_FLAG_INV_ICACHE |
1383 RADV_CMD_FLAG_INV_SMEM_L1 |
1384 RADV_CMD_FLAG_INV_VMEM_L1 |
1385 RADV_CMD_FLAG_INV_GLOBAL_L2);
1386 }
1387
1388 if (!queue->device->ws->cs_finalize(cs))
1389 goto fail;
1390 }
1391
1392 if (queue->initial_preamble_cs)
1393 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1394
1395 if (queue->continue_preamble_cs)
1396 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1397
1398 queue->initial_preamble_cs = dest_cs[0];
1399 queue->continue_preamble_cs = dest_cs[1];
1400
1401 if (scratch_bo != queue->scratch_bo) {
1402 if (queue->scratch_bo)
1403 queue->device->ws->buffer_destroy(queue->scratch_bo);
1404 queue->scratch_bo = scratch_bo;
1405 queue->scratch_size = scratch_size;
1406 }
1407
1408 if (compute_scratch_bo != queue->compute_scratch_bo) {
1409 if (queue->compute_scratch_bo)
1410 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1411 queue->compute_scratch_bo = compute_scratch_bo;
1412 queue->compute_scratch_size = compute_scratch_size;
1413 }
1414
1415 if (esgs_ring_bo != queue->esgs_ring_bo) {
1416 if (queue->esgs_ring_bo)
1417 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1418 queue->esgs_ring_bo = esgs_ring_bo;
1419 queue->esgs_ring_size = esgs_ring_size;
1420 }
1421
1422 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1423 if (queue->gsvs_ring_bo)
1424 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1425 queue->gsvs_ring_bo = gsvs_ring_bo;
1426 queue->gsvs_ring_size = gsvs_ring_size;
1427 }
1428
1429 if (descriptor_bo != queue->descriptor_bo) {
1430 if (queue->descriptor_bo)
1431 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1432
1433 queue->descriptor_bo = descriptor_bo;
1434 }
1435
1436 *initial_preamble_cs = queue->initial_preamble_cs;
1437 *continue_preamble_cs = queue->continue_preamble_cs;
1438 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1439 *continue_preamble_cs = NULL;
1440 return VK_SUCCESS;
1441 fail:
1442 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1443 if (dest_cs[i])
1444 queue->device->ws->cs_destroy(dest_cs[i]);
1445 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1446 queue->device->ws->buffer_destroy(descriptor_bo);
1447 if (scratch_bo && scratch_bo != queue->scratch_bo)
1448 queue->device->ws->buffer_destroy(scratch_bo);
1449 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1450 queue->device->ws->buffer_destroy(compute_scratch_bo);
1451 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1452 queue->device->ws->buffer_destroy(esgs_ring_bo);
1453 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1454 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1455 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1456 }
1457
1458 VkResult radv_QueueSubmit(
1459 VkQueue _queue,
1460 uint32_t submitCount,
1461 const VkSubmitInfo* pSubmits,
1462 VkFence _fence)
1463 {
1464 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1465 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1466 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1467 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1468 int ret;
1469 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1470 uint32_t scratch_size = 0;
1471 uint32_t compute_scratch_size = 0;
1472 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1473 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1474 VkResult result;
1475 bool fence_emitted = false;
1476
1477 /* Do this first so failing to allocate scratch buffers can't result in
1478 * partially executed submissions. */
1479 for (uint32_t i = 0; i < submitCount; i++) {
1480 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1481 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1482 pSubmits[i].pCommandBuffers[j]);
1483
1484 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1485 compute_scratch_size = MAX2(compute_scratch_size,
1486 cmd_buffer->compute_scratch_size_needed);
1487 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1488 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1489 }
1490 }
1491
1492 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1493 esgs_ring_size, gsvs_ring_size,
1494 &initial_preamble_cs, &continue_preamble_cs);
1495 if (result != VK_SUCCESS)
1496 return result;
1497
1498 for (uint32_t i = 0; i < submitCount; i++) {
1499 struct radeon_winsys_cs **cs_array;
1500 bool has_flush = !submitCount;
1501 bool can_patch = !has_flush;
1502 uint32_t advance;
1503
1504 if (!pSubmits[i].commandBufferCount) {
1505 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1506 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1507 &queue->device->empty_cs[queue->queue_family_index],
1508 1, NULL, NULL,
1509 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1510 pSubmits[i].waitSemaphoreCount,
1511 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1512 pSubmits[i].signalSemaphoreCount,
1513 false, base_fence);
1514 if (ret) {
1515 radv_loge("failed to submit CS %d\n", i);
1516 abort();
1517 }
1518 fence_emitted = true;
1519 }
1520 continue;
1521 }
1522
1523 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1524 (pSubmits[i].commandBufferCount + has_flush));
1525
1526 if(has_flush)
1527 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1528
1529 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1530 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1531 pSubmits[i].pCommandBuffers[j]);
1532 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1533
1534 cs_array[j + has_flush] = cmd_buffer->cs;
1535 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1536 can_patch = false;
1537 }
1538
1539 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1540 advance = MIN2(max_cs_submission,
1541 pSubmits[i].commandBufferCount + has_flush - j);
1542 bool b = j == 0;
1543 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1544
1545 if (queue->device->trace_bo)
1546 *queue->device->trace_id_ptr = 0;
1547
1548 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1549 advance, initial_preamble_cs, continue_preamble_cs,
1550 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1551 b ? pSubmits[i].waitSemaphoreCount : 0,
1552 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1553 e ? pSubmits[i].signalSemaphoreCount : 0,
1554 can_patch, base_fence);
1555
1556 if (ret) {
1557 radv_loge("failed to submit CS %d\n", i);
1558 abort();
1559 }
1560 fence_emitted = true;
1561 if (queue->device->trace_bo) {
1562 bool success = queue->device->ws->ctx_wait_idle(
1563 queue->hw_ctx,
1564 radv_queue_family_to_ring(
1565 queue->queue_family_index),
1566 queue->queue_idx);
1567
1568 if (!success) { /* Hang */
1569 radv_dump_trace(queue->device, cs_array[j]);
1570 abort();
1571 }
1572 }
1573 }
1574 free(cs_array);
1575 }
1576
1577 if (fence) {
1578 if (!fence_emitted)
1579 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1580 &queue->device->empty_cs[queue->queue_family_index],
1581 1, NULL, NULL, NULL, 0, NULL, 0,
1582 false, base_fence);
1583
1584 fence->submitted = true;
1585 }
1586
1587 return VK_SUCCESS;
1588 }
1589
1590 VkResult radv_QueueWaitIdle(
1591 VkQueue _queue)
1592 {
1593 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1594
1595 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1596 radv_queue_family_to_ring(queue->queue_family_index),
1597 queue->queue_idx);
1598 return VK_SUCCESS;
1599 }
1600
1601 VkResult radv_DeviceWaitIdle(
1602 VkDevice _device)
1603 {
1604 RADV_FROM_HANDLE(radv_device, device, _device);
1605
1606 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1607 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1608 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1609 }
1610 }
1611 return VK_SUCCESS;
1612 }
1613
1614 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1615 VkInstance instance,
1616 const char* pName)
1617 {
1618 return radv_lookup_entrypoint(pName);
1619 }
1620
1621 /* The loader wants us to expose a second GetInstanceProcAddr function
1622 * to work around certain LD_PRELOAD issues seen in apps.
1623 */
1624 PUBLIC
1625 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1626 VkInstance instance,
1627 const char* pName);
1628
1629 PUBLIC
1630 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1631 VkInstance instance,
1632 const char* pName)
1633 {
1634 return radv_GetInstanceProcAddr(instance, pName);
1635 }
1636
1637 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1638 VkDevice device,
1639 const char* pName)
1640 {
1641 return radv_lookup_entrypoint(pName);
1642 }
1643
1644 bool radv_get_memory_fd(struct radv_device *device,
1645 struct radv_device_memory *memory,
1646 int *pFD)
1647 {
1648 struct radeon_bo_metadata metadata;
1649
1650 if (memory->image) {
1651 radv_init_metadata(device, memory->image, &metadata);
1652 device->ws->buffer_set_metadata(memory->bo, &metadata);
1653 }
1654
1655 return device->ws->buffer_get_fd(device->ws, memory->bo,
1656 pFD);
1657 }
1658
1659 VkResult radv_AllocateMemory(
1660 VkDevice _device,
1661 const VkMemoryAllocateInfo* pAllocateInfo,
1662 const VkAllocationCallbacks* pAllocator,
1663 VkDeviceMemory* pMem)
1664 {
1665 RADV_FROM_HANDLE(radv_device, device, _device);
1666 struct radv_device_memory *mem;
1667 VkResult result;
1668 enum radeon_bo_domain domain;
1669 uint32_t flags = 0;
1670 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1671 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1672
1673 if (pAllocateInfo->allocationSize == 0) {
1674 /* Apparently, this is allowed */
1675 *pMem = VK_NULL_HANDLE;
1676 return VK_SUCCESS;
1677 }
1678
1679 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1680 switch (ext->sType) {
1681 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1682 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1683 break;
1684 default:
1685 break;
1686 }
1687 }
1688
1689 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1690 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1691 if (mem == NULL)
1692 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1693
1694 if (dedicate_info) {
1695 mem->image = radv_image_from_handle(dedicate_info->image);
1696 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1697 } else {
1698 mem->image = NULL;
1699 mem->buffer = NULL;
1700 }
1701
1702 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1703 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1704 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1705 domain = RADEON_DOMAIN_GTT;
1706 else
1707 domain = RADEON_DOMAIN_VRAM;
1708
1709 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1710 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1711 else
1712 flags |= RADEON_FLAG_CPU_ACCESS;
1713
1714 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1715 flags |= RADEON_FLAG_GTT_WC;
1716
1717 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1718 domain, flags);
1719
1720 if (!mem->bo) {
1721 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1722 goto fail;
1723 }
1724 mem->type_index = pAllocateInfo->memoryTypeIndex;
1725
1726 *pMem = radv_device_memory_to_handle(mem);
1727
1728 return VK_SUCCESS;
1729
1730 fail:
1731 vk_free2(&device->alloc, pAllocator, mem);
1732
1733 return result;
1734 }
1735
1736 void radv_FreeMemory(
1737 VkDevice _device,
1738 VkDeviceMemory _mem,
1739 const VkAllocationCallbacks* pAllocator)
1740 {
1741 RADV_FROM_HANDLE(radv_device, device, _device);
1742 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1743
1744 if (mem == NULL)
1745 return;
1746
1747 device->ws->buffer_destroy(mem->bo);
1748 mem->bo = NULL;
1749
1750 vk_free2(&device->alloc, pAllocator, mem);
1751 }
1752
1753 VkResult radv_MapMemory(
1754 VkDevice _device,
1755 VkDeviceMemory _memory,
1756 VkDeviceSize offset,
1757 VkDeviceSize size,
1758 VkMemoryMapFlags flags,
1759 void** ppData)
1760 {
1761 RADV_FROM_HANDLE(radv_device, device, _device);
1762 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1763
1764 if (mem == NULL) {
1765 *ppData = NULL;
1766 return VK_SUCCESS;
1767 }
1768
1769 *ppData = device->ws->buffer_map(mem->bo);
1770 if (*ppData) {
1771 *ppData += offset;
1772 return VK_SUCCESS;
1773 }
1774
1775 return VK_ERROR_MEMORY_MAP_FAILED;
1776 }
1777
1778 void radv_UnmapMemory(
1779 VkDevice _device,
1780 VkDeviceMemory _memory)
1781 {
1782 RADV_FROM_HANDLE(radv_device, device, _device);
1783 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1784
1785 if (mem == NULL)
1786 return;
1787
1788 device->ws->buffer_unmap(mem->bo);
1789 }
1790
1791 VkResult radv_FlushMappedMemoryRanges(
1792 VkDevice _device,
1793 uint32_t memoryRangeCount,
1794 const VkMappedMemoryRange* pMemoryRanges)
1795 {
1796 return VK_SUCCESS;
1797 }
1798
1799 VkResult radv_InvalidateMappedMemoryRanges(
1800 VkDevice _device,
1801 uint32_t memoryRangeCount,
1802 const VkMappedMemoryRange* pMemoryRanges)
1803 {
1804 return VK_SUCCESS;
1805 }
1806
1807 void radv_GetBufferMemoryRequirements(
1808 VkDevice device,
1809 VkBuffer _buffer,
1810 VkMemoryRequirements* pMemoryRequirements)
1811 {
1812 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1813
1814 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1815
1816 pMemoryRequirements->size = buffer->size;
1817 pMemoryRequirements->alignment = 16;
1818 }
1819
1820 void radv_GetImageMemoryRequirements(
1821 VkDevice device,
1822 VkImage _image,
1823 VkMemoryRequirements* pMemoryRequirements)
1824 {
1825 RADV_FROM_HANDLE(radv_image, image, _image);
1826
1827 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1828
1829 pMemoryRequirements->size = image->size;
1830 pMemoryRequirements->alignment = image->alignment;
1831 }
1832
1833 void radv_GetImageSparseMemoryRequirements(
1834 VkDevice device,
1835 VkImage image,
1836 uint32_t* pSparseMemoryRequirementCount,
1837 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1838 {
1839 stub();
1840 }
1841
1842 void radv_GetDeviceMemoryCommitment(
1843 VkDevice device,
1844 VkDeviceMemory memory,
1845 VkDeviceSize* pCommittedMemoryInBytes)
1846 {
1847 *pCommittedMemoryInBytes = 0;
1848 }
1849
1850 VkResult radv_BindBufferMemory(
1851 VkDevice device,
1852 VkBuffer _buffer,
1853 VkDeviceMemory _memory,
1854 VkDeviceSize memoryOffset)
1855 {
1856 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1857 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1858
1859 if (mem) {
1860 buffer->bo = mem->bo;
1861 buffer->offset = memoryOffset;
1862 } else {
1863 buffer->bo = NULL;
1864 buffer->offset = 0;
1865 }
1866
1867 return VK_SUCCESS;
1868 }
1869
1870 VkResult radv_BindImageMemory(
1871 VkDevice device,
1872 VkImage _image,
1873 VkDeviceMemory _memory,
1874 VkDeviceSize memoryOffset)
1875 {
1876 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1877 RADV_FROM_HANDLE(radv_image, image, _image);
1878
1879 if (mem) {
1880 image->bo = mem->bo;
1881 image->offset = memoryOffset;
1882 } else {
1883 image->bo = NULL;
1884 image->offset = 0;
1885 }
1886
1887 return VK_SUCCESS;
1888 }
1889
1890 VkResult radv_QueueBindSparse(
1891 VkQueue queue,
1892 uint32_t bindInfoCount,
1893 const VkBindSparseInfo* pBindInfo,
1894 VkFence fence)
1895 {
1896 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1897 }
1898
1899 VkResult radv_CreateFence(
1900 VkDevice _device,
1901 const VkFenceCreateInfo* pCreateInfo,
1902 const VkAllocationCallbacks* pAllocator,
1903 VkFence* pFence)
1904 {
1905 RADV_FROM_HANDLE(radv_device, device, _device);
1906 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1907 sizeof(*fence), 8,
1908 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1909
1910 if (!fence)
1911 return VK_ERROR_OUT_OF_HOST_MEMORY;
1912
1913 memset(fence, 0, sizeof(*fence));
1914 fence->submitted = false;
1915 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1916 fence->fence = device->ws->create_fence();
1917 if (!fence->fence) {
1918 vk_free2(&device->alloc, pAllocator, fence);
1919 return VK_ERROR_OUT_OF_HOST_MEMORY;
1920 }
1921
1922 *pFence = radv_fence_to_handle(fence);
1923
1924 return VK_SUCCESS;
1925 }
1926
1927 void radv_DestroyFence(
1928 VkDevice _device,
1929 VkFence _fence,
1930 const VkAllocationCallbacks* pAllocator)
1931 {
1932 RADV_FROM_HANDLE(radv_device, device, _device);
1933 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1934
1935 if (!fence)
1936 return;
1937 device->ws->destroy_fence(fence->fence);
1938 vk_free2(&device->alloc, pAllocator, fence);
1939 }
1940
1941 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1942 {
1943 uint64_t current_time;
1944 struct timespec tv;
1945
1946 clock_gettime(CLOCK_MONOTONIC, &tv);
1947 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1948
1949 timeout = MIN2(UINT64_MAX - current_time, timeout);
1950
1951 return current_time + timeout;
1952 }
1953
1954 VkResult radv_WaitForFences(
1955 VkDevice _device,
1956 uint32_t fenceCount,
1957 const VkFence* pFences,
1958 VkBool32 waitAll,
1959 uint64_t timeout)
1960 {
1961 RADV_FROM_HANDLE(radv_device, device, _device);
1962 timeout = radv_get_absolute_timeout(timeout);
1963
1964 if (!waitAll && fenceCount > 1) {
1965 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1966 }
1967
1968 for (uint32_t i = 0; i < fenceCount; ++i) {
1969 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1970 bool expired = false;
1971
1972 if (fence->signalled)
1973 continue;
1974
1975 if (!fence->submitted)
1976 return VK_TIMEOUT;
1977
1978 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1979 if (!expired)
1980 return VK_TIMEOUT;
1981
1982 fence->signalled = true;
1983 }
1984
1985 return VK_SUCCESS;
1986 }
1987
1988 VkResult radv_ResetFences(VkDevice device,
1989 uint32_t fenceCount,
1990 const VkFence *pFences)
1991 {
1992 for (unsigned i = 0; i < fenceCount; ++i) {
1993 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1994 fence->submitted = fence->signalled = false;
1995 }
1996
1997 return VK_SUCCESS;
1998 }
1999
2000 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2001 {
2002 RADV_FROM_HANDLE(radv_device, device, _device);
2003 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2004
2005 if (fence->signalled)
2006 return VK_SUCCESS;
2007 if (!fence->submitted)
2008 return VK_NOT_READY;
2009
2010 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2011 return VK_NOT_READY;
2012
2013 return VK_SUCCESS;
2014 }
2015
2016
2017 // Queue semaphore functions
2018
2019 VkResult radv_CreateSemaphore(
2020 VkDevice _device,
2021 const VkSemaphoreCreateInfo* pCreateInfo,
2022 const VkAllocationCallbacks* pAllocator,
2023 VkSemaphore* pSemaphore)
2024 {
2025 RADV_FROM_HANDLE(radv_device, device, _device);
2026 struct radeon_winsys_sem *sem;
2027
2028 sem = device->ws->create_sem(device->ws);
2029 if (!sem)
2030 return VK_ERROR_OUT_OF_HOST_MEMORY;
2031
2032 *pSemaphore = (VkSemaphore)sem;
2033 return VK_SUCCESS;
2034 }
2035
2036 void radv_DestroySemaphore(
2037 VkDevice _device,
2038 VkSemaphore _semaphore,
2039 const VkAllocationCallbacks* pAllocator)
2040 {
2041 RADV_FROM_HANDLE(radv_device, device, _device);
2042 struct radeon_winsys_sem *sem;
2043 if (!_semaphore)
2044 return;
2045
2046 sem = (struct radeon_winsys_sem *)_semaphore;
2047 device->ws->destroy_sem(sem);
2048 }
2049
2050 VkResult radv_CreateEvent(
2051 VkDevice _device,
2052 const VkEventCreateInfo* pCreateInfo,
2053 const VkAllocationCallbacks* pAllocator,
2054 VkEvent* pEvent)
2055 {
2056 RADV_FROM_HANDLE(radv_device, device, _device);
2057 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2058 sizeof(*event), 8,
2059 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2060
2061 if (!event)
2062 return VK_ERROR_OUT_OF_HOST_MEMORY;
2063
2064 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2065 RADEON_DOMAIN_GTT,
2066 RADEON_FLAG_CPU_ACCESS);
2067 if (!event->bo) {
2068 vk_free2(&device->alloc, pAllocator, event);
2069 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2070 }
2071
2072 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2073
2074 *pEvent = radv_event_to_handle(event);
2075
2076 return VK_SUCCESS;
2077 }
2078
2079 void radv_DestroyEvent(
2080 VkDevice _device,
2081 VkEvent _event,
2082 const VkAllocationCallbacks* pAllocator)
2083 {
2084 RADV_FROM_HANDLE(radv_device, device, _device);
2085 RADV_FROM_HANDLE(radv_event, event, _event);
2086
2087 if (!event)
2088 return;
2089 device->ws->buffer_destroy(event->bo);
2090 vk_free2(&device->alloc, pAllocator, event);
2091 }
2092
2093 VkResult radv_GetEventStatus(
2094 VkDevice _device,
2095 VkEvent _event)
2096 {
2097 RADV_FROM_HANDLE(radv_event, event, _event);
2098
2099 if (*event->map == 1)
2100 return VK_EVENT_SET;
2101 return VK_EVENT_RESET;
2102 }
2103
2104 VkResult radv_SetEvent(
2105 VkDevice _device,
2106 VkEvent _event)
2107 {
2108 RADV_FROM_HANDLE(radv_event, event, _event);
2109 *event->map = 1;
2110
2111 return VK_SUCCESS;
2112 }
2113
2114 VkResult radv_ResetEvent(
2115 VkDevice _device,
2116 VkEvent _event)
2117 {
2118 RADV_FROM_HANDLE(radv_event, event, _event);
2119 *event->map = 0;
2120
2121 return VK_SUCCESS;
2122 }
2123
2124 VkResult radv_CreateBuffer(
2125 VkDevice _device,
2126 const VkBufferCreateInfo* pCreateInfo,
2127 const VkAllocationCallbacks* pAllocator,
2128 VkBuffer* pBuffer)
2129 {
2130 RADV_FROM_HANDLE(radv_device, device, _device);
2131 struct radv_buffer *buffer;
2132
2133 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2134
2135 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2136 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2137 if (buffer == NULL)
2138 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2139
2140 buffer->size = pCreateInfo->size;
2141 buffer->usage = pCreateInfo->usage;
2142 buffer->bo = NULL;
2143 buffer->offset = 0;
2144
2145 *pBuffer = radv_buffer_to_handle(buffer);
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 void radv_DestroyBuffer(
2151 VkDevice _device,
2152 VkBuffer _buffer,
2153 const VkAllocationCallbacks* pAllocator)
2154 {
2155 RADV_FROM_HANDLE(radv_device, device, _device);
2156 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2157
2158 if (!buffer)
2159 return;
2160
2161 vk_free2(&device->alloc, pAllocator, buffer);
2162 }
2163
2164 static inline unsigned
2165 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2166 {
2167 if (stencil)
2168 return image->surface.stencil_tiling_index[level];
2169 else
2170 return image->surface.tiling_index[level];
2171 }
2172
2173 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2174 {
2175 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2176 }
2177
2178 static void
2179 radv_initialise_color_surface(struct radv_device *device,
2180 struct radv_color_buffer_info *cb,
2181 struct radv_image_view *iview)
2182 {
2183 const struct vk_format_description *desc;
2184 unsigned ntype, format, swap, endian;
2185 unsigned blend_clamp = 0, blend_bypass = 0;
2186 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2187 uint64_t va;
2188 const struct radeon_surf *surf = &iview->image->surface;
2189 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2190
2191 desc = vk_format_description(iview->vk_format);
2192
2193 memset(cb, 0, sizeof(*cb));
2194
2195 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2196 va += level_info->offset;
2197 cb->cb_color_base = va >> 8;
2198
2199 /* CMASK variables */
2200 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2201 va += iview->image->cmask.offset;
2202 cb->cb_color_cmask = va >> 8;
2203 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2204
2205 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2206 va += iview->image->dcc_offset;
2207 cb->cb_dcc_base = va >> 8;
2208
2209 uint32_t max_slice = radv_surface_layer_count(iview);
2210 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2211 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2212
2213 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2214 pitch_tile_max = level_info->nblk_x / 8 - 1;
2215 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2216 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2217
2218 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2219 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2220
2221 /* Intensity is implemented as Red, so treat it that way. */
2222 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2223 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2224
2225 if (iview->image->samples > 1) {
2226 unsigned log_samples = util_logbase2(iview->image->samples);
2227
2228 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2229 S_028C74_NUM_FRAGMENTS(log_samples);
2230 }
2231
2232 if (iview->image->fmask.size) {
2233 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2234 if (device->physical_device->rad_info.chip_class >= CIK)
2235 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2236 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2237 cb->cb_color_fmask = va >> 8;
2238 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2239 } else {
2240 /* This must be set for fast clear to work without FMASK. */
2241 if (device->physical_device->rad_info.chip_class >= CIK)
2242 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2243 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2244 cb->cb_color_fmask = cb->cb_color_base;
2245 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2246 }
2247
2248 ntype = radv_translate_color_numformat(iview->vk_format,
2249 desc,
2250 vk_format_get_first_non_void_channel(iview->vk_format));
2251 format = radv_translate_colorformat(iview->vk_format);
2252 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2253 radv_finishme("Illegal color\n");
2254 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2255 endian = radv_colorformat_endian_swap(format);
2256
2257 /* blend clamp should be set for all NORM/SRGB types */
2258 if (ntype == V_028C70_NUMBER_UNORM ||
2259 ntype == V_028C70_NUMBER_SNORM ||
2260 ntype == V_028C70_NUMBER_SRGB)
2261 blend_clamp = 1;
2262
2263 /* set blend bypass according to docs if SINT/UINT or
2264 8/24 COLOR variants */
2265 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2266 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2267 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2268 blend_clamp = 0;
2269 blend_bypass = 1;
2270 }
2271 #if 0
2272 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2273 (format == V_028C70_COLOR_8 ||
2274 format == V_028C70_COLOR_8_8 ||
2275 format == V_028C70_COLOR_8_8_8_8))
2276 ->color_is_int8 = true;
2277 #endif
2278 cb->cb_color_info = S_028C70_FORMAT(format) |
2279 S_028C70_COMP_SWAP(swap) |
2280 S_028C70_BLEND_CLAMP(blend_clamp) |
2281 S_028C70_BLEND_BYPASS(blend_bypass) |
2282 S_028C70_SIMPLE_FLOAT(1) |
2283 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2284 ntype != V_028C70_NUMBER_SNORM &&
2285 ntype != V_028C70_NUMBER_SRGB &&
2286 format != V_028C70_COLOR_8_24 &&
2287 format != V_028C70_COLOR_24_8) |
2288 S_028C70_NUMBER_TYPE(ntype) |
2289 S_028C70_ENDIAN(endian);
2290 if (iview->image->samples > 1)
2291 if (iview->image->fmask.size)
2292 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2293
2294 if (iview->image->cmask.size &&
2295 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2296 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2297
2298 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2299 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2300
2301 if (device->physical_device->rad_info.chip_class >= VI) {
2302 unsigned max_uncompressed_block_size = 2;
2303 if (iview->image->samples > 1) {
2304 if (iview->image->surface.bpe == 1)
2305 max_uncompressed_block_size = 0;
2306 else if (iview->image->surface.bpe == 2)
2307 max_uncompressed_block_size = 1;
2308 }
2309
2310 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2311 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2312 }
2313
2314 /* This must be set for fast clear to work without FMASK. */
2315 if (!iview->image->fmask.size &&
2316 device->physical_device->rad_info.chip_class == SI) {
2317 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2318 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2319 }
2320 }
2321
2322 static void
2323 radv_initialise_ds_surface(struct radv_device *device,
2324 struct radv_ds_buffer_info *ds,
2325 struct radv_image_view *iview)
2326 {
2327 unsigned level = iview->base_mip;
2328 unsigned format;
2329 uint64_t va, s_offs, z_offs;
2330 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2331 memset(ds, 0, sizeof(*ds));
2332 switch (iview->vk_format) {
2333 case VK_FORMAT_D24_UNORM_S8_UINT:
2334 case VK_FORMAT_X8_D24_UNORM_PACK32:
2335 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2336 ds->offset_scale = 2.0f;
2337 break;
2338 case VK_FORMAT_D16_UNORM:
2339 case VK_FORMAT_D16_UNORM_S8_UINT:
2340 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2341 ds->offset_scale = 4.0f;
2342 break;
2343 case VK_FORMAT_D32_SFLOAT:
2344 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2345 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2346 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2347 ds->offset_scale = 1.0f;
2348 break;
2349 default:
2350 break;
2351 }
2352
2353 format = radv_translate_dbformat(iview->vk_format);
2354 if (format == V_028040_Z_INVALID) {
2355 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2356 }
2357
2358 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2359 s_offs = z_offs = va;
2360 z_offs += iview->image->surface.level[level].offset;
2361 s_offs += iview->image->surface.stencil_level[level].offset;
2362
2363 uint32_t max_slice = radv_surface_layer_count(iview);
2364 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2365 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2366 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2367 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2368
2369 if (iview->image->samples > 1)
2370 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2371
2372 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2373 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2374 else
2375 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2376
2377 if (device->physical_device->rad_info.chip_class >= CIK) {
2378 struct radeon_info *info = &device->physical_device->rad_info;
2379 unsigned tiling_index = iview->image->surface.tiling_index[level];
2380 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2381 unsigned macro_index = iview->image->surface.macro_tile_index;
2382 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2383 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2384 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2385
2386 ds->db_depth_info |=
2387 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2388 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2389 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2390 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2391 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2392 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2393 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2394 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2395 } else {
2396 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2397 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2398 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2399 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2400 }
2401
2402 if (iview->image->htile.size && !level) {
2403 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2404 S_028040_ALLOW_EXPCLEAR(1);
2405
2406 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2407 /* Workaround: For a not yet understood reason, the
2408 * combination of MSAA, fast stencil clear and stencil
2409 * decompress messes with subsequent stencil buffer
2410 * uses. Problem was reproduced on Verde, Bonaire,
2411 * Tonga, and Carrizo.
2412 *
2413 * Disabling EXPCLEAR works around the problem.
2414 *
2415 * Check piglit's arb_texture_multisample-stencil-clear
2416 * test if you want to try changing this.
2417 */
2418 if (iview->image->samples <= 1)
2419 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2420 } else
2421 /* Use all of the htile_buffer for depth if there's no stencil. */
2422 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2423
2424 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2425 iview->image->htile.offset;
2426 ds->db_htile_data_base = va >> 8;
2427 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2428 } else {
2429 ds->db_htile_data_base = 0;
2430 ds->db_htile_surface = 0;
2431 }
2432
2433 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2434 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2435
2436 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2437 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2438 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2439 }
2440
2441 VkResult radv_CreateFramebuffer(
2442 VkDevice _device,
2443 const VkFramebufferCreateInfo* pCreateInfo,
2444 const VkAllocationCallbacks* pAllocator,
2445 VkFramebuffer* pFramebuffer)
2446 {
2447 RADV_FROM_HANDLE(radv_device, device, _device);
2448 struct radv_framebuffer *framebuffer;
2449
2450 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2451
2452 size_t size = sizeof(*framebuffer) +
2453 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2454 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2455 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2456 if (framebuffer == NULL)
2457 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2458
2459 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2460 framebuffer->width = pCreateInfo->width;
2461 framebuffer->height = pCreateInfo->height;
2462 framebuffer->layers = pCreateInfo->layers;
2463 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2464 VkImageView _iview = pCreateInfo->pAttachments[i];
2465 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2466 framebuffer->attachments[i].attachment = iview;
2467 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2468 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2469 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2470 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2471 }
2472 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2473 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2474 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2475 }
2476
2477 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2478 return VK_SUCCESS;
2479 }
2480
2481 void radv_DestroyFramebuffer(
2482 VkDevice _device,
2483 VkFramebuffer _fb,
2484 const VkAllocationCallbacks* pAllocator)
2485 {
2486 RADV_FROM_HANDLE(radv_device, device, _device);
2487 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2488
2489 if (!fb)
2490 return;
2491 vk_free2(&device->alloc, pAllocator, fb);
2492 }
2493
2494 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2495 {
2496 switch (address_mode) {
2497 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2498 return V_008F30_SQ_TEX_WRAP;
2499 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2500 return V_008F30_SQ_TEX_MIRROR;
2501 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2502 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2503 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2504 return V_008F30_SQ_TEX_CLAMP_BORDER;
2505 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2506 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2507 default:
2508 unreachable("illegal tex wrap mode");
2509 break;
2510 }
2511 }
2512
2513 static unsigned
2514 radv_tex_compare(VkCompareOp op)
2515 {
2516 switch (op) {
2517 case VK_COMPARE_OP_NEVER:
2518 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2519 case VK_COMPARE_OP_LESS:
2520 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2521 case VK_COMPARE_OP_EQUAL:
2522 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2523 case VK_COMPARE_OP_LESS_OR_EQUAL:
2524 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2525 case VK_COMPARE_OP_GREATER:
2526 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2527 case VK_COMPARE_OP_NOT_EQUAL:
2528 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2529 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2530 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2531 case VK_COMPARE_OP_ALWAYS:
2532 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2533 default:
2534 unreachable("illegal compare mode");
2535 break;
2536 }
2537 }
2538
2539 static unsigned
2540 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2541 {
2542 switch (filter) {
2543 case VK_FILTER_NEAREST:
2544 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2545 V_008F38_SQ_TEX_XY_FILTER_POINT);
2546 case VK_FILTER_LINEAR:
2547 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2548 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2549 case VK_FILTER_CUBIC_IMG:
2550 default:
2551 fprintf(stderr, "illegal texture filter");
2552 return 0;
2553 }
2554 }
2555
2556 static unsigned
2557 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2558 {
2559 switch (mode) {
2560 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2561 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2562 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2563 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2564 default:
2565 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2566 }
2567 }
2568
2569 static unsigned
2570 radv_tex_bordercolor(VkBorderColor bcolor)
2571 {
2572 switch (bcolor) {
2573 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2574 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2575 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2576 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2577 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2578 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2579 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2580 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2581 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2582 default:
2583 break;
2584 }
2585 return 0;
2586 }
2587
2588 static unsigned
2589 radv_tex_aniso_filter(unsigned filter)
2590 {
2591 if (filter < 2)
2592 return 0;
2593 if (filter < 4)
2594 return 1;
2595 if (filter < 8)
2596 return 2;
2597 if (filter < 16)
2598 return 3;
2599 return 4;
2600 }
2601
2602 static void
2603 radv_init_sampler(struct radv_device *device,
2604 struct radv_sampler *sampler,
2605 const VkSamplerCreateInfo *pCreateInfo)
2606 {
2607 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2608 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2609 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2610 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2611
2612 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2613 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2614 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2615 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2616 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2617 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2618 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2619 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2620 S_008F30_DISABLE_CUBE_WRAP(0) |
2621 S_008F30_COMPAT_MODE(is_vi));
2622 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2623 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2624 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2625 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2626 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2627 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2628 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2629 S_008F38_MIP_POINT_PRECLAMP(1) |
2630 S_008F38_DISABLE_LSB_CEIL(1) |
2631 S_008F38_FILTER_PREC_FIX(1) |
2632 S_008F38_ANISO_OVERRIDE(is_vi));
2633 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2634 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2635 }
2636
2637 VkResult radv_CreateSampler(
2638 VkDevice _device,
2639 const VkSamplerCreateInfo* pCreateInfo,
2640 const VkAllocationCallbacks* pAllocator,
2641 VkSampler* pSampler)
2642 {
2643 RADV_FROM_HANDLE(radv_device, device, _device);
2644 struct radv_sampler *sampler;
2645
2646 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2647
2648 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2649 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2650 if (!sampler)
2651 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2652
2653 radv_init_sampler(device, sampler, pCreateInfo);
2654 *pSampler = radv_sampler_to_handle(sampler);
2655
2656 return VK_SUCCESS;
2657 }
2658
2659 void radv_DestroySampler(
2660 VkDevice _device,
2661 VkSampler _sampler,
2662 const VkAllocationCallbacks* pAllocator)
2663 {
2664 RADV_FROM_HANDLE(radv_device, device, _device);
2665 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2666
2667 if (!sampler)
2668 return;
2669 vk_free2(&device->alloc, pAllocator, sampler);
2670 }
2671
2672
2673 /* vk_icd.h does not declare this function, so we declare it here to
2674 * suppress Wmissing-prototypes.
2675 */
2676 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2677 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2678
2679 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2680 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2681 {
2682 /* For the full details on loader interface versioning, see
2683 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2684 * What follows is a condensed summary, to help you navigate the large and
2685 * confusing official doc.
2686 *
2687 * - Loader interface v0 is incompatible with later versions. We don't
2688 * support it.
2689 *
2690 * - In loader interface v1:
2691 * - The first ICD entrypoint called by the loader is
2692 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2693 * entrypoint.
2694 * - The ICD must statically expose no other Vulkan symbol unless it is
2695 * linked with -Bsymbolic.
2696 * - Each dispatchable Vulkan handle created by the ICD must be
2697 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2698 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2699 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2700 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2701 * such loader-managed surfaces.
2702 *
2703 * - Loader interface v2 differs from v1 in:
2704 * - The first ICD entrypoint called by the loader is
2705 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2706 * statically expose this entrypoint.
2707 *
2708 * - Loader interface v3 differs from v2 in:
2709 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2710 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2711 * because the loader no longer does so.
2712 */
2713 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2714 return VK_SUCCESS;
2715 }