radv: Set driver version to mesa version;
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 };
88
89 static const VkExtensionProperties common_device_extensions[] = {
90 {
91 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
92 .specVersion = 1,
93 },
94 {
95 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
100 .specVersion = 68,
101 },
102 {
103 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (!instance)
369 return;
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 VkResult radv_EnumeratePhysicalDevices(
383 VkInstance _instance,
384 uint32_t* pPhysicalDeviceCount,
385 VkPhysicalDevice* pPhysicalDevices)
386 {
387 RADV_FROM_HANDLE(radv_instance, instance, _instance);
388 VkResult result;
389
390 if (instance->physicalDeviceCount < 0) {
391 char path[20];
392 instance->physicalDeviceCount = 0;
393 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
394 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
395 result = radv_physical_device_init(instance->physicalDevices +
396 instance->physicalDeviceCount,
397 instance, path);
398 if (result == VK_SUCCESS)
399 ++instance->physicalDeviceCount;
400 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
401 return result;
402 }
403 }
404
405 if (!pPhysicalDevices) {
406 *pPhysicalDeviceCount = instance->physicalDeviceCount;
407 } else {
408 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
409 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
410 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
411 }
412
413 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
414 : VK_SUCCESS;
415 }
416
417 void radv_GetPhysicalDeviceFeatures(
418 VkPhysicalDevice physicalDevice,
419 VkPhysicalDeviceFeatures* pFeatures)
420 {
421 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
422
423 memset(pFeatures, 0, sizeof(*pFeatures));
424
425 *pFeatures = (VkPhysicalDeviceFeatures) {
426 .robustBufferAccess = true,
427 .fullDrawIndexUint32 = true,
428 .imageCubeArray = true,
429 .independentBlend = true,
430 .geometryShader = true,
431 .tessellationShader = false,
432 .sampleRateShading = false,
433 .dualSrcBlend = true,
434 .logicOp = true,
435 .multiDrawIndirect = true,
436 .drawIndirectFirstInstance = true,
437 .depthClamp = true,
438 .depthBiasClamp = true,
439 .fillModeNonSolid = true,
440 .depthBounds = true,
441 .wideLines = true,
442 .largePoints = true,
443 .alphaToOne = true,
444 .multiViewport = true,
445 .samplerAnisotropy = true,
446 .textureCompressionETC2 = false,
447 .textureCompressionASTC_LDR = false,
448 .textureCompressionBC = true,
449 .occlusionQueryPrecise = true,
450 .pipelineStatisticsQuery = false,
451 .vertexPipelineStoresAndAtomics = true,
452 .fragmentStoresAndAtomics = true,
453 .shaderTessellationAndGeometryPointSize = true,
454 .shaderImageGatherExtended = true,
455 .shaderStorageImageExtendedFormats = true,
456 .shaderStorageImageMultisample = false,
457 .shaderUniformBufferArrayDynamicIndexing = true,
458 .shaderSampledImageArrayDynamicIndexing = true,
459 .shaderStorageBufferArrayDynamicIndexing = true,
460 .shaderStorageImageArrayDynamicIndexing = true,
461 .shaderStorageImageReadWithoutFormat = true,
462 .shaderStorageImageWriteWithoutFormat = true,
463 .shaderClipDistance = true,
464 .shaderCullDistance = true,
465 .shaderFloat64 = true,
466 .shaderInt64 = false,
467 .shaderInt16 = false,
468 .variableMultisampleRate = false,
469 .inheritedQueries = false,
470 };
471 }
472
473 void radv_GetPhysicalDeviceFeatures2KHR(
474 VkPhysicalDevice physicalDevice,
475 VkPhysicalDeviceFeatures2KHR *pFeatures)
476 {
477 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
478 }
479
480 static uint32_t radv_get_driver_version()
481 {
482 const char *minor_string = strchr(VERSION, '.');
483 const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
484 int major = atoi(VERSION);
485 int minor = minor_string ? atoi(minor_string + 1) : 0;
486 int patch = patch_string ? atoi(patch_string + 1) : 0;
487 if (strstr(VERSION, "devel")) {
488 if (patch == 0) {
489 patch = 99;
490 if (minor == 0) {
491 minor = 99;
492 --major;
493 } else
494 --minor;
495 } else
496 --patch;
497 }
498 uint32_t version = VK_MAKE_VERSION(major, minor, patch);
499 return version;
500 }
501
502 void radv_GetPhysicalDeviceProperties(
503 VkPhysicalDevice physicalDevice,
504 VkPhysicalDeviceProperties* pProperties)
505 {
506 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
507 VkSampleCountFlags sample_counts = 0xf;
508 VkPhysicalDeviceLimits limits = {
509 .maxImageDimension1D = (1 << 14),
510 .maxImageDimension2D = (1 << 14),
511 .maxImageDimension3D = (1 << 11),
512 .maxImageDimensionCube = (1 << 14),
513 .maxImageArrayLayers = (1 << 11),
514 .maxTexelBufferElements = 128 * 1024 * 1024,
515 .maxUniformBufferRange = UINT32_MAX,
516 .maxStorageBufferRange = UINT32_MAX,
517 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
518 .maxMemoryAllocationCount = UINT32_MAX,
519 .maxSamplerAllocationCount = 64 * 1024,
520 .bufferImageGranularity = 64, /* A cache line */
521 .sparseAddressSpaceSize = 0,
522 .maxBoundDescriptorSets = MAX_SETS,
523 .maxPerStageDescriptorSamplers = 64,
524 .maxPerStageDescriptorUniformBuffers = 64,
525 .maxPerStageDescriptorStorageBuffers = 64,
526 .maxPerStageDescriptorSampledImages = 64,
527 .maxPerStageDescriptorStorageImages = 64,
528 .maxPerStageDescriptorInputAttachments = 64,
529 .maxPerStageResources = 128,
530 .maxDescriptorSetSamplers = 256,
531 .maxDescriptorSetUniformBuffers = 256,
532 .maxDescriptorSetUniformBuffersDynamic = 256,
533 .maxDescriptorSetStorageBuffers = 256,
534 .maxDescriptorSetStorageBuffersDynamic = 256,
535 .maxDescriptorSetSampledImages = 256,
536 .maxDescriptorSetStorageImages = 256,
537 .maxDescriptorSetInputAttachments = 256,
538 .maxVertexInputAttributes = 32,
539 .maxVertexInputBindings = 32,
540 .maxVertexInputAttributeOffset = 2047,
541 .maxVertexInputBindingStride = 2048,
542 .maxVertexOutputComponents = 128,
543 .maxTessellationGenerationLevel = 0,
544 .maxTessellationPatchSize = 0,
545 .maxTessellationControlPerVertexInputComponents = 0,
546 .maxTessellationControlPerVertexOutputComponents = 0,
547 .maxTessellationControlPerPatchOutputComponents = 0,
548 .maxTessellationControlTotalOutputComponents = 0,
549 .maxTessellationEvaluationInputComponents = 0,
550 .maxTessellationEvaluationOutputComponents = 0,
551 .maxGeometryShaderInvocations = 32,
552 .maxGeometryInputComponents = 64,
553 .maxGeometryOutputComponents = 128,
554 .maxGeometryOutputVertices = 256,
555 .maxGeometryTotalOutputComponents = 1024,
556 .maxFragmentInputComponents = 128,
557 .maxFragmentOutputAttachments = 8,
558 .maxFragmentDualSrcAttachments = 1,
559 .maxFragmentCombinedOutputResources = 8,
560 .maxComputeSharedMemorySize = 32768,
561 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
562 .maxComputeWorkGroupInvocations = 2048,
563 .maxComputeWorkGroupSize = {
564 2048,
565 2048,
566 2048
567 },
568 .subPixelPrecisionBits = 4 /* FIXME */,
569 .subTexelPrecisionBits = 4 /* FIXME */,
570 .mipmapPrecisionBits = 4 /* FIXME */,
571 .maxDrawIndexedIndexValue = UINT32_MAX,
572 .maxDrawIndirectCount = UINT32_MAX,
573 .maxSamplerLodBias = 16,
574 .maxSamplerAnisotropy = 16,
575 .maxViewports = MAX_VIEWPORTS,
576 .maxViewportDimensions = { (1 << 14), (1 << 14) },
577 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
578 .viewportSubPixelBits = 13, /* We take a float? */
579 .minMemoryMapAlignment = 4096, /* A page */
580 .minTexelBufferOffsetAlignment = 1,
581 .minUniformBufferOffsetAlignment = 4,
582 .minStorageBufferOffsetAlignment = 4,
583 .minTexelOffset = -32,
584 .maxTexelOffset = 31,
585 .minTexelGatherOffset = -32,
586 .maxTexelGatherOffset = 31,
587 .minInterpolationOffset = -2,
588 .maxInterpolationOffset = 2,
589 .subPixelInterpolationOffsetBits = 8,
590 .maxFramebufferWidth = (1 << 14),
591 .maxFramebufferHeight = (1 << 14),
592 .maxFramebufferLayers = (1 << 10),
593 .framebufferColorSampleCounts = sample_counts,
594 .framebufferDepthSampleCounts = sample_counts,
595 .framebufferStencilSampleCounts = sample_counts,
596 .framebufferNoAttachmentsSampleCounts = sample_counts,
597 .maxColorAttachments = MAX_RTS,
598 .sampledImageColorSampleCounts = sample_counts,
599 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
600 .sampledImageDepthSampleCounts = sample_counts,
601 .sampledImageStencilSampleCounts = sample_counts,
602 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
603 .maxSampleMaskWords = 1,
604 .timestampComputeAndGraphics = false,
605 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
606 .maxClipDistances = 8,
607 .maxCullDistances = 8,
608 .maxCombinedClipAndCullDistances = 8,
609 .discreteQueuePriorities = 1,
610 .pointSizeRange = { 0.125, 255.875 },
611 .lineWidthRange = { 0.0, 7.9921875 },
612 .pointSizeGranularity = (1.0 / 8.0),
613 .lineWidthGranularity = (1.0 / 128.0),
614 .strictLines = false, /* FINISHME */
615 .standardSampleLocations = true,
616 .optimalBufferCopyOffsetAlignment = 128,
617 .optimalBufferCopyRowPitchAlignment = 128,
618 .nonCoherentAtomSize = 64,
619 };
620
621 *pProperties = (VkPhysicalDeviceProperties) {
622 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
623 .driverVersion = radv_get_driver_version(),
624 .vendorID = 0x1002,
625 .deviceID = pdevice->rad_info.pci_id,
626 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
627 .limits = limits,
628 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
629 };
630
631 strcpy(pProperties->deviceName, pdevice->name);
632 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
633 }
634
635 void radv_GetPhysicalDeviceProperties2KHR(
636 VkPhysicalDevice physicalDevice,
637 VkPhysicalDeviceProperties2KHR *pProperties)
638 {
639 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
640 }
641
642 static void radv_get_physical_device_queue_family_properties(
643 struct radv_physical_device* pdevice,
644 uint32_t* pCount,
645 VkQueueFamilyProperties** pQueueFamilyProperties)
646 {
647 int num_queue_families = 1;
648 int idx;
649 if (pdevice->rad_info.compute_rings > 0 &&
650 pdevice->rad_info.chip_class >= CIK &&
651 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
652 num_queue_families++;
653
654 if (pQueueFamilyProperties == NULL) {
655 *pCount = num_queue_families;
656 return;
657 }
658
659 if (!*pCount)
660 return;
661
662 idx = 0;
663 if (*pCount >= 1) {
664 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
665 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
666 VK_QUEUE_COMPUTE_BIT |
667 VK_QUEUE_TRANSFER_BIT,
668 .queueCount = 1,
669 .timestampValidBits = 64,
670 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
671 };
672 idx++;
673 }
674
675 if (pdevice->rad_info.compute_rings > 0 &&
676 pdevice->rad_info.chip_class >= CIK &&
677 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
678 if (*pCount > idx) {
679 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
680 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
681 .queueCount = pdevice->rad_info.compute_rings,
682 .timestampValidBits = 64,
683 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
684 };
685 idx++;
686 }
687 }
688 *pCount = idx;
689 }
690
691 void radv_GetPhysicalDeviceQueueFamilyProperties(
692 VkPhysicalDevice physicalDevice,
693 uint32_t* pCount,
694 VkQueueFamilyProperties* pQueueFamilyProperties)
695 {
696 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
697 if (!pQueueFamilyProperties) {
698 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
699 return;
700 }
701 VkQueueFamilyProperties *properties[] = {
702 pQueueFamilyProperties + 0,
703 pQueueFamilyProperties + 1,
704 pQueueFamilyProperties + 2,
705 };
706 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
707 assert(*pCount <= 3);
708 }
709
710 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
711 VkPhysicalDevice physicalDevice,
712 uint32_t* pCount,
713 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
714 {
715 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
716 if (!pQueueFamilyProperties) {
717 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
718 return;
719 }
720 VkQueueFamilyProperties *properties[] = {
721 &pQueueFamilyProperties[0].queueFamilyProperties,
722 &pQueueFamilyProperties[1].queueFamilyProperties,
723 &pQueueFamilyProperties[2].queueFamilyProperties,
724 };
725 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
726 assert(*pCount <= 3);
727 }
728
729 void radv_GetPhysicalDeviceMemoryProperties(
730 VkPhysicalDevice physicalDevice,
731 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
732 {
733 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
734
735 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
736
737 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
738 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
739 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
740 .heapIndex = RADV_MEM_HEAP_VRAM,
741 };
742 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
743 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
744 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
745 .heapIndex = RADV_MEM_HEAP_GTT,
746 };
747 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
748 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
749 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
750 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
751 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
752 };
753 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
754 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
755 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
756 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
757 .heapIndex = RADV_MEM_HEAP_GTT,
758 };
759
760 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
761
762 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
763 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
764 .size = physical_device->rad_info.vram_size -
765 physical_device->rad_info.visible_vram_size,
766 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
767 };
768 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
769 .size = physical_device->rad_info.visible_vram_size,
770 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
771 };
772 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
773 .size = physical_device->rad_info.gart_size,
774 .flags = 0,
775 };
776 }
777
778 void radv_GetPhysicalDeviceMemoryProperties2KHR(
779 VkPhysicalDevice physicalDevice,
780 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
781 {
782 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
783 &pMemoryProperties->memoryProperties);
784 }
785
786 static int
787 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
788 int queue_family_index, int idx)
789 {
790 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
791 queue->device = device;
792 queue->queue_family_index = queue_family_index;
793 queue->queue_idx = idx;
794
795 queue->hw_ctx = device->ws->ctx_create(device->ws);
796 if (!queue->hw_ctx)
797 return VK_ERROR_OUT_OF_HOST_MEMORY;
798
799 return VK_SUCCESS;
800 }
801
802 static void
803 radv_queue_finish(struct radv_queue *queue)
804 {
805 if (queue->hw_ctx)
806 queue->device->ws->ctx_destroy(queue->hw_ctx);
807
808 if (queue->initial_preamble_cs)
809 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
810 if (queue->continue_preamble_cs)
811 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
812 if (queue->descriptor_bo)
813 queue->device->ws->buffer_destroy(queue->descriptor_bo);
814 if (queue->scratch_bo)
815 queue->device->ws->buffer_destroy(queue->scratch_bo);
816 if (queue->esgs_ring_bo)
817 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
818 if (queue->gsvs_ring_bo)
819 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
820 if (queue->compute_scratch_bo)
821 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
822 }
823
824 static void
825 radv_device_init_gs_info(struct radv_device *device)
826 {
827 switch (device->physical_device->rad_info.family) {
828 case CHIP_OLAND:
829 case CHIP_HAINAN:
830 case CHIP_KAVERI:
831 case CHIP_KABINI:
832 case CHIP_MULLINS:
833 case CHIP_ICELAND:
834 case CHIP_CARRIZO:
835 case CHIP_STONEY:
836 device->gs_table_depth = 16;
837 return;
838 case CHIP_TAHITI:
839 case CHIP_PITCAIRN:
840 case CHIP_VERDE:
841 case CHIP_BONAIRE:
842 case CHIP_HAWAII:
843 case CHIP_TONGA:
844 case CHIP_FIJI:
845 case CHIP_POLARIS10:
846 case CHIP_POLARIS11:
847 device->gs_table_depth = 32;
848 return;
849 default:
850 unreachable("unknown GPU");
851 }
852 }
853
854 VkResult radv_CreateDevice(
855 VkPhysicalDevice physicalDevice,
856 const VkDeviceCreateInfo* pCreateInfo,
857 const VkAllocationCallbacks* pAllocator,
858 VkDevice* pDevice)
859 {
860 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
861 VkResult result;
862 struct radv_device *device;
863
864 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
865 if (!is_extension_enabled(physical_device->extensions.ext_array,
866 physical_device->extensions.num_ext,
867 pCreateInfo->ppEnabledExtensionNames[i]))
868 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
869 }
870
871 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
872 sizeof(*device), 8,
873 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
874 if (!device)
875 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
876
877 memset(device, 0, sizeof(*device));
878
879 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
880 device->instance = physical_device->instance;
881 device->physical_device = physical_device;
882
883 device->debug_flags = device->instance->debug_flags;
884
885 device->ws = physical_device->ws;
886 if (pAllocator)
887 device->alloc = *pAllocator;
888 else
889 device->alloc = physical_device->instance->alloc;
890
891 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
892 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
893 uint32_t qfi = queue_create->queueFamilyIndex;
894
895 device->queues[qfi] = vk_alloc(&device->alloc,
896 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
897 if (!device->queues[qfi]) {
898 result = VK_ERROR_OUT_OF_HOST_MEMORY;
899 goto fail;
900 }
901
902 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
903
904 device->queue_count[qfi] = queue_create->queueCount;
905
906 for (unsigned q = 0; q < queue_create->queueCount; q++) {
907 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
908 if (result != VK_SUCCESS)
909 goto fail;
910 }
911 }
912
913 #if HAVE_LLVM < 0x0400
914 device->llvm_supports_spill = false;
915 #else
916 device->llvm_supports_spill = true;
917 #endif
918
919 /* The maximum number of scratch waves. Scratch space isn't divided
920 * evenly between CUs. The number is only a function of the number of CUs.
921 * We can decrease the constant to decrease the scratch buffer size.
922 *
923 * sctx->scratch_waves must be >= the maximum posible size of
924 * 1 threadgroup, so that the hw doesn't hang from being unable
925 * to start any.
926 *
927 * The recommended value is 4 per CU at most. Higher numbers don't
928 * bring much benefit, but they still occupy chip resources (think
929 * async compute). I've seen ~2% performance difference between 4 and 32.
930 */
931 uint32_t max_threads_per_block = 2048;
932 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
933 max_threads_per_block / 64);
934
935 radv_device_init_gs_info(device);
936
937 result = radv_device_init_meta(device);
938 if (result != VK_SUCCESS)
939 goto fail;
940
941 radv_device_init_msaa(device);
942
943 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
944 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
945 switch (family) {
946 case RADV_QUEUE_GENERAL:
947 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
948 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
949 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
950 break;
951 case RADV_QUEUE_COMPUTE:
952 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
953 radeon_emit(device->empty_cs[family], 0);
954 break;
955 }
956 device->ws->cs_finalize(device->empty_cs[family]);
957
958 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
959 switch (family) {
960 case RADV_QUEUE_GENERAL:
961 case RADV_QUEUE_COMPUTE:
962 si_cs_emit_cache_flush(device->flush_cs[family],
963 device->physical_device->rad_info.chip_class,
964 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
965 RADV_CMD_FLAG_INV_ICACHE |
966 RADV_CMD_FLAG_INV_SMEM_L1 |
967 RADV_CMD_FLAG_INV_VMEM_L1 |
968 RADV_CMD_FLAG_INV_GLOBAL_L2);
969 break;
970 }
971 device->ws->cs_finalize(device->flush_cs[family]);
972 }
973
974 if (getenv("RADV_TRACE_FILE")) {
975 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
976 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
977 if (!device->trace_bo)
978 goto fail;
979
980 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
981 if (!device->trace_id_ptr)
982 goto fail;
983 }
984
985 if (device->physical_device->rad_info.chip_class >= CIK)
986 cik_create_gfx_config(device);
987
988 *pDevice = radv_device_to_handle(device);
989 return VK_SUCCESS;
990
991 fail:
992 if (device->trace_bo)
993 device->ws->buffer_destroy(device->trace_bo);
994
995 if (device->gfx_init)
996 device->ws->buffer_destroy(device->gfx_init);
997
998 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
999 for (unsigned q = 0; q < device->queue_count[i]; q++)
1000 radv_queue_finish(&device->queues[i][q]);
1001 if (device->queue_count[i])
1002 vk_free(&device->alloc, device->queues[i]);
1003 }
1004
1005 vk_free(&device->alloc, device);
1006 return result;
1007 }
1008
1009 void radv_DestroyDevice(
1010 VkDevice _device,
1011 const VkAllocationCallbacks* pAllocator)
1012 {
1013 RADV_FROM_HANDLE(radv_device, device, _device);
1014
1015 if (!device)
1016 return;
1017
1018 if (device->trace_bo)
1019 device->ws->buffer_destroy(device->trace_bo);
1020
1021 if (device->gfx_init)
1022 device->ws->buffer_destroy(device->gfx_init);
1023
1024 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1025 for (unsigned q = 0; q < device->queue_count[i]; q++)
1026 radv_queue_finish(&device->queues[i][q]);
1027 if (device->queue_count[i])
1028 vk_free(&device->alloc, device->queues[i]);
1029 if (device->empty_cs[i])
1030 device->ws->cs_destroy(device->empty_cs[i]);
1031 if (device->flush_cs[i])
1032 device->ws->cs_destroy(device->flush_cs[i]);
1033 }
1034 radv_device_finish_meta(device);
1035
1036 vk_free(&device->alloc, device);
1037 }
1038
1039 VkResult radv_EnumerateInstanceExtensionProperties(
1040 const char* pLayerName,
1041 uint32_t* pPropertyCount,
1042 VkExtensionProperties* pProperties)
1043 {
1044 if (pProperties == NULL) {
1045 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1046 return VK_SUCCESS;
1047 }
1048
1049 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1050 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1051
1052 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1053 return VK_INCOMPLETE;
1054
1055 return VK_SUCCESS;
1056 }
1057
1058 VkResult radv_EnumerateDeviceExtensionProperties(
1059 VkPhysicalDevice physicalDevice,
1060 const char* pLayerName,
1061 uint32_t* pPropertyCount,
1062 VkExtensionProperties* pProperties)
1063 {
1064 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1065
1066 if (pProperties == NULL) {
1067 *pPropertyCount = pdevice->extensions.num_ext;
1068 return VK_SUCCESS;
1069 }
1070
1071 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1072 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1073
1074 if (*pPropertyCount < pdevice->extensions.num_ext)
1075 return VK_INCOMPLETE;
1076
1077 return VK_SUCCESS;
1078 }
1079
1080 VkResult radv_EnumerateInstanceLayerProperties(
1081 uint32_t* pPropertyCount,
1082 VkLayerProperties* pProperties)
1083 {
1084 if (pProperties == NULL) {
1085 *pPropertyCount = 0;
1086 return VK_SUCCESS;
1087 }
1088
1089 /* None supported at this time */
1090 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1091 }
1092
1093 VkResult radv_EnumerateDeviceLayerProperties(
1094 VkPhysicalDevice physicalDevice,
1095 uint32_t* pPropertyCount,
1096 VkLayerProperties* pProperties)
1097 {
1098 if (pProperties == NULL) {
1099 *pPropertyCount = 0;
1100 return VK_SUCCESS;
1101 }
1102
1103 /* None supported at this time */
1104 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1105 }
1106
1107 void radv_GetDeviceQueue(
1108 VkDevice _device,
1109 uint32_t queueFamilyIndex,
1110 uint32_t queueIndex,
1111 VkQueue* pQueue)
1112 {
1113 RADV_FROM_HANDLE(radv_device, device, _device);
1114
1115 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1116 }
1117
1118 static void radv_dump_trace(struct radv_device *device,
1119 struct radeon_winsys_cs *cs)
1120 {
1121 const char *filename = getenv("RADV_TRACE_FILE");
1122 FILE *f = fopen(filename, "w");
1123 if (!f) {
1124 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1125 return;
1126 }
1127
1128 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1129 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1130 fclose(f);
1131 }
1132
1133 static void
1134 fill_geom_rings(struct radv_queue *queue,
1135 uint32_t *map,
1136 uint32_t esgs_ring_size,
1137 struct radeon_winsys_bo *esgs_ring_bo,
1138 uint32_t gsvs_ring_size,
1139 struct radeon_winsys_bo *gsvs_ring_bo)
1140 {
1141 uint64_t esgs_va = 0, gsvs_va = 0;
1142 uint32_t *desc = &map[4];
1143
1144 if (esgs_ring_bo)
1145 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1146 if (gsvs_ring_bo)
1147 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1148
1149 /* stride 0, num records - size, add tid, swizzle, elsize4,
1150 index stride 64 */
1151 desc[0] = esgs_va;
1152 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1153 S_008F04_STRIDE(0) |
1154 S_008F04_SWIZZLE_ENABLE(true);
1155 desc[2] = esgs_ring_size;
1156 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1157 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1158 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1159 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1160 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1161 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1162 S_008F0C_ELEMENT_SIZE(1) |
1163 S_008F0C_INDEX_STRIDE(3) |
1164 S_008F0C_ADD_TID_ENABLE(true);
1165
1166 desc += 4;
1167 /* GS entry for ES->GS ring */
1168 /* stride 0, num records - size, elsize0,
1169 index stride 0 */
1170 desc[0] = esgs_va;
1171 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1172 S_008F04_STRIDE(0) |
1173 S_008F04_SWIZZLE_ENABLE(false);
1174 desc[2] = esgs_ring_size;
1175 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1176 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1177 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1178 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1179 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1180 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1181 S_008F0C_ELEMENT_SIZE(0) |
1182 S_008F0C_INDEX_STRIDE(0) |
1183 S_008F0C_ADD_TID_ENABLE(false);
1184
1185 desc += 4;
1186 /* VS entry for GS->VS ring */
1187 /* stride 0, num records - size, elsize0,
1188 index stride 0 */
1189 desc[0] = gsvs_va;
1190 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1191 S_008F04_STRIDE(0) |
1192 S_008F04_SWIZZLE_ENABLE(false);
1193 desc[2] = gsvs_ring_size;
1194 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1195 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1196 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1197 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1198 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1199 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1200 S_008F0C_ELEMENT_SIZE(0) |
1201 S_008F0C_INDEX_STRIDE(0) |
1202 S_008F0C_ADD_TID_ENABLE(false);
1203 desc += 4;
1204
1205 /* stride gsvs_itemsize, num records 64
1206 elsize 4, index stride 16 */
1207 /* shader will patch stride and desc[2] */
1208 desc[0] = gsvs_va;
1209 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1210 S_008F04_STRIDE(0) |
1211 S_008F04_SWIZZLE_ENABLE(true);
1212 desc[2] = 0;
1213 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1214 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1215 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1216 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1217 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1218 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1219 S_008F0C_ELEMENT_SIZE(1) |
1220 S_008F0C_INDEX_STRIDE(1) |
1221 S_008F0C_ADD_TID_ENABLE(true);
1222 }
1223
1224 static VkResult
1225 radv_get_preamble_cs(struct radv_queue *queue,
1226 uint32_t scratch_size,
1227 uint32_t compute_scratch_size,
1228 uint32_t esgs_ring_size,
1229 uint32_t gsvs_ring_size,
1230 struct radeon_winsys_cs **initial_preamble_cs,
1231 struct radeon_winsys_cs **continue_preamble_cs)
1232 {
1233 struct radeon_winsys_bo *scratch_bo = NULL;
1234 struct radeon_winsys_bo *descriptor_bo = NULL;
1235 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1236 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1237 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1238 struct radeon_winsys_cs *dest_cs[2] = {0};
1239
1240 if (scratch_size <= queue->scratch_size &&
1241 compute_scratch_size <= queue->compute_scratch_size &&
1242 esgs_ring_size <= queue->esgs_ring_size &&
1243 gsvs_ring_size <= queue->gsvs_ring_size &&
1244 queue->initial_preamble_cs) {
1245 *initial_preamble_cs = queue->initial_preamble_cs;
1246 *continue_preamble_cs = queue->continue_preamble_cs;
1247 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1248 *continue_preamble_cs = NULL;
1249 return VK_SUCCESS;
1250 }
1251
1252 if (scratch_size > queue->scratch_size) {
1253 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1254 scratch_size,
1255 4096,
1256 RADEON_DOMAIN_VRAM,
1257 RADEON_FLAG_NO_CPU_ACCESS);
1258 if (!scratch_bo)
1259 goto fail;
1260 } else
1261 scratch_bo = queue->scratch_bo;
1262
1263 if (compute_scratch_size > queue->compute_scratch_size) {
1264 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1265 compute_scratch_size,
1266 4096,
1267 RADEON_DOMAIN_VRAM,
1268 RADEON_FLAG_NO_CPU_ACCESS);
1269 if (!compute_scratch_bo)
1270 goto fail;
1271
1272 } else
1273 compute_scratch_bo = queue->compute_scratch_bo;
1274
1275 if (esgs_ring_size > queue->esgs_ring_size) {
1276 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1277 esgs_ring_size,
1278 4096,
1279 RADEON_DOMAIN_VRAM,
1280 RADEON_FLAG_NO_CPU_ACCESS);
1281 if (!esgs_ring_bo)
1282 goto fail;
1283 } else {
1284 esgs_ring_bo = queue->esgs_ring_bo;
1285 esgs_ring_size = queue->esgs_ring_size;
1286 }
1287
1288 if (gsvs_ring_size > queue->gsvs_ring_size) {
1289 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1290 gsvs_ring_size,
1291 4096,
1292 RADEON_DOMAIN_VRAM,
1293 RADEON_FLAG_NO_CPU_ACCESS);
1294 if (!gsvs_ring_bo)
1295 goto fail;
1296 } else {
1297 gsvs_ring_bo = queue->gsvs_ring_bo;
1298 gsvs_ring_size = queue->gsvs_ring_size;
1299 }
1300
1301 if (scratch_bo != queue->scratch_bo ||
1302 esgs_ring_bo != queue->esgs_ring_bo ||
1303 gsvs_ring_bo != queue->gsvs_ring_bo) {
1304 uint32_t size = 0;
1305 if (gsvs_ring_bo || esgs_ring_bo)
1306 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1307 else if (scratch_bo)
1308 size = 8; /* 2 dword */
1309
1310 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1311 size,
1312 4096,
1313 RADEON_DOMAIN_VRAM,
1314 RADEON_FLAG_CPU_ACCESS);
1315 if (!descriptor_bo)
1316 goto fail;
1317 } else
1318 descriptor_bo = queue->descriptor_bo;
1319
1320 for(int i = 0; i < 2; ++i) {
1321 struct radeon_winsys_cs *cs = NULL;
1322 cs = queue->device->ws->cs_create(queue->device->ws,
1323 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1324 if (!cs)
1325 goto fail;
1326
1327 dest_cs[i] = cs;
1328
1329 if (scratch_bo)
1330 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1331
1332 if (esgs_ring_bo)
1333 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1334
1335 if (gsvs_ring_bo)
1336 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1337
1338 if (descriptor_bo)
1339 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1340
1341 if (descriptor_bo != queue->descriptor_bo) {
1342 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1343
1344 if (scratch_bo) {
1345 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1346 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1347 S_008F04_SWIZZLE_ENABLE(1);
1348 map[0] = scratch_va;
1349 map[1] = rsrc1;
1350 }
1351
1352 if (esgs_ring_bo || gsvs_ring_bo)
1353 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1354
1355 queue->device->ws->buffer_unmap(descriptor_bo);
1356 }
1357
1358 if (esgs_ring_bo || gsvs_ring_bo) {
1359 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1360 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1361 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1362 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1363
1364 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1365 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1366 radeon_emit(cs, esgs_ring_size >> 8);
1367 radeon_emit(cs, gsvs_ring_size >> 8);
1368 } else {
1369 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1370 radeon_emit(cs, esgs_ring_size >> 8);
1371 radeon_emit(cs, gsvs_ring_size >> 8);
1372 }
1373 }
1374
1375 if (descriptor_bo) {
1376 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1377 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1378 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1379 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1380 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1381 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1382
1383 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1384
1385 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1386 radeon_set_sh_reg_seq(cs, regs[i], 2);
1387 radeon_emit(cs, va);
1388 radeon_emit(cs, va >> 32);
1389 }
1390 }
1391
1392 if (compute_scratch_bo) {
1393 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1394 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1395 S_008F04_SWIZZLE_ENABLE(1);
1396
1397 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1398
1399 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1400 radeon_emit(cs, scratch_va);
1401 radeon_emit(cs, rsrc1);
1402 }
1403
1404 if (!i) {
1405 si_cs_emit_cache_flush(cs,
1406 queue->device->physical_device->rad_info.chip_class,
1407 queue->queue_family_index == RING_COMPUTE &&
1408 queue->device->physical_device->rad_info.chip_class >= CIK,
1409 RADV_CMD_FLAG_INV_ICACHE |
1410 RADV_CMD_FLAG_INV_SMEM_L1 |
1411 RADV_CMD_FLAG_INV_VMEM_L1 |
1412 RADV_CMD_FLAG_INV_GLOBAL_L2);
1413 }
1414
1415 if (!queue->device->ws->cs_finalize(cs))
1416 goto fail;
1417 }
1418
1419 if (queue->initial_preamble_cs)
1420 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1421
1422 if (queue->continue_preamble_cs)
1423 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1424
1425 queue->initial_preamble_cs = dest_cs[0];
1426 queue->continue_preamble_cs = dest_cs[1];
1427
1428 if (scratch_bo != queue->scratch_bo) {
1429 if (queue->scratch_bo)
1430 queue->device->ws->buffer_destroy(queue->scratch_bo);
1431 queue->scratch_bo = scratch_bo;
1432 queue->scratch_size = scratch_size;
1433 }
1434
1435 if (compute_scratch_bo != queue->compute_scratch_bo) {
1436 if (queue->compute_scratch_bo)
1437 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1438 queue->compute_scratch_bo = compute_scratch_bo;
1439 queue->compute_scratch_size = compute_scratch_size;
1440 }
1441
1442 if (esgs_ring_bo != queue->esgs_ring_bo) {
1443 if (queue->esgs_ring_bo)
1444 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1445 queue->esgs_ring_bo = esgs_ring_bo;
1446 queue->esgs_ring_size = esgs_ring_size;
1447 }
1448
1449 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1450 if (queue->gsvs_ring_bo)
1451 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1452 queue->gsvs_ring_bo = gsvs_ring_bo;
1453 queue->gsvs_ring_size = gsvs_ring_size;
1454 }
1455
1456 if (descriptor_bo != queue->descriptor_bo) {
1457 if (queue->descriptor_bo)
1458 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1459
1460 queue->descriptor_bo = descriptor_bo;
1461 }
1462
1463 *initial_preamble_cs = queue->initial_preamble_cs;
1464 *continue_preamble_cs = queue->continue_preamble_cs;
1465 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1466 *continue_preamble_cs = NULL;
1467 return VK_SUCCESS;
1468 fail:
1469 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1470 if (dest_cs[i])
1471 queue->device->ws->cs_destroy(dest_cs[i]);
1472 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1473 queue->device->ws->buffer_destroy(descriptor_bo);
1474 if (scratch_bo && scratch_bo != queue->scratch_bo)
1475 queue->device->ws->buffer_destroy(scratch_bo);
1476 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1477 queue->device->ws->buffer_destroy(compute_scratch_bo);
1478 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1479 queue->device->ws->buffer_destroy(esgs_ring_bo);
1480 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1481 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1482 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1483 }
1484
1485 VkResult radv_QueueSubmit(
1486 VkQueue _queue,
1487 uint32_t submitCount,
1488 const VkSubmitInfo* pSubmits,
1489 VkFence _fence)
1490 {
1491 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1492 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1493 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1494 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1495 int ret;
1496 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1497 uint32_t scratch_size = 0;
1498 uint32_t compute_scratch_size = 0;
1499 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1500 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1501 VkResult result;
1502 bool fence_emitted = false;
1503
1504 /* Do this first so failing to allocate scratch buffers can't result in
1505 * partially executed submissions. */
1506 for (uint32_t i = 0; i < submitCount; i++) {
1507 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1508 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1509 pSubmits[i].pCommandBuffers[j]);
1510
1511 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1512 compute_scratch_size = MAX2(compute_scratch_size,
1513 cmd_buffer->compute_scratch_size_needed);
1514 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1515 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1516 }
1517 }
1518
1519 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1520 esgs_ring_size, gsvs_ring_size,
1521 &initial_preamble_cs, &continue_preamble_cs);
1522 if (result != VK_SUCCESS)
1523 return result;
1524
1525 for (uint32_t i = 0; i < submitCount; i++) {
1526 struct radeon_winsys_cs **cs_array;
1527 bool has_flush = !submitCount;
1528 bool can_patch = !has_flush;
1529 uint32_t advance;
1530
1531 if (!pSubmits[i].commandBufferCount) {
1532 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1533 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1534 &queue->device->empty_cs[queue->queue_family_index],
1535 1, NULL, NULL,
1536 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1537 pSubmits[i].waitSemaphoreCount,
1538 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1539 pSubmits[i].signalSemaphoreCount,
1540 false, base_fence);
1541 if (ret) {
1542 radv_loge("failed to submit CS %d\n", i);
1543 abort();
1544 }
1545 fence_emitted = true;
1546 }
1547 continue;
1548 }
1549
1550 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1551 (pSubmits[i].commandBufferCount + has_flush));
1552
1553 if(has_flush)
1554 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1555
1556 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1557 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1558 pSubmits[i].pCommandBuffers[j]);
1559 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1560
1561 cs_array[j + has_flush] = cmd_buffer->cs;
1562 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1563 can_patch = false;
1564 }
1565
1566 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1567 advance = MIN2(max_cs_submission,
1568 pSubmits[i].commandBufferCount + has_flush - j);
1569 bool b = j == 0;
1570 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1571
1572 if (queue->device->trace_bo)
1573 *queue->device->trace_id_ptr = 0;
1574
1575 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1576 advance, initial_preamble_cs, continue_preamble_cs,
1577 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1578 b ? pSubmits[i].waitSemaphoreCount : 0,
1579 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1580 e ? pSubmits[i].signalSemaphoreCount : 0,
1581 can_patch, base_fence);
1582
1583 if (ret) {
1584 radv_loge("failed to submit CS %d\n", i);
1585 abort();
1586 }
1587 fence_emitted = true;
1588 if (queue->device->trace_bo) {
1589 bool success = queue->device->ws->ctx_wait_idle(
1590 queue->hw_ctx,
1591 radv_queue_family_to_ring(
1592 queue->queue_family_index),
1593 queue->queue_idx);
1594
1595 if (!success) { /* Hang */
1596 radv_dump_trace(queue->device, cs_array[j]);
1597 abort();
1598 }
1599 }
1600 }
1601 free(cs_array);
1602 }
1603
1604 if (fence) {
1605 if (!fence_emitted)
1606 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1607 &queue->device->empty_cs[queue->queue_family_index],
1608 1, NULL, NULL, NULL, 0, NULL, 0,
1609 false, base_fence);
1610
1611 fence->submitted = true;
1612 }
1613
1614 return VK_SUCCESS;
1615 }
1616
1617 VkResult radv_QueueWaitIdle(
1618 VkQueue _queue)
1619 {
1620 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1621
1622 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1623 radv_queue_family_to_ring(queue->queue_family_index),
1624 queue->queue_idx);
1625 return VK_SUCCESS;
1626 }
1627
1628 VkResult radv_DeviceWaitIdle(
1629 VkDevice _device)
1630 {
1631 RADV_FROM_HANDLE(radv_device, device, _device);
1632
1633 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1634 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1635 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1636 }
1637 }
1638 return VK_SUCCESS;
1639 }
1640
1641 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1642 VkInstance instance,
1643 const char* pName)
1644 {
1645 return radv_lookup_entrypoint(pName);
1646 }
1647
1648 /* The loader wants us to expose a second GetInstanceProcAddr function
1649 * to work around certain LD_PRELOAD issues seen in apps.
1650 */
1651 PUBLIC
1652 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1653 VkInstance instance,
1654 const char* pName);
1655
1656 PUBLIC
1657 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1658 VkInstance instance,
1659 const char* pName)
1660 {
1661 return radv_GetInstanceProcAddr(instance, pName);
1662 }
1663
1664 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1665 VkDevice device,
1666 const char* pName)
1667 {
1668 return radv_lookup_entrypoint(pName);
1669 }
1670
1671 bool radv_get_memory_fd(struct radv_device *device,
1672 struct radv_device_memory *memory,
1673 int *pFD)
1674 {
1675 struct radeon_bo_metadata metadata;
1676
1677 if (memory->image) {
1678 radv_init_metadata(device, memory->image, &metadata);
1679 device->ws->buffer_set_metadata(memory->bo, &metadata);
1680 }
1681
1682 return device->ws->buffer_get_fd(device->ws, memory->bo,
1683 pFD);
1684 }
1685
1686 VkResult radv_AllocateMemory(
1687 VkDevice _device,
1688 const VkMemoryAllocateInfo* pAllocateInfo,
1689 const VkAllocationCallbacks* pAllocator,
1690 VkDeviceMemory* pMem)
1691 {
1692 RADV_FROM_HANDLE(radv_device, device, _device);
1693 struct radv_device_memory *mem;
1694 VkResult result;
1695 enum radeon_bo_domain domain;
1696 uint32_t flags = 0;
1697 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1698 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1699
1700 if (pAllocateInfo->allocationSize == 0) {
1701 /* Apparently, this is allowed */
1702 *pMem = VK_NULL_HANDLE;
1703 return VK_SUCCESS;
1704 }
1705
1706 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1707 switch (ext->sType) {
1708 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1709 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1710 break;
1711 default:
1712 break;
1713 }
1714 }
1715
1716 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1717 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1718 if (mem == NULL)
1719 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1720
1721 if (dedicate_info) {
1722 mem->image = radv_image_from_handle(dedicate_info->image);
1723 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1724 } else {
1725 mem->image = NULL;
1726 mem->buffer = NULL;
1727 }
1728
1729 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1730 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1731 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1732 domain = RADEON_DOMAIN_GTT;
1733 else
1734 domain = RADEON_DOMAIN_VRAM;
1735
1736 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1737 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1738 else
1739 flags |= RADEON_FLAG_CPU_ACCESS;
1740
1741 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1742 flags |= RADEON_FLAG_GTT_WC;
1743
1744 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1745 domain, flags);
1746
1747 if (!mem->bo) {
1748 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1749 goto fail;
1750 }
1751 mem->type_index = pAllocateInfo->memoryTypeIndex;
1752
1753 *pMem = radv_device_memory_to_handle(mem);
1754
1755 return VK_SUCCESS;
1756
1757 fail:
1758 vk_free2(&device->alloc, pAllocator, mem);
1759
1760 return result;
1761 }
1762
1763 void radv_FreeMemory(
1764 VkDevice _device,
1765 VkDeviceMemory _mem,
1766 const VkAllocationCallbacks* pAllocator)
1767 {
1768 RADV_FROM_HANDLE(radv_device, device, _device);
1769 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1770
1771 if (mem == NULL)
1772 return;
1773
1774 device->ws->buffer_destroy(mem->bo);
1775 mem->bo = NULL;
1776
1777 vk_free2(&device->alloc, pAllocator, mem);
1778 }
1779
1780 VkResult radv_MapMemory(
1781 VkDevice _device,
1782 VkDeviceMemory _memory,
1783 VkDeviceSize offset,
1784 VkDeviceSize size,
1785 VkMemoryMapFlags flags,
1786 void** ppData)
1787 {
1788 RADV_FROM_HANDLE(radv_device, device, _device);
1789 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1790
1791 if (mem == NULL) {
1792 *ppData = NULL;
1793 return VK_SUCCESS;
1794 }
1795
1796 *ppData = device->ws->buffer_map(mem->bo);
1797 if (*ppData) {
1798 *ppData += offset;
1799 return VK_SUCCESS;
1800 }
1801
1802 return VK_ERROR_MEMORY_MAP_FAILED;
1803 }
1804
1805 void radv_UnmapMemory(
1806 VkDevice _device,
1807 VkDeviceMemory _memory)
1808 {
1809 RADV_FROM_HANDLE(radv_device, device, _device);
1810 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1811
1812 if (mem == NULL)
1813 return;
1814
1815 device->ws->buffer_unmap(mem->bo);
1816 }
1817
1818 VkResult radv_FlushMappedMemoryRanges(
1819 VkDevice _device,
1820 uint32_t memoryRangeCount,
1821 const VkMappedMemoryRange* pMemoryRanges)
1822 {
1823 return VK_SUCCESS;
1824 }
1825
1826 VkResult radv_InvalidateMappedMemoryRanges(
1827 VkDevice _device,
1828 uint32_t memoryRangeCount,
1829 const VkMappedMemoryRange* pMemoryRanges)
1830 {
1831 return VK_SUCCESS;
1832 }
1833
1834 void radv_GetBufferMemoryRequirements(
1835 VkDevice device,
1836 VkBuffer _buffer,
1837 VkMemoryRequirements* pMemoryRequirements)
1838 {
1839 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1840
1841 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1842
1843 pMemoryRequirements->size = buffer->size;
1844 pMemoryRequirements->alignment = 16;
1845 }
1846
1847 void radv_GetImageMemoryRequirements(
1848 VkDevice device,
1849 VkImage _image,
1850 VkMemoryRequirements* pMemoryRequirements)
1851 {
1852 RADV_FROM_HANDLE(radv_image, image, _image);
1853
1854 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1855
1856 pMemoryRequirements->size = image->size;
1857 pMemoryRequirements->alignment = image->alignment;
1858 }
1859
1860 void radv_GetImageSparseMemoryRequirements(
1861 VkDevice device,
1862 VkImage image,
1863 uint32_t* pSparseMemoryRequirementCount,
1864 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1865 {
1866 stub();
1867 }
1868
1869 void radv_GetDeviceMemoryCommitment(
1870 VkDevice device,
1871 VkDeviceMemory memory,
1872 VkDeviceSize* pCommittedMemoryInBytes)
1873 {
1874 *pCommittedMemoryInBytes = 0;
1875 }
1876
1877 VkResult radv_BindBufferMemory(
1878 VkDevice device,
1879 VkBuffer _buffer,
1880 VkDeviceMemory _memory,
1881 VkDeviceSize memoryOffset)
1882 {
1883 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1884 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1885
1886 if (mem) {
1887 buffer->bo = mem->bo;
1888 buffer->offset = memoryOffset;
1889 } else {
1890 buffer->bo = NULL;
1891 buffer->offset = 0;
1892 }
1893
1894 return VK_SUCCESS;
1895 }
1896
1897 VkResult radv_BindImageMemory(
1898 VkDevice device,
1899 VkImage _image,
1900 VkDeviceMemory _memory,
1901 VkDeviceSize memoryOffset)
1902 {
1903 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1904 RADV_FROM_HANDLE(radv_image, image, _image);
1905
1906 if (mem) {
1907 image->bo = mem->bo;
1908 image->offset = memoryOffset;
1909 } else {
1910 image->bo = NULL;
1911 image->offset = 0;
1912 }
1913
1914 return VK_SUCCESS;
1915 }
1916
1917 VkResult radv_QueueBindSparse(
1918 VkQueue queue,
1919 uint32_t bindInfoCount,
1920 const VkBindSparseInfo* pBindInfo,
1921 VkFence fence)
1922 {
1923 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1924 }
1925
1926 VkResult radv_CreateFence(
1927 VkDevice _device,
1928 const VkFenceCreateInfo* pCreateInfo,
1929 const VkAllocationCallbacks* pAllocator,
1930 VkFence* pFence)
1931 {
1932 RADV_FROM_HANDLE(radv_device, device, _device);
1933 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1934 sizeof(*fence), 8,
1935 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1936
1937 if (!fence)
1938 return VK_ERROR_OUT_OF_HOST_MEMORY;
1939
1940 memset(fence, 0, sizeof(*fence));
1941 fence->submitted = false;
1942 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1943 fence->fence = device->ws->create_fence();
1944 if (!fence->fence) {
1945 vk_free2(&device->alloc, pAllocator, fence);
1946 return VK_ERROR_OUT_OF_HOST_MEMORY;
1947 }
1948
1949 *pFence = radv_fence_to_handle(fence);
1950
1951 return VK_SUCCESS;
1952 }
1953
1954 void radv_DestroyFence(
1955 VkDevice _device,
1956 VkFence _fence,
1957 const VkAllocationCallbacks* pAllocator)
1958 {
1959 RADV_FROM_HANDLE(radv_device, device, _device);
1960 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1961
1962 if (!fence)
1963 return;
1964 device->ws->destroy_fence(fence->fence);
1965 vk_free2(&device->alloc, pAllocator, fence);
1966 }
1967
1968 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1969 {
1970 uint64_t current_time;
1971 struct timespec tv;
1972
1973 clock_gettime(CLOCK_MONOTONIC, &tv);
1974 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1975
1976 timeout = MIN2(UINT64_MAX - current_time, timeout);
1977
1978 return current_time + timeout;
1979 }
1980
1981 VkResult radv_WaitForFences(
1982 VkDevice _device,
1983 uint32_t fenceCount,
1984 const VkFence* pFences,
1985 VkBool32 waitAll,
1986 uint64_t timeout)
1987 {
1988 RADV_FROM_HANDLE(radv_device, device, _device);
1989 timeout = radv_get_absolute_timeout(timeout);
1990
1991 if (!waitAll && fenceCount > 1) {
1992 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1993 }
1994
1995 for (uint32_t i = 0; i < fenceCount; ++i) {
1996 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1997 bool expired = false;
1998
1999 if (fence->signalled)
2000 continue;
2001
2002 if (!fence->submitted)
2003 return VK_TIMEOUT;
2004
2005 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2006 if (!expired)
2007 return VK_TIMEOUT;
2008
2009 fence->signalled = true;
2010 }
2011
2012 return VK_SUCCESS;
2013 }
2014
2015 VkResult radv_ResetFences(VkDevice device,
2016 uint32_t fenceCount,
2017 const VkFence *pFences)
2018 {
2019 for (unsigned i = 0; i < fenceCount; ++i) {
2020 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2021 fence->submitted = fence->signalled = false;
2022 }
2023
2024 return VK_SUCCESS;
2025 }
2026
2027 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2028 {
2029 RADV_FROM_HANDLE(radv_device, device, _device);
2030 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2031
2032 if (fence->signalled)
2033 return VK_SUCCESS;
2034 if (!fence->submitted)
2035 return VK_NOT_READY;
2036
2037 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2038 return VK_NOT_READY;
2039
2040 return VK_SUCCESS;
2041 }
2042
2043
2044 // Queue semaphore functions
2045
2046 VkResult radv_CreateSemaphore(
2047 VkDevice _device,
2048 const VkSemaphoreCreateInfo* pCreateInfo,
2049 const VkAllocationCallbacks* pAllocator,
2050 VkSemaphore* pSemaphore)
2051 {
2052 RADV_FROM_HANDLE(radv_device, device, _device);
2053 struct radeon_winsys_sem *sem;
2054
2055 sem = device->ws->create_sem(device->ws);
2056 if (!sem)
2057 return VK_ERROR_OUT_OF_HOST_MEMORY;
2058
2059 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2060 return VK_SUCCESS;
2061 }
2062
2063 void radv_DestroySemaphore(
2064 VkDevice _device,
2065 VkSemaphore _semaphore,
2066 const VkAllocationCallbacks* pAllocator)
2067 {
2068 RADV_FROM_HANDLE(radv_device, device, _device);
2069 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2070 if (!_semaphore)
2071 return;
2072
2073 device->ws->destroy_sem(sem);
2074 }
2075
2076 VkResult radv_CreateEvent(
2077 VkDevice _device,
2078 const VkEventCreateInfo* pCreateInfo,
2079 const VkAllocationCallbacks* pAllocator,
2080 VkEvent* pEvent)
2081 {
2082 RADV_FROM_HANDLE(radv_device, device, _device);
2083 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2084 sizeof(*event), 8,
2085 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2086
2087 if (!event)
2088 return VK_ERROR_OUT_OF_HOST_MEMORY;
2089
2090 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2091 RADEON_DOMAIN_GTT,
2092 RADEON_FLAG_CPU_ACCESS);
2093 if (!event->bo) {
2094 vk_free2(&device->alloc, pAllocator, event);
2095 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2096 }
2097
2098 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2099
2100 *pEvent = radv_event_to_handle(event);
2101
2102 return VK_SUCCESS;
2103 }
2104
2105 void radv_DestroyEvent(
2106 VkDevice _device,
2107 VkEvent _event,
2108 const VkAllocationCallbacks* pAllocator)
2109 {
2110 RADV_FROM_HANDLE(radv_device, device, _device);
2111 RADV_FROM_HANDLE(radv_event, event, _event);
2112
2113 if (!event)
2114 return;
2115 device->ws->buffer_destroy(event->bo);
2116 vk_free2(&device->alloc, pAllocator, event);
2117 }
2118
2119 VkResult radv_GetEventStatus(
2120 VkDevice _device,
2121 VkEvent _event)
2122 {
2123 RADV_FROM_HANDLE(radv_event, event, _event);
2124
2125 if (*event->map == 1)
2126 return VK_EVENT_SET;
2127 return VK_EVENT_RESET;
2128 }
2129
2130 VkResult radv_SetEvent(
2131 VkDevice _device,
2132 VkEvent _event)
2133 {
2134 RADV_FROM_HANDLE(radv_event, event, _event);
2135 *event->map = 1;
2136
2137 return VK_SUCCESS;
2138 }
2139
2140 VkResult radv_ResetEvent(
2141 VkDevice _device,
2142 VkEvent _event)
2143 {
2144 RADV_FROM_HANDLE(radv_event, event, _event);
2145 *event->map = 0;
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 VkResult radv_CreateBuffer(
2151 VkDevice _device,
2152 const VkBufferCreateInfo* pCreateInfo,
2153 const VkAllocationCallbacks* pAllocator,
2154 VkBuffer* pBuffer)
2155 {
2156 RADV_FROM_HANDLE(radv_device, device, _device);
2157 struct radv_buffer *buffer;
2158
2159 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2160
2161 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2162 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2163 if (buffer == NULL)
2164 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2165
2166 buffer->size = pCreateInfo->size;
2167 buffer->usage = pCreateInfo->usage;
2168 buffer->bo = NULL;
2169 buffer->offset = 0;
2170
2171 *pBuffer = radv_buffer_to_handle(buffer);
2172
2173 return VK_SUCCESS;
2174 }
2175
2176 void radv_DestroyBuffer(
2177 VkDevice _device,
2178 VkBuffer _buffer,
2179 const VkAllocationCallbacks* pAllocator)
2180 {
2181 RADV_FROM_HANDLE(radv_device, device, _device);
2182 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2183
2184 if (!buffer)
2185 return;
2186
2187 vk_free2(&device->alloc, pAllocator, buffer);
2188 }
2189
2190 static inline unsigned
2191 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2192 {
2193 if (stencil)
2194 return image->surface.stencil_tiling_index[level];
2195 else
2196 return image->surface.tiling_index[level];
2197 }
2198
2199 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2200 {
2201 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2202 }
2203
2204 static void
2205 radv_initialise_color_surface(struct radv_device *device,
2206 struct radv_color_buffer_info *cb,
2207 struct radv_image_view *iview)
2208 {
2209 const struct vk_format_description *desc;
2210 unsigned ntype, format, swap, endian;
2211 unsigned blend_clamp = 0, blend_bypass = 0;
2212 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2213 uint64_t va;
2214 const struct radeon_surf *surf = &iview->image->surface;
2215 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2216
2217 desc = vk_format_description(iview->vk_format);
2218
2219 memset(cb, 0, sizeof(*cb));
2220
2221 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2222 va += level_info->offset;
2223 cb->cb_color_base = va >> 8;
2224
2225 /* CMASK variables */
2226 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2227 va += iview->image->cmask.offset;
2228 cb->cb_color_cmask = va >> 8;
2229 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2230
2231 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2232 va += iview->image->dcc_offset;
2233 cb->cb_dcc_base = va >> 8;
2234
2235 uint32_t max_slice = radv_surface_layer_count(iview);
2236 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2237 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2238
2239 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2240 pitch_tile_max = level_info->nblk_x / 8 - 1;
2241 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2242 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2243
2244 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2245 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2246
2247 /* Intensity is implemented as Red, so treat it that way. */
2248 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2249 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2250
2251 if (iview->image->samples > 1) {
2252 unsigned log_samples = util_logbase2(iview->image->samples);
2253
2254 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2255 S_028C74_NUM_FRAGMENTS(log_samples);
2256 }
2257
2258 if (iview->image->fmask.size) {
2259 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2260 if (device->physical_device->rad_info.chip_class >= CIK)
2261 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2262 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2263 cb->cb_color_fmask = va >> 8;
2264 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2265 } else {
2266 /* This must be set for fast clear to work without FMASK. */
2267 if (device->physical_device->rad_info.chip_class >= CIK)
2268 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2269 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2270 cb->cb_color_fmask = cb->cb_color_base;
2271 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2272 }
2273
2274 ntype = radv_translate_color_numformat(iview->vk_format,
2275 desc,
2276 vk_format_get_first_non_void_channel(iview->vk_format));
2277 format = radv_translate_colorformat(iview->vk_format);
2278 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2279 radv_finishme("Illegal color\n");
2280 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2281 endian = radv_colorformat_endian_swap(format);
2282
2283 /* blend clamp should be set for all NORM/SRGB types */
2284 if (ntype == V_028C70_NUMBER_UNORM ||
2285 ntype == V_028C70_NUMBER_SNORM ||
2286 ntype == V_028C70_NUMBER_SRGB)
2287 blend_clamp = 1;
2288
2289 /* set blend bypass according to docs if SINT/UINT or
2290 8/24 COLOR variants */
2291 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2292 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2293 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2294 blend_clamp = 0;
2295 blend_bypass = 1;
2296 }
2297 #if 0
2298 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2299 (format == V_028C70_COLOR_8 ||
2300 format == V_028C70_COLOR_8_8 ||
2301 format == V_028C70_COLOR_8_8_8_8))
2302 ->color_is_int8 = true;
2303 #endif
2304 cb->cb_color_info = S_028C70_FORMAT(format) |
2305 S_028C70_COMP_SWAP(swap) |
2306 S_028C70_BLEND_CLAMP(blend_clamp) |
2307 S_028C70_BLEND_BYPASS(blend_bypass) |
2308 S_028C70_SIMPLE_FLOAT(1) |
2309 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2310 ntype != V_028C70_NUMBER_SNORM &&
2311 ntype != V_028C70_NUMBER_SRGB &&
2312 format != V_028C70_COLOR_8_24 &&
2313 format != V_028C70_COLOR_24_8) |
2314 S_028C70_NUMBER_TYPE(ntype) |
2315 S_028C70_ENDIAN(endian);
2316 if (iview->image->samples > 1)
2317 if (iview->image->fmask.size)
2318 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2319
2320 if (iview->image->cmask.size &&
2321 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2322 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2323
2324 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2325 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2326
2327 if (device->physical_device->rad_info.chip_class >= VI) {
2328 unsigned max_uncompressed_block_size = 2;
2329 if (iview->image->samples > 1) {
2330 if (iview->image->surface.bpe == 1)
2331 max_uncompressed_block_size = 0;
2332 else if (iview->image->surface.bpe == 2)
2333 max_uncompressed_block_size = 1;
2334 }
2335
2336 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2337 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2338 }
2339
2340 /* This must be set for fast clear to work without FMASK. */
2341 if (!iview->image->fmask.size &&
2342 device->physical_device->rad_info.chip_class == SI) {
2343 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2344 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2345 }
2346 }
2347
2348 static void
2349 radv_initialise_ds_surface(struct radv_device *device,
2350 struct radv_ds_buffer_info *ds,
2351 struct radv_image_view *iview)
2352 {
2353 unsigned level = iview->base_mip;
2354 unsigned format;
2355 uint64_t va, s_offs, z_offs;
2356 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2357 memset(ds, 0, sizeof(*ds));
2358 switch (iview->vk_format) {
2359 case VK_FORMAT_D24_UNORM_S8_UINT:
2360 case VK_FORMAT_X8_D24_UNORM_PACK32:
2361 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2362 ds->offset_scale = 2.0f;
2363 break;
2364 case VK_FORMAT_D16_UNORM:
2365 case VK_FORMAT_D16_UNORM_S8_UINT:
2366 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2367 ds->offset_scale = 4.0f;
2368 break;
2369 case VK_FORMAT_D32_SFLOAT:
2370 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2371 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2372 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2373 ds->offset_scale = 1.0f;
2374 break;
2375 default:
2376 break;
2377 }
2378
2379 format = radv_translate_dbformat(iview->vk_format);
2380 if (format == V_028040_Z_INVALID) {
2381 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2382 }
2383
2384 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2385 s_offs = z_offs = va;
2386 z_offs += iview->image->surface.level[level].offset;
2387 s_offs += iview->image->surface.stencil_level[level].offset;
2388
2389 uint32_t max_slice = radv_surface_layer_count(iview);
2390 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2391 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2392 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2393 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2394
2395 if (iview->image->samples > 1)
2396 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2397
2398 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2399 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2400 else
2401 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2402
2403 if (device->physical_device->rad_info.chip_class >= CIK) {
2404 struct radeon_info *info = &device->physical_device->rad_info;
2405 unsigned tiling_index = iview->image->surface.tiling_index[level];
2406 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2407 unsigned macro_index = iview->image->surface.macro_tile_index;
2408 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2409 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2410 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2411
2412 ds->db_depth_info |=
2413 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2414 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2415 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2416 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2417 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2418 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2419 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2420 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2421 } else {
2422 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2423 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2424 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2425 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2426 }
2427
2428 if (iview->image->surface.htile_size && !level) {
2429 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2430 S_028040_ALLOW_EXPCLEAR(1);
2431
2432 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2433 /* Workaround: For a not yet understood reason, the
2434 * combination of MSAA, fast stencil clear and stencil
2435 * decompress messes with subsequent stencil buffer
2436 * uses. Problem was reproduced on Verde, Bonaire,
2437 * Tonga, and Carrizo.
2438 *
2439 * Disabling EXPCLEAR works around the problem.
2440 *
2441 * Check piglit's arb_texture_multisample-stencil-clear
2442 * test if you want to try changing this.
2443 */
2444 if (iview->image->samples <= 1)
2445 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2446 } else
2447 /* Use all of the htile_buffer for depth if there's no stencil. */
2448 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2449
2450 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2451 iview->image->htile_offset;
2452 ds->db_htile_data_base = va >> 8;
2453 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2454 } else {
2455 ds->db_htile_data_base = 0;
2456 ds->db_htile_surface = 0;
2457 }
2458
2459 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2460 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2461
2462 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2463 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2464 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2465 }
2466
2467 VkResult radv_CreateFramebuffer(
2468 VkDevice _device,
2469 const VkFramebufferCreateInfo* pCreateInfo,
2470 const VkAllocationCallbacks* pAllocator,
2471 VkFramebuffer* pFramebuffer)
2472 {
2473 RADV_FROM_HANDLE(radv_device, device, _device);
2474 struct radv_framebuffer *framebuffer;
2475
2476 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2477
2478 size_t size = sizeof(*framebuffer) +
2479 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2480 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2481 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2482 if (framebuffer == NULL)
2483 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2484
2485 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2486 framebuffer->width = pCreateInfo->width;
2487 framebuffer->height = pCreateInfo->height;
2488 framebuffer->layers = pCreateInfo->layers;
2489 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2490 VkImageView _iview = pCreateInfo->pAttachments[i];
2491 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2492 framebuffer->attachments[i].attachment = iview;
2493 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2494 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2495 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2496 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2497 }
2498 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2499 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2500 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2501 }
2502
2503 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2504 return VK_SUCCESS;
2505 }
2506
2507 void radv_DestroyFramebuffer(
2508 VkDevice _device,
2509 VkFramebuffer _fb,
2510 const VkAllocationCallbacks* pAllocator)
2511 {
2512 RADV_FROM_HANDLE(radv_device, device, _device);
2513 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2514
2515 if (!fb)
2516 return;
2517 vk_free2(&device->alloc, pAllocator, fb);
2518 }
2519
2520 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2521 {
2522 switch (address_mode) {
2523 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2524 return V_008F30_SQ_TEX_WRAP;
2525 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2526 return V_008F30_SQ_TEX_MIRROR;
2527 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2528 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2529 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2530 return V_008F30_SQ_TEX_CLAMP_BORDER;
2531 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2532 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2533 default:
2534 unreachable("illegal tex wrap mode");
2535 break;
2536 }
2537 }
2538
2539 static unsigned
2540 radv_tex_compare(VkCompareOp op)
2541 {
2542 switch (op) {
2543 case VK_COMPARE_OP_NEVER:
2544 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2545 case VK_COMPARE_OP_LESS:
2546 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2547 case VK_COMPARE_OP_EQUAL:
2548 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2549 case VK_COMPARE_OP_LESS_OR_EQUAL:
2550 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2551 case VK_COMPARE_OP_GREATER:
2552 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2553 case VK_COMPARE_OP_NOT_EQUAL:
2554 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2555 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2556 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2557 case VK_COMPARE_OP_ALWAYS:
2558 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2559 default:
2560 unreachable("illegal compare mode");
2561 break;
2562 }
2563 }
2564
2565 static unsigned
2566 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2567 {
2568 switch (filter) {
2569 case VK_FILTER_NEAREST:
2570 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2571 V_008F38_SQ_TEX_XY_FILTER_POINT);
2572 case VK_FILTER_LINEAR:
2573 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2574 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2575 case VK_FILTER_CUBIC_IMG:
2576 default:
2577 fprintf(stderr, "illegal texture filter");
2578 return 0;
2579 }
2580 }
2581
2582 static unsigned
2583 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2584 {
2585 switch (mode) {
2586 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2587 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2588 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2589 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2590 default:
2591 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2592 }
2593 }
2594
2595 static unsigned
2596 radv_tex_bordercolor(VkBorderColor bcolor)
2597 {
2598 switch (bcolor) {
2599 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2600 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2601 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2602 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2603 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2604 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2605 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2606 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2607 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2608 default:
2609 break;
2610 }
2611 return 0;
2612 }
2613
2614 static unsigned
2615 radv_tex_aniso_filter(unsigned filter)
2616 {
2617 if (filter < 2)
2618 return 0;
2619 if (filter < 4)
2620 return 1;
2621 if (filter < 8)
2622 return 2;
2623 if (filter < 16)
2624 return 3;
2625 return 4;
2626 }
2627
2628 static void
2629 radv_init_sampler(struct radv_device *device,
2630 struct radv_sampler *sampler,
2631 const VkSamplerCreateInfo *pCreateInfo)
2632 {
2633 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2634 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2635 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2636 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2637
2638 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2639 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2640 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2641 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2642 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2643 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2644 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2645 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2646 S_008F30_DISABLE_CUBE_WRAP(0) |
2647 S_008F30_COMPAT_MODE(is_vi));
2648 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2649 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2650 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2651 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2652 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2653 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2654 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2655 S_008F38_MIP_POINT_PRECLAMP(0) |
2656 S_008F38_DISABLE_LSB_CEIL(1) |
2657 S_008F38_FILTER_PREC_FIX(1) |
2658 S_008F38_ANISO_OVERRIDE(is_vi));
2659 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2660 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2661 }
2662
2663 VkResult radv_CreateSampler(
2664 VkDevice _device,
2665 const VkSamplerCreateInfo* pCreateInfo,
2666 const VkAllocationCallbacks* pAllocator,
2667 VkSampler* pSampler)
2668 {
2669 RADV_FROM_HANDLE(radv_device, device, _device);
2670 struct radv_sampler *sampler;
2671
2672 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2673
2674 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2675 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2676 if (!sampler)
2677 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2678
2679 radv_init_sampler(device, sampler, pCreateInfo);
2680 *pSampler = radv_sampler_to_handle(sampler);
2681
2682 return VK_SUCCESS;
2683 }
2684
2685 void radv_DestroySampler(
2686 VkDevice _device,
2687 VkSampler _sampler,
2688 const VkAllocationCallbacks* pAllocator)
2689 {
2690 RADV_FROM_HANDLE(radv_device, device, _device);
2691 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2692
2693 if (!sampler)
2694 return;
2695 vk_free2(&device->alloc, pAllocator, sampler);
2696 }
2697
2698
2699 /* vk_icd.h does not declare this function, so we declare it here to
2700 * suppress Wmissing-prototypes.
2701 */
2702 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2703 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2704
2705 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2706 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2707 {
2708 /* For the full details on loader interface versioning, see
2709 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2710 * What follows is a condensed summary, to help you navigate the large and
2711 * confusing official doc.
2712 *
2713 * - Loader interface v0 is incompatible with later versions. We don't
2714 * support it.
2715 *
2716 * - In loader interface v1:
2717 * - The first ICD entrypoint called by the loader is
2718 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2719 * entrypoint.
2720 * - The ICD must statically expose no other Vulkan symbol unless it is
2721 * linked with -Bsymbolic.
2722 * - Each dispatchable Vulkan handle created by the ICD must be
2723 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2724 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2725 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2726 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2727 * such loader-managed surfaces.
2728 *
2729 * - Loader interface v2 differs from v1 in:
2730 * - The first ICD entrypoint called by the loader is
2731 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2732 * statically expose this entrypoint.
2733 *
2734 * - Loader interface v3 differs from v2 in:
2735 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2736 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2737 * because the loader no longer does so.
2738 */
2739 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2740 return VK_SUCCESS;
2741 }