9202baaaa8bf10118193362875a897812fadf42c
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 };
88
89 static const VkExtensionProperties common_device_extensions[] = {
90 {
91 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
92 .specVersion = 1,
93 },
94 {
95 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
100 .specVersion = 68,
101 },
102 {
103 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {"allbos", RADV_DEBUG_ALL_BOS},
296 {"noibs", RADV_DEBUG_NO_IBS},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 if (!instance)
369 return;
370
371 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
372 radv_physical_device_finish(instance->physicalDevices + i);
373 }
374
375 VG(VALGRIND_DESTROY_MEMPOOL(instance));
376
377 _mesa_locale_fini();
378
379 vk_free(&instance->alloc, instance);
380 }
381
382 VkResult radv_EnumeratePhysicalDevices(
383 VkInstance _instance,
384 uint32_t* pPhysicalDeviceCount,
385 VkPhysicalDevice* pPhysicalDevices)
386 {
387 RADV_FROM_HANDLE(radv_instance, instance, _instance);
388 VkResult result;
389
390 if (instance->physicalDeviceCount < 0) {
391 char path[20];
392 instance->physicalDeviceCount = 0;
393 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
394 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
395 result = radv_physical_device_init(instance->physicalDevices +
396 instance->physicalDeviceCount,
397 instance, path);
398 if (result == VK_SUCCESS)
399 ++instance->physicalDeviceCount;
400 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
401 return result;
402 }
403 }
404
405 if (!pPhysicalDevices) {
406 *pPhysicalDeviceCount = instance->physicalDeviceCount;
407 } else {
408 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
409 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
410 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
411 }
412
413 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
414 : VK_SUCCESS;
415 }
416
417 void radv_GetPhysicalDeviceFeatures(
418 VkPhysicalDevice physicalDevice,
419 VkPhysicalDeviceFeatures* pFeatures)
420 {
421 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
422
423 memset(pFeatures, 0, sizeof(*pFeatures));
424
425 *pFeatures = (VkPhysicalDeviceFeatures) {
426 .robustBufferAccess = true,
427 .fullDrawIndexUint32 = true,
428 .imageCubeArray = true,
429 .independentBlend = true,
430 .geometryShader = true,
431 .tessellationShader = false,
432 .sampleRateShading = false,
433 .dualSrcBlend = true,
434 .logicOp = true,
435 .multiDrawIndirect = true,
436 .drawIndirectFirstInstance = true,
437 .depthClamp = true,
438 .depthBiasClamp = true,
439 .fillModeNonSolid = true,
440 .depthBounds = true,
441 .wideLines = true,
442 .largePoints = true,
443 .alphaToOne = true,
444 .multiViewport = true,
445 .samplerAnisotropy = true,
446 .textureCompressionETC2 = false,
447 .textureCompressionASTC_LDR = false,
448 .textureCompressionBC = true,
449 .occlusionQueryPrecise = true,
450 .pipelineStatisticsQuery = false,
451 .vertexPipelineStoresAndAtomics = true,
452 .fragmentStoresAndAtomics = true,
453 .shaderTessellationAndGeometryPointSize = true,
454 .shaderImageGatherExtended = true,
455 .shaderStorageImageExtendedFormats = true,
456 .shaderStorageImageMultisample = false,
457 .shaderUniformBufferArrayDynamicIndexing = true,
458 .shaderSampledImageArrayDynamicIndexing = true,
459 .shaderStorageBufferArrayDynamicIndexing = true,
460 .shaderStorageImageArrayDynamicIndexing = true,
461 .shaderStorageImageReadWithoutFormat = true,
462 .shaderStorageImageWriteWithoutFormat = true,
463 .shaderClipDistance = true,
464 .shaderCullDistance = true,
465 .shaderFloat64 = true,
466 .shaderInt64 = false,
467 .shaderInt16 = false,
468 .alphaToOne = true,
469 .variableMultisampleRate = false,
470 .inheritedQueries = false,
471 };
472 }
473
474 void radv_GetPhysicalDeviceFeatures2KHR(
475 VkPhysicalDevice physicalDevice,
476 VkPhysicalDeviceFeatures2KHR *pFeatures)
477 {
478 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
479 }
480
481 void radv_GetPhysicalDeviceProperties(
482 VkPhysicalDevice physicalDevice,
483 VkPhysicalDeviceProperties* pProperties)
484 {
485 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
486 VkSampleCountFlags sample_counts = 0xf;
487 VkPhysicalDeviceLimits limits = {
488 .maxImageDimension1D = (1 << 14),
489 .maxImageDimension2D = (1 << 14),
490 .maxImageDimension3D = (1 << 11),
491 .maxImageDimensionCube = (1 << 14),
492 .maxImageArrayLayers = (1 << 11),
493 .maxTexelBufferElements = 128 * 1024 * 1024,
494 .maxUniformBufferRange = UINT32_MAX,
495 .maxStorageBufferRange = UINT32_MAX,
496 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
497 .maxMemoryAllocationCount = UINT32_MAX,
498 .maxSamplerAllocationCount = 64 * 1024,
499 .bufferImageGranularity = 64, /* A cache line */
500 .sparseAddressSpaceSize = 0,
501 .maxBoundDescriptorSets = MAX_SETS,
502 .maxPerStageDescriptorSamplers = 64,
503 .maxPerStageDescriptorUniformBuffers = 64,
504 .maxPerStageDescriptorStorageBuffers = 64,
505 .maxPerStageDescriptorSampledImages = 64,
506 .maxPerStageDescriptorStorageImages = 64,
507 .maxPerStageDescriptorInputAttachments = 64,
508 .maxPerStageResources = 128,
509 .maxDescriptorSetSamplers = 256,
510 .maxDescriptorSetUniformBuffers = 256,
511 .maxDescriptorSetUniformBuffersDynamic = 256,
512 .maxDescriptorSetStorageBuffers = 256,
513 .maxDescriptorSetStorageBuffersDynamic = 256,
514 .maxDescriptorSetSampledImages = 256,
515 .maxDescriptorSetStorageImages = 256,
516 .maxDescriptorSetInputAttachments = 256,
517 .maxVertexInputAttributes = 32,
518 .maxVertexInputBindings = 32,
519 .maxVertexInputAttributeOffset = 2047,
520 .maxVertexInputBindingStride = 2048,
521 .maxVertexOutputComponents = 128,
522 .maxTessellationGenerationLevel = 0,
523 .maxTessellationPatchSize = 0,
524 .maxTessellationControlPerVertexInputComponents = 0,
525 .maxTessellationControlPerVertexOutputComponents = 0,
526 .maxTessellationControlPerPatchOutputComponents = 0,
527 .maxTessellationControlTotalOutputComponents = 0,
528 .maxTessellationEvaluationInputComponents = 0,
529 .maxTessellationEvaluationOutputComponents = 0,
530 .maxGeometryShaderInvocations = 32,
531 .maxGeometryInputComponents = 64,
532 .maxGeometryOutputComponents = 128,
533 .maxGeometryOutputVertices = 256,
534 .maxGeometryTotalOutputComponents = 1024,
535 .maxFragmentInputComponents = 128,
536 .maxFragmentOutputAttachments = 8,
537 .maxFragmentDualSrcAttachments = 1,
538 .maxFragmentCombinedOutputResources = 8,
539 .maxComputeSharedMemorySize = 32768,
540 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
541 .maxComputeWorkGroupInvocations = 2048,
542 .maxComputeWorkGroupSize = {
543 2048,
544 2048,
545 2048
546 },
547 .subPixelPrecisionBits = 4 /* FIXME */,
548 .subTexelPrecisionBits = 4 /* FIXME */,
549 .mipmapPrecisionBits = 4 /* FIXME */,
550 .maxDrawIndexedIndexValue = UINT32_MAX,
551 .maxDrawIndirectCount = UINT32_MAX,
552 .maxSamplerLodBias = 16,
553 .maxSamplerAnisotropy = 16,
554 .maxViewports = MAX_VIEWPORTS,
555 .maxViewportDimensions = { (1 << 14), (1 << 14) },
556 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
557 .viewportSubPixelBits = 13, /* We take a float? */
558 .minMemoryMapAlignment = 4096, /* A page */
559 .minTexelBufferOffsetAlignment = 1,
560 .minUniformBufferOffsetAlignment = 4,
561 .minStorageBufferOffsetAlignment = 4,
562 .minTexelOffset = -32,
563 .maxTexelOffset = 31,
564 .minTexelGatherOffset = -32,
565 .maxTexelGatherOffset = 31,
566 .minInterpolationOffset = -2,
567 .maxInterpolationOffset = 2,
568 .subPixelInterpolationOffsetBits = 8,
569 .maxFramebufferWidth = (1 << 14),
570 .maxFramebufferHeight = (1 << 14),
571 .maxFramebufferLayers = (1 << 10),
572 .framebufferColorSampleCounts = sample_counts,
573 .framebufferDepthSampleCounts = sample_counts,
574 .framebufferStencilSampleCounts = sample_counts,
575 .framebufferNoAttachmentsSampleCounts = sample_counts,
576 .maxColorAttachments = MAX_RTS,
577 .sampledImageColorSampleCounts = sample_counts,
578 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
579 .sampledImageDepthSampleCounts = sample_counts,
580 .sampledImageStencilSampleCounts = sample_counts,
581 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
582 .maxSampleMaskWords = 1,
583 .timestampComputeAndGraphics = false,
584 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
585 .maxClipDistances = 8,
586 .maxCullDistances = 8,
587 .maxCombinedClipAndCullDistances = 8,
588 .discreteQueuePriorities = 1,
589 .pointSizeRange = { 0.125, 255.875 },
590 .lineWidthRange = { 0.0, 7.9921875 },
591 .pointSizeGranularity = (1.0 / 8.0),
592 .lineWidthGranularity = (1.0 / 128.0),
593 .strictLines = false, /* FINISHME */
594 .standardSampleLocations = true,
595 .optimalBufferCopyOffsetAlignment = 128,
596 .optimalBufferCopyRowPitchAlignment = 128,
597 .nonCoherentAtomSize = 64,
598 };
599
600 *pProperties = (VkPhysicalDeviceProperties) {
601 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
602 .driverVersion = 1,
603 .vendorID = 0x1002,
604 .deviceID = pdevice->rad_info.pci_id,
605 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
606 .limits = limits,
607 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
608 };
609
610 strcpy(pProperties->deviceName, pdevice->name);
611 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
612 }
613
614 void radv_GetPhysicalDeviceProperties2KHR(
615 VkPhysicalDevice physicalDevice,
616 VkPhysicalDeviceProperties2KHR *pProperties)
617 {
618 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
619 }
620
621 static void radv_get_physical_device_queue_family_properties(
622 struct radv_physical_device* pdevice,
623 uint32_t* pCount,
624 VkQueueFamilyProperties** pQueueFamilyProperties)
625 {
626 int num_queue_families = 1;
627 int idx;
628 if (pdevice->rad_info.compute_rings > 0 &&
629 pdevice->rad_info.chip_class >= CIK &&
630 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
631 num_queue_families++;
632
633 if (pQueueFamilyProperties == NULL) {
634 *pCount = num_queue_families;
635 return;
636 }
637
638 if (!*pCount)
639 return;
640
641 idx = 0;
642 if (*pCount >= 1) {
643 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
644 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
645 VK_QUEUE_COMPUTE_BIT |
646 VK_QUEUE_TRANSFER_BIT,
647 .queueCount = 1,
648 .timestampValidBits = 64,
649 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
650 };
651 idx++;
652 }
653
654 if (pdevice->rad_info.compute_rings > 0 &&
655 pdevice->rad_info.chip_class >= CIK &&
656 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
657 if (*pCount > idx) {
658 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
659 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
660 .queueCount = pdevice->rad_info.compute_rings,
661 .timestampValidBits = 64,
662 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
663 };
664 idx++;
665 }
666 }
667 *pCount = idx;
668 }
669
670 void radv_GetPhysicalDeviceQueueFamilyProperties(
671 VkPhysicalDevice physicalDevice,
672 uint32_t* pCount,
673 VkQueueFamilyProperties* pQueueFamilyProperties)
674 {
675 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
676 if (!pQueueFamilyProperties) {
677 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
678 return;
679 }
680 VkQueueFamilyProperties *properties[] = {
681 pQueueFamilyProperties + 0,
682 pQueueFamilyProperties + 1,
683 pQueueFamilyProperties + 2,
684 };
685 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
686 assert(*pCount <= 3);
687 }
688
689 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
690 VkPhysicalDevice physicalDevice,
691 uint32_t* pCount,
692 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
693 {
694 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
695 if (!pQueueFamilyProperties) {
696 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
697 return;
698 }
699 VkQueueFamilyProperties *properties[] = {
700 &pQueueFamilyProperties[0].queueFamilyProperties,
701 &pQueueFamilyProperties[1].queueFamilyProperties,
702 &pQueueFamilyProperties[2].queueFamilyProperties,
703 };
704 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
705 assert(*pCount <= 3);
706 }
707
708 void radv_GetPhysicalDeviceMemoryProperties(
709 VkPhysicalDevice physicalDevice,
710 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
711 {
712 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
713
714 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
715
716 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
717 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
718 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
719 .heapIndex = RADV_MEM_HEAP_VRAM,
720 };
721 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
722 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
723 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
724 .heapIndex = RADV_MEM_HEAP_GTT,
725 };
726 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
727 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
728 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
729 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
730 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
731 };
732 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
733 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
734 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
735 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
736 .heapIndex = RADV_MEM_HEAP_GTT,
737 };
738
739 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
740
741 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
742 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
743 .size = physical_device->rad_info.vram_size -
744 physical_device->rad_info.visible_vram_size,
745 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
746 };
747 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
748 .size = physical_device->rad_info.visible_vram_size,
749 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
750 };
751 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
752 .size = physical_device->rad_info.gart_size,
753 .flags = 0,
754 };
755 }
756
757 void radv_GetPhysicalDeviceMemoryProperties2KHR(
758 VkPhysicalDevice physicalDevice,
759 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
760 {
761 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
762 &pMemoryProperties->memoryProperties);
763 }
764
765 static int
766 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
767 int queue_family_index, int idx)
768 {
769 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
770 queue->device = device;
771 queue->queue_family_index = queue_family_index;
772 queue->queue_idx = idx;
773
774 queue->hw_ctx = device->ws->ctx_create(device->ws);
775 if (!queue->hw_ctx)
776 return VK_ERROR_OUT_OF_HOST_MEMORY;
777
778 return VK_SUCCESS;
779 }
780
781 static void
782 radv_queue_finish(struct radv_queue *queue)
783 {
784 if (queue->hw_ctx)
785 queue->device->ws->ctx_destroy(queue->hw_ctx);
786
787 if (queue->initial_preamble_cs)
788 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
789 if (queue->continue_preamble_cs)
790 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
791 if (queue->descriptor_bo)
792 queue->device->ws->buffer_destroy(queue->descriptor_bo);
793 if (queue->scratch_bo)
794 queue->device->ws->buffer_destroy(queue->scratch_bo);
795 if (queue->esgs_ring_bo)
796 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
797 if (queue->gsvs_ring_bo)
798 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
799 if (queue->compute_scratch_bo)
800 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
801 }
802
803 static void
804 radv_device_init_gs_info(struct radv_device *device)
805 {
806 switch (device->physical_device->rad_info.family) {
807 case CHIP_OLAND:
808 case CHIP_HAINAN:
809 case CHIP_KAVERI:
810 case CHIP_KABINI:
811 case CHIP_MULLINS:
812 case CHIP_ICELAND:
813 case CHIP_CARRIZO:
814 case CHIP_STONEY:
815 device->gs_table_depth = 16;
816 return;
817 case CHIP_TAHITI:
818 case CHIP_PITCAIRN:
819 case CHIP_VERDE:
820 case CHIP_BONAIRE:
821 case CHIP_HAWAII:
822 case CHIP_TONGA:
823 case CHIP_FIJI:
824 case CHIP_POLARIS10:
825 case CHIP_POLARIS11:
826 device->gs_table_depth = 32;
827 return;
828 default:
829 unreachable("unknown GPU");
830 }
831 }
832
833 VkResult radv_CreateDevice(
834 VkPhysicalDevice physicalDevice,
835 const VkDeviceCreateInfo* pCreateInfo,
836 const VkAllocationCallbacks* pAllocator,
837 VkDevice* pDevice)
838 {
839 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
840 VkResult result;
841 struct radv_device *device;
842
843 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
844 if (!is_extension_enabled(physical_device->extensions.ext_array,
845 physical_device->extensions.num_ext,
846 pCreateInfo->ppEnabledExtensionNames[i]))
847 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
848 }
849
850 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
851 sizeof(*device), 8,
852 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
853 if (!device)
854 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
855
856 memset(device, 0, sizeof(*device));
857
858 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
859 device->instance = physical_device->instance;
860 device->physical_device = physical_device;
861
862 device->debug_flags = device->instance->debug_flags;
863
864 device->ws = physical_device->ws;
865 if (pAllocator)
866 device->alloc = *pAllocator;
867 else
868 device->alloc = physical_device->instance->alloc;
869
870 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
871 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
872 uint32_t qfi = queue_create->queueFamilyIndex;
873
874 device->queues[qfi] = vk_alloc(&device->alloc,
875 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
876 if (!device->queues[qfi]) {
877 result = VK_ERROR_OUT_OF_HOST_MEMORY;
878 goto fail;
879 }
880
881 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
882
883 device->queue_count[qfi] = queue_create->queueCount;
884
885 for (unsigned q = 0; q < queue_create->queueCount; q++) {
886 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
887 if (result != VK_SUCCESS)
888 goto fail;
889 }
890 }
891
892 #if HAVE_LLVM < 0x0400
893 device->llvm_supports_spill = false;
894 #else
895 device->llvm_supports_spill = true;
896 #endif
897
898 /* The maximum number of scratch waves. Scratch space isn't divided
899 * evenly between CUs. The number is only a function of the number of CUs.
900 * We can decrease the constant to decrease the scratch buffer size.
901 *
902 * sctx->scratch_waves must be >= the maximum posible size of
903 * 1 threadgroup, so that the hw doesn't hang from being unable
904 * to start any.
905 *
906 * The recommended value is 4 per CU at most. Higher numbers don't
907 * bring much benefit, but they still occupy chip resources (think
908 * async compute). I've seen ~2% performance difference between 4 and 32.
909 */
910 uint32_t max_threads_per_block = 2048;
911 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
912 max_threads_per_block / 64);
913
914 radv_device_init_gs_info(device);
915
916 result = radv_device_init_meta(device);
917 if (result != VK_SUCCESS)
918 goto fail;
919
920 radv_device_init_msaa(device);
921
922 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
923 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
924 switch (family) {
925 case RADV_QUEUE_GENERAL:
926 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
927 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
928 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
929 break;
930 case RADV_QUEUE_COMPUTE:
931 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
932 radeon_emit(device->empty_cs[family], 0);
933 break;
934 }
935 device->ws->cs_finalize(device->empty_cs[family]);
936
937 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
938 switch (family) {
939 case RADV_QUEUE_GENERAL:
940 case RADV_QUEUE_COMPUTE:
941 si_cs_emit_cache_flush(device->flush_cs[family],
942 device->physical_device->rad_info.chip_class,
943 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
944 RADV_CMD_FLAG_INV_ICACHE |
945 RADV_CMD_FLAG_INV_SMEM_L1 |
946 RADV_CMD_FLAG_INV_VMEM_L1 |
947 RADV_CMD_FLAG_INV_GLOBAL_L2);
948 break;
949 }
950 device->ws->cs_finalize(device->flush_cs[family]);
951 }
952
953 if (getenv("RADV_TRACE_FILE")) {
954 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
955 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
956 if (!device->trace_bo)
957 goto fail;
958
959 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
960 if (!device->trace_id_ptr)
961 goto fail;
962 }
963
964 if (device->physical_device->rad_info.chip_class >= CIK)
965 cik_create_gfx_config(device);
966
967 *pDevice = radv_device_to_handle(device);
968 return VK_SUCCESS;
969
970 fail:
971 if (device->trace_bo)
972 device->ws->buffer_destroy(device->trace_bo);
973
974 if (device->gfx_init)
975 device->ws->buffer_destroy(device->gfx_init);
976
977 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
978 for (unsigned q = 0; q < device->queue_count[i]; q++)
979 radv_queue_finish(&device->queues[i][q]);
980 if (device->queue_count[i])
981 vk_free(&device->alloc, device->queues[i]);
982 }
983
984 vk_free(&device->alloc, device);
985 return result;
986 }
987
988 void radv_DestroyDevice(
989 VkDevice _device,
990 const VkAllocationCallbacks* pAllocator)
991 {
992 RADV_FROM_HANDLE(radv_device, device, _device);
993
994 if (!device)
995 return;
996
997 if (device->trace_bo)
998 device->ws->buffer_destroy(device->trace_bo);
999
1000 if (device->gfx_init)
1001 device->ws->buffer_destroy(device->gfx_init);
1002
1003 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1004 for (unsigned q = 0; q < device->queue_count[i]; q++)
1005 radv_queue_finish(&device->queues[i][q]);
1006 if (device->queue_count[i])
1007 vk_free(&device->alloc, device->queues[i]);
1008 if (device->empty_cs[i])
1009 device->ws->cs_destroy(device->empty_cs[i]);
1010 if (device->flush_cs[i])
1011 device->ws->cs_destroy(device->flush_cs[i]);
1012 }
1013 radv_device_finish_meta(device);
1014
1015 vk_free(&device->alloc, device);
1016 }
1017
1018 VkResult radv_EnumerateInstanceExtensionProperties(
1019 const char* pLayerName,
1020 uint32_t* pPropertyCount,
1021 VkExtensionProperties* pProperties)
1022 {
1023 if (pProperties == NULL) {
1024 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1025 return VK_SUCCESS;
1026 }
1027
1028 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1029 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1030
1031 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1032 return VK_INCOMPLETE;
1033
1034 return VK_SUCCESS;
1035 }
1036
1037 VkResult radv_EnumerateDeviceExtensionProperties(
1038 VkPhysicalDevice physicalDevice,
1039 const char* pLayerName,
1040 uint32_t* pPropertyCount,
1041 VkExtensionProperties* pProperties)
1042 {
1043 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1044
1045 if (pProperties == NULL) {
1046 *pPropertyCount = pdevice->extensions.num_ext;
1047 return VK_SUCCESS;
1048 }
1049
1050 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1051 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1052
1053 if (*pPropertyCount < pdevice->extensions.num_ext)
1054 return VK_INCOMPLETE;
1055
1056 return VK_SUCCESS;
1057 }
1058
1059 VkResult radv_EnumerateInstanceLayerProperties(
1060 uint32_t* pPropertyCount,
1061 VkLayerProperties* pProperties)
1062 {
1063 if (pProperties == NULL) {
1064 *pPropertyCount = 0;
1065 return VK_SUCCESS;
1066 }
1067
1068 /* None supported at this time */
1069 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1070 }
1071
1072 VkResult radv_EnumerateDeviceLayerProperties(
1073 VkPhysicalDevice physicalDevice,
1074 uint32_t* pPropertyCount,
1075 VkLayerProperties* pProperties)
1076 {
1077 if (pProperties == NULL) {
1078 *pPropertyCount = 0;
1079 return VK_SUCCESS;
1080 }
1081
1082 /* None supported at this time */
1083 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1084 }
1085
1086 void radv_GetDeviceQueue(
1087 VkDevice _device,
1088 uint32_t queueFamilyIndex,
1089 uint32_t queueIndex,
1090 VkQueue* pQueue)
1091 {
1092 RADV_FROM_HANDLE(radv_device, device, _device);
1093
1094 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1095 }
1096
1097 static void radv_dump_trace(struct radv_device *device,
1098 struct radeon_winsys_cs *cs)
1099 {
1100 const char *filename = getenv("RADV_TRACE_FILE");
1101 FILE *f = fopen(filename, "w");
1102 if (!f) {
1103 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1104 return;
1105 }
1106
1107 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1108 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1109 fclose(f);
1110 }
1111
1112 static void
1113 fill_geom_rings(struct radv_queue *queue,
1114 uint32_t *map,
1115 uint32_t esgs_ring_size,
1116 struct radeon_winsys_bo *esgs_ring_bo,
1117 uint32_t gsvs_ring_size,
1118 struct radeon_winsys_bo *gsvs_ring_bo)
1119 {
1120 uint64_t esgs_va = 0, gsvs_va = 0;
1121 uint32_t *desc = &map[4];
1122
1123 if (esgs_ring_bo)
1124 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1125 if (gsvs_ring_bo)
1126 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1127
1128 /* stride 0, num records - size, add tid, swizzle, elsize4,
1129 index stride 64 */
1130 desc[0] = esgs_va;
1131 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1132 S_008F04_STRIDE(0) |
1133 S_008F04_SWIZZLE_ENABLE(true);
1134 desc[2] = esgs_ring_size;
1135 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1136 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1137 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1138 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1139 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1140 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1141 S_008F0C_ELEMENT_SIZE(1) |
1142 S_008F0C_INDEX_STRIDE(3) |
1143 S_008F0C_ADD_TID_ENABLE(true);
1144
1145 desc += 4;
1146 /* GS entry for ES->GS ring */
1147 /* stride 0, num records - size, elsize0,
1148 index stride 0 */
1149 desc[0] = esgs_va;
1150 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1151 S_008F04_STRIDE(0) |
1152 S_008F04_SWIZZLE_ENABLE(false);
1153 desc[2] = esgs_ring_size;
1154 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1155 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1156 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1157 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1158 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1159 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1160 S_008F0C_ELEMENT_SIZE(0) |
1161 S_008F0C_INDEX_STRIDE(0) |
1162 S_008F0C_ADD_TID_ENABLE(false);
1163
1164 desc += 4;
1165 /* VS entry for GS->VS ring */
1166 /* stride 0, num records - size, elsize0,
1167 index stride 0 */
1168 desc[0] = gsvs_va;
1169 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1170 S_008F04_STRIDE(0) |
1171 S_008F04_SWIZZLE_ENABLE(false);
1172 desc[2] = gsvs_ring_size;
1173 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1174 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1175 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1176 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1177 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1178 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1179 S_008F0C_ELEMENT_SIZE(0) |
1180 S_008F0C_INDEX_STRIDE(0) |
1181 S_008F0C_ADD_TID_ENABLE(false);
1182 desc += 4;
1183
1184 /* stride gsvs_itemsize, num records 64
1185 elsize 4, index stride 16 */
1186 /* shader will patch stride and desc[2] */
1187 desc[0] = gsvs_va;
1188 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1189 S_008F04_STRIDE(0) |
1190 S_008F04_SWIZZLE_ENABLE(true);
1191 desc[2] = 0;
1192 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1193 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1194 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1195 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1196 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1197 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1198 S_008F0C_ELEMENT_SIZE(1) |
1199 S_008F0C_INDEX_STRIDE(1) |
1200 S_008F0C_ADD_TID_ENABLE(true);
1201 }
1202
1203 static VkResult
1204 radv_get_preamble_cs(struct radv_queue *queue,
1205 uint32_t scratch_size,
1206 uint32_t compute_scratch_size,
1207 uint32_t esgs_ring_size,
1208 uint32_t gsvs_ring_size,
1209 struct radeon_winsys_cs **initial_preamble_cs,
1210 struct radeon_winsys_cs **continue_preamble_cs)
1211 {
1212 struct radeon_winsys_bo *scratch_bo = NULL;
1213 struct radeon_winsys_bo *descriptor_bo = NULL;
1214 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1215 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1216 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1217 struct radeon_winsys_cs *dest_cs[2] = {0};
1218
1219 if (scratch_size <= queue->scratch_size &&
1220 compute_scratch_size <= queue->compute_scratch_size &&
1221 esgs_ring_size <= queue->esgs_ring_size &&
1222 gsvs_ring_size <= queue->gsvs_ring_size &&
1223 queue->initial_preamble_cs) {
1224 *initial_preamble_cs = queue->initial_preamble_cs;
1225 *continue_preamble_cs = queue->continue_preamble_cs;
1226 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1227 *continue_preamble_cs = NULL;
1228 return VK_SUCCESS;
1229 }
1230
1231 if (scratch_size > queue->scratch_size) {
1232 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1233 scratch_size,
1234 4096,
1235 RADEON_DOMAIN_VRAM,
1236 RADEON_FLAG_NO_CPU_ACCESS);
1237 if (!scratch_bo)
1238 goto fail;
1239 } else
1240 scratch_bo = queue->scratch_bo;
1241
1242 if (compute_scratch_size > queue->compute_scratch_size) {
1243 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1244 compute_scratch_size,
1245 4096,
1246 RADEON_DOMAIN_VRAM,
1247 RADEON_FLAG_NO_CPU_ACCESS);
1248 if (!compute_scratch_bo)
1249 goto fail;
1250
1251 } else
1252 compute_scratch_bo = queue->compute_scratch_bo;
1253
1254 if (esgs_ring_size > queue->esgs_ring_size) {
1255 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1256 esgs_ring_size,
1257 4096,
1258 RADEON_DOMAIN_VRAM,
1259 RADEON_FLAG_NO_CPU_ACCESS);
1260 if (!esgs_ring_bo)
1261 goto fail;
1262 } else {
1263 esgs_ring_bo = queue->esgs_ring_bo;
1264 esgs_ring_size = queue->esgs_ring_size;
1265 }
1266
1267 if (gsvs_ring_size > queue->gsvs_ring_size) {
1268 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1269 gsvs_ring_size,
1270 4096,
1271 RADEON_DOMAIN_VRAM,
1272 RADEON_FLAG_NO_CPU_ACCESS);
1273 if (!gsvs_ring_bo)
1274 goto fail;
1275 } else {
1276 gsvs_ring_bo = queue->gsvs_ring_bo;
1277 gsvs_ring_size = queue->gsvs_ring_size;
1278 }
1279
1280 if (scratch_bo != queue->scratch_bo ||
1281 esgs_ring_bo != queue->esgs_ring_bo ||
1282 gsvs_ring_bo != queue->gsvs_ring_bo) {
1283 uint32_t size = 0;
1284 if (gsvs_ring_bo || esgs_ring_bo)
1285 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1286 else if (scratch_bo)
1287 size = 8; /* 2 dword */
1288
1289 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1290 size,
1291 4096,
1292 RADEON_DOMAIN_VRAM,
1293 RADEON_FLAG_CPU_ACCESS);
1294 if (!descriptor_bo)
1295 goto fail;
1296 } else
1297 descriptor_bo = queue->descriptor_bo;
1298
1299 for(int i = 0; i < 2; ++i) {
1300 struct radeon_winsys_cs *cs = NULL;
1301 cs = queue->device->ws->cs_create(queue->device->ws,
1302 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1303 if (!cs)
1304 goto fail;
1305
1306 dest_cs[i] = cs;
1307
1308 if (scratch_bo)
1309 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1310
1311 if (esgs_ring_bo)
1312 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1313
1314 if (gsvs_ring_bo)
1315 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1316
1317 if (descriptor_bo)
1318 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1319
1320 if (descriptor_bo != queue->descriptor_bo) {
1321 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1322
1323 if (scratch_bo) {
1324 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1325 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1326 S_008F04_SWIZZLE_ENABLE(1);
1327 map[0] = scratch_va;
1328 map[1] = rsrc1;
1329 }
1330
1331 if (esgs_ring_bo || gsvs_ring_bo)
1332 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1333
1334 queue->device->ws->buffer_unmap(descriptor_bo);
1335 }
1336
1337 if (esgs_ring_bo || gsvs_ring_bo) {
1338 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1339 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1340 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1341 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1342
1343 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1344 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1345 radeon_emit(cs, esgs_ring_size >> 8);
1346 radeon_emit(cs, gsvs_ring_size >> 8);
1347 } else {
1348 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1349 radeon_emit(cs, esgs_ring_size >> 8);
1350 radeon_emit(cs, gsvs_ring_size >> 8);
1351 }
1352 }
1353
1354 if (descriptor_bo) {
1355 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1356 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1357 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1358 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1359 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1360 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1361
1362 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1363
1364 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1365 radeon_set_sh_reg_seq(cs, regs[i], 2);
1366 radeon_emit(cs, va);
1367 radeon_emit(cs, va >> 32);
1368 }
1369 }
1370
1371 if (compute_scratch_bo) {
1372 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1373 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1374 S_008F04_SWIZZLE_ENABLE(1);
1375
1376 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1377
1378 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1379 radeon_emit(cs, scratch_va);
1380 radeon_emit(cs, rsrc1);
1381 }
1382
1383 if (!i) {
1384 si_cs_emit_cache_flush(cs,
1385 queue->device->physical_device->rad_info.chip_class,
1386 queue->queue_family_index == RING_COMPUTE &&
1387 queue->device->physical_device->rad_info.chip_class >= CIK,
1388 RADV_CMD_FLAG_INV_ICACHE |
1389 RADV_CMD_FLAG_INV_SMEM_L1 |
1390 RADV_CMD_FLAG_INV_VMEM_L1 |
1391 RADV_CMD_FLAG_INV_GLOBAL_L2);
1392 }
1393
1394 if (!queue->device->ws->cs_finalize(cs))
1395 goto fail;
1396 }
1397
1398 if (queue->initial_preamble_cs)
1399 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1400
1401 if (queue->continue_preamble_cs)
1402 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1403
1404 queue->initial_preamble_cs = dest_cs[0];
1405 queue->continue_preamble_cs = dest_cs[1];
1406
1407 if (scratch_bo != queue->scratch_bo) {
1408 if (queue->scratch_bo)
1409 queue->device->ws->buffer_destroy(queue->scratch_bo);
1410 queue->scratch_bo = scratch_bo;
1411 queue->scratch_size = scratch_size;
1412 }
1413
1414 if (compute_scratch_bo != queue->compute_scratch_bo) {
1415 if (queue->compute_scratch_bo)
1416 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1417 queue->compute_scratch_bo = compute_scratch_bo;
1418 queue->compute_scratch_size = compute_scratch_size;
1419 }
1420
1421 if (esgs_ring_bo != queue->esgs_ring_bo) {
1422 if (queue->esgs_ring_bo)
1423 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1424 queue->esgs_ring_bo = esgs_ring_bo;
1425 queue->esgs_ring_size = esgs_ring_size;
1426 }
1427
1428 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1429 if (queue->gsvs_ring_bo)
1430 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1431 queue->gsvs_ring_bo = gsvs_ring_bo;
1432 queue->gsvs_ring_size = gsvs_ring_size;
1433 }
1434
1435 if (descriptor_bo != queue->descriptor_bo) {
1436 if (queue->descriptor_bo)
1437 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1438
1439 queue->descriptor_bo = descriptor_bo;
1440 }
1441
1442 *initial_preamble_cs = queue->initial_preamble_cs;
1443 *continue_preamble_cs = queue->continue_preamble_cs;
1444 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1445 *continue_preamble_cs = NULL;
1446 return VK_SUCCESS;
1447 fail:
1448 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1449 if (dest_cs[i])
1450 queue->device->ws->cs_destroy(dest_cs[i]);
1451 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1452 queue->device->ws->buffer_destroy(descriptor_bo);
1453 if (scratch_bo && scratch_bo != queue->scratch_bo)
1454 queue->device->ws->buffer_destroy(scratch_bo);
1455 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1456 queue->device->ws->buffer_destroy(compute_scratch_bo);
1457 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1458 queue->device->ws->buffer_destroy(esgs_ring_bo);
1459 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1460 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1461 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1462 }
1463
1464 VkResult radv_QueueSubmit(
1465 VkQueue _queue,
1466 uint32_t submitCount,
1467 const VkSubmitInfo* pSubmits,
1468 VkFence _fence)
1469 {
1470 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1471 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1472 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1473 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1474 int ret;
1475 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1476 uint32_t scratch_size = 0;
1477 uint32_t compute_scratch_size = 0;
1478 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1479 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1480 VkResult result;
1481 bool fence_emitted = false;
1482
1483 /* Do this first so failing to allocate scratch buffers can't result in
1484 * partially executed submissions. */
1485 for (uint32_t i = 0; i < submitCount; i++) {
1486 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1487 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1488 pSubmits[i].pCommandBuffers[j]);
1489
1490 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1491 compute_scratch_size = MAX2(compute_scratch_size,
1492 cmd_buffer->compute_scratch_size_needed);
1493 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1494 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1495 }
1496 }
1497
1498 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1499 esgs_ring_size, gsvs_ring_size,
1500 &initial_preamble_cs, &continue_preamble_cs);
1501 if (result != VK_SUCCESS)
1502 return result;
1503
1504 for (uint32_t i = 0; i < submitCount; i++) {
1505 struct radeon_winsys_cs **cs_array;
1506 bool has_flush = !submitCount;
1507 bool can_patch = !has_flush;
1508 uint32_t advance;
1509
1510 if (!pSubmits[i].commandBufferCount) {
1511 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1512 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1513 &queue->device->empty_cs[queue->queue_family_index],
1514 1, NULL, NULL,
1515 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1516 pSubmits[i].waitSemaphoreCount,
1517 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1518 pSubmits[i].signalSemaphoreCount,
1519 false, base_fence);
1520 if (ret) {
1521 radv_loge("failed to submit CS %d\n", i);
1522 abort();
1523 }
1524 fence_emitted = true;
1525 }
1526 continue;
1527 }
1528
1529 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1530 (pSubmits[i].commandBufferCount + has_flush));
1531
1532 if(has_flush)
1533 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1534
1535 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1536 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1537 pSubmits[i].pCommandBuffers[j]);
1538 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1539
1540 cs_array[j + has_flush] = cmd_buffer->cs;
1541 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1542 can_patch = false;
1543 }
1544
1545 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1546 advance = MIN2(max_cs_submission,
1547 pSubmits[i].commandBufferCount + has_flush - j);
1548 bool b = j == 0;
1549 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1550
1551 if (queue->device->trace_bo)
1552 *queue->device->trace_id_ptr = 0;
1553
1554 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1555 advance, initial_preamble_cs, continue_preamble_cs,
1556 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1557 b ? pSubmits[i].waitSemaphoreCount : 0,
1558 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1559 e ? pSubmits[i].signalSemaphoreCount : 0,
1560 can_patch, base_fence);
1561
1562 if (ret) {
1563 radv_loge("failed to submit CS %d\n", i);
1564 abort();
1565 }
1566 fence_emitted = true;
1567 if (queue->device->trace_bo) {
1568 bool success = queue->device->ws->ctx_wait_idle(
1569 queue->hw_ctx,
1570 radv_queue_family_to_ring(
1571 queue->queue_family_index),
1572 queue->queue_idx);
1573
1574 if (!success) { /* Hang */
1575 radv_dump_trace(queue->device, cs_array[j]);
1576 abort();
1577 }
1578 }
1579 }
1580 free(cs_array);
1581 }
1582
1583 if (fence) {
1584 if (!fence_emitted)
1585 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1586 &queue->device->empty_cs[queue->queue_family_index],
1587 1, NULL, NULL, NULL, 0, NULL, 0,
1588 false, base_fence);
1589
1590 fence->submitted = true;
1591 }
1592
1593 return VK_SUCCESS;
1594 }
1595
1596 VkResult radv_QueueWaitIdle(
1597 VkQueue _queue)
1598 {
1599 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1600
1601 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1602 radv_queue_family_to_ring(queue->queue_family_index),
1603 queue->queue_idx);
1604 return VK_SUCCESS;
1605 }
1606
1607 VkResult radv_DeviceWaitIdle(
1608 VkDevice _device)
1609 {
1610 RADV_FROM_HANDLE(radv_device, device, _device);
1611
1612 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1613 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1614 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1615 }
1616 }
1617 return VK_SUCCESS;
1618 }
1619
1620 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1621 VkInstance instance,
1622 const char* pName)
1623 {
1624 return radv_lookup_entrypoint(pName);
1625 }
1626
1627 /* The loader wants us to expose a second GetInstanceProcAddr function
1628 * to work around certain LD_PRELOAD issues seen in apps.
1629 */
1630 PUBLIC
1631 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1632 VkInstance instance,
1633 const char* pName);
1634
1635 PUBLIC
1636 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1637 VkInstance instance,
1638 const char* pName)
1639 {
1640 return radv_GetInstanceProcAddr(instance, pName);
1641 }
1642
1643 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1644 VkDevice device,
1645 const char* pName)
1646 {
1647 return radv_lookup_entrypoint(pName);
1648 }
1649
1650 bool radv_get_memory_fd(struct radv_device *device,
1651 struct radv_device_memory *memory,
1652 int *pFD)
1653 {
1654 struct radeon_bo_metadata metadata;
1655
1656 if (memory->image) {
1657 radv_init_metadata(device, memory->image, &metadata);
1658 device->ws->buffer_set_metadata(memory->bo, &metadata);
1659 }
1660
1661 return device->ws->buffer_get_fd(device->ws, memory->bo,
1662 pFD);
1663 }
1664
1665 VkResult radv_AllocateMemory(
1666 VkDevice _device,
1667 const VkMemoryAllocateInfo* pAllocateInfo,
1668 const VkAllocationCallbacks* pAllocator,
1669 VkDeviceMemory* pMem)
1670 {
1671 RADV_FROM_HANDLE(radv_device, device, _device);
1672 struct radv_device_memory *mem;
1673 VkResult result;
1674 enum radeon_bo_domain domain;
1675 uint32_t flags = 0;
1676 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1677 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1678
1679 if (pAllocateInfo->allocationSize == 0) {
1680 /* Apparently, this is allowed */
1681 *pMem = VK_NULL_HANDLE;
1682 return VK_SUCCESS;
1683 }
1684
1685 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1686 switch (ext->sType) {
1687 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1688 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1689 break;
1690 default:
1691 break;
1692 }
1693 }
1694
1695 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1696 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1697 if (mem == NULL)
1698 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1699
1700 if (dedicate_info) {
1701 mem->image = radv_image_from_handle(dedicate_info->image);
1702 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1703 } else {
1704 mem->image = NULL;
1705 mem->buffer = NULL;
1706 }
1707
1708 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1709 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1710 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1711 domain = RADEON_DOMAIN_GTT;
1712 else
1713 domain = RADEON_DOMAIN_VRAM;
1714
1715 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1716 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1717 else
1718 flags |= RADEON_FLAG_CPU_ACCESS;
1719
1720 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1721 flags |= RADEON_FLAG_GTT_WC;
1722
1723 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1724 domain, flags);
1725
1726 if (!mem->bo) {
1727 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1728 goto fail;
1729 }
1730 mem->type_index = pAllocateInfo->memoryTypeIndex;
1731
1732 *pMem = radv_device_memory_to_handle(mem);
1733
1734 return VK_SUCCESS;
1735
1736 fail:
1737 vk_free2(&device->alloc, pAllocator, mem);
1738
1739 return result;
1740 }
1741
1742 void radv_FreeMemory(
1743 VkDevice _device,
1744 VkDeviceMemory _mem,
1745 const VkAllocationCallbacks* pAllocator)
1746 {
1747 RADV_FROM_HANDLE(radv_device, device, _device);
1748 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1749
1750 if (mem == NULL)
1751 return;
1752
1753 device->ws->buffer_destroy(mem->bo);
1754 mem->bo = NULL;
1755
1756 vk_free2(&device->alloc, pAllocator, mem);
1757 }
1758
1759 VkResult radv_MapMemory(
1760 VkDevice _device,
1761 VkDeviceMemory _memory,
1762 VkDeviceSize offset,
1763 VkDeviceSize size,
1764 VkMemoryMapFlags flags,
1765 void** ppData)
1766 {
1767 RADV_FROM_HANDLE(radv_device, device, _device);
1768 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1769
1770 if (mem == NULL) {
1771 *ppData = NULL;
1772 return VK_SUCCESS;
1773 }
1774
1775 *ppData = device->ws->buffer_map(mem->bo);
1776 if (*ppData) {
1777 *ppData += offset;
1778 return VK_SUCCESS;
1779 }
1780
1781 return VK_ERROR_MEMORY_MAP_FAILED;
1782 }
1783
1784 void radv_UnmapMemory(
1785 VkDevice _device,
1786 VkDeviceMemory _memory)
1787 {
1788 RADV_FROM_HANDLE(radv_device, device, _device);
1789 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1790
1791 if (mem == NULL)
1792 return;
1793
1794 device->ws->buffer_unmap(mem->bo);
1795 }
1796
1797 VkResult radv_FlushMappedMemoryRanges(
1798 VkDevice _device,
1799 uint32_t memoryRangeCount,
1800 const VkMappedMemoryRange* pMemoryRanges)
1801 {
1802 return VK_SUCCESS;
1803 }
1804
1805 VkResult radv_InvalidateMappedMemoryRanges(
1806 VkDevice _device,
1807 uint32_t memoryRangeCount,
1808 const VkMappedMemoryRange* pMemoryRanges)
1809 {
1810 return VK_SUCCESS;
1811 }
1812
1813 void radv_GetBufferMemoryRequirements(
1814 VkDevice device,
1815 VkBuffer _buffer,
1816 VkMemoryRequirements* pMemoryRequirements)
1817 {
1818 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1819
1820 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1821
1822 pMemoryRequirements->size = buffer->size;
1823 pMemoryRequirements->alignment = 16;
1824 }
1825
1826 void radv_GetImageMemoryRequirements(
1827 VkDevice device,
1828 VkImage _image,
1829 VkMemoryRequirements* pMemoryRequirements)
1830 {
1831 RADV_FROM_HANDLE(radv_image, image, _image);
1832
1833 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1834
1835 pMemoryRequirements->size = image->size;
1836 pMemoryRequirements->alignment = image->alignment;
1837 }
1838
1839 void radv_GetImageSparseMemoryRequirements(
1840 VkDevice device,
1841 VkImage image,
1842 uint32_t* pSparseMemoryRequirementCount,
1843 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1844 {
1845 stub();
1846 }
1847
1848 void radv_GetDeviceMemoryCommitment(
1849 VkDevice device,
1850 VkDeviceMemory memory,
1851 VkDeviceSize* pCommittedMemoryInBytes)
1852 {
1853 *pCommittedMemoryInBytes = 0;
1854 }
1855
1856 VkResult radv_BindBufferMemory(
1857 VkDevice device,
1858 VkBuffer _buffer,
1859 VkDeviceMemory _memory,
1860 VkDeviceSize memoryOffset)
1861 {
1862 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1863 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1864
1865 if (mem) {
1866 buffer->bo = mem->bo;
1867 buffer->offset = memoryOffset;
1868 } else {
1869 buffer->bo = NULL;
1870 buffer->offset = 0;
1871 }
1872
1873 return VK_SUCCESS;
1874 }
1875
1876 VkResult radv_BindImageMemory(
1877 VkDevice device,
1878 VkImage _image,
1879 VkDeviceMemory _memory,
1880 VkDeviceSize memoryOffset)
1881 {
1882 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1883 RADV_FROM_HANDLE(radv_image, image, _image);
1884
1885 if (mem) {
1886 image->bo = mem->bo;
1887 image->offset = memoryOffset;
1888 } else {
1889 image->bo = NULL;
1890 image->offset = 0;
1891 }
1892
1893 return VK_SUCCESS;
1894 }
1895
1896 VkResult radv_QueueBindSparse(
1897 VkQueue queue,
1898 uint32_t bindInfoCount,
1899 const VkBindSparseInfo* pBindInfo,
1900 VkFence fence)
1901 {
1902 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1903 }
1904
1905 VkResult radv_CreateFence(
1906 VkDevice _device,
1907 const VkFenceCreateInfo* pCreateInfo,
1908 const VkAllocationCallbacks* pAllocator,
1909 VkFence* pFence)
1910 {
1911 RADV_FROM_HANDLE(radv_device, device, _device);
1912 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1913 sizeof(*fence), 8,
1914 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1915
1916 if (!fence)
1917 return VK_ERROR_OUT_OF_HOST_MEMORY;
1918
1919 memset(fence, 0, sizeof(*fence));
1920 fence->submitted = false;
1921 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1922 fence->fence = device->ws->create_fence();
1923 if (!fence->fence) {
1924 vk_free2(&device->alloc, pAllocator, fence);
1925 return VK_ERROR_OUT_OF_HOST_MEMORY;
1926 }
1927
1928 *pFence = radv_fence_to_handle(fence);
1929
1930 return VK_SUCCESS;
1931 }
1932
1933 void radv_DestroyFence(
1934 VkDevice _device,
1935 VkFence _fence,
1936 const VkAllocationCallbacks* pAllocator)
1937 {
1938 RADV_FROM_HANDLE(radv_device, device, _device);
1939 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1940
1941 if (!fence)
1942 return;
1943 device->ws->destroy_fence(fence->fence);
1944 vk_free2(&device->alloc, pAllocator, fence);
1945 }
1946
1947 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1948 {
1949 uint64_t current_time;
1950 struct timespec tv;
1951
1952 clock_gettime(CLOCK_MONOTONIC, &tv);
1953 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1954
1955 timeout = MIN2(UINT64_MAX - current_time, timeout);
1956
1957 return current_time + timeout;
1958 }
1959
1960 VkResult radv_WaitForFences(
1961 VkDevice _device,
1962 uint32_t fenceCount,
1963 const VkFence* pFences,
1964 VkBool32 waitAll,
1965 uint64_t timeout)
1966 {
1967 RADV_FROM_HANDLE(radv_device, device, _device);
1968 timeout = radv_get_absolute_timeout(timeout);
1969
1970 if (!waitAll && fenceCount > 1) {
1971 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1972 }
1973
1974 for (uint32_t i = 0; i < fenceCount; ++i) {
1975 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1976 bool expired = false;
1977
1978 if (fence->signalled)
1979 continue;
1980
1981 if (!fence->submitted)
1982 return VK_TIMEOUT;
1983
1984 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1985 if (!expired)
1986 return VK_TIMEOUT;
1987
1988 fence->signalled = true;
1989 }
1990
1991 return VK_SUCCESS;
1992 }
1993
1994 VkResult radv_ResetFences(VkDevice device,
1995 uint32_t fenceCount,
1996 const VkFence *pFences)
1997 {
1998 for (unsigned i = 0; i < fenceCount; ++i) {
1999 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2000 fence->submitted = fence->signalled = false;
2001 }
2002
2003 return VK_SUCCESS;
2004 }
2005
2006 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2007 {
2008 RADV_FROM_HANDLE(radv_device, device, _device);
2009 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2010
2011 if (fence->signalled)
2012 return VK_SUCCESS;
2013 if (!fence->submitted)
2014 return VK_NOT_READY;
2015
2016 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2017 return VK_NOT_READY;
2018
2019 return VK_SUCCESS;
2020 }
2021
2022
2023 // Queue semaphore functions
2024
2025 VkResult radv_CreateSemaphore(
2026 VkDevice _device,
2027 const VkSemaphoreCreateInfo* pCreateInfo,
2028 const VkAllocationCallbacks* pAllocator,
2029 VkSemaphore* pSemaphore)
2030 {
2031 RADV_FROM_HANDLE(radv_device, device, _device);
2032 struct radeon_winsys_sem *sem;
2033
2034 sem = device->ws->create_sem(device->ws);
2035 if (!sem)
2036 return VK_ERROR_OUT_OF_HOST_MEMORY;
2037
2038 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2039 return VK_SUCCESS;
2040 }
2041
2042 void radv_DestroySemaphore(
2043 VkDevice _device,
2044 VkSemaphore _semaphore,
2045 const VkAllocationCallbacks* pAllocator)
2046 {
2047 RADV_FROM_HANDLE(radv_device, device, _device);
2048 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2049 if (!_semaphore)
2050 return;
2051
2052 device->ws->destroy_sem(sem);
2053 }
2054
2055 VkResult radv_CreateEvent(
2056 VkDevice _device,
2057 const VkEventCreateInfo* pCreateInfo,
2058 const VkAllocationCallbacks* pAllocator,
2059 VkEvent* pEvent)
2060 {
2061 RADV_FROM_HANDLE(radv_device, device, _device);
2062 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2063 sizeof(*event), 8,
2064 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2065
2066 if (!event)
2067 return VK_ERROR_OUT_OF_HOST_MEMORY;
2068
2069 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2070 RADEON_DOMAIN_GTT,
2071 RADEON_FLAG_CPU_ACCESS);
2072 if (!event->bo) {
2073 vk_free2(&device->alloc, pAllocator, event);
2074 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2075 }
2076
2077 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2078
2079 *pEvent = radv_event_to_handle(event);
2080
2081 return VK_SUCCESS;
2082 }
2083
2084 void radv_DestroyEvent(
2085 VkDevice _device,
2086 VkEvent _event,
2087 const VkAllocationCallbacks* pAllocator)
2088 {
2089 RADV_FROM_HANDLE(radv_device, device, _device);
2090 RADV_FROM_HANDLE(radv_event, event, _event);
2091
2092 if (!event)
2093 return;
2094 device->ws->buffer_destroy(event->bo);
2095 vk_free2(&device->alloc, pAllocator, event);
2096 }
2097
2098 VkResult radv_GetEventStatus(
2099 VkDevice _device,
2100 VkEvent _event)
2101 {
2102 RADV_FROM_HANDLE(radv_event, event, _event);
2103
2104 if (*event->map == 1)
2105 return VK_EVENT_SET;
2106 return VK_EVENT_RESET;
2107 }
2108
2109 VkResult radv_SetEvent(
2110 VkDevice _device,
2111 VkEvent _event)
2112 {
2113 RADV_FROM_HANDLE(radv_event, event, _event);
2114 *event->map = 1;
2115
2116 return VK_SUCCESS;
2117 }
2118
2119 VkResult radv_ResetEvent(
2120 VkDevice _device,
2121 VkEvent _event)
2122 {
2123 RADV_FROM_HANDLE(radv_event, event, _event);
2124 *event->map = 0;
2125
2126 return VK_SUCCESS;
2127 }
2128
2129 VkResult radv_CreateBuffer(
2130 VkDevice _device,
2131 const VkBufferCreateInfo* pCreateInfo,
2132 const VkAllocationCallbacks* pAllocator,
2133 VkBuffer* pBuffer)
2134 {
2135 RADV_FROM_HANDLE(radv_device, device, _device);
2136 struct radv_buffer *buffer;
2137
2138 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2139
2140 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2141 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2142 if (buffer == NULL)
2143 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2144
2145 buffer->size = pCreateInfo->size;
2146 buffer->usage = pCreateInfo->usage;
2147 buffer->bo = NULL;
2148 buffer->offset = 0;
2149
2150 *pBuffer = radv_buffer_to_handle(buffer);
2151
2152 return VK_SUCCESS;
2153 }
2154
2155 void radv_DestroyBuffer(
2156 VkDevice _device,
2157 VkBuffer _buffer,
2158 const VkAllocationCallbacks* pAllocator)
2159 {
2160 RADV_FROM_HANDLE(radv_device, device, _device);
2161 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2162
2163 if (!buffer)
2164 return;
2165
2166 vk_free2(&device->alloc, pAllocator, buffer);
2167 }
2168
2169 static inline unsigned
2170 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2171 {
2172 if (stencil)
2173 return image->surface.stencil_tiling_index[level];
2174 else
2175 return image->surface.tiling_index[level];
2176 }
2177
2178 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2179 {
2180 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2181 }
2182
2183 static void
2184 radv_initialise_color_surface(struct radv_device *device,
2185 struct radv_color_buffer_info *cb,
2186 struct radv_image_view *iview)
2187 {
2188 const struct vk_format_description *desc;
2189 unsigned ntype, format, swap, endian;
2190 unsigned blend_clamp = 0, blend_bypass = 0;
2191 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2192 uint64_t va;
2193 const struct radeon_surf *surf = &iview->image->surface;
2194 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2195
2196 desc = vk_format_description(iview->vk_format);
2197
2198 memset(cb, 0, sizeof(*cb));
2199
2200 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2201 va += level_info->offset;
2202 cb->cb_color_base = va >> 8;
2203
2204 /* CMASK variables */
2205 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2206 va += iview->image->cmask.offset;
2207 cb->cb_color_cmask = va >> 8;
2208 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2209
2210 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2211 va += iview->image->dcc_offset;
2212 cb->cb_dcc_base = va >> 8;
2213
2214 uint32_t max_slice = radv_surface_layer_count(iview);
2215 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2216 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2217
2218 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2219 pitch_tile_max = level_info->nblk_x / 8 - 1;
2220 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2221 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2222
2223 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2224 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2225
2226 /* Intensity is implemented as Red, so treat it that way. */
2227 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2228 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2229
2230 if (iview->image->samples > 1) {
2231 unsigned log_samples = util_logbase2(iview->image->samples);
2232
2233 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2234 S_028C74_NUM_FRAGMENTS(log_samples);
2235 }
2236
2237 if (iview->image->fmask.size) {
2238 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2239 if (device->physical_device->rad_info.chip_class >= CIK)
2240 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2241 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2242 cb->cb_color_fmask = va >> 8;
2243 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2244 } else {
2245 /* This must be set for fast clear to work without FMASK. */
2246 if (device->physical_device->rad_info.chip_class >= CIK)
2247 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2248 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2249 cb->cb_color_fmask = cb->cb_color_base;
2250 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2251 }
2252
2253 ntype = radv_translate_color_numformat(iview->vk_format,
2254 desc,
2255 vk_format_get_first_non_void_channel(iview->vk_format));
2256 format = radv_translate_colorformat(iview->vk_format);
2257 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2258 radv_finishme("Illegal color\n");
2259 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2260 endian = radv_colorformat_endian_swap(format);
2261
2262 /* blend clamp should be set for all NORM/SRGB types */
2263 if (ntype == V_028C70_NUMBER_UNORM ||
2264 ntype == V_028C70_NUMBER_SNORM ||
2265 ntype == V_028C70_NUMBER_SRGB)
2266 blend_clamp = 1;
2267
2268 /* set blend bypass according to docs if SINT/UINT or
2269 8/24 COLOR variants */
2270 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2271 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2272 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2273 blend_clamp = 0;
2274 blend_bypass = 1;
2275 }
2276 #if 0
2277 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2278 (format == V_028C70_COLOR_8 ||
2279 format == V_028C70_COLOR_8_8 ||
2280 format == V_028C70_COLOR_8_8_8_8))
2281 ->color_is_int8 = true;
2282 #endif
2283 cb->cb_color_info = S_028C70_FORMAT(format) |
2284 S_028C70_COMP_SWAP(swap) |
2285 S_028C70_BLEND_CLAMP(blend_clamp) |
2286 S_028C70_BLEND_BYPASS(blend_bypass) |
2287 S_028C70_SIMPLE_FLOAT(1) |
2288 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2289 ntype != V_028C70_NUMBER_SNORM &&
2290 ntype != V_028C70_NUMBER_SRGB &&
2291 format != V_028C70_COLOR_8_24 &&
2292 format != V_028C70_COLOR_24_8) |
2293 S_028C70_NUMBER_TYPE(ntype) |
2294 S_028C70_ENDIAN(endian);
2295 if (iview->image->samples > 1)
2296 if (iview->image->fmask.size)
2297 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2298
2299 if (iview->image->cmask.size &&
2300 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2301 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2302
2303 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2304 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2305
2306 if (device->physical_device->rad_info.chip_class >= VI) {
2307 unsigned max_uncompressed_block_size = 2;
2308 if (iview->image->samples > 1) {
2309 if (iview->image->surface.bpe == 1)
2310 max_uncompressed_block_size = 0;
2311 else if (iview->image->surface.bpe == 2)
2312 max_uncompressed_block_size = 1;
2313 }
2314
2315 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2316 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2317 }
2318
2319 /* This must be set for fast clear to work without FMASK. */
2320 if (!iview->image->fmask.size &&
2321 device->physical_device->rad_info.chip_class == SI) {
2322 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2323 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2324 }
2325 }
2326
2327 static void
2328 radv_initialise_ds_surface(struct radv_device *device,
2329 struct radv_ds_buffer_info *ds,
2330 struct radv_image_view *iview)
2331 {
2332 unsigned level = iview->base_mip;
2333 unsigned format;
2334 uint64_t va, s_offs, z_offs;
2335 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2336 memset(ds, 0, sizeof(*ds));
2337 switch (iview->vk_format) {
2338 case VK_FORMAT_D24_UNORM_S8_UINT:
2339 case VK_FORMAT_X8_D24_UNORM_PACK32:
2340 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2341 ds->offset_scale = 2.0f;
2342 break;
2343 case VK_FORMAT_D16_UNORM:
2344 case VK_FORMAT_D16_UNORM_S8_UINT:
2345 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2346 ds->offset_scale = 4.0f;
2347 break;
2348 case VK_FORMAT_D32_SFLOAT:
2349 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2350 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2351 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2352 ds->offset_scale = 1.0f;
2353 break;
2354 default:
2355 break;
2356 }
2357
2358 format = radv_translate_dbformat(iview->vk_format);
2359 if (format == V_028040_Z_INVALID) {
2360 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2361 }
2362
2363 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2364 s_offs = z_offs = va;
2365 z_offs += iview->image->surface.level[level].offset;
2366 s_offs += iview->image->surface.stencil_level[level].offset;
2367
2368 uint32_t max_slice = radv_surface_layer_count(iview);
2369 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2370 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2371 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2372 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2373
2374 if (iview->image->samples > 1)
2375 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2376
2377 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2378 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2379 else
2380 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2381
2382 if (device->physical_device->rad_info.chip_class >= CIK) {
2383 struct radeon_info *info = &device->physical_device->rad_info;
2384 unsigned tiling_index = iview->image->surface.tiling_index[level];
2385 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2386 unsigned macro_index = iview->image->surface.macro_tile_index;
2387 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2388 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2389 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2390
2391 ds->db_depth_info |=
2392 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2393 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2394 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2395 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2396 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2397 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2398 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2399 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2400 } else {
2401 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2402 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2403 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2404 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2405 }
2406
2407 if (iview->image->surface.htile_size && !level) {
2408 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2409 S_028040_ALLOW_EXPCLEAR(1);
2410
2411 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2412 /* Workaround: For a not yet understood reason, the
2413 * combination of MSAA, fast stencil clear and stencil
2414 * decompress messes with subsequent stencil buffer
2415 * uses. Problem was reproduced on Verde, Bonaire,
2416 * Tonga, and Carrizo.
2417 *
2418 * Disabling EXPCLEAR works around the problem.
2419 *
2420 * Check piglit's arb_texture_multisample-stencil-clear
2421 * test if you want to try changing this.
2422 */
2423 if (iview->image->samples <= 1)
2424 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2425 } else
2426 /* Use all of the htile_buffer for depth if there's no stencil. */
2427 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2428
2429 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2430 iview->image->htile_offset;
2431 ds->db_htile_data_base = va >> 8;
2432 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2433 } else {
2434 ds->db_htile_data_base = 0;
2435 ds->db_htile_surface = 0;
2436 }
2437
2438 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2439 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2440
2441 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2442 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2443 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2444 }
2445
2446 VkResult radv_CreateFramebuffer(
2447 VkDevice _device,
2448 const VkFramebufferCreateInfo* pCreateInfo,
2449 const VkAllocationCallbacks* pAllocator,
2450 VkFramebuffer* pFramebuffer)
2451 {
2452 RADV_FROM_HANDLE(radv_device, device, _device);
2453 struct radv_framebuffer *framebuffer;
2454
2455 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2456
2457 size_t size = sizeof(*framebuffer) +
2458 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2459 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2460 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2461 if (framebuffer == NULL)
2462 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2463
2464 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2465 framebuffer->width = pCreateInfo->width;
2466 framebuffer->height = pCreateInfo->height;
2467 framebuffer->layers = pCreateInfo->layers;
2468 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2469 VkImageView _iview = pCreateInfo->pAttachments[i];
2470 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2471 framebuffer->attachments[i].attachment = iview;
2472 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2473 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2474 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2475 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2476 }
2477 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2478 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2479 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2480 }
2481
2482 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2483 return VK_SUCCESS;
2484 }
2485
2486 void radv_DestroyFramebuffer(
2487 VkDevice _device,
2488 VkFramebuffer _fb,
2489 const VkAllocationCallbacks* pAllocator)
2490 {
2491 RADV_FROM_HANDLE(radv_device, device, _device);
2492 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2493
2494 if (!fb)
2495 return;
2496 vk_free2(&device->alloc, pAllocator, fb);
2497 }
2498
2499 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2500 {
2501 switch (address_mode) {
2502 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2503 return V_008F30_SQ_TEX_WRAP;
2504 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2505 return V_008F30_SQ_TEX_MIRROR;
2506 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2507 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2508 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2509 return V_008F30_SQ_TEX_CLAMP_BORDER;
2510 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2511 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2512 default:
2513 unreachable("illegal tex wrap mode");
2514 break;
2515 }
2516 }
2517
2518 static unsigned
2519 radv_tex_compare(VkCompareOp op)
2520 {
2521 switch (op) {
2522 case VK_COMPARE_OP_NEVER:
2523 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2524 case VK_COMPARE_OP_LESS:
2525 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2526 case VK_COMPARE_OP_EQUAL:
2527 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2528 case VK_COMPARE_OP_LESS_OR_EQUAL:
2529 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2530 case VK_COMPARE_OP_GREATER:
2531 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2532 case VK_COMPARE_OP_NOT_EQUAL:
2533 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2534 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2535 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2536 case VK_COMPARE_OP_ALWAYS:
2537 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2538 default:
2539 unreachable("illegal compare mode");
2540 break;
2541 }
2542 }
2543
2544 static unsigned
2545 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2546 {
2547 switch (filter) {
2548 case VK_FILTER_NEAREST:
2549 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2550 V_008F38_SQ_TEX_XY_FILTER_POINT);
2551 case VK_FILTER_LINEAR:
2552 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2553 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2554 case VK_FILTER_CUBIC_IMG:
2555 default:
2556 fprintf(stderr, "illegal texture filter");
2557 return 0;
2558 }
2559 }
2560
2561 static unsigned
2562 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2563 {
2564 switch (mode) {
2565 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2566 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2567 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2568 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2569 default:
2570 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2571 }
2572 }
2573
2574 static unsigned
2575 radv_tex_bordercolor(VkBorderColor bcolor)
2576 {
2577 switch (bcolor) {
2578 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2579 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2580 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2581 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2582 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2583 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2584 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2585 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2586 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2587 default:
2588 break;
2589 }
2590 return 0;
2591 }
2592
2593 static unsigned
2594 radv_tex_aniso_filter(unsigned filter)
2595 {
2596 if (filter < 2)
2597 return 0;
2598 if (filter < 4)
2599 return 1;
2600 if (filter < 8)
2601 return 2;
2602 if (filter < 16)
2603 return 3;
2604 return 4;
2605 }
2606
2607 static void
2608 radv_init_sampler(struct radv_device *device,
2609 struct radv_sampler *sampler,
2610 const VkSamplerCreateInfo *pCreateInfo)
2611 {
2612 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2613 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2614 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2615 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2616
2617 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2618 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2619 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2620 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2621 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2622 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2623 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2624 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2625 S_008F30_DISABLE_CUBE_WRAP(0) |
2626 S_008F30_COMPAT_MODE(is_vi));
2627 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2628 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2629 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2630 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2631 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2632 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2633 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2634 S_008F38_MIP_POINT_PRECLAMP(1) |
2635 S_008F38_DISABLE_LSB_CEIL(1) |
2636 S_008F38_FILTER_PREC_FIX(1) |
2637 S_008F38_ANISO_OVERRIDE(is_vi));
2638 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2639 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2640 }
2641
2642 VkResult radv_CreateSampler(
2643 VkDevice _device,
2644 const VkSamplerCreateInfo* pCreateInfo,
2645 const VkAllocationCallbacks* pAllocator,
2646 VkSampler* pSampler)
2647 {
2648 RADV_FROM_HANDLE(radv_device, device, _device);
2649 struct radv_sampler *sampler;
2650
2651 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2652
2653 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2654 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2655 if (!sampler)
2656 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2657
2658 radv_init_sampler(device, sampler, pCreateInfo);
2659 *pSampler = radv_sampler_to_handle(sampler);
2660
2661 return VK_SUCCESS;
2662 }
2663
2664 void radv_DestroySampler(
2665 VkDevice _device,
2666 VkSampler _sampler,
2667 const VkAllocationCallbacks* pAllocator)
2668 {
2669 RADV_FROM_HANDLE(radv_device, device, _device);
2670 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2671
2672 if (!sampler)
2673 return;
2674 vk_free2(&device->alloc, pAllocator, sampler);
2675 }
2676
2677
2678 /* vk_icd.h does not declare this function, so we declare it here to
2679 * suppress Wmissing-prototypes.
2680 */
2681 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2682 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2683
2684 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2685 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2686 {
2687 /* For the full details on loader interface versioning, see
2688 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2689 * What follows is a condensed summary, to help you navigate the large and
2690 * confusing official doc.
2691 *
2692 * - Loader interface v0 is incompatible with later versions. We don't
2693 * support it.
2694 *
2695 * - In loader interface v1:
2696 * - The first ICD entrypoint called by the loader is
2697 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2698 * entrypoint.
2699 * - The ICD must statically expose no other Vulkan symbol unless it is
2700 * linked with -Bsymbolic.
2701 * - Each dispatchable Vulkan handle created by the ICD must be
2702 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2703 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2704 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2705 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2706 * such loader-managed surfaces.
2707 *
2708 * - Loader interface v2 differs from v1 in:
2709 * - The first ICD entrypoint called by the loader is
2710 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2711 * statically expose this entrypoint.
2712 *
2713 * - Loader interface v3 differs from v2 in:
2714 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2715 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2716 * because the loader no longer does so.
2717 */
2718 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2719 return VK_SUCCESS;
2720 }