radv: add support for NV_dedicated_allocation
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "util/vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46
47 static int
48 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
49 {
50 uint32_t mesa_timestamp, llvm_timestamp;
51 uint16_t f = family;
52 memset(uuid, 0, VK_UUID_SIZE);
53 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
54 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
55 return -1;
56
57 memcpy(uuid, &mesa_timestamp, 4);
58 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
59 memcpy((char*)uuid + 8, &f, 2);
60 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
61 return 0;
62 }
63
64 static const VkExtensionProperties instance_extensions[] = {
65 {
66 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
67 .specVersion = 25,
68 },
69 #ifdef VK_USE_PLATFORM_XCB_KHR
70 {
71 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
72 .specVersion = 6,
73 },
74 #endif
75 #ifdef VK_USE_PLATFORM_XLIB_KHR
76 {
77 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
78 .specVersion = 6,
79 },
80 #endif
81 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
82 {
83 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
84 .specVersion = 5,
85 },
86 #endif
87 };
88
89 static const VkExtensionProperties common_device_extensions[] = {
90 {
91 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
92 .specVersion = 1,
93 },
94 {
95 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
96 .specVersion = 1,
97 },
98 {
99 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
100 .specVersion = 68,
101 },
102 {
103 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
104 .specVersion = 1,
105 },
106 {
107 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
116 .specVersion = 1,
117 },
118 };
119
120 static VkResult
121 radv_extensions_register(struct radv_instance *instance,
122 struct radv_extensions *extensions,
123 const VkExtensionProperties *new_ext,
124 uint32_t num_ext)
125 {
126 size_t new_size;
127 VkExtensionProperties *new_ptr;
128
129 assert(new_ext && num_ext > 0);
130
131 if (!new_ext)
132 return VK_ERROR_INITIALIZATION_FAILED;
133
134 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
135 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
136 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
137
138 /* Old array continues to be valid, update nothing */
139 if (!new_ptr)
140 return VK_ERROR_OUT_OF_HOST_MEMORY;
141
142 memcpy(&new_ptr[extensions->num_ext], new_ext,
143 num_ext * sizeof(VkExtensionProperties));
144 extensions->ext_array = new_ptr;
145 extensions->num_ext += num_ext;
146
147 return VK_SUCCESS;
148 }
149
150 static void
151 radv_extensions_finish(struct radv_instance *instance,
152 struct radv_extensions *extensions)
153 {
154 assert(extensions);
155
156 if (!extensions)
157 radv_loge("Attemted to free invalid extension struct\n");
158
159 if (extensions->ext_array)
160 vk_free(&instance->alloc, extensions->ext_array);
161 }
162
163 static bool
164 is_extension_enabled(const VkExtensionProperties *extensions,
165 size_t num_ext,
166 const char *name)
167 {
168 assert(extensions && name);
169
170 for (uint32_t i = 0; i < num_ext; i++) {
171 if (strcmp(name, extensions[i].extensionName) == 0)
172 return true;
173 }
174
175 return false;
176 }
177
178 static VkResult
179 radv_physical_device_init(struct radv_physical_device *device,
180 struct radv_instance *instance,
181 const char *path)
182 {
183 VkResult result;
184 drmVersionPtr version;
185 int fd;
186
187 fd = open(path, O_RDWR | O_CLOEXEC);
188 if (fd < 0)
189 return VK_ERROR_INCOMPATIBLE_DRIVER;
190
191 version = drmGetVersion(fd);
192 if (!version) {
193 close(fd);
194 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
195 "failed to get version %s: %m", path);
196 }
197
198 if (strcmp(version->name, "amdgpu")) {
199 drmFreeVersion(version);
200 close(fd);
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202 }
203 drmFreeVersion(version);
204
205 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
206 device->instance = instance;
207 assert(strlen(path) < ARRAY_SIZE(device->path));
208 strncpy(device->path, path, ARRAY_SIZE(device->path));
209
210 device->ws = radv_amdgpu_winsys_create(fd);
211 if (!device->ws) {
212 result = VK_ERROR_INCOMPATIBLE_DRIVER;
213 goto fail;
214 }
215
216 device->local_fd = fd;
217 device->ws->query_info(device->ws, &device->rad_info);
218 result = radv_init_wsi(device);
219 if (result != VK_SUCCESS) {
220 device->ws->destroy(device->ws);
221 goto fail;
222 }
223
224 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
225 radv_finish_wsi(device);
226 device->ws->destroy(device->ws);
227 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
228 "cannot generate UUID");
229 goto fail;
230 }
231
232 result = radv_extensions_register(instance,
233 &device->extensions,
234 common_device_extensions,
235 ARRAY_SIZE(common_device_extensions));
236 if (result != VK_SUCCESS)
237 goto fail;
238
239 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
240 device->name = device->rad_info.name;
241
242 return VK_SUCCESS;
243
244 fail:
245 close(fd);
246 return result;
247 }
248
249 static void
250 radv_physical_device_finish(struct radv_physical_device *device)
251 {
252 radv_extensions_finish(device->instance, &device->extensions);
253 radv_finish_wsi(device);
254 device->ws->destroy(device->ws);
255 close(device->local_fd);
256 }
257
258
259 static void *
260 default_alloc_func(void *pUserData, size_t size, size_t align,
261 VkSystemAllocationScope allocationScope)
262 {
263 return malloc(size);
264 }
265
266 static void *
267 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
268 size_t align, VkSystemAllocationScope allocationScope)
269 {
270 return realloc(pOriginal, size);
271 }
272
273 static void
274 default_free_func(void *pUserData, void *pMemory)
275 {
276 free(pMemory);
277 }
278
279 static const VkAllocationCallbacks default_alloc = {
280 .pUserData = NULL,
281 .pfnAllocation = default_alloc_func,
282 .pfnReallocation = default_realloc_func,
283 .pfnFree = default_free_func,
284 };
285
286 static const struct debug_control radv_debug_options[] = {
287 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
288 {"nodcc", RADV_DEBUG_NO_DCC},
289 {"shaders", RADV_DEBUG_DUMP_SHADERS},
290 {"nocache", RADV_DEBUG_NO_CACHE},
291 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
292 {"nohiz", RADV_DEBUG_NO_HIZ},
293 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
294 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
295 {NULL, 0}
296 };
297
298 VkResult radv_CreateInstance(
299 const VkInstanceCreateInfo* pCreateInfo,
300 const VkAllocationCallbacks* pAllocator,
301 VkInstance* pInstance)
302 {
303 struct radv_instance *instance;
304
305 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
306
307 uint32_t client_version;
308 if (pCreateInfo->pApplicationInfo &&
309 pCreateInfo->pApplicationInfo->apiVersion != 0) {
310 client_version = pCreateInfo->pApplicationInfo->apiVersion;
311 } else {
312 client_version = VK_MAKE_VERSION(1, 0, 0);
313 }
314
315 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
316 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
317 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
318 "Client requested version %d.%d.%d",
319 VK_VERSION_MAJOR(client_version),
320 VK_VERSION_MINOR(client_version),
321 VK_VERSION_PATCH(client_version));
322 }
323
324 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
325 if (!is_extension_enabled(instance_extensions,
326 ARRAY_SIZE(instance_extensions),
327 pCreateInfo->ppEnabledExtensionNames[i]))
328 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
329 }
330
331 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
332 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
333 if (!instance)
334 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
335
336 memset(instance, 0, sizeof(*instance));
337
338 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
339
340 if (pAllocator)
341 instance->alloc = *pAllocator;
342 else
343 instance->alloc = default_alloc;
344
345 instance->apiVersion = client_version;
346 instance->physicalDeviceCount = -1;
347
348 _mesa_locale_init();
349
350 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
351
352 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
353 radv_debug_options);
354
355 *pInstance = radv_instance_to_handle(instance);
356
357 return VK_SUCCESS;
358 }
359
360 void radv_DestroyInstance(
361 VkInstance _instance,
362 const VkAllocationCallbacks* pAllocator)
363 {
364 RADV_FROM_HANDLE(radv_instance, instance, _instance);
365
366 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
367 radv_physical_device_finish(instance->physicalDevices + i);
368 }
369
370 VG(VALGRIND_DESTROY_MEMPOOL(instance));
371
372 _mesa_locale_fini();
373
374 vk_free(&instance->alloc, instance);
375 }
376
377 VkResult radv_EnumeratePhysicalDevices(
378 VkInstance _instance,
379 uint32_t* pPhysicalDeviceCount,
380 VkPhysicalDevice* pPhysicalDevices)
381 {
382 RADV_FROM_HANDLE(radv_instance, instance, _instance);
383 VkResult result;
384
385 if (instance->physicalDeviceCount < 0) {
386 char path[20];
387 instance->physicalDeviceCount = 0;
388 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
389 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
390 result = radv_physical_device_init(instance->physicalDevices +
391 instance->physicalDeviceCount,
392 instance, path);
393 if (result == VK_SUCCESS)
394 ++instance->physicalDeviceCount;
395 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
396 return result;
397 }
398 }
399
400 if (!pPhysicalDevices) {
401 *pPhysicalDeviceCount = instance->physicalDeviceCount;
402 } else {
403 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
404 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
405 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
406 }
407
408 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
409 : VK_SUCCESS;
410 }
411
412 void radv_GetPhysicalDeviceFeatures(
413 VkPhysicalDevice physicalDevice,
414 VkPhysicalDeviceFeatures* pFeatures)
415 {
416 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
417
418 memset(pFeatures, 0, sizeof(*pFeatures));
419
420 *pFeatures = (VkPhysicalDeviceFeatures) {
421 .robustBufferAccess = true,
422 .fullDrawIndexUint32 = true,
423 .imageCubeArray = true,
424 .independentBlend = true,
425 .geometryShader = true,
426 .tessellationShader = false,
427 .sampleRateShading = false,
428 .dualSrcBlend = true,
429 .logicOp = true,
430 .multiDrawIndirect = true,
431 .drawIndirectFirstInstance = true,
432 .depthClamp = true,
433 .depthBiasClamp = true,
434 .fillModeNonSolid = true,
435 .depthBounds = true,
436 .wideLines = true,
437 .largePoints = true,
438 .alphaToOne = true,
439 .multiViewport = true,
440 .samplerAnisotropy = true,
441 .textureCompressionETC2 = false,
442 .textureCompressionASTC_LDR = false,
443 .textureCompressionBC = true,
444 .occlusionQueryPrecise = true,
445 .pipelineStatisticsQuery = false,
446 .vertexPipelineStoresAndAtomics = true,
447 .fragmentStoresAndAtomics = true,
448 .shaderTessellationAndGeometryPointSize = true,
449 .shaderImageGatherExtended = true,
450 .shaderStorageImageExtendedFormats = true,
451 .shaderStorageImageMultisample = false,
452 .shaderUniformBufferArrayDynamicIndexing = true,
453 .shaderSampledImageArrayDynamicIndexing = true,
454 .shaderStorageBufferArrayDynamicIndexing = true,
455 .shaderStorageImageArrayDynamicIndexing = true,
456 .shaderStorageImageReadWithoutFormat = true,
457 .shaderStorageImageWriteWithoutFormat = true,
458 .shaderClipDistance = true,
459 .shaderCullDistance = true,
460 .shaderFloat64 = true,
461 .shaderInt64 = false,
462 .shaderInt16 = false,
463 .alphaToOne = true,
464 .variableMultisampleRate = false,
465 .inheritedQueries = false,
466 };
467 }
468
469 void radv_GetPhysicalDeviceFeatures2KHR(
470 VkPhysicalDevice physicalDevice,
471 VkPhysicalDeviceFeatures2KHR *pFeatures)
472 {
473 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
474 }
475
476 void radv_GetPhysicalDeviceProperties(
477 VkPhysicalDevice physicalDevice,
478 VkPhysicalDeviceProperties* pProperties)
479 {
480 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
481 VkSampleCountFlags sample_counts = 0xf;
482 VkPhysicalDeviceLimits limits = {
483 .maxImageDimension1D = (1 << 14),
484 .maxImageDimension2D = (1 << 14),
485 .maxImageDimension3D = (1 << 11),
486 .maxImageDimensionCube = (1 << 14),
487 .maxImageArrayLayers = (1 << 11),
488 .maxTexelBufferElements = 128 * 1024 * 1024,
489 .maxUniformBufferRange = UINT32_MAX,
490 .maxStorageBufferRange = UINT32_MAX,
491 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
492 .maxMemoryAllocationCount = UINT32_MAX,
493 .maxSamplerAllocationCount = 64 * 1024,
494 .bufferImageGranularity = 64, /* A cache line */
495 .sparseAddressSpaceSize = 0,
496 .maxBoundDescriptorSets = MAX_SETS,
497 .maxPerStageDescriptorSamplers = 64,
498 .maxPerStageDescriptorUniformBuffers = 64,
499 .maxPerStageDescriptorStorageBuffers = 64,
500 .maxPerStageDescriptorSampledImages = 64,
501 .maxPerStageDescriptorStorageImages = 64,
502 .maxPerStageDescriptorInputAttachments = 64,
503 .maxPerStageResources = 128,
504 .maxDescriptorSetSamplers = 256,
505 .maxDescriptorSetUniformBuffers = 256,
506 .maxDescriptorSetUniformBuffersDynamic = 256,
507 .maxDescriptorSetStorageBuffers = 256,
508 .maxDescriptorSetStorageBuffersDynamic = 256,
509 .maxDescriptorSetSampledImages = 256,
510 .maxDescriptorSetStorageImages = 256,
511 .maxDescriptorSetInputAttachments = 256,
512 .maxVertexInputAttributes = 32,
513 .maxVertexInputBindings = 32,
514 .maxVertexInputAttributeOffset = 2047,
515 .maxVertexInputBindingStride = 2048,
516 .maxVertexOutputComponents = 128,
517 .maxTessellationGenerationLevel = 0,
518 .maxTessellationPatchSize = 0,
519 .maxTessellationControlPerVertexInputComponents = 0,
520 .maxTessellationControlPerVertexOutputComponents = 0,
521 .maxTessellationControlPerPatchOutputComponents = 0,
522 .maxTessellationControlTotalOutputComponents = 0,
523 .maxTessellationEvaluationInputComponents = 0,
524 .maxTessellationEvaluationOutputComponents = 0,
525 .maxGeometryShaderInvocations = 32,
526 .maxGeometryInputComponents = 64,
527 .maxGeometryOutputComponents = 128,
528 .maxGeometryOutputVertices = 256,
529 .maxGeometryTotalOutputComponents = 1024,
530 .maxFragmentInputComponents = 128,
531 .maxFragmentOutputAttachments = 8,
532 .maxFragmentDualSrcAttachments = 1,
533 .maxFragmentCombinedOutputResources = 8,
534 .maxComputeSharedMemorySize = 32768,
535 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
536 .maxComputeWorkGroupInvocations = 2048,
537 .maxComputeWorkGroupSize = {
538 2048,
539 2048,
540 2048
541 },
542 .subPixelPrecisionBits = 4 /* FIXME */,
543 .subTexelPrecisionBits = 4 /* FIXME */,
544 .mipmapPrecisionBits = 4 /* FIXME */,
545 .maxDrawIndexedIndexValue = UINT32_MAX,
546 .maxDrawIndirectCount = UINT32_MAX,
547 .maxSamplerLodBias = 16,
548 .maxSamplerAnisotropy = 16,
549 .maxViewports = MAX_VIEWPORTS,
550 .maxViewportDimensions = { (1 << 14), (1 << 14) },
551 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
552 .viewportSubPixelBits = 13, /* We take a float? */
553 .minMemoryMapAlignment = 4096, /* A page */
554 .minTexelBufferOffsetAlignment = 1,
555 .minUniformBufferOffsetAlignment = 4,
556 .minStorageBufferOffsetAlignment = 4,
557 .minTexelOffset = -32,
558 .maxTexelOffset = 31,
559 .minTexelGatherOffset = -32,
560 .maxTexelGatherOffset = 31,
561 .minInterpolationOffset = -2,
562 .maxInterpolationOffset = 2,
563 .subPixelInterpolationOffsetBits = 8,
564 .maxFramebufferWidth = (1 << 14),
565 .maxFramebufferHeight = (1 << 14),
566 .maxFramebufferLayers = (1 << 10),
567 .framebufferColorSampleCounts = sample_counts,
568 .framebufferDepthSampleCounts = sample_counts,
569 .framebufferStencilSampleCounts = sample_counts,
570 .framebufferNoAttachmentsSampleCounts = sample_counts,
571 .maxColorAttachments = MAX_RTS,
572 .sampledImageColorSampleCounts = sample_counts,
573 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
574 .sampledImageDepthSampleCounts = sample_counts,
575 .sampledImageStencilSampleCounts = sample_counts,
576 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
577 .maxSampleMaskWords = 1,
578 .timestampComputeAndGraphics = false,
579 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
580 .maxClipDistances = 8,
581 .maxCullDistances = 8,
582 .maxCombinedClipAndCullDistances = 8,
583 .discreteQueuePriorities = 1,
584 .pointSizeRange = { 0.125, 255.875 },
585 .lineWidthRange = { 0.0, 7.9921875 },
586 .pointSizeGranularity = (1.0 / 8.0),
587 .lineWidthGranularity = (1.0 / 128.0),
588 .strictLines = false, /* FINISHME */
589 .standardSampleLocations = true,
590 .optimalBufferCopyOffsetAlignment = 128,
591 .optimalBufferCopyRowPitchAlignment = 128,
592 .nonCoherentAtomSize = 64,
593 };
594
595 *pProperties = (VkPhysicalDeviceProperties) {
596 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
597 .driverVersion = 1,
598 .vendorID = 0x1002,
599 .deviceID = pdevice->rad_info.pci_id,
600 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
601 .limits = limits,
602 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
603 };
604
605 strcpy(pProperties->deviceName, pdevice->name);
606 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
607 }
608
609 void radv_GetPhysicalDeviceProperties2KHR(
610 VkPhysicalDevice physicalDevice,
611 VkPhysicalDeviceProperties2KHR *pProperties)
612 {
613 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
614 }
615
616 static void radv_get_physical_device_queue_family_properties(
617 struct radv_physical_device* pdevice,
618 uint32_t* pCount,
619 VkQueueFamilyProperties** pQueueFamilyProperties)
620 {
621 int num_queue_families = 1;
622 int idx;
623 if (pdevice->rad_info.compute_rings > 0 &&
624 pdevice->rad_info.chip_class >= CIK &&
625 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
626 num_queue_families++;
627
628 if (pQueueFamilyProperties == NULL) {
629 *pCount = num_queue_families;
630 return;
631 }
632
633 if (!*pCount)
634 return;
635
636 idx = 0;
637 if (*pCount >= 1) {
638 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
639 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
640 VK_QUEUE_COMPUTE_BIT |
641 VK_QUEUE_TRANSFER_BIT,
642 .queueCount = 1,
643 .timestampValidBits = 64,
644 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
645 };
646 idx++;
647 }
648
649 if (pdevice->rad_info.compute_rings > 0 &&
650 pdevice->rad_info.chip_class >= CIK &&
651 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
652 if (*pCount > idx) {
653 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
654 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
655 .queueCount = pdevice->rad_info.compute_rings,
656 .timestampValidBits = 64,
657 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
658 };
659 idx++;
660 }
661 }
662 *pCount = idx;
663 }
664
665 void radv_GetPhysicalDeviceQueueFamilyProperties(
666 VkPhysicalDevice physicalDevice,
667 uint32_t* pCount,
668 VkQueueFamilyProperties* pQueueFamilyProperties)
669 {
670 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
671 if (!pQueueFamilyProperties) {
672 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
673 return;
674 }
675 VkQueueFamilyProperties *properties[] = {
676 pQueueFamilyProperties + 0,
677 pQueueFamilyProperties + 1,
678 pQueueFamilyProperties + 2,
679 };
680 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
681 assert(*pCount <= 3);
682 }
683
684 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
685 VkPhysicalDevice physicalDevice,
686 uint32_t* pCount,
687 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
688 {
689 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
690 if (!pQueueFamilyProperties) {
691 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
692 return;
693 }
694 VkQueueFamilyProperties *properties[] = {
695 &pQueueFamilyProperties[0].queueFamilyProperties,
696 &pQueueFamilyProperties[1].queueFamilyProperties,
697 &pQueueFamilyProperties[2].queueFamilyProperties,
698 };
699 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
700 assert(*pCount <= 3);
701 }
702
703 void radv_GetPhysicalDeviceMemoryProperties(
704 VkPhysicalDevice physicalDevice,
705 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
706 {
707 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
708
709 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
710
711 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
712 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
713 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
714 .heapIndex = RADV_MEM_HEAP_VRAM,
715 };
716 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
717 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
718 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
719 .heapIndex = RADV_MEM_HEAP_GTT,
720 };
721 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
722 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
723 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
724 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
725 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
726 };
727 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
728 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
729 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
730 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
731 .heapIndex = RADV_MEM_HEAP_GTT,
732 };
733
734 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
735
736 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
737 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
738 .size = physical_device->rad_info.vram_size -
739 physical_device->rad_info.visible_vram_size,
740 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
741 };
742 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
743 .size = physical_device->rad_info.visible_vram_size,
744 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
745 };
746 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
747 .size = physical_device->rad_info.gart_size,
748 .flags = 0,
749 };
750 }
751
752 void radv_GetPhysicalDeviceMemoryProperties2KHR(
753 VkPhysicalDevice physicalDevice,
754 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
755 {
756 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
757 &pMemoryProperties->memoryProperties);
758 }
759
760 static int
761 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
762 int queue_family_index, int idx)
763 {
764 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
765 queue->device = device;
766 queue->queue_family_index = queue_family_index;
767 queue->queue_idx = idx;
768
769 queue->hw_ctx = device->ws->ctx_create(device->ws);
770 if (!queue->hw_ctx)
771 return VK_ERROR_OUT_OF_HOST_MEMORY;
772
773 return VK_SUCCESS;
774 }
775
776 static void
777 radv_queue_finish(struct radv_queue *queue)
778 {
779 if (queue->hw_ctx)
780 queue->device->ws->ctx_destroy(queue->hw_ctx);
781
782 if (queue->initial_preamble_cs)
783 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
784 if (queue->continue_preamble_cs)
785 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
786 if (queue->descriptor_bo)
787 queue->device->ws->buffer_destroy(queue->descriptor_bo);
788 if (queue->scratch_bo)
789 queue->device->ws->buffer_destroy(queue->scratch_bo);
790 if (queue->esgs_ring_bo)
791 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
792 if (queue->gsvs_ring_bo)
793 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
794 if (queue->compute_scratch_bo)
795 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
796 }
797
798 static void
799 radv_device_init_gs_info(struct radv_device *device)
800 {
801 switch (device->physical_device->rad_info.family) {
802 case CHIP_OLAND:
803 case CHIP_HAINAN:
804 case CHIP_KAVERI:
805 case CHIP_KABINI:
806 case CHIP_MULLINS:
807 case CHIP_ICELAND:
808 case CHIP_CARRIZO:
809 case CHIP_STONEY:
810 device->gs_table_depth = 16;
811 return;
812 case CHIP_TAHITI:
813 case CHIP_PITCAIRN:
814 case CHIP_VERDE:
815 case CHIP_BONAIRE:
816 case CHIP_HAWAII:
817 case CHIP_TONGA:
818 case CHIP_FIJI:
819 case CHIP_POLARIS10:
820 case CHIP_POLARIS11:
821 device->gs_table_depth = 32;
822 return;
823 default:
824 unreachable("unknown GPU");
825 }
826 }
827
828 VkResult radv_CreateDevice(
829 VkPhysicalDevice physicalDevice,
830 const VkDeviceCreateInfo* pCreateInfo,
831 const VkAllocationCallbacks* pAllocator,
832 VkDevice* pDevice)
833 {
834 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
835 VkResult result;
836 struct radv_device *device;
837
838 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
839 if (!is_extension_enabled(physical_device->extensions.ext_array,
840 physical_device->extensions.num_ext,
841 pCreateInfo->ppEnabledExtensionNames[i]))
842 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
843 }
844
845 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
846 sizeof(*device), 8,
847 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
848 if (!device)
849 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
850
851 memset(device, 0, sizeof(*device));
852
853 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
854 device->instance = physical_device->instance;
855 device->physical_device = physical_device;
856
857 device->debug_flags = device->instance->debug_flags;
858
859 device->ws = physical_device->ws;
860 if (pAllocator)
861 device->alloc = *pAllocator;
862 else
863 device->alloc = physical_device->instance->alloc;
864
865 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
866 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
867 uint32_t qfi = queue_create->queueFamilyIndex;
868
869 device->queues[qfi] = vk_alloc(&device->alloc,
870 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
871 if (!device->queues[qfi]) {
872 result = VK_ERROR_OUT_OF_HOST_MEMORY;
873 goto fail;
874 }
875
876 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
877
878 device->queue_count[qfi] = queue_create->queueCount;
879
880 for (unsigned q = 0; q < queue_create->queueCount; q++) {
881 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
882 if (result != VK_SUCCESS)
883 goto fail;
884 }
885 }
886
887 #if HAVE_LLVM < 0x0400
888 device->llvm_supports_spill = false;
889 #else
890 device->llvm_supports_spill = true;
891 #endif
892
893 /* The maximum number of scratch waves. Scratch space isn't divided
894 * evenly between CUs. The number is only a function of the number of CUs.
895 * We can decrease the constant to decrease the scratch buffer size.
896 *
897 * sctx->scratch_waves must be >= the maximum posible size of
898 * 1 threadgroup, so that the hw doesn't hang from being unable
899 * to start any.
900 *
901 * The recommended value is 4 per CU at most. Higher numbers don't
902 * bring much benefit, but they still occupy chip resources (think
903 * async compute). I've seen ~2% performance difference between 4 and 32.
904 */
905 uint32_t max_threads_per_block = 2048;
906 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
907 max_threads_per_block / 64);
908
909 radv_device_init_gs_info(device);
910
911 result = radv_device_init_meta(device);
912 if (result != VK_SUCCESS)
913 goto fail;
914
915 radv_device_init_msaa(device);
916
917 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
918 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
919 switch (family) {
920 case RADV_QUEUE_GENERAL:
921 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
922 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
923 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
924 break;
925 case RADV_QUEUE_COMPUTE:
926 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
927 radeon_emit(device->empty_cs[family], 0);
928 break;
929 }
930 device->ws->cs_finalize(device->empty_cs[family]);
931
932 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
933 switch (family) {
934 case RADV_QUEUE_GENERAL:
935 case RADV_QUEUE_COMPUTE:
936 si_cs_emit_cache_flush(device->flush_cs[family],
937 device->physical_device->rad_info.chip_class,
938 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
939 RADV_CMD_FLAG_INV_ICACHE |
940 RADV_CMD_FLAG_INV_SMEM_L1 |
941 RADV_CMD_FLAG_INV_VMEM_L1 |
942 RADV_CMD_FLAG_INV_GLOBAL_L2);
943 break;
944 }
945 device->ws->cs_finalize(device->flush_cs[family]);
946 }
947
948 if (getenv("RADV_TRACE_FILE")) {
949 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
950 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
951 if (!device->trace_bo)
952 goto fail;
953
954 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
955 if (!device->trace_id_ptr)
956 goto fail;
957 }
958
959 if (device->physical_device->rad_info.chip_class >= CIK)
960 cik_create_gfx_config(device);
961
962 *pDevice = radv_device_to_handle(device);
963 return VK_SUCCESS;
964
965 fail:
966 if (device->trace_bo)
967 device->ws->buffer_destroy(device->trace_bo);
968
969 if (device->gfx_init)
970 device->ws->buffer_destroy(device->gfx_init);
971
972 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
973 for (unsigned q = 0; q < device->queue_count[i]; q++)
974 radv_queue_finish(&device->queues[i][q]);
975 if (device->queue_count[i])
976 vk_free(&device->alloc, device->queues[i]);
977 }
978
979 vk_free(&device->alloc, device);
980 return result;
981 }
982
983 void radv_DestroyDevice(
984 VkDevice _device,
985 const VkAllocationCallbacks* pAllocator)
986 {
987 RADV_FROM_HANDLE(radv_device, device, _device);
988
989 if (device->trace_bo)
990 device->ws->buffer_destroy(device->trace_bo);
991
992 if (device->gfx_init)
993 device->ws->buffer_destroy(device->gfx_init);
994
995 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
996 for (unsigned q = 0; q < device->queue_count[i]; q++)
997 radv_queue_finish(&device->queues[i][q]);
998 if (device->queue_count[i])
999 vk_free(&device->alloc, device->queues[i]);
1000 if (device->empty_cs[i])
1001 device->ws->cs_destroy(device->empty_cs[i]);
1002 if (device->flush_cs[i])
1003 device->ws->cs_destroy(device->flush_cs[i]);
1004 }
1005 radv_device_finish_meta(device);
1006
1007 vk_free(&device->alloc, device);
1008 }
1009
1010 VkResult radv_EnumerateInstanceExtensionProperties(
1011 const char* pLayerName,
1012 uint32_t* pPropertyCount,
1013 VkExtensionProperties* pProperties)
1014 {
1015 if (pProperties == NULL) {
1016 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1017 return VK_SUCCESS;
1018 }
1019
1020 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1021 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1022
1023 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1024 return VK_INCOMPLETE;
1025
1026 return VK_SUCCESS;
1027 }
1028
1029 VkResult radv_EnumerateDeviceExtensionProperties(
1030 VkPhysicalDevice physicalDevice,
1031 const char* pLayerName,
1032 uint32_t* pPropertyCount,
1033 VkExtensionProperties* pProperties)
1034 {
1035 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1036
1037 if (pProperties == NULL) {
1038 *pPropertyCount = pdevice->extensions.num_ext;
1039 return VK_SUCCESS;
1040 }
1041
1042 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1043 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1044
1045 if (*pPropertyCount < pdevice->extensions.num_ext)
1046 return VK_INCOMPLETE;
1047
1048 return VK_SUCCESS;
1049 }
1050
1051 VkResult radv_EnumerateInstanceLayerProperties(
1052 uint32_t* pPropertyCount,
1053 VkLayerProperties* pProperties)
1054 {
1055 if (pProperties == NULL) {
1056 *pPropertyCount = 0;
1057 return VK_SUCCESS;
1058 }
1059
1060 /* None supported at this time */
1061 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1062 }
1063
1064 VkResult radv_EnumerateDeviceLayerProperties(
1065 VkPhysicalDevice physicalDevice,
1066 uint32_t* pPropertyCount,
1067 VkLayerProperties* pProperties)
1068 {
1069 if (pProperties == NULL) {
1070 *pPropertyCount = 0;
1071 return VK_SUCCESS;
1072 }
1073
1074 /* None supported at this time */
1075 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1076 }
1077
1078 void radv_GetDeviceQueue(
1079 VkDevice _device,
1080 uint32_t queueFamilyIndex,
1081 uint32_t queueIndex,
1082 VkQueue* pQueue)
1083 {
1084 RADV_FROM_HANDLE(radv_device, device, _device);
1085
1086 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1087 }
1088
1089 static void radv_dump_trace(struct radv_device *device,
1090 struct radeon_winsys_cs *cs)
1091 {
1092 const char *filename = getenv("RADV_TRACE_FILE");
1093 FILE *f = fopen(filename, "w");
1094 if (!f) {
1095 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1096 return;
1097 }
1098
1099 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1100 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1101 fclose(f);
1102 }
1103
1104 static void
1105 fill_geom_rings(struct radv_queue *queue,
1106 uint32_t *map,
1107 uint32_t esgs_ring_size,
1108 struct radeon_winsys_bo *esgs_ring_bo,
1109 uint32_t gsvs_ring_size,
1110 struct radeon_winsys_bo *gsvs_ring_bo)
1111 {
1112 uint64_t esgs_va = 0, gsvs_va = 0;
1113 uint32_t *desc = &map[4];
1114
1115 if (esgs_ring_bo)
1116 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1117 if (gsvs_ring_bo)
1118 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1119
1120 /* stride 0, num records - size, add tid, swizzle, elsize4,
1121 index stride 64 */
1122 desc[0] = esgs_va;
1123 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1124 S_008F04_STRIDE(0) |
1125 S_008F04_SWIZZLE_ENABLE(true);
1126 desc[2] = esgs_ring_size;
1127 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1128 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1129 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1130 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1131 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1132 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1133 S_008F0C_ELEMENT_SIZE(1) |
1134 S_008F0C_INDEX_STRIDE(3) |
1135 S_008F0C_ADD_TID_ENABLE(true);
1136
1137 desc += 4;
1138 /* GS entry for ES->GS ring */
1139 /* stride 0, num records - size, elsize0,
1140 index stride 0 */
1141 desc[0] = esgs_va;
1142 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1143 S_008F04_STRIDE(0) |
1144 S_008F04_SWIZZLE_ENABLE(false);
1145 desc[2] = esgs_ring_size;
1146 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1147 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1148 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1149 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1150 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1151 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1152 S_008F0C_ELEMENT_SIZE(0) |
1153 S_008F0C_INDEX_STRIDE(0) |
1154 S_008F0C_ADD_TID_ENABLE(false);
1155
1156 desc += 4;
1157 /* VS entry for GS->VS ring */
1158 /* stride 0, num records - size, elsize0,
1159 index stride 0 */
1160 desc[0] = gsvs_va;
1161 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1162 S_008F04_STRIDE(0) |
1163 S_008F04_SWIZZLE_ENABLE(false);
1164 desc[2] = gsvs_ring_size;
1165 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1166 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1167 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1168 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1169 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1170 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1171 S_008F0C_ELEMENT_SIZE(0) |
1172 S_008F0C_INDEX_STRIDE(0) |
1173 S_008F0C_ADD_TID_ENABLE(false);
1174 desc += 4;
1175
1176 /* stride gsvs_itemsize, num records 64
1177 elsize 4, index stride 16 */
1178 /* shader will patch stride and desc[2] */
1179 desc[0] = gsvs_va;
1180 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1181 S_008F04_STRIDE(0) |
1182 S_008F04_SWIZZLE_ENABLE(true);
1183 desc[2] = 0;
1184 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1185 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1186 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1187 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1188 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1189 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1190 S_008F0C_ELEMENT_SIZE(1) |
1191 S_008F0C_INDEX_STRIDE(1) |
1192 S_008F0C_ADD_TID_ENABLE(true);
1193 }
1194
1195 static VkResult
1196 radv_get_preamble_cs(struct radv_queue *queue,
1197 uint32_t scratch_size,
1198 uint32_t compute_scratch_size,
1199 uint32_t esgs_ring_size,
1200 uint32_t gsvs_ring_size,
1201 struct radeon_winsys_cs **initial_preamble_cs,
1202 struct radeon_winsys_cs **continue_preamble_cs)
1203 {
1204 struct radeon_winsys_bo *scratch_bo = NULL;
1205 struct radeon_winsys_bo *descriptor_bo = NULL;
1206 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1207 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1208 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1209 struct radeon_winsys_cs *dest_cs[2] = {0};
1210
1211 if (scratch_size <= queue->scratch_size &&
1212 compute_scratch_size <= queue->compute_scratch_size &&
1213 esgs_ring_size <= queue->esgs_ring_size &&
1214 gsvs_ring_size <= queue->gsvs_ring_size &&
1215 queue->initial_preamble_cs) {
1216 *initial_preamble_cs = queue->initial_preamble_cs;
1217 *continue_preamble_cs = queue->continue_preamble_cs;
1218 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1219 *continue_preamble_cs = NULL;
1220 return VK_SUCCESS;
1221 }
1222
1223 if (scratch_size > queue->scratch_size) {
1224 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1225 scratch_size,
1226 4096,
1227 RADEON_DOMAIN_VRAM,
1228 RADEON_FLAG_NO_CPU_ACCESS);
1229 if (!scratch_bo)
1230 goto fail;
1231 } else
1232 scratch_bo = queue->scratch_bo;
1233
1234 if (compute_scratch_size > queue->compute_scratch_size) {
1235 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1236 compute_scratch_size,
1237 4096,
1238 RADEON_DOMAIN_VRAM,
1239 RADEON_FLAG_NO_CPU_ACCESS);
1240 if (!compute_scratch_bo)
1241 goto fail;
1242
1243 } else
1244 compute_scratch_bo = queue->compute_scratch_bo;
1245
1246 if (esgs_ring_size > queue->esgs_ring_size) {
1247 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1248 esgs_ring_size,
1249 4096,
1250 RADEON_DOMAIN_VRAM,
1251 RADEON_FLAG_NO_CPU_ACCESS);
1252 if (!esgs_ring_bo)
1253 goto fail;
1254 } else {
1255 esgs_ring_bo = queue->esgs_ring_bo;
1256 esgs_ring_size = queue->esgs_ring_size;
1257 }
1258
1259 if (gsvs_ring_size > queue->gsvs_ring_size) {
1260 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1261 gsvs_ring_size,
1262 4096,
1263 RADEON_DOMAIN_VRAM,
1264 RADEON_FLAG_NO_CPU_ACCESS);
1265 if (!gsvs_ring_bo)
1266 goto fail;
1267 } else {
1268 gsvs_ring_bo = queue->gsvs_ring_bo;
1269 gsvs_ring_size = queue->gsvs_ring_size;
1270 }
1271
1272 if (scratch_bo != queue->scratch_bo ||
1273 esgs_ring_bo != queue->esgs_ring_bo ||
1274 gsvs_ring_bo != queue->gsvs_ring_bo) {
1275 uint32_t size = 0;
1276 if (gsvs_ring_bo || esgs_ring_bo)
1277 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1278 else if (scratch_bo)
1279 size = 8; /* 2 dword */
1280
1281 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1282 size,
1283 4096,
1284 RADEON_DOMAIN_VRAM,
1285 RADEON_FLAG_CPU_ACCESS);
1286 if (!descriptor_bo)
1287 goto fail;
1288 } else
1289 descriptor_bo = queue->descriptor_bo;
1290
1291 for(int i = 0; i < 2; ++i) {
1292 struct radeon_winsys_cs *cs = NULL;
1293 cs = queue->device->ws->cs_create(queue->device->ws,
1294 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1295 if (!cs)
1296 goto fail;
1297
1298 dest_cs[i] = cs;
1299
1300 if (scratch_bo)
1301 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1302
1303 if (esgs_ring_bo)
1304 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1305
1306 if (gsvs_ring_bo)
1307 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1308
1309 if (descriptor_bo)
1310 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1311
1312 if (descriptor_bo != queue->descriptor_bo) {
1313 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1314
1315 if (scratch_bo) {
1316 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1317 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1318 S_008F04_SWIZZLE_ENABLE(1);
1319 map[0] = scratch_va;
1320 map[1] = rsrc1;
1321 }
1322
1323 if (esgs_ring_bo || gsvs_ring_bo)
1324 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1325
1326 queue->device->ws->buffer_unmap(descriptor_bo);
1327 }
1328
1329 if (esgs_ring_bo || gsvs_ring_bo) {
1330 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1331 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1332 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1333 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1334
1335 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1336 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1337 radeon_emit(cs, esgs_ring_size >> 8);
1338 radeon_emit(cs, gsvs_ring_size >> 8);
1339 } else {
1340 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1341 radeon_emit(cs, esgs_ring_size >> 8);
1342 radeon_emit(cs, gsvs_ring_size >> 8);
1343 }
1344 }
1345
1346 if (descriptor_bo) {
1347 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1348 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1349 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1350 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1351 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1352 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1353
1354 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1355
1356 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1357 radeon_set_sh_reg_seq(cs, regs[i], 2);
1358 radeon_emit(cs, va);
1359 radeon_emit(cs, va >> 32);
1360 }
1361 }
1362
1363 if (compute_scratch_bo) {
1364 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1365 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1366 S_008F04_SWIZZLE_ENABLE(1);
1367
1368 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1369
1370 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1371 radeon_emit(cs, scratch_va);
1372 radeon_emit(cs, rsrc1);
1373 }
1374
1375 if (!i) {
1376 si_cs_emit_cache_flush(cs,
1377 queue->device->physical_device->rad_info.chip_class,
1378 queue->queue_family_index == RING_COMPUTE &&
1379 queue->device->physical_device->rad_info.chip_class >= CIK,
1380 RADV_CMD_FLAG_INV_ICACHE |
1381 RADV_CMD_FLAG_INV_SMEM_L1 |
1382 RADV_CMD_FLAG_INV_VMEM_L1 |
1383 RADV_CMD_FLAG_INV_GLOBAL_L2);
1384 }
1385
1386 if (!queue->device->ws->cs_finalize(cs))
1387 goto fail;
1388 }
1389
1390 if (queue->initial_preamble_cs)
1391 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1392
1393 if (queue->continue_preamble_cs)
1394 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1395
1396 queue->initial_preamble_cs = dest_cs[0];
1397 queue->continue_preamble_cs = dest_cs[1];
1398
1399 if (scratch_bo != queue->scratch_bo) {
1400 if (queue->scratch_bo)
1401 queue->device->ws->buffer_destroy(queue->scratch_bo);
1402 queue->scratch_bo = scratch_bo;
1403 queue->scratch_size = scratch_size;
1404 }
1405
1406 if (compute_scratch_bo != queue->compute_scratch_bo) {
1407 if (queue->compute_scratch_bo)
1408 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1409 queue->compute_scratch_bo = compute_scratch_bo;
1410 queue->compute_scratch_size = compute_scratch_size;
1411 }
1412
1413 if (esgs_ring_bo != queue->esgs_ring_bo) {
1414 if (queue->esgs_ring_bo)
1415 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1416 queue->esgs_ring_bo = esgs_ring_bo;
1417 queue->esgs_ring_size = esgs_ring_size;
1418 }
1419
1420 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1421 if (queue->gsvs_ring_bo)
1422 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1423 queue->gsvs_ring_bo = gsvs_ring_bo;
1424 queue->gsvs_ring_size = gsvs_ring_size;
1425 }
1426
1427 if (descriptor_bo != queue->descriptor_bo) {
1428 if (queue->descriptor_bo)
1429 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1430
1431 queue->descriptor_bo = descriptor_bo;
1432 }
1433
1434 *initial_preamble_cs = queue->initial_preamble_cs;
1435 *continue_preamble_cs = queue->continue_preamble_cs;
1436 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1437 *continue_preamble_cs = NULL;
1438 return VK_SUCCESS;
1439 fail:
1440 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1441 if (dest_cs[i])
1442 queue->device->ws->cs_destroy(dest_cs[i]);
1443 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1444 queue->device->ws->buffer_destroy(descriptor_bo);
1445 if (scratch_bo && scratch_bo != queue->scratch_bo)
1446 queue->device->ws->buffer_destroy(scratch_bo);
1447 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1448 queue->device->ws->buffer_destroy(compute_scratch_bo);
1449 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1450 queue->device->ws->buffer_destroy(esgs_ring_bo);
1451 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1452 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1453 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1454 }
1455
1456 VkResult radv_QueueSubmit(
1457 VkQueue _queue,
1458 uint32_t submitCount,
1459 const VkSubmitInfo* pSubmits,
1460 VkFence _fence)
1461 {
1462 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1463 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1464 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1465 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1466 int ret;
1467 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1468 uint32_t scratch_size = 0;
1469 uint32_t compute_scratch_size = 0;
1470 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1471 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1472 VkResult result;
1473 bool fence_emitted = false;
1474
1475 /* Do this first so failing to allocate scratch buffers can't result in
1476 * partially executed submissions. */
1477 for (uint32_t i = 0; i < submitCount; i++) {
1478 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1479 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1480 pSubmits[i].pCommandBuffers[j]);
1481
1482 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1483 compute_scratch_size = MAX2(compute_scratch_size,
1484 cmd_buffer->compute_scratch_size_needed);
1485 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1486 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1487 }
1488 }
1489
1490 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1491 esgs_ring_size, gsvs_ring_size,
1492 &initial_preamble_cs, &continue_preamble_cs);
1493 if (result != VK_SUCCESS)
1494 return result;
1495
1496 for (uint32_t i = 0; i < submitCount; i++) {
1497 struct radeon_winsys_cs **cs_array;
1498 bool has_flush = !submitCount;
1499 bool can_patch = !has_flush;
1500 uint32_t advance;
1501
1502 if (!pSubmits[i].commandBufferCount) {
1503 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1504 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1505 &queue->device->empty_cs[queue->queue_family_index],
1506 1, NULL, NULL,
1507 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1508 pSubmits[i].waitSemaphoreCount,
1509 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1510 pSubmits[i].signalSemaphoreCount,
1511 false, base_fence);
1512 if (ret) {
1513 radv_loge("failed to submit CS %d\n", i);
1514 abort();
1515 }
1516 fence_emitted = true;
1517 }
1518 continue;
1519 }
1520
1521 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1522 (pSubmits[i].commandBufferCount + has_flush));
1523
1524 if(has_flush)
1525 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1526
1527 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1528 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1529 pSubmits[i].pCommandBuffers[j]);
1530 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1531
1532 cs_array[j + has_flush] = cmd_buffer->cs;
1533 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1534 can_patch = false;
1535 }
1536
1537 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1538 advance = MIN2(max_cs_submission,
1539 pSubmits[i].commandBufferCount + has_flush - j);
1540 bool b = j == 0;
1541 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1542
1543 if (queue->device->trace_bo)
1544 *queue->device->trace_id_ptr = 0;
1545
1546 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1547 advance, initial_preamble_cs, continue_preamble_cs,
1548 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1549 b ? pSubmits[i].waitSemaphoreCount : 0,
1550 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1551 e ? pSubmits[i].signalSemaphoreCount : 0,
1552 can_patch, base_fence);
1553
1554 if (ret) {
1555 radv_loge("failed to submit CS %d\n", i);
1556 abort();
1557 }
1558 fence_emitted = true;
1559 if (queue->device->trace_bo) {
1560 bool success = queue->device->ws->ctx_wait_idle(
1561 queue->hw_ctx,
1562 radv_queue_family_to_ring(
1563 queue->queue_family_index),
1564 queue->queue_idx);
1565
1566 if (!success) { /* Hang */
1567 radv_dump_trace(queue->device, cs_array[j]);
1568 abort();
1569 }
1570 }
1571 }
1572 free(cs_array);
1573 }
1574
1575 if (fence) {
1576 if (!fence_emitted)
1577 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1578 &queue->device->empty_cs[queue->queue_family_index],
1579 1, NULL, NULL, NULL, 0, NULL, 0,
1580 false, base_fence);
1581
1582 fence->submitted = true;
1583 }
1584
1585 return VK_SUCCESS;
1586 }
1587
1588 VkResult radv_QueueWaitIdle(
1589 VkQueue _queue)
1590 {
1591 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1592
1593 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1594 radv_queue_family_to_ring(queue->queue_family_index),
1595 queue->queue_idx);
1596 return VK_SUCCESS;
1597 }
1598
1599 VkResult radv_DeviceWaitIdle(
1600 VkDevice _device)
1601 {
1602 RADV_FROM_HANDLE(radv_device, device, _device);
1603
1604 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1605 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1606 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1607 }
1608 }
1609 return VK_SUCCESS;
1610 }
1611
1612 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1613 VkInstance instance,
1614 const char* pName)
1615 {
1616 return radv_lookup_entrypoint(pName);
1617 }
1618
1619 /* The loader wants us to expose a second GetInstanceProcAddr function
1620 * to work around certain LD_PRELOAD issues seen in apps.
1621 */
1622 PUBLIC
1623 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1624 VkInstance instance,
1625 const char* pName);
1626
1627 PUBLIC
1628 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1629 VkInstance instance,
1630 const char* pName)
1631 {
1632 return radv_GetInstanceProcAddr(instance, pName);
1633 }
1634
1635 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1636 VkDevice device,
1637 const char* pName)
1638 {
1639 return radv_lookup_entrypoint(pName);
1640 }
1641
1642 bool radv_get_memory_fd(struct radv_device *device,
1643 struct radv_device_memory *memory,
1644 int *pFD)
1645 {
1646 struct radeon_bo_metadata metadata;
1647
1648 if (memory->image) {
1649 radv_init_metadata(device, memory->image, &metadata);
1650 device->ws->buffer_set_metadata(memory->bo, &metadata);
1651 }
1652
1653 return device->ws->buffer_get_fd(device->ws, memory->bo,
1654 pFD);
1655 }
1656
1657 VkResult radv_AllocateMemory(
1658 VkDevice _device,
1659 const VkMemoryAllocateInfo* pAllocateInfo,
1660 const VkAllocationCallbacks* pAllocator,
1661 VkDeviceMemory* pMem)
1662 {
1663 RADV_FROM_HANDLE(radv_device, device, _device);
1664 struct radv_device_memory *mem;
1665 VkResult result;
1666 enum radeon_bo_domain domain;
1667 uint32_t flags = 0;
1668 const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
1669 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1670
1671 if (pAllocateInfo->allocationSize == 0) {
1672 /* Apparently, this is allowed */
1673 *pMem = VK_NULL_HANDLE;
1674 return VK_SUCCESS;
1675 }
1676
1677 vk_foreach_struct(ext, pAllocateInfo->pNext) {
1678 switch (ext->sType) {
1679 case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
1680 dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
1681 break;
1682 default:
1683 break;
1684 }
1685 }
1686
1687 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1688 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1689 if (mem == NULL)
1690 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1691
1692 if (dedicate_info) {
1693 mem->image = radv_image_from_handle(dedicate_info->image);
1694 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
1695 } else {
1696 mem->image = NULL;
1697 mem->buffer = NULL;
1698 }
1699
1700 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1701 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1702 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1703 domain = RADEON_DOMAIN_GTT;
1704 else
1705 domain = RADEON_DOMAIN_VRAM;
1706
1707 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1708 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1709 else
1710 flags |= RADEON_FLAG_CPU_ACCESS;
1711
1712 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1713 flags |= RADEON_FLAG_GTT_WC;
1714
1715 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1716 domain, flags);
1717
1718 if (!mem->bo) {
1719 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1720 goto fail;
1721 }
1722 mem->type_index = pAllocateInfo->memoryTypeIndex;
1723
1724 *pMem = radv_device_memory_to_handle(mem);
1725
1726 return VK_SUCCESS;
1727
1728 fail:
1729 vk_free2(&device->alloc, pAllocator, mem);
1730
1731 return result;
1732 }
1733
1734 void radv_FreeMemory(
1735 VkDevice _device,
1736 VkDeviceMemory _mem,
1737 const VkAllocationCallbacks* pAllocator)
1738 {
1739 RADV_FROM_HANDLE(radv_device, device, _device);
1740 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1741
1742 if (mem == NULL)
1743 return;
1744
1745 device->ws->buffer_destroy(mem->bo);
1746 mem->bo = NULL;
1747
1748 vk_free2(&device->alloc, pAllocator, mem);
1749 }
1750
1751 VkResult radv_MapMemory(
1752 VkDevice _device,
1753 VkDeviceMemory _memory,
1754 VkDeviceSize offset,
1755 VkDeviceSize size,
1756 VkMemoryMapFlags flags,
1757 void** ppData)
1758 {
1759 RADV_FROM_HANDLE(radv_device, device, _device);
1760 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1761
1762 if (mem == NULL) {
1763 *ppData = NULL;
1764 return VK_SUCCESS;
1765 }
1766
1767 *ppData = device->ws->buffer_map(mem->bo);
1768 if (*ppData) {
1769 *ppData += offset;
1770 return VK_SUCCESS;
1771 }
1772
1773 return VK_ERROR_MEMORY_MAP_FAILED;
1774 }
1775
1776 void radv_UnmapMemory(
1777 VkDevice _device,
1778 VkDeviceMemory _memory)
1779 {
1780 RADV_FROM_HANDLE(radv_device, device, _device);
1781 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1782
1783 if (mem == NULL)
1784 return;
1785
1786 device->ws->buffer_unmap(mem->bo);
1787 }
1788
1789 VkResult radv_FlushMappedMemoryRanges(
1790 VkDevice _device,
1791 uint32_t memoryRangeCount,
1792 const VkMappedMemoryRange* pMemoryRanges)
1793 {
1794 return VK_SUCCESS;
1795 }
1796
1797 VkResult radv_InvalidateMappedMemoryRanges(
1798 VkDevice _device,
1799 uint32_t memoryRangeCount,
1800 const VkMappedMemoryRange* pMemoryRanges)
1801 {
1802 return VK_SUCCESS;
1803 }
1804
1805 void radv_GetBufferMemoryRequirements(
1806 VkDevice device,
1807 VkBuffer _buffer,
1808 VkMemoryRequirements* pMemoryRequirements)
1809 {
1810 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1811
1812 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1813
1814 pMemoryRequirements->size = buffer->size;
1815 pMemoryRequirements->alignment = 16;
1816 }
1817
1818 void radv_GetImageMemoryRequirements(
1819 VkDevice device,
1820 VkImage _image,
1821 VkMemoryRequirements* pMemoryRequirements)
1822 {
1823 RADV_FROM_HANDLE(radv_image, image, _image);
1824
1825 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1826
1827 pMemoryRequirements->size = image->size;
1828 pMemoryRequirements->alignment = image->alignment;
1829 }
1830
1831 void radv_GetImageSparseMemoryRequirements(
1832 VkDevice device,
1833 VkImage image,
1834 uint32_t* pSparseMemoryRequirementCount,
1835 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1836 {
1837 stub();
1838 }
1839
1840 void radv_GetDeviceMemoryCommitment(
1841 VkDevice device,
1842 VkDeviceMemory memory,
1843 VkDeviceSize* pCommittedMemoryInBytes)
1844 {
1845 *pCommittedMemoryInBytes = 0;
1846 }
1847
1848 VkResult radv_BindBufferMemory(
1849 VkDevice device,
1850 VkBuffer _buffer,
1851 VkDeviceMemory _memory,
1852 VkDeviceSize memoryOffset)
1853 {
1854 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1855 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1856
1857 if (mem) {
1858 buffer->bo = mem->bo;
1859 buffer->offset = memoryOffset;
1860 } else {
1861 buffer->bo = NULL;
1862 buffer->offset = 0;
1863 }
1864
1865 return VK_SUCCESS;
1866 }
1867
1868 VkResult radv_BindImageMemory(
1869 VkDevice device,
1870 VkImage _image,
1871 VkDeviceMemory _memory,
1872 VkDeviceSize memoryOffset)
1873 {
1874 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1875 RADV_FROM_HANDLE(radv_image, image, _image);
1876
1877 if (mem) {
1878 image->bo = mem->bo;
1879 image->offset = memoryOffset;
1880 } else {
1881 image->bo = NULL;
1882 image->offset = 0;
1883 }
1884
1885 return VK_SUCCESS;
1886 }
1887
1888 VkResult radv_QueueBindSparse(
1889 VkQueue queue,
1890 uint32_t bindInfoCount,
1891 const VkBindSparseInfo* pBindInfo,
1892 VkFence fence)
1893 {
1894 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1895 }
1896
1897 VkResult radv_CreateFence(
1898 VkDevice _device,
1899 const VkFenceCreateInfo* pCreateInfo,
1900 const VkAllocationCallbacks* pAllocator,
1901 VkFence* pFence)
1902 {
1903 RADV_FROM_HANDLE(radv_device, device, _device);
1904 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1905 sizeof(*fence), 8,
1906 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1907
1908 if (!fence)
1909 return VK_ERROR_OUT_OF_HOST_MEMORY;
1910
1911 memset(fence, 0, sizeof(*fence));
1912 fence->submitted = false;
1913 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1914 fence->fence = device->ws->create_fence();
1915 if (!fence->fence) {
1916 vk_free2(&device->alloc, pAllocator, fence);
1917 return VK_ERROR_OUT_OF_HOST_MEMORY;
1918 }
1919
1920 *pFence = radv_fence_to_handle(fence);
1921
1922 return VK_SUCCESS;
1923 }
1924
1925 void radv_DestroyFence(
1926 VkDevice _device,
1927 VkFence _fence,
1928 const VkAllocationCallbacks* pAllocator)
1929 {
1930 RADV_FROM_HANDLE(radv_device, device, _device);
1931 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1932
1933 if (!fence)
1934 return;
1935 device->ws->destroy_fence(fence->fence);
1936 vk_free2(&device->alloc, pAllocator, fence);
1937 }
1938
1939 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1940 {
1941 uint64_t current_time;
1942 struct timespec tv;
1943
1944 clock_gettime(CLOCK_MONOTONIC, &tv);
1945 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1946
1947 timeout = MIN2(UINT64_MAX - current_time, timeout);
1948
1949 return current_time + timeout;
1950 }
1951
1952 VkResult radv_WaitForFences(
1953 VkDevice _device,
1954 uint32_t fenceCount,
1955 const VkFence* pFences,
1956 VkBool32 waitAll,
1957 uint64_t timeout)
1958 {
1959 RADV_FROM_HANDLE(radv_device, device, _device);
1960 timeout = radv_get_absolute_timeout(timeout);
1961
1962 if (!waitAll && fenceCount > 1) {
1963 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1964 }
1965
1966 for (uint32_t i = 0; i < fenceCount; ++i) {
1967 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1968 bool expired = false;
1969
1970 if (fence->signalled)
1971 continue;
1972
1973 if (!fence->submitted)
1974 return VK_TIMEOUT;
1975
1976 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1977 if (!expired)
1978 return VK_TIMEOUT;
1979
1980 fence->signalled = true;
1981 }
1982
1983 return VK_SUCCESS;
1984 }
1985
1986 VkResult radv_ResetFences(VkDevice device,
1987 uint32_t fenceCount,
1988 const VkFence *pFences)
1989 {
1990 for (unsigned i = 0; i < fenceCount; ++i) {
1991 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1992 fence->submitted = fence->signalled = false;
1993 }
1994
1995 return VK_SUCCESS;
1996 }
1997
1998 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1999 {
2000 RADV_FROM_HANDLE(radv_device, device, _device);
2001 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2002
2003 if (fence->signalled)
2004 return VK_SUCCESS;
2005 if (!fence->submitted)
2006 return VK_NOT_READY;
2007
2008 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2009 return VK_NOT_READY;
2010
2011 return VK_SUCCESS;
2012 }
2013
2014
2015 // Queue semaphore functions
2016
2017 VkResult radv_CreateSemaphore(
2018 VkDevice _device,
2019 const VkSemaphoreCreateInfo* pCreateInfo,
2020 const VkAllocationCallbacks* pAllocator,
2021 VkSemaphore* pSemaphore)
2022 {
2023 RADV_FROM_HANDLE(radv_device, device, _device);
2024 struct radeon_winsys_sem *sem;
2025
2026 sem = device->ws->create_sem(device->ws);
2027 if (!sem)
2028 return VK_ERROR_OUT_OF_HOST_MEMORY;
2029
2030 *pSemaphore = (VkSemaphore)sem;
2031 return VK_SUCCESS;
2032 }
2033
2034 void radv_DestroySemaphore(
2035 VkDevice _device,
2036 VkSemaphore _semaphore,
2037 const VkAllocationCallbacks* pAllocator)
2038 {
2039 RADV_FROM_HANDLE(radv_device, device, _device);
2040 struct radeon_winsys_sem *sem;
2041 if (!_semaphore)
2042 return;
2043
2044 sem = (struct radeon_winsys_sem *)_semaphore;
2045 device->ws->destroy_sem(sem);
2046 }
2047
2048 VkResult radv_CreateEvent(
2049 VkDevice _device,
2050 const VkEventCreateInfo* pCreateInfo,
2051 const VkAllocationCallbacks* pAllocator,
2052 VkEvent* pEvent)
2053 {
2054 RADV_FROM_HANDLE(radv_device, device, _device);
2055 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2056 sizeof(*event), 8,
2057 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2058
2059 if (!event)
2060 return VK_ERROR_OUT_OF_HOST_MEMORY;
2061
2062 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2063 RADEON_DOMAIN_GTT,
2064 RADEON_FLAG_CPU_ACCESS);
2065 if (!event->bo) {
2066 vk_free2(&device->alloc, pAllocator, event);
2067 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2068 }
2069
2070 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2071
2072 *pEvent = radv_event_to_handle(event);
2073
2074 return VK_SUCCESS;
2075 }
2076
2077 void radv_DestroyEvent(
2078 VkDevice _device,
2079 VkEvent _event,
2080 const VkAllocationCallbacks* pAllocator)
2081 {
2082 RADV_FROM_HANDLE(radv_device, device, _device);
2083 RADV_FROM_HANDLE(radv_event, event, _event);
2084
2085 if (!event)
2086 return;
2087 device->ws->buffer_destroy(event->bo);
2088 vk_free2(&device->alloc, pAllocator, event);
2089 }
2090
2091 VkResult radv_GetEventStatus(
2092 VkDevice _device,
2093 VkEvent _event)
2094 {
2095 RADV_FROM_HANDLE(radv_event, event, _event);
2096
2097 if (*event->map == 1)
2098 return VK_EVENT_SET;
2099 return VK_EVENT_RESET;
2100 }
2101
2102 VkResult radv_SetEvent(
2103 VkDevice _device,
2104 VkEvent _event)
2105 {
2106 RADV_FROM_HANDLE(radv_event, event, _event);
2107 *event->map = 1;
2108
2109 return VK_SUCCESS;
2110 }
2111
2112 VkResult radv_ResetEvent(
2113 VkDevice _device,
2114 VkEvent _event)
2115 {
2116 RADV_FROM_HANDLE(radv_event, event, _event);
2117 *event->map = 0;
2118
2119 return VK_SUCCESS;
2120 }
2121
2122 VkResult radv_CreateBuffer(
2123 VkDevice _device,
2124 const VkBufferCreateInfo* pCreateInfo,
2125 const VkAllocationCallbacks* pAllocator,
2126 VkBuffer* pBuffer)
2127 {
2128 RADV_FROM_HANDLE(radv_device, device, _device);
2129 struct radv_buffer *buffer;
2130
2131 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2132
2133 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2134 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2135 if (buffer == NULL)
2136 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2137
2138 buffer->size = pCreateInfo->size;
2139 buffer->usage = pCreateInfo->usage;
2140 buffer->bo = NULL;
2141 buffer->offset = 0;
2142
2143 *pBuffer = radv_buffer_to_handle(buffer);
2144
2145 return VK_SUCCESS;
2146 }
2147
2148 void radv_DestroyBuffer(
2149 VkDevice _device,
2150 VkBuffer _buffer,
2151 const VkAllocationCallbacks* pAllocator)
2152 {
2153 RADV_FROM_HANDLE(radv_device, device, _device);
2154 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2155
2156 if (!buffer)
2157 return;
2158
2159 vk_free2(&device->alloc, pAllocator, buffer);
2160 }
2161
2162 static inline unsigned
2163 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2164 {
2165 if (stencil)
2166 return image->surface.stencil_tiling_index[level];
2167 else
2168 return image->surface.tiling_index[level];
2169 }
2170
2171 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2172 {
2173 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2174 }
2175
2176 static void
2177 radv_initialise_color_surface(struct radv_device *device,
2178 struct radv_color_buffer_info *cb,
2179 struct radv_image_view *iview)
2180 {
2181 const struct vk_format_description *desc;
2182 unsigned ntype, format, swap, endian;
2183 unsigned blend_clamp = 0, blend_bypass = 0;
2184 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2185 uint64_t va;
2186 const struct radeon_surf *surf = &iview->image->surface;
2187 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2188
2189 desc = vk_format_description(iview->vk_format);
2190
2191 memset(cb, 0, sizeof(*cb));
2192
2193 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2194 va += level_info->offset;
2195 cb->cb_color_base = va >> 8;
2196
2197 /* CMASK variables */
2198 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2199 va += iview->image->cmask.offset;
2200 cb->cb_color_cmask = va >> 8;
2201 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2202
2203 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2204 va += iview->image->dcc_offset;
2205 cb->cb_dcc_base = va >> 8;
2206
2207 uint32_t max_slice = radv_surface_layer_count(iview);
2208 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2209 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2210
2211 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2212 pitch_tile_max = level_info->nblk_x / 8 - 1;
2213 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2214 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2215
2216 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2217 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2218
2219 /* Intensity is implemented as Red, so treat it that way. */
2220 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2221 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2222
2223 if (iview->image->samples > 1) {
2224 unsigned log_samples = util_logbase2(iview->image->samples);
2225
2226 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2227 S_028C74_NUM_FRAGMENTS(log_samples);
2228 }
2229
2230 if (iview->image->fmask.size) {
2231 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2232 if (device->physical_device->rad_info.chip_class >= CIK)
2233 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2234 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2235 cb->cb_color_fmask = va >> 8;
2236 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2237 } else {
2238 /* This must be set for fast clear to work without FMASK. */
2239 if (device->physical_device->rad_info.chip_class >= CIK)
2240 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2241 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2242 cb->cb_color_fmask = cb->cb_color_base;
2243 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2244 }
2245
2246 ntype = radv_translate_color_numformat(iview->vk_format,
2247 desc,
2248 vk_format_get_first_non_void_channel(iview->vk_format));
2249 format = radv_translate_colorformat(iview->vk_format);
2250 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2251 radv_finishme("Illegal color\n");
2252 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2253 endian = radv_colorformat_endian_swap(format);
2254
2255 /* blend clamp should be set for all NORM/SRGB types */
2256 if (ntype == V_028C70_NUMBER_UNORM ||
2257 ntype == V_028C70_NUMBER_SNORM ||
2258 ntype == V_028C70_NUMBER_SRGB)
2259 blend_clamp = 1;
2260
2261 /* set blend bypass according to docs if SINT/UINT or
2262 8/24 COLOR variants */
2263 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2264 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2265 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2266 blend_clamp = 0;
2267 blend_bypass = 1;
2268 }
2269 #if 0
2270 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2271 (format == V_028C70_COLOR_8 ||
2272 format == V_028C70_COLOR_8_8 ||
2273 format == V_028C70_COLOR_8_8_8_8))
2274 ->color_is_int8 = true;
2275 #endif
2276 cb->cb_color_info = S_028C70_FORMAT(format) |
2277 S_028C70_COMP_SWAP(swap) |
2278 S_028C70_BLEND_CLAMP(blend_clamp) |
2279 S_028C70_BLEND_BYPASS(blend_bypass) |
2280 S_028C70_SIMPLE_FLOAT(1) |
2281 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2282 ntype != V_028C70_NUMBER_SNORM &&
2283 ntype != V_028C70_NUMBER_SRGB &&
2284 format != V_028C70_COLOR_8_24 &&
2285 format != V_028C70_COLOR_24_8) |
2286 S_028C70_NUMBER_TYPE(ntype) |
2287 S_028C70_ENDIAN(endian);
2288 if (iview->image->samples > 1)
2289 if (iview->image->fmask.size)
2290 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2291
2292 if (iview->image->cmask.size &&
2293 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2294 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2295
2296 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2297 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2298
2299 if (device->physical_device->rad_info.chip_class >= VI) {
2300 unsigned max_uncompressed_block_size = 2;
2301 if (iview->image->samples > 1) {
2302 if (iview->image->surface.bpe == 1)
2303 max_uncompressed_block_size = 0;
2304 else if (iview->image->surface.bpe == 2)
2305 max_uncompressed_block_size = 1;
2306 }
2307
2308 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2309 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2310 }
2311
2312 /* This must be set for fast clear to work without FMASK. */
2313 if (!iview->image->fmask.size &&
2314 device->physical_device->rad_info.chip_class == SI) {
2315 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2316 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2317 }
2318 }
2319
2320 static void
2321 radv_initialise_ds_surface(struct radv_device *device,
2322 struct radv_ds_buffer_info *ds,
2323 struct radv_image_view *iview)
2324 {
2325 unsigned level = iview->base_mip;
2326 unsigned format;
2327 uint64_t va, s_offs, z_offs;
2328 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2329 memset(ds, 0, sizeof(*ds));
2330 switch (iview->vk_format) {
2331 case VK_FORMAT_D24_UNORM_S8_UINT:
2332 case VK_FORMAT_X8_D24_UNORM_PACK32:
2333 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2334 ds->offset_scale = 2.0f;
2335 break;
2336 case VK_FORMAT_D16_UNORM:
2337 case VK_FORMAT_D16_UNORM_S8_UINT:
2338 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2339 ds->offset_scale = 4.0f;
2340 break;
2341 case VK_FORMAT_D32_SFLOAT:
2342 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2343 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2344 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2345 ds->offset_scale = 1.0f;
2346 break;
2347 default:
2348 break;
2349 }
2350
2351 format = radv_translate_dbformat(iview->vk_format);
2352 if (format == V_028040_Z_INVALID) {
2353 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2354 }
2355
2356 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2357 s_offs = z_offs = va;
2358 z_offs += iview->image->surface.level[level].offset;
2359 s_offs += iview->image->surface.stencil_level[level].offset;
2360
2361 uint32_t max_slice = radv_surface_layer_count(iview);
2362 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2363 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2364 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2365 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2366
2367 if (iview->image->samples > 1)
2368 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2369
2370 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2371 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2372 else
2373 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2374
2375 if (device->physical_device->rad_info.chip_class >= CIK) {
2376 struct radeon_info *info = &device->physical_device->rad_info;
2377 unsigned tiling_index = iview->image->surface.tiling_index[level];
2378 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2379 unsigned macro_index = iview->image->surface.macro_tile_index;
2380 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2381 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2382 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2383
2384 ds->db_depth_info |=
2385 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2386 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2387 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2388 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2389 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2390 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2391 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2392 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2393 } else {
2394 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2395 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2396 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2397 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2398 }
2399
2400 if (iview->image->htile.size && !level) {
2401 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2402 S_028040_ALLOW_EXPCLEAR(1);
2403
2404 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2405 /* Workaround: For a not yet understood reason, the
2406 * combination of MSAA, fast stencil clear and stencil
2407 * decompress messes with subsequent stencil buffer
2408 * uses. Problem was reproduced on Verde, Bonaire,
2409 * Tonga, and Carrizo.
2410 *
2411 * Disabling EXPCLEAR works around the problem.
2412 *
2413 * Check piglit's arb_texture_multisample-stencil-clear
2414 * test if you want to try changing this.
2415 */
2416 if (iview->image->samples <= 1)
2417 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2418 } else
2419 /* Use all of the htile_buffer for depth if there's no stencil. */
2420 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2421
2422 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2423 iview->image->htile.offset;
2424 ds->db_htile_data_base = va >> 8;
2425 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2426 } else {
2427 ds->db_htile_data_base = 0;
2428 ds->db_htile_surface = 0;
2429 }
2430
2431 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2432 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2433
2434 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2435 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2436 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2437 }
2438
2439 VkResult radv_CreateFramebuffer(
2440 VkDevice _device,
2441 const VkFramebufferCreateInfo* pCreateInfo,
2442 const VkAllocationCallbacks* pAllocator,
2443 VkFramebuffer* pFramebuffer)
2444 {
2445 RADV_FROM_HANDLE(radv_device, device, _device);
2446 struct radv_framebuffer *framebuffer;
2447
2448 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2449
2450 size_t size = sizeof(*framebuffer) +
2451 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2452 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2453 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2454 if (framebuffer == NULL)
2455 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2456
2457 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2458 framebuffer->width = pCreateInfo->width;
2459 framebuffer->height = pCreateInfo->height;
2460 framebuffer->layers = pCreateInfo->layers;
2461 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2462 VkImageView _iview = pCreateInfo->pAttachments[i];
2463 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2464 framebuffer->attachments[i].attachment = iview;
2465 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2466 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2467 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2468 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2469 }
2470 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2471 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2472 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2473 }
2474
2475 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2476 return VK_SUCCESS;
2477 }
2478
2479 void radv_DestroyFramebuffer(
2480 VkDevice _device,
2481 VkFramebuffer _fb,
2482 const VkAllocationCallbacks* pAllocator)
2483 {
2484 RADV_FROM_HANDLE(radv_device, device, _device);
2485 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2486
2487 if (!fb)
2488 return;
2489 vk_free2(&device->alloc, pAllocator, fb);
2490 }
2491
2492 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2493 {
2494 switch (address_mode) {
2495 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2496 return V_008F30_SQ_TEX_WRAP;
2497 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2498 return V_008F30_SQ_TEX_MIRROR;
2499 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2500 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2501 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2502 return V_008F30_SQ_TEX_CLAMP_BORDER;
2503 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2504 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2505 default:
2506 unreachable("illegal tex wrap mode");
2507 break;
2508 }
2509 }
2510
2511 static unsigned
2512 radv_tex_compare(VkCompareOp op)
2513 {
2514 switch (op) {
2515 case VK_COMPARE_OP_NEVER:
2516 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2517 case VK_COMPARE_OP_LESS:
2518 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2519 case VK_COMPARE_OP_EQUAL:
2520 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2521 case VK_COMPARE_OP_LESS_OR_EQUAL:
2522 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2523 case VK_COMPARE_OP_GREATER:
2524 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2525 case VK_COMPARE_OP_NOT_EQUAL:
2526 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2527 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2528 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2529 case VK_COMPARE_OP_ALWAYS:
2530 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2531 default:
2532 unreachable("illegal compare mode");
2533 break;
2534 }
2535 }
2536
2537 static unsigned
2538 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2539 {
2540 switch (filter) {
2541 case VK_FILTER_NEAREST:
2542 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2543 V_008F38_SQ_TEX_XY_FILTER_POINT);
2544 case VK_FILTER_LINEAR:
2545 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2546 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2547 case VK_FILTER_CUBIC_IMG:
2548 default:
2549 fprintf(stderr, "illegal texture filter");
2550 return 0;
2551 }
2552 }
2553
2554 static unsigned
2555 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2556 {
2557 switch (mode) {
2558 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2559 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2560 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2561 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2562 default:
2563 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2564 }
2565 }
2566
2567 static unsigned
2568 radv_tex_bordercolor(VkBorderColor bcolor)
2569 {
2570 switch (bcolor) {
2571 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2572 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2573 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2574 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2575 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2576 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2577 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2578 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2579 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2580 default:
2581 break;
2582 }
2583 return 0;
2584 }
2585
2586 static unsigned
2587 radv_tex_aniso_filter(unsigned filter)
2588 {
2589 if (filter < 2)
2590 return 0;
2591 if (filter < 4)
2592 return 1;
2593 if (filter < 8)
2594 return 2;
2595 if (filter < 16)
2596 return 3;
2597 return 4;
2598 }
2599
2600 static void
2601 radv_init_sampler(struct radv_device *device,
2602 struct radv_sampler *sampler,
2603 const VkSamplerCreateInfo *pCreateInfo)
2604 {
2605 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2606 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2607 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2608 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2609
2610 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2611 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2612 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2613 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2614 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2615 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2616 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2617 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2618 S_008F30_DISABLE_CUBE_WRAP(0) |
2619 S_008F30_COMPAT_MODE(is_vi));
2620 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2621 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2622 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2623 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2624 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2625 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2626 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2627 S_008F38_MIP_POINT_PRECLAMP(1) |
2628 S_008F38_DISABLE_LSB_CEIL(1) |
2629 S_008F38_FILTER_PREC_FIX(1) |
2630 S_008F38_ANISO_OVERRIDE(is_vi));
2631 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2632 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2633 }
2634
2635 VkResult radv_CreateSampler(
2636 VkDevice _device,
2637 const VkSamplerCreateInfo* pCreateInfo,
2638 const VkAllocationCallbacks* pAllocator,
2639 VkSampler* pSampler)
2640 {
2641 RADV_FROM_HANDLE(radv_device, device, _device);
2642 struct radv_sampler *sampler;
2643
2644 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2645
2646 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2647 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2648 if (!sampler)
2649 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2650
2651 radv_init_sampler(device, sampler, pCreateInfo);
2652 *pSampler = radv_sampler_to_handle(sampler);
2653
2654 return VK_SUCCESS;
2655 }
2656
2657 void radv_DestroySampler(
2658 VkDevice _device,
2659 VkSampler _sampler,
2660 const VkAllocationCallbacks* pAllocator)
2661 {
2662 RADV_FROM_HANDLE(radv_device, device, _device);
2663 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2664
2665 if (!sampler)
2666 return;
2667 vk_free2(&device->alloc, pAllocator, sampler);
2668 }
2669
2670
2671 /* vk_icd.h does not declare this function, so we declare it here to
2672 * suppress Wmissing-prototypes.
2673 */
2674 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2675 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2676
2677 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2678 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2679 {
2680 /* For the full details on loader interface versioning, see
2681 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2682 * What follows is a condensed summary, to help you navigate the large and
2683 * confusing official doc.
2684 *
2685 * - Loader interface v0 is incompatible with later versions. We don't
2686 * support it.
2687 *
2688 * - In loader interface v1:
2689 * - The first ICD entrypoint called by the loader is
2690 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2691 * entrypoint.
2692 * - The ICD must statically expose no other Vulkan symbol unless it is
2693 * linked with -Bsymbolic.
2694 * - Each dispatchable Vulkan handle created by the ICD must be
2695 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2696 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2697 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2698 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2699 * such loader-managed surfaces.
2700 *
2701 * - Loader interface v2 differs from v1 in:
2702 * - The first ICD entrypoint called by the loader is
2703 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2704 * statically expose this entrypoint.
2705 *
2706 * - Loader interface v3 differs from v2 in:
2707 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2708 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2709 * because the loader no longer does so.
2710 */
2711 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2712 return VK_SUCCESS;
2713 }