radv: remove unused radv_dispatch_table dtable
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47
48 static int
49 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
50 {
51 Dl_info info;
52 struct stat st;
53 if (!dladdr(ptr, &info) || !info.dli_fname) {
54 return -1;
55 }
56 if (stat(info.dli_fname, &st)) {
57 return -1;
58 }
59 *timestamp = st.st_mtim.tv_sec;
60 return 0;
61 }
62
63 static int
64 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
65 {
66 uint32_t mesa_timestamp, llvm_timestamp;
67 uint16_t f = family;
68 memset(uuid, 0, VK_UUID_SIZE);
69 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
70 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
71 return -1;
72
73 memcpy(uuid, &mesa_timestamp, 4);
74 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
75 memcpy((char*)uuid + 8, &f, 2);
76 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
77 return 0;
78 }
79
80 static const VkExtensionProperties instance_extensions[] = {
81 {
82 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
83 .specVersion = 25,
84 },
85 #ifdef VK_USE_PLATFORM_XCB_KHR
86 {
87 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_XLIB_KHR
92 {
93 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
94 .specVersion = 6,
95 },
96 #endif
97 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
98 {
99 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
100 .specVersion = 5,
101 },
102 #endif
103 };
104
105 static const VkExtensionProperties common_device_extensions[] = {
106 {
107 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
108 .specVersion = 1,
109 },
110 {
111 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
112 .specVersion = 1,
113 },
114 {
115 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
116 .specVersion = 68,
117 },
118 {
119 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
120 .specVersion = 1,
121 },
122 {
123 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
124 .specVersion = 1,
125 },
126 {
127 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
128 .specVersion = 1,
129 },
130 };
131
132 static VkResult
133 radv_extensions_register(struct radv_instance *instance,
134 struct radv_extensions *extensions,
135 const VkExtensionProperties *new_ext,
136 uint32_t num_ext)
137 {
138 size_t new_size;
139 VkExtensionProperties *new_ptr;
140
141 assert(new_ext && num_ext > 0);
142
143 if (!new_ext)
144 return VK_ERROR_INITIALIZATION_FAILED;
145
146 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
147 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
148 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
149
150 /* Old array continues to be valid, update nothing */
151 if (!new_ptr)
152 return VK_ERROR_OUT_OF_HOST_MEMORY;
153
154 memcpy(&new_ptr[extensions->num_ext], new_ext,
155 num_ext * sizeof(VkExtensionProperties));
156 extensions->ext_array = new_ptr;
157 extensions->num_ext += num_ext;
158
159 return VK_SUCCESS;
160 }
161
162 static void
163 radv_extensions_finish(struct radv_instance *instance,
164 struct radv_extensions *extensions)
165 {
166 assert(extensions);
167
168 if (!extensions)
169 radv_loge("Attemted to free invalid extension struct\n");
170
171 if (extensions->ext_array)
172 vk_free(&instance->alloc, extensions->ext_array);
173 }
174
175 static bool
176 is_extension_enabled(const VkExtensionProperties *extensions,
177 size_t num_ext,
178 const char *name)
179 {
180 assert(extensions && name);
181
182 for (uint32_t i = 0; i < num_ext; i++) {
183 if (strcmp(name, extensions[i].extensionName) == 0)
184 return true;
185 }
186
187 return false;
188 }
189
190 static VkResult
191 radv_physical_device_init(struct radv_physical_device *device,
192 struct radv_instance *instance,
193 const char *path)
194 {
195 VkResult result;
196 drmVersionPtr version;
197 int fd;
198
199 fd = open(path, O_RDWR | O_CLOEXEC);
200 if (fd < 0)
201 return VK_ERROR_INCOMPATIBLE_DRIVER;
202
203 version = drmGetVersion(fd);
204 if (!version) {
205 close(fd);
206 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
207 "failed to get version %s: %m", path);
208 }
209
210 if (strcmp(version->name, "amdgpu")) {
211 drmFreeVersion(version);
212 close(fd);
213 return VK_ERROR_INCOMPATIBLE_DRIVER;
214 }
215 drmFreeVersion(version);
216
217 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
218 device->instance = instance;
219 assert(strlen(path) < ARRAY_SIZE(device->path));
220 strncpy(device->path, path, ARRAY_SIZE(device->path));
221
222 device->ws = radv_amdgpu_winsys_create(fd);
223 if (!device->ws) {
224 result = VK_ERROR_INCOMPATIBLE_DRIVER;
225 goto fail;
226 }
227
228 device->local_fd = fd;
229 device->ws->query_info(device->ws, &device->rad_info);
230 result = radv_init_wsi(device);
231 if (result != VK_SUCCESS) {
232 device->ws->destroy(device->ws);
233 goto fail;
234 }
235
236 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
237 radv_finish_wsi(device);
238 device->ws->destroy(device->ws);
239 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
240 "cannot generate UUID");
241 goto fail;
242 }
243
244 result = radv_extensions_register(instance,
245 &device->extensions,
246 common_device_extensions,
247 ARRAY_SIZE(common_device_extensions));
248 if (result != VK_SUCCESS)
249 goto fail;
250
251 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
252 device->name = device->rad_info.name;
253
254 return VK_SUCCESS;
255
256 fail:
257 close(fd);
258 return result;
259 }
260
261 static void
262 radv_physical_device_finish(struct radv_physical_device *device)
263 {
264 radv_extensions_finish(device->instance, &device->extensions);
265 radv_finish_wsi(device);
266 device->ws->destroy(device->ws);
267 close(device->local_fd);
268 }
269
270
271 static void *
272 default_alloc_func(void *pUserData, size_t size, size_t align,
273 VkSystemAllocationScope allocationScope)
274 {
275 return malloc(size);
276 }
277
278 static void *
279 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
280 size_t align, VkSystemAllocationScope allocationScope)
281 {
282 return realloc(pOriginal, size);
283 }
284
285 static void
286 default_free_func(void *pUserData, void *pMemory)
287 {
288 free(pMemory);
289 }
290
291 static const VkAllocationCallbacks default_alloc = {
292 .pUserData = NULL,
293 .pfnAllocation = default_alloc_func,
294 .pfnReallocation = default_realloc_func,
295 .pfnFree = default_free_func,
296 };
297
298 static const struct debug_control radv_debug_options[] = {
299 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
300 {"nodcc", RADV_DEBUG_NO_DCC},
301 {"shaders", RADV_DEBUG_DUMP_SHADERS},
302 {"nocache", RADV_DEBUG_NO_CACHE},
303 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
304 {"nohiz", RADV_DEBUG_NO_HIZ},
305 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
306 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
307 {NULL, 0}
308 };
309
310 VkResult radv_CreateInstance(
311 const VkInstanceCreateInfo* pCreateInfo,
312 const VkAllocationCallbacks* pAllocator,
313 VkInstance* pInstance)
314 {
315 struct radv_instance *instance;
316
317 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
318
319 uint32_t client_version;
320 if (pCreateInfo->pApplicationInfo &&
321 pCreateInfo->pApplicationInfo->apiVersion != 0) {
322 client_version = pCreateInfo->pApplicationInfo->apiVersion;
323 } else {
324 client_version = VK_MAKE_VERSION(1, 0, 0);
325 }
326
327 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
328 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
329 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
330 "Client requested version %d.%d.%d",
331 VK_VERSION_MAJOR(client_version),
332 VK_VERSION_MINOR(client_version),
333 VK_VERSION_PATCH(client_version));
334 }
335
336 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
337 if (!is_extension_enabled(instance_extensions,
338 ARRAY_SIZE(instance_extensions),
339 pCreateInfo->ppEnabledExtensionNames[i]))
340 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
341 }
342
343 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
344 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
345 if (!instance)
346 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
347
348 memset(instance, 0, sizeof(*instance));
349
350 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
351
352 if (pAllocator)
353 instance->alloc = *pAllocator;
354 else
355 instance->alloc = default_alloc;
356
357 instance->apiVersion = client_version;
358 instance->physicalDeviceCount = -1;
359
360 _mesa_locale_init();
361
362 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
363
364 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
365 radv_debug_options);
366
367 *pInstance = radv_instance_to_handle(instance);
368
369 return VK_SUCCESS;
370 }
371
372 void radv_DestroyInstance(
373 VkInstance _instance,
374 const VkAllocationCallbacks* pAllocator)
375 {
376 RADV_FROM_HANDLE(radv_instance, instance, _instance);
377
378 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
379 radv_physical_device_finish(instance->physicalDevices + i);
380 }
381
382 VG(VALGRIND_DESTROY_MEMPOOL(instance));
383
384 _mesa_locale_fini();
385
386 vk_free(&instance->alloc, instance);
387 }
388
389 VkResult radv_EnumeratePhysicalDevices(
390 VkInstance _instance,
391 uint32_t* pPhysicalDeviceCount,
392 VkPhysicalDevice* pPhysicalDevices)
393 {
394 RADV_FROM_HANDLE(radv_instance, instance, _instance);
395 VkResult result;
396
397 if (instance->physicalDeviceCount < 0) {
398 char path[20];
399 instance->physicalDeviceCount = 0;
400 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
401 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
402 result = radv_physical_device_init(instance->physicalDevices +
403 instance->physicalDeviceCount,
404 instance, path);
405 if (result == VK_SUCCESS)
406 ++instance->physicalDeviceCount;
407 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
408 return result;
409 }
410 }
411
412 if (!pPhysicalDevices) {
413 *pPhysicalDeviceCount = instance->physicalDeviceCount;
414 } else {
415 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
416 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
417 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
418 }
419
420 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
421 : VK_SUCCESS;
422 }
423
424 void radv_GetPhysicalDeviceFeatures(
425 VkPhysicalDevice physicalDevice,
426 VkPhysicalDeviceFeatures* pFeatures)
427 {
428 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
429
430 memset(pFeatures, 0, sizeof(*pFeatures));
431
432 *pFeatures = (VkPhysicalDeviceFeatures) {
433 .robustBufferAccess = true,
434 .fullDrawIndexUint32 = true,
435 .imageCubeArray = true,
436 .independentBlend = true,
437 .geometryShader = true,
438 .tessellationShader = false,
439 .sampleRateShading = false,
440 .dualSrcBlend = true,
441 .logicOp = true,
442 .multiDrawIndirect = true,
443 .drawIndirectFirstInstance = true,
444 .depthClamp = true,
445 .depthBiasClamp = true,
446 .fillModeNonSolid = true,
447 .depthBounds = true,
448 .wideLines = true,
449 .largePoints = true,
450 .alphaToOne = true,
451 .multiViewport = true,
452 .samplerAnisotropy = true,
453 .textureCompressionETC2 = false,
454 .textureCompressionASTC_LDR = false,
455 .textureCompressionBC = true,
456 .occlusionQueryPrecise = true,
457 .pipelineStatisticsQuery = false,
458 .vertexPipelineStoresAndAtomics = true,
459 .fragmentStoresAndAtomics = true,
460 .shaderTessellationAndGeometryPointSize = true,
461 .shaderImageGatherExtended = true,
462 .shaderStorageImageExtendedFormats = true,
463 .shaderStorageImageMultisample = false,
464 .shaderUniformBufferArrayDynamicIndexing = true,
465 .shaderSampledImageArrayDynamicIndexing = true,
466 .shaderStorageBufferArrayDynamicIndexing = true,
467 .shaderStorageImageArrayDynamicIndexing = true,
468 .shaderStorageImageReadWithoutFormat = true,
469 .shaderStorageImageWriteWithoutFormat = true,
470 .shaderClipDistance = true,
471 .shaderCullDistance = true,
472 .shaderFloat64 = true,
473 .shaderInt64 = false,
474 .shaderInt16 = false,
475 .alphaToOne = true,
476 .variableMultisampleRate = false,
477 .inheritedQueries = false,
478 };
479 }
480
481 void radv_GetPhysicalDeviceFeatures2KHR(
482 VkPhysicalDevice physicalDevice,
483 VkPhysicalDeviceFeatures2KHR *pFeatures)
484 {
485 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
486 }
487
488 void radv_GetPhysicalDeviceProperties(
489 VkPhysicalDevice physicalDevice,
490 VkPhysicalDeviceProperties* pProperties)
491 {
492 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
493 VkSampleCountFlags sample_counts = 0xf;
494 VkPhysicalDeviceLimits limits = {
495 .maxImageDimension1D = (1 << 14),
496 .maxImageDimension2D = (1 << 14),
497 .maxImageDimension3D = (1 << 11),
498 .maxImageDimensionCube = (1 << 14),
499 .maxImageArrayLayers = (1 << 11),
500 .maxTexelBufferElements = 128 * 1024 * 1024,
501 .maxUniformBufferRange = UINT32_MAX,
502 .maxStorageBufferRange = UINT32_MAX,
503 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
504 .maxMemoryAllocationCount = UINT32_MAX,
505 .maxSamplerAllocationCount = 64 * 1024,
506 .bufferImageGranularity = 64, /* A cache line */
507 .sparseAddressSpaceSize = 0,
508 .maxBoundDescriptorSets = MAX_SETS,
509 .maxPerStageDescriptorSamplers = 64,
510 .maxPerStageDescriptorUniformBuffers = 64,
511 .maxPerStageDescriptorStorageBuffers = 64,
512 .maxPerStageDescriptorSampledImages = 64,
513 .maxPerStageDescriptorStorageImages = 64,
514 .maxPerStageDescriptorInputAttachments = 64,
515 .maxPerStageResources = 128,
516 .maxDescriptorSetSamplers = 256,
517 .maxDescriptorSetUniformBuffers = 256,
518 .maxDescriptorSetUniformBuffersDynamic = 256,
519 .maxDescriptorSetStorageBuffers = 256,
520 .maxDescriptorSetStorageBuffersDynamic = 256,
521 .maxDescriptorSetSampledImages = 256,
522 .maxDescriptorSetStorageImages = 256,
523 .maxDescriptorSetInputAttachments = 256,
524 .maxVertexInputAttributes = 32,
525 .maxVertexInputBindings = 32,
526 .maxVertexInputAttributeOffset = 2047,
527 .maxVertexInputBindingStride = 2048,
528 .maxVertexOutputComponents = 128,
529 .maxTessellationGenerationLevel = 0,
530 .maxTessellationPatchSize = 0,
531 .maxTessellationControlPerVertexInputComponents = 0,
532 .maxTessellationControlPerVertexOutputComponents = 0,
533 .maxTessellationControlPerPatchOutputComponents = 0,
534 .maxTessellationControlTotalOutputComponents = 0,
535 .maxTessellationEvaluationInputComponents = 0,
536 .maxTessellationEvaluationOutputComponents = 0,
537 .maxGeometryShaderInvocations = 32,
538 .maxGeometryInputComponents = 64,
539 .maxGeometryOutputComponents = 128,
540 .maxGeometryOutputVertices = 256,
541 .maxGeometryTotalOutputComponents = 1024,
542 .maxFragmentInputComponents = 128,
543 .maxFragmentOutputAttachments = 8,
544 .maxFragmentDualSrcAttachments = 1,
545 .maxFragmentCombinedOutputResources = 8,
546 .maxComputeSharedMemorySize = 32768,
547 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
548 .maxComputeWorkGroupInvocations = 2048,
549 .maxComputeWorkGroupSize = {
550 2048,
551 2048,
552 2048
553 },
554 .subPixelPrecisionBits = 4 /* FIXME */,
555 .subTexelPrecisionBits = 4 /* FIXME */,
556 .mipmapPrecisionBits = 4 /* FIXME */,
557 .maxDrawIndexedIndexValue = UINT32_MAX,
558 .maxDrawIndirectCount = UINT32_MAX,
559 .maxSamplerLodBias = 16,
560 .maxSamplerAnisotropy = 16,
561 .maxViewports = MAX_VIEWPORTS,
562 .maxViewportDimensions = { (1 << 14), (1 << 14) },
563 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
564 .viewportSubPixelBits = 13, /* We take a float? */
565 .minMemoryMapAlignment = 4096, /* A page */
566 .minTexelBufferOffsetAlignment = 1,
567 .minUniformBufferOffsetAlignment = 4,
568 .minStorageBufferOffsetAlignment = 4,
569 .minTexelOffset = -32,
570 .maxTexelOffset = 31,
571 .minTexelGatherOffset = -32,
572 .maxTexelGatherOffset = 31,
573 .minInterpolationOffset = -2,
574 .maxInterpolationOffset = 2,
575 .subPixelInterpolationOffsetBits = 8,
576 .maxFramebufferWidth = (1 << 14),
577 .maxFramebufferHeight = (1 << 14),
578 .maxFramebufferLayers = (1 << 10),
579 .framebufferColorSampleCounts = sample_counts,
580 .framebufferDepthSampleCounts = sample_counts,
581 .framebufferStencilSampleCounts = sample_counts,
582 .framebufferNoAttachmentsSampleCounts = sample_counts,
583 .maxColorAttachments = MAX_RTS,
584 .sampledImageColorSampleCounts = sample_counts,
585 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
586 .sampledImageDepthSampleCounts = sample_counts,
587 .sampledImageStencilSampleCounts = sample_counts,
588 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
589 .maxSampleMaskWords = 1,
590 .timestampComputeAndGraphics = false,
591 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
592 .maxClipDistances = 8,
593 .maxCullDistances = 8,
594 .maxCombinedClipAndCullDistances = 8,
595 .discreteQueuePriorities = 1,
596 .pointSizeRange = { 0.125, 255.875 },
597 .lineWidthRange = { 0.0, 7.9921875 },
598 .pointSizeGranularity = (1.0 / 8.0),
599 .lineWidthGranularity = (1.0 / 128.0),
600 .strictLines = false, /* FINISHME */
601 .standardSampleLocations = true,
602 .optimalBufferCopyOffsetAlignment = 128,
603 .optimalBufferCopyRowPitchAlignment = 128,
604 .nonCoherentAtomSize = 64,
605 };
606
607 *pProperties = (VkPhysicalDeviceProperties) {
608 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
609 .driverVersion = 1,
610 .vendorID = 0x1002,
611 .deviceID = pdevice->rad_info.pci_id,
612 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
613 .limits = limits,
614 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
615 };
616
617 strcpy(pProperties->deviceName, pdevice->name);
618 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
619 }
620
621 void radv_GetPhysicalDeviceProperties2KHR(
622 VkPhysicalDevice physicalDevice,
623 VkPhysicalDeviceProperties2KHR *pProperties)
624 {
625 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
626 }
627
628 static void radv_get_physical_device_queue_family_properties(
629 struct radv_physical_device* pdevice,
630 uint32_t* pCount,
631 VkQueueFamilyProperties** pQueueFamilyProperties)
632 {
633 int num_queue_families = 1;
634 int idx;
635 if (pdevice->rad_info.compute_rings > 0 &&
636 pdevice->rad_info.chip_class >= CIK &&
637 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
638 num_queue_families++;
639
640 if (pQueueFamilyProperties == NULL) {
641 *pCount = num_queue_families;
642 return;
643 }
644
645 if (!*pCount)
646 return;
647
648 idx = 0;
649 if (*pCount >= 1) {
650 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
651 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
652 VK_QUEUE_COMPUTE_BIT |
653 VK_QUEUE_TRANSFER_BIT,
654 .queueCount = 1,
655 .timestampValidBits = 64,
656 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
657 };
658 idx++;
659 }
660
661 if (pdevice->rad_info.compute_rings > 0 &&
662 pdevice->rad_info.chip_class >= CIK &&
663 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
664 if (*pCount > idx) {
665 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
666 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
667 .queueCount = pdevice->rad_info.compute_rings,
668 .timestampValidBits = 64,
669 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
670 };
671 idx++;
672 }
673 }
674 *pCount = idx;
675 }
676
677 void radv_GetPhysicalDeviceQueueFamilyProperties(
678 VkPhysicalDevice physicalDevice,
679 uint32_t* pCount,
680 VkQueueFamilyProperties* pQueueFamilyProperties)
681 {
682 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
683 if (!pQueueFamilyProperties) {
684 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
685 return;
686 }
687 VkQueueFamilyProperties *properties[] = {
688 pQueueFamilyProperties + 0,
689 pQueueFamilyProperties + 1,
690 pQueueFamilyProperties + 2,
691 };
692 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
693 assert(*pCount <= 3);
694 }
695
696 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
697 VkPhysicalDevice physicalDevice,
698 uint32_t* pCount,
699 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
700 {
701 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
702 if (!pQueueFamilyProperties) {
703 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
704 return;
705 }
706 VkQueueFamilyProperties *properties[] = {
707 &pQueueFamilyProperties[0].queueFamilyProperties,
708 &pQueueFamilyProperties[1].queueFamilyProperties,
709 &pQueueFamilyProperties[2].queueFamilyProperties,
710 };
711 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
712 assert(*pCount <= 3);
713 }
714
715 void radv_GetPhysicalDeviceMemoryProperties(
716 VkPhysicalDevice physicalDevice,
717 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
718 {
719 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
720
721 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
722
723 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
724 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
725 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
726 .heapIndex = RADV_MEM_HEAP_VRAM,
727 };
728 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
729 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
730 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
731 .heapIndex = RADV_MEM_HEAP_GTT,
732 };
733 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
734 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
735 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
736 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
737 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
738 };
739 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
740 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
741 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
742 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
743 .heapIndex = RADV_MEM_HEAP_GTT,
744 };
745
746 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
747
748 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
749 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
750 .size = physical_device->rad_info.vram_size -
751 physical_device->rad_info.visible_vram_size,
752 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
753 };
754 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
755 .size = physical_device->rad_info.visible_vram_size,
756 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
757 };
758 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
759 .size = physical_device->rad_info.gart_size,
760 .flags = 0,
761 };
762 }
763
764 void radv_GetPhysicalDeviceMemoryProperties2KHR(
765 VkPhysicalDevice physicalDevice,
766 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
767 {
768 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
769 &pMemoryProperties->memoryProperties);
770 }
771
772 static int
773 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
774 int queue_family_index, int idx)
775 {
776 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
777 queue->device = device;
778 queue->queue_family_index = queue_family_index;
779 queue->queue_idx = idx;
780
781 queue->hw_ctx = device->ws->ctx_create(device->ws);
782 if (!queue->hw_ctx)
783 return VK_ERROR_OUT_OF_HOST_MEMORY;
784
785 return VK_SUCCESS;
786 }
787
788 static void
789 radv_queue_finish(struct radv_queue *queue)
790 {
791 if (queue->hw_ctx)
792 queue->device->ws->ctx_destroy(queue->hw_ctx);
793
794 if (queue->initial_preamble_cs)
795 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
796 if (queue->continue_preamble_cs)
797 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
798 if (queue->descriptor_bo)
799 queue->device->ws->buffer_destroy(queue->descriptor_bo);
800 if (queue->scratch_bo)
801 queue->device->ws->buffer_destroy(queue->scratch_bo);
802 if (queue->esgs_ring_bo)
803 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
804 if (queue->gsvs_ring_bo)
805 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
806 if (queue->compute_scratch_bo)
807 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
808 }
809
810 static void
811 radv_device_init_gs_info(struct radv_device *device)
812 {
813 switch (device->physical_device->rad_info.family) {
814 case CHIP_OLAND:
815 case CHIP_HAINAN:
816 case CHIP_KAVERI:
817 case CHIP_KABINI:
818 case CHIP_MULLINS:
819 case CHIP_ICELAND:
820 case CHIP_CARRIZO:
821 case CHIP_STONEY:
822 device->gs_table_depth = 16;
823 return;
824 case CHIP_TAHITI:
825 case CHIP_PITCAIRN:
826 case CHIP_VERDE:
827 case CHIP_BONAIRE:
828 case CHIP_HAWAII:
829 case CHIP_TONGA:
830 case CHIP_FIJI:
831 case CHIP_POLARIS10:
832 case CHIP_POLARIS11:
833 device->gs_table_depth = 32;
834 return;
835 default:
836 unreachable("unknown GPU");
837 }
838 }
839
840 VkResult radv_CreateDevice(
841 VkPhysicalDevice physicalDevice,
842 const VkDeviceCreateInfo* pCreateInfo,
843 const VkAllocationCallbacks* pAllocator,
844 VkDevice* pDevice)
845 {
846 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
847 VkResult result;
848 struct radv_device *device;
849
850 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
851 if (!is_extension_enabled(physical_device->extensions.ext_array,
852 physical_device->extensions.num_ext,
853 pCreateInfo->ppEnabledExtensionNames[i]))
854 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
855 }
856
857 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
858 sizeof(*device), 8,
859 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
860 if (!device)
861 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
862
863 memset(device, 0, sizeof(*device));
864
865 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
866 device->instance = physical_device->instance;
867 device->physical_device = physical_device;
868
869 device->debug_flags = device->instance->debug_flags;
870
871 device->ws = physical_device->ws;
872 if (pAllocator)
873 device->alloc = *pAllocator;
874 else
875 device->alloc = physical_device->instance->alloc;
876
877 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
878 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
879 uint32_t qfi = queue_create->queueFamilyIndex;
880
881 device->queues[qfi] = vk_alloc(&device->alloc,
882 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
883 if (!device->queues[qfi]) {
884 result = VK_ERROR_OUT_OF_HOST_MEMORY;
885 goto fail;
886 }
887
888 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
889
890 device->queue_count[qfi] = queue_create->queueCount;
891
892 for (unsigned q = 0; q < queue_create->queueCount; q++) {
893 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
894 if (result != VK_SUCCESS)
895 goto fail;
896 }
897 }
898
899 #if HAVE_LLVM < 0x0400
900 device->llvm_supports_spill = false;
901 #else
902 device->llvm_supports_spill = true;
903 #endif
904
905 /* The maximum number of scratch waves. Scratch space isn't divided
906 * evenly between CUs. The number is only a function of the number of CUs.
907 * We can decrease the constant to decrease the scratch buffer size.
908 *
909 * sctx->scratch_waves must be >= the maximum posible size of
910 * 1 threadgroup, so that the hw doesn't hang from being unable
911 * to start any.
912 *
913 * The recommended value is 4 per CU at most. Higher numbers don't
914 * bring much benefit, but they still occupy chip resources (think
915 * async compute). I've seen ~2% performance difference between 4 and 32.
916 */
917 uint32_t max_threads_per_block = 2048;
918 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
919 max_threads_per_block / 64);
920
921 radv_device_init_gs_info(device);
922
923 result = radv_device_init_meta(device);
924 if (result != VK_SUCCESS)
925 goto fail;
926
927 radv_device_init_msaa(device);
928
929 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
930 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
931 switch (family) {
932 case RADV_QUEUE_GENERAL:
933 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
934 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
935 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
936 break;
937 case RADV_QUEUE_COMPUTE:
938 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
939 radeon_emit(device->empty_cs[family], 0);
940 break;
941 }
942 device->ws->cs_finalize(device->empty_cs[family]);
943
944 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
945 switch (family) {
946 case RADV_QUEUE_GENERAL:
947 case RADV_QUEUE_COMPUTE:
948 si_cs_emit_cache_flush(device->flush_cs[family],
949 device->physical_device->rad_info.chip_class,
950 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
951 RADV_CMD_FLAG_INV_ICACHE |
952 RADV_CMD_FLAG_INV_SMEM_L1 |
953 RADV_CMD_FLAG_INV_VMEM_L1 |
954 RADV_CMD_FLAG_INV_GLOBAL_L2);
955 break;
956 }
957 device->ws->cs_finalize(device->flush_cs[family]);
958 }
959
960 if (getenv("RADV_TRACE_FILE")) {
961 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
962 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
963 if (!device->trace_bo)
964 goto fail;
965
966 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
967 if (!device->trace_id_ptr)
968 goto fail;
969 }
970
971 if (device->physical_device->rad_info.chip_class >= CIK)
972 cik_create_gfx_config(device);
973
974 *pDevice = radv_device_to_handle(device);
975 return VK_SUCCESS;
976
977 fail:
978 if (device->trace_bo)
979 device->ws->buffer_destroy(device->trace_bo);
980
981 if (device->gfx_init)
982 device->ws->buffer_destroy(device->gfx_init);
983
984 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
985 for (unsigned q = 0; q < device->queue_count[i]; q++)
986 radv_queue_finish(&device->queues[i][q]);
987 if (device->queue_count[i])
988 vk_free(&device->alloc, device->queues[i]);
989 }
990
991 vk_free(&device->alloc, device);
992 return result;
993 }
994
995 void radv_DestroyDevice(
996 VkDevice _device,
997 const VkAllocationCallbacks* pAllocator)
998 {
999 RADV_FROM_HANDLE(radv_device, device, _device);
1000
1001 if (device->trace_bo)
1002 device->ws->buffer_destroy(device->trace_bo);
1003
1004 if (device->gfx_init)
1005 device->ws->buffer_destroy(device->gfx_init);
1006
1007 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1008 for (unsigned q = 0; q < device->queue_count[i]; q++)
1009 radv_queue_finish(&device->queues[i][q]);
1010 if (device->queue_count[i])
1011 vk_free(&device->alloc, device->queues[i]);
1012 if (device->empty_cs[i])
1013 device->ws->cs_destroy(device->empty_cs[i]);
1014 if (device->flush_cs[i])
1015 device->ws->cs_destroy(device->flush_cs[i]);
1016 }
1017 radv_device_finish_meta(device);
1018
1019 vk_free(&device->alloc, device);
1020 }
1021
1022 VkResult radv_EnumerateInstanceExtensionProperties(
1023 const char* pLayerName,
1024 uint32_t* pPropertyCount,
1025 VkExtensionProperties* pProperties)
1026 {
1027 if (pProperties == NULL) {
1028 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1029 return VK_SUCCESS;
1030 }
1031
1032 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1033 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1034
1035 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1036 return VK_INCOMPLETE;
1037
1038 return VK_SUCCESS;
1039 }
1040
1041 VkResult radv_EnumerateDeviceExtensionProperties(
1042 VkPhysicalDevice physicalDevice,
1043 const char* pLayerName,
1044 uint32_t* pPropertyCount,
1045 VkExtensionProperties* pProperties)
1046 {
1047 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1048
1049 if (pProperties == NULL) {
1050 *pPropertyCount = pdevice->extensions.num_ext;
1051 return VK_SUCCESS;
1052 }
1053
1054 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1055 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1056
1057 if (*pPropertyCount < pdevice->extensions.num_ext)
1058 return VK_INCOMPLETE;
1059
1060 return VK_SUCCESS;
1061 }
1062
1063 VkResult radv_EnumerateInstanceLayerProperties(
1064 uint32_t* pPropertyCount,
1065 VkLayerProperties* pProperties)
1066 {
1067 if (pProperties == NULL) {
1068 *pPropertyCount = 0;
1069 return VK_SUCCESS;
1070 }
1071
1072 /* None supported at this time */
1073 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1074 }
1075
1076 VkResult radv_EnumerateDeviceLayerProperties(
1077 VkPhysicalDevice physicalDevice,
1078 uint32_t* pPropertyCount,
1079 VkLayerProperties* pProperties)
1080 {
1081 if (pProperties == NULL) {
1082 *pPropertyCount = 0;
1083 return VK_SUCCESS;
1084 }
1085
1086 /* None supported at this time */
1087 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1088 }
1089
1090 void radv_GetDeviceQueue(
1091 VkDevice _device,
1092 uint32_t queueFamilyIndex,
1093 uint32_t queueIndex,
1094 VkQueue* pQueue)
1095 {
1096 RADV_FROM_HANDLE(radv_device, device, _device);
1097
1098 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1099 }
1100
1101 static void radv_dump_trace(struct radv_device *device,
1102 struct radeon_winsys_cs *cs)
1103 {
1104 const char *filename = getenv("RADV_TRACE_FILE");
1105 FILE *f = fopen(filename, "w");
1106 if (!f) {
1107 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1108 return;
1109 }
1110
1111 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1112 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1113 fclose(f);
1114 }
1115
1116 static void
1117 fill_geom_rings(struct radv_queue *queue,
1118 uint32_t *map,
1119 uint32_t esgs_ring_size,
1120 struct radeon_winsys_bo *esgs_ring_bo,
1121 uint32_t gsvs_ring_size,
1122 struct radeon_winsys_bo *gsvs_ring_bo)
1123 {
1124 uint64_t esgs_va = 0, gsvs_va = 0;
1125 uint32_t *desc = &map[4];
1126
1127 if (esgs_ring_bo)
1128 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1129 if (gsvs_ring_bo)
1130 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1131
1132 /* stride 0, num records - size, add tid, swizzle, elsize4,
1133 index stride 64 */
1134 desc[0] = esgs_va;
1135 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1136 S_008F04_STRIDE(0) |
1137 S_008F04_SWIZZLE_ENABLE(true);
1138 desc[2] = esgs_ring_size;
1139 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1140 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1141 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1142 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1143 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1144 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1145 S_008F0C_ELEMENT_SIZE(1) |
1146 S_008F0C_INDEX_STRIDE(3) |
1147 S_008F0C_ADD_TID_ENABLE(true);
1148
1149 desc += 4;
1150 /* GS entry for ES->GS ring */
1151 /* stride 0, num records - size, elsize0,
1152 index stride 0 */
1153 desc[0] = esgs_va;
1154 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1155 S_008F04_STRIDE(0) |
1156 S_008F04_SWIZZLE_ENABLE(false);
1157 desc[2] = esgs_ring_size;
1158 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1159 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1160 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1161 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1162 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1163 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1164 S_008F0C_ELEMENT_SIZE(0) |
1165 S_008F0C_INDEX_STRIDE(0) |
1166 S_008F0C_ADD_TID_ENABLE(false);
1167
1168 desc += 4;
1169 /* VS entry for GS->VS ring */
1170 /* stride 0, num records - size, elsize0,
1171 index stride 0 */
1172 desc[0] = gsvs_va;
1173 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1174 S_008F04_STRIDE(0) |
1175 S_008F04_SWIZZLE_ENABLE(false);
1176 desc[2] = gsvs_ring_size;
1177 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1178 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1179 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1180 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1181 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1182 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1183 S_008F0C_ELEMENT_SIZE(0) |
1184 S_008F0C_INDEX_STRIDE(0) |
1185 S_008F0C_ADD_TID_ENABLE(false);
1186 desc += 4;
1187
1188 /* stride gsvs_itemsize, num records 64
1189 elsize 4, index stride 16 */
1190 /* shader will patch stride and desc[2] */
1191 desc[0] = gsvs_va;
1192 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1193 S_008F04_STRIDE(0) |
1194 S_008F04_SWIZZLE_ENABLE(true);
1195 desc[2] = 0;
1196 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1197 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1198 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1199 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1200 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1201 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1202 S_008F0C_ELEMENT_SIZE(1) |
1203 S_008F0C_INDEX_STRIDE(1) |
1204 S_008F0C_ADD_TID_ENABLE(true);
1205 }
1206
1207 static VkResult
1208 radv_get_preamble_cs(struct radv_queue *queue,
1209 uint32_t scratch_size,
1210 uint32_t compute_scratch_size,
1211 uint32_t esgs_ring_size,
1212 uint32_t gsvs_ring_size,
1213 struct radeon_winsys_cs **initial_preamble_cs,
1214 struct radeon_winsys_cs **continue_preamble_cs)
1215 {
1216 struct radeon_winsys_bo *scratch_bo = NULL;
1217 struct radeon_winsys_bo *descriptor_bo = NULL;
1218 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1219 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1220 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1221 struct radeon_winsys_cs *dest_cs[2] = {0};
1222
1223 if (scratch_size <= queue->scratch_size &&
1224 compute_scratch_size <= queue->compute_scratch_size &&
1225 esgs_ring_size <= queue->esgs_ring_size &&
1226 gsvs_ring_size <= queue->gsvs_ring_size &&
1227 queue->initial_preamble_cs) {
1228 *initial_preamble_cs = queue->initial_preamble_cs;
1229 *continue_preamble_cs = queue->continue_preamble_cs;
1230 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1231 *continue_preamble_cs = NULL;
1232 return VK_SUCCESS;
1233 }
1234
1235 if (scratch_size > queue->scratch_size) {
1236 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1237 scratch_size,
1238 4096,
1239 RADEON_DOMAIN_VRAM,
1240 RADEON_FLAG_NO_CPU_ACCESS);
1241 if (!scratch_bo)
1242 goto fail;
1243 } else
1244 scratch_bo = queue->scratch_bo;
1245
1246 if (compute_scratch_size > queue->compute_scratch_size) {
1247 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1248 compute_scratch_size,
1249 4096,
1250 RADEON_DOMAIN_VRAM,
1251 RADEON_FLAG_NO_CPU_ACCESS);
1252 if (!compute_scratch_bo)
1253 goto fail;
1254
1255 } else
1256 compute_scratch_bo = queue->compute_scratch_bo;
1257
1258 if (esgs_ring_size > queue->esgs_ring_size) {
1259 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1260 esgs_ring_size,
1261 4096,
1262 RADEON_DOMAIN_VRAM,
1263 RADEON_FLAG_NO_CPU_ACCESS);
1264 if (!esgs_ring_bo)
1265 goto fail;
1266 } else {
1267 esgs_ring_bo = queue->esgs_ring_bo;
1268 esgs_ring_size = queue->esgs_ring_size;
1269 }
1270
1271 if (gsvs_ring_size > queue->gsvs_ring_size) {
1272 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1273 gsvs_ring_size,
1274 4096,
1275 RADEON_DOMAIN_VRAM,
1276 RADEON_FLAG_NO_CPU_ACCESS);
1277 if (!gsvs_ring_bo)
1278 goto fail;
1279 } else {
1280 gsvs_ring_bo = queue->gsvs_ring_bo;
1281 gsvs_ring_size = queue->gsvs_ring_size;
1282 }
1283
1284 if (scratch_bo != queue->scratch_bo ||
1285 esgs_ring_bo != queue->esgs_ring_bo ||
1286 gsvs_ring_bo != queue->gsvs_ring_bo) {
1287 uint32_t size = 0;
1288 if (gsvs_ring_bo || esgs_ring_bo)
1289 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1290 else if (scratch_bo)
1291 size = 8; /* 2 dword */
1292
1293 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1294 size,
1295 4096,
1296 RADEON_DOMAIN_VRAM,
1297 RADEON_FLAG_CPU_ACCESS);
1298 if (!descriptor_bo)
1299 goto fail;
1300 } else
1301 descriptor_bo = queue->descriptor_bo;
1302
1303 for(int i = 0; i < 2; ++i) {
1304 struct radeon_winsys_cs *cs = NULL;
1305 cs = queue->device->ws->cs_create(queue->device->ws,
1306 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1307 if (!cs)
1308 goto fail;
1309
1310 dest_cs[i] = cs;
1311
1312 if (scratch_bo)
1313 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1314
1315 if (esgs_ring_bo)
1316 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1317
1318 if (gsvs_ring_bo)
1319 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1320
1321 if (descriptor_bo)
1322 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1323
1324 if (descriptor_bo != queue->descriptor_bo) {
1325 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1326
1327 if (scratch_bo) {
1328 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1329 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1330 S_008F04_SWIZZLE_ENABLE(1);
1331 map[0] = scratch_va;
1332 map[1] = rsrc1;
1333 }
1334
1335 if (esgs_ring_bo || gsvs_ring_bo)
1336 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1337
1338 queue->device->ws->buffer_unmap(descriptor_bo);
1339 }
1340
1341 if (esgs_ring_bo || gsvs_ring_bo) {
1342 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1343 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1344 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1345 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1346
1347 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1348 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1349 radeon_emit(cs, esgs_ring_size >> 8);
1350 radeon_emit(cs, gsvs_ring_size >> 8);
1351 } else {
1352 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1353 radeon_emit(cs, esgs_ring_size >> 8);
1354 radeon_emit(cs, gsvs_ring_size >> 8);
1355 }
1356 }
1357
1358 if (descriptor_bo) {
1359 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1360 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1361 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1362 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1363 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1364 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1365
1366 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1367
1368 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1369 radeon_set_sh_reg_seq(cs, regs[i], 2);
1370 radeon_emit(cs, va);
1371 radeon_emit(cs, va >> 32);
1372 }
1373 }
1374
1375 if (compute_scratch_bo) {
1376 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1377 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1378 S_008F04_SWIZZLE_ENABLE(1);
1379
1380 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1381
1382 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1383 radeon_emit(cs, scratch_va);
1384 radeon_emit(cs, rsrc1);
1385 }
1386
1387 if (!i) {
1388 si_cs_emit_cache_flush(cs,
1389 queue->device->physical_device->rad_info.chip_class,
1390 queue->queue_family_index == RING_COMPUTE &&
1391 queue->device->physical_device->rad_info.chip_class >= CIK,
1392 RADV_CMD_FLAG_INV_ICACHE |
1393 RADV_CMD_FLAG_INV_SMEM_L1 |
1394 RADV_CMD_FLAG_INV_VMEM_L1 |
1395 RADV_CMD_FLAG_INV_GLOBAL_L2);
1396 }
1397
1398 if (!queue->device->ws->cs_finalize(cs))
1399 goto fail;
1400 }
1401
1402 if (queue->initial_preamble_cs)
1403 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1404
1405 if (queue->continue_preamble_cs)
1406 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1407
1408 queue->initial_preamble_cs = dest_cs[0];
1409 queue->continue_preamble_cs = dest_cs[1];
1410
1411 if (scratch_bo != queue->scratch_bo) {
1412 if (queue->scratch_bo)
1413 queue->device->ws->buffer_destroy(queue->scratch_bo);
1414 queue->scratch_bo = scratch_bo;
1415 queue->scratch_size = scratch_size;
1416 }
1417
1418 if (compute_scratch_bo != queue->compute_scratch_bo) {
1419 if (queue->compute_scratch_bo)
1420 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1421 queue->compute_scratch_bo = compute_scratch_bo;
1422 queue->compute_scratch_size = compute_scratch_size;
1423 }
1424
1425 if (esgs_ring_bo != queue->esgs_ring_bo) {
1426 if (queue->esgs_ring_bo)
1427 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1428 queue->esgs_ring_bo = esgs_ring_bo;
1429 queue->esgs_ring_size = esgs_ring_size;
1430 }
1431
1432 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1433 if (queue->gsvs_ring_bo)
1434 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1435 queue->gsvs_ring_bo = gsvs_ring_bo;
1436 queue->gsvs_ring_size = gsvs_ring_size;
1437 }
1438
1439 if (descriptor_bo != queue->descriptor_bo) {
1440 if (queue->descriptor_bo)
1441 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1442
1443 queue->descriptor_bo = descriptor_bo;
1444 }
1445
1446 *initial_preamble_cs = queue->initial_preamble_cs;
1447 *continue_preamble_cs = queue->continue_preamble_cs;
1448 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1449 *continue_preamble_cs = NULL;
1450 return VK_SUCCESS;
1451 fail:
1452 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1453 if (dest_cs[i])
1454 queue->device->ws->cs_destroy(dest_cs[i]);
1455 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1456 queue->device->ws->buffer_destroy(descriptor_bo);
1457 if (scratch_bo && scratch_bo != queue->scratch_bo)
1458 queue->device->ws->buffer_destroy(scratch_bo);
1459 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1460 queue->device->ws->buffer_destroy(compute_scratch_bo);
1461 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1462 queue->device->ws->buffer_destroy(esgs_ring_bo);
1463 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1464 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1465 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1466 }
1467
1468 VkResult radv_QueueSubmit(
1469 VkQueue _queue,
1470 uint32_t submitCount,
1471 const VkSubmitInfo* pSubmits,
1472 VkFence _fence)
1473 {
1474 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1475 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1476 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1477 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1478 int ret;
1479 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1480 uint32_t scratch_size = 0;
1481 uint32_t compute_scratch_size = 0;
1482 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1483 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1484 VkResult result;
1485 bool fence_emitted = false;
1486
1487 /* Do this first so failing to allocate scratch buffers can't result in
1488 * partially executed submissions. */
1489 for (uint32_t i = 0; i < submitCount; i++) {
1490 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1491 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1492 pSubmits[i].pCommandBuffers[j]);
1493
1494 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1495 compute_scratch_size = MAX2(compute_scratch_size,
1496 cmd_buffer->compute_scratch_size_needed);
1497 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1498 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1499 }
1500 }
1501
1502 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1503 esgs_ring_size, gsvs_ring_size,
1504 &initial_preamble_cs, &continue_preamble_cs);
1505 if (result != VK_SUCCESS)
1506 return result;
1507
1508 for (uint32_t i = 0; i < submitCount; i++) {
1509 struct radeon_winsys_cs **cs_array;
1510 bool has_flush = !submitCount;
1511 bool can_patch = !has_flush;
1512 uint32_t advance;
1513
1514 if (!pSubmits[i].commandBufferCount) {
1515 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1516 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1517 &queue->device->empty_cs[queue->queue_family_index],
1518 1, NULL, NULL,
1519 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1520 pSubmits[i].waitSemaphoreCount,
1521 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1522 pSubmits[i].signalSemaphoreCount,
1523 false, base_fence);
1524 if (ret) {
1525 radv_loge("failed to submit CS %d\n", i);
1526 abort();
1527 }
1528 fence_emitted = true;
1529 }
1530 continue;
1531 }
1532
1533 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1534 (pSubmits[i].commandBufferCount + has_flush));
1535
1536 if(has_flush)
1537 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1538
1539 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1540 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1541 pSubmits[i].pCommandBuffers[j]);
1542 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1543
1544 cs_array[j + has_flush] = cmd_buffer->cs;
1545 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1546 can_patch = false;
1547 }
1548
1549 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1550 advance = MIN2(max_cs_submission,
1551 pSubmits[i].commandBufferCount + has_flush - j);
1552 bool b = j == 0;
1553 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1554
1555 if (queue->device->trace_bo)
1556 *queue->device->trace_id_ptr = 0;
1557
1558 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1559 advance, initial_preamble_cs, continue_preamble_cs,
1560 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1561 b ? pSubmits[i].waitSemaphoreCount : 0,
1562 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1563 e ? pSubmits[i].signalSemaphoreCount : 0,
1564 can_patch, base_fence);
1565
1566 if (ret) {
1567 radv_loge("failed to submit CS %d\n", i);
1568 abort();
1569 }
1570 fence_emitted = true;
1571 if (queue->device->trace_bo) {
1572 bool success = queue->device->ws->ctx_wait_idle(
1573 queue->hw_ctx,
1574 radv_queue_family_to_ring(
1575 queue->queue_family_index),
1576 queue->queue_idx);
1577
1578 if (!success) { /* Hang */
1579 radv_dump_trace(queue->device, cs_array[j]);
1580 abort();
1581 }
1582 }
1583 }
1584 free(cs_array);
1585 }
1586
1587 if (fence) {
1588 if (!fence_emitted)
1589 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1590 &queue->device->empty_cs[queue->queue_family_index],
1591 1, NULL, NULL, NULL, 0, NULL, 0,
1592 false, base_fence);
1593
1594 fence->submitted = true;
1595 }
1596
1597 return VK_SUCCESS;
1598 }
1599
1600 VkResult radv_QueueWaitIdle(
1601 VkQueue _queue)
1602 {
1603 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1604
1605 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1606 radv_queue_family_to_ring(queue->queue_family_index),
1607 queue->queue_idx);
1608 return VK_SUCCESS;
1609 }
1610
1611 VkResult radv_DeviceWaitIdle(
1612 VkDevice _device)
1613 {
1614 RADV_FROM_HANDLE(radv_device, device, _device);
1615
1616 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1617 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1618 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1619 }
1620 }
1621 return VK_SUCCESS;
1622 }
1623
1624 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1625 VkInstance instance,
1626 const char* pName)
1627 {
1628 return radv_lookup_entrypoint(pName);
1629 }
1630
1631 /* The loader wants us to expose a second GetInstanceProcAddr function
1632 * to work around certain LD_PRELOAD issues seen in apps.
1633 */
1634 PUBLIC
1635 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1636 VkInstance instance,
1637 const char* pName);
1638
1639 PUBLIC
1640 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1641 VkInstance instance,
1642 const char* pName)
1643 {
1644 return radv_GetInstanceProcAddr(instance, pName);
1645 }
1646
1647 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1648 VkDevice device,
1649 const char* pName)
1650 {
1651 return radv_lookup_entrypoint(pName);
1652 }
1653
1654 VkResult radv_AllocateMemory(
1655 VkDevice _device,
1656 const VkMemoryAllocateInfo* pAllocateInfo,
1657 const VkAllocationCallbacks* pAllocator,
1658 VkDeviceMemory* pMem)
1659 {
1660 RADV_FROM_HANDLE(radv_device, device, _device);
1661 struct radv_device_memory *mem;
1662 VkResult result;
1663 enum radeon_bo_domain domain;
1664 uint32_t flags = 0;
1665 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1666
1667 if (pAllocateInfo->allocationSize == 0) {
1668 /* Apparently, this is allowed */
1669 *pMem = VK_NULL_HANDLE;
1670 return VK_SUCCESS;
1671 }
1672
1673 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1674 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1675 if (mem == NULL)
1676 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1677
1678 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1679 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1680 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1681 domain = RADEON_DOMAIN_GTT;
1682 else
1683 domain = RADEON_DOMAIN_VRAM;
1684
1685 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1686 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1687 else
1688 flags |= RADEON_FLAG_CPU_ACCESS;
1689
1690 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1691 flags |= RADEON_FLAG_GTT_WC;
1692
1693 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1694 domain, flags);
1695
1696 if (!mem->bo) {
1697 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1698 goto fail;
1699 }
1700 mem->type_index = pAllocateInfo->memoryTypeIndex;
1701
1702 *pMem = radv_device_memory_to_handle(mem);
1703
1704 return VK_SUCCESS;
1705
1706 fail:
1707 vk_free2(&device->alloc, pAllocator, mem);
1708
1709 return result;
1710 }
1711
1712 void radv_FreeMemory(
1713 VkDevice _device,
1714 VkDeviceMemory _mem,
1715 const VkAllocationCallbacks* pAllocator)
1716 {
1717 RADV_FROM_HANDLE(radv_device, device, _device);
1718 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1719
1720 if (mem == NULL)
1721 return;
1722
1723 device->ws->buffer_destroy(mem->bo);
1724 mem->bo = NULL;
1725
1726 vk_free2(&device->alloc, pAllocator, mem);
1727 }
1728
1729 VkResult radv_MapMemory(
1730 VkDevice _device,
1731 VkDeviceMemory _memory,
1732 VkDeviceSize offset,
1733 VkDeviceSize size,
1734 VkMemoryMapFlags flags,
1735 void** ppData)
1736 {
1737 RADV_FROM_HANDLE(radv_device, device, _device);
1738 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1739
1740 if (mem == NULL) {
1741 *ppData = NULL;
1742 return VK_SUCCESS;
1743 }
1744
1745 *ppData = device->ws->buffer_map(mem->bo);
1746 if (*ppData) {
1747 *ppData += offset;
1748 return VK_SUCCESS;
1749 }
1750
1751 return VK_ERROR_MEMORY_MAP_FAILED;
1752 }
1753
1754 void radv_UnmapMemory(
1755 VkDevice _device,
1756 VkDeviceMemory _memory)
1757 {
1758 RADV_FROM_HANDLE(radv_device, device, _device);
1759 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1760
1761 if (mem == NULL)
1762 return;
1763
1764 device->ws->buffer_unmap(mem->bo);
1765 }
1766
1767 VkResult radv_FlushMappedMemoryRanges(
1768 VkDevice _device,
1769 uint32_t memoryRangeCount,
1770 const VkMappedMemoryRange* pMemoryRanges)
1771 {
1772 return VK_SUCCESS;
1773 }
1774
1775 VkResult radv_InvalidateMappedMemoryRanges(
1776 VkDevice _device,
1777 uint32_t memoryRangeCount,
1778 const VkMappedMemoryRange* pMemoryRanges)
1779 {
1780 return VK_SUCCESS;
1781 }
1782
1783 void radv_GetBufferMemoryRequirements(
1784 VkDevice device,
1785 VkBuffer _buffer,
1786 VkMemoryRequirements* pMemoryRequirements)
1787 {
1788 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1789
1790 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1791
1792 pMemoryRequirements->size = buffer->size;
1793 pMemoryRequirements->alignment = 16;
1794 }
1795
1796 void radv_GetImageMemoryRequirements(
1797 VkDevice device,
1798 VkImage _image,
1799 VkMemoryRequirements* pMemoryRequirements)
1800 {
1801 RADV_FROM_HANDLE(radv_image, image, _image);
1802
1803 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1804
1805 pMemoryRequirements->size = image->size;
1806 pMemoryRequirements->alignment = image->alignment;
1807 }
1808
1809 void radv_GetImageSparseMemoryRequirements(
1810 VkDevice device,
1811 VkImage image,
1812 uint32_t* pSparseMemoryRequirementCount,
1813 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1814 {
1815 stub();
1816 }
1817
1818 void radv_GetDeviceMemoryCommitment(
1819 VkDevice device,
1820 VkDeviceMemory memory,
1821 VkDeviceSize* pCommittedMemoryInBytes)
1822 {
1823 *pCommittedMemoryInBytes = 0;
1824 }
1825
1826 VkResult radv_BindBufferMemory(
1827 VkDevice device,
1828 VkBuffer _buffer,
1829 VkDeviceMemory _memory,
1830 VkDeviceSize memoryOffset)
1831 {
1832 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1833 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1834
1835 if (mem) {
1836 buffer->bo = mem->bo;
1837 buffer->offset = memoryOffset;
1838 } else {
1839 buffer->bo = NULL;
1840 buffer->offset = 0;
1841 }
1842
1843 return VK_SUCCESS;
1844 }
1845
1846 VkResult radv_BindImageMemory(
1847 VkDevice device,
1848 VkImage _image,
1849 VkDeviceMemory _memory,
1850 VkDeviceSize memoryOffset)
1851 {
1852 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1853 RADV_FROM_HANDLE(radv_image, image, _image);
1854
1855 if (mem) {
1856 image->bo = mem->bo;
1857 image->offset = memoryOffset;
1858 } else {
1859 image->bo = NULL;
1860 image->offset = 0;
1861 }
1862
1863 return VK_SUCCESS;
1864 }
1865
1866 VkResult radv_QueueBindSparse(
1867 VkQueue queue,
1868 uint32_t bindInfoCount,
1869 const VkBindSparseInfo* pBindInfo,
1870 VkFence fence)
1871 {
1872 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1873 }
1874
1875 VkResult radv_CreateFence(
1876 VkDevice _device,
1877 const VkFenceCreateInfo* pCreateInfo,
1878 const VkAllocationCallbacks* pAllocator,
1879 VkFence* pFence)
1880 {
1881 RADV_FROM_HANDLE(radv_device, device, _device);
1882 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1883 sizeof(*fence), 8,
1884 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1885
1886 if (!fence)
1887 return VK_ERROR_OUT_OF_HOST_MEMORY;
1888
1889 memset(fence, 0, sizeof(*fence));
1890 fence->submitted = false;
1891 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1892 fence->fence = device->ws->create_fence();
1893 if (!fence->fence) {
1894 vk_free2(&device->alloc, pAllocator, fence);
1895 return VK_ERROR_OUT_OF_HOST_MEMORY;
1896 }
1897
1898 *pFence = radv_fence_to_handle(fence);
1899
1900 return VK_SUCCESS;
1901 }
1902
1903 void radv_DestroyFence(
1904 VkDevice _device,
1905 VkFence _fence,
1906 const VkAllocationCallbacks* pAllocator)
1907 {
1908 RADV_FROM_HANDLE(radv_device, device, _device);
1909 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1910
1911 if (!fence)
1912 return;
1913 device->ws->destroy_fence(fence->fence);
1914 vk_free2(&device->alloc, pAllocator, fence);
1915 }
1916
1917 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1918 {
1919 uint64_t current_time;
1920 struct timespec tv;
1921
1922 clock_gettime(CLOCK_MONOTONIC, &tv);
1923 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1924
1925 timeout = MIN2(UINT64_MAX - current_time, timeout);
1926
1927 return current_time + timeout;
1928 }
1929
1930 VkResult radv_WaitForFences(
1931 VkDevice _device,
1932 uint32_t fenceCount,
1933 const VkFence* pFences,
1934 VkBool32 waitAll,
1935 uint64_t timeout)
1936 {
1937 RADV_FROM_HANDLE(radv_device, device, _device);
1938 timeout = radv_get_absolute_timeout(timeout);
1939
1940 if (!waitAll && fenceCount > 1) {
1941 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1942 }
1943
1944 for (uint32_t i = 0; i < fenceCount; ++i) {
1945 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1946 bool expired = false;
1947
1948 if (fence->signalled)
1949 continue;
1950
1951 if (!fence->submitted)
1952 return VK_TIMEOUT;
1953
1954 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1955 if (!expired)
1956 return VK_TIMEOUT;
1957
1958 fence->signalled = true;
1959 }
1960
1961 return VK_SUCCESS;
1962 }
1963
1964 VkResult radv_ResetFences(VkDevice device,
1965 uint32_t fenceCount,
1966 const VkFence *pFences)
1967 {
1968 for (unsigned i = 0; i < fenceCount; ++i) {
1969 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1970 fence->submitted = fence->signalled = false;
1971 }
1972
1973 return VK_SUCCESS;
1974 }
1975
1976 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1977 {
1978 RADV_FROM_HANDLE(radv_device, device, _device);
1979 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1980
1981 if (fence->signalled)
1982 return VK_SUCCESS;
1983 if (!fence->submitted)
1984 return VK_NOT_READY;
1985
1986 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1987 return VK_NOT_READY;
1988
1989 return VK_SUCCESS;
1990 }
1991
1992
1993 // Queue semaphore functions
1994
1995 VkResult radv_CreateSemaphore(
1996 VkDevice _device,
1997 const VkSemaphoreCreateInfo* pCreateInfo,
1998 const VkAllocationCallbacks* pAllocator,
1999 VkSemaphore* pSemaphore)
2000 {
2001 RADV_FROM_HANDLE(radv_device, device, _device);
2002 struct radeon_winsys_sem *sem;
2003
2004 sem = device->ws->create_sem(device->ws);
2005 if (!sem)
2006 return VK_ERROR_OUT_OF_HOST_MEMORY;
2007
2008 *pSemaphore = (VkSemaphore)sem;
2009 return VK_SUCCESS;
2010 }
2011
2012 void radv_DestroySemaphore(
2013 VkDevice _device,
2014 VkSemaphore _semaphore,
2015 const VkAllocationCallbacks* pAllocator)
2016 {
2017 RADV_FROM_HANDLE(radv_device, device, _device);
2018 struct radeon_winsys_sem *sem;
2019 if (!_semaphore)
2020 return;
2021
2022 sem = (struct radeon_winsys_sem *)_semaphore;
2023 device->ws->destroy_sem(sem);
2024 }
2025
2026 VkResult radv_CreateEvent(
2027 VkDevice _device,
2028 const VkEventCreateInfo* pCreateInfo,
2029 const VkAllocationCallbacks* pAllocator,
2030 VkEvent* pEvent)
2031 {
2032 RADV_FROM_HANDLE(radv_device, device, _device);
2033 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2034 sizeof(*event), 8,
2035 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2036
2037 if (!event)
2038 return VK_ERROR_OUT_OF_HOST_MEMORY;
2039
2040 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2041 RADEON_DOMAIN_GTT,
2042 RADEON_FLAG_CPU_ACCESS);
2043 if (!event->bo) {
2044 vk_free2(&device->alloc, pAllocator, event);
2045 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2046 }
2047
2048 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2049
2050 *pEvent = radv_event_to_handle(event);
2051
2052 return VK_SUCCESS;
2053 }
2054
2055 void radv_DestroyEvent(
2056 VkDevice _device,
2057 VkEvent _event,
2058 const VkAllocationCallbacks* pAllocator)
2059 {
2060 RADV_FROM_HANDLE(radv_device, device, _device);
2061 RADV_FROM_HANDLE(radv_event, event, _event);
2062
2063 if (!event)
2064 return;
2065 device->ws->buffer_destroy(event->bo);
2066 vk_free2(&device->alloc, pAllocator, event);
2067 }
2068
2069 VkResult radv_GetEventStatus(
2070 VkDevice _device,
2071 VkEvent _event)
2072 {
2073 RADV_FROM_HANDLE(radv_event, event, _event);
2074
2075 if (*event->map == 1)
2076 return VK_EVENT_SET;
2077 return VK_EVENT_RESET;
2078 }
2079
2080 VkResult radv_SetEvent(
2081 VkDevice _device,
2082 VkEvent _event)
2083 {
2084 RADV_FROM_HANDLE(radv_event, event, _event);
2085 *event->map = 1;
2086
2087 return VK_SUCCESS;
2088 }
2089
2090 VkResult radv_ResetEvent(
2091 VkDevice _device,
2092 VkEvent _event)
2093 {
2094 RADV_FROM_HANDLE(radv_event, event, _event);
2095 *event->map = 0;
2096
2097 return VK_SUCCESS;
2098 }
2099
2100 VkResult radv_CreateBuffer(
2101 VkDevice _device,
2102 const VkBufferCreateInfo* pCreateInfo,
2103 const VkAllocationCallbacks* pAllocator,
2104 VkBuffer* pBuffer)
2105 {
2106 RADV_FROM_HANDLE(radv_device, device, _device);
2107 struct radv_buffer *buffer;
2108
2109 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2110
2111 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2112 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2113 if (buffer == NULL)
2114 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2115
2116 buffer->size = pCreateInfo->size;
2117 buffer->usage = pCreateInfo->usage;
2118 buffer->bo = NULL;
2119 buffer->offset = 0;
2120
2121 *pBuffer = radv_buffer_to_handle(buffer);
2122
2123 return VK_SUCCESS;
2124 }
2125
2126 void radv_DestroyBuffer(
2127 VkDevice _device,
2128 VkBuffer _buffer,
2129 const VkAllocationCallbacks* pAllocator)
2130 {
2131 RADV_FROM_HANDLE(radv_device, device, _device);
2132 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2133
2134 if (!buffer)
2135 return;
2136
2137 vk_free2(&device->alloc, pAllocator, buffer);
2138 }
2139
2140 static inline unsigned
2141 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2142 {
2143 if (stencil)
2144 return image->surface.stencil_tiling_index[level];
2145 else
2146 return image->surface.tiling_index[level];
2147 }
2148
2149 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2150 {
2151 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2152 }
2153
2154 static void
2155 radv_initialise_color_surface(struct radv_device *device,
2156 struct radv_color_buffer_info *cb,
2157 struct radv_image_view *iview)
2158 {
2159 const struct vk_format_description *desc;
2160 unsigned ntype, format, swap, endian;
2161 unsigned blend_clamp = 0, blend_bypass = 0;
2162 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2163 uint64_t va;
2164 const struct radeon_surf *surf = &iview->image->surface;
2165 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2166
2167 desc = vk_format_description(iview->vk_format);
2168
2169 memset(cb, 0, sizeof(*cb));
2170
2171 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2172 va += level_info->offset;
2173 cb->cb_color_base = va >> 8;
2174
2175 /* CMASK variables */
2176 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2177 va += iview->image->cmask.offset;
2178 cb->cb_color_cmask = va >> 8;
2179 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2180
2181 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2182 va += iview->image->dcc_offset;
2183 cb->cb_dcc_base = va >> 8;
2184
2185 uint32_t max_slice = radv_surface_layer_count(iview);
2186 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2187 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2188
2189 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2190 pitch_tile_max = level_info->nblk_x / 8 - 1;
2191 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2192 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2193
2194 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2195 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2196
2197 /* Intensity is implemented as Red, so treat it that way. */
2198 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2199 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2200
2201 if (iview->image->samples > 1) {
2202 unsigned log_samples = util_logbase2(iview->image->samples);
2203
2204 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2205 S_028C74_NUM_FRAGMENTS(log_samples);
2206 }
2207
2208 if (iview->image->fmask.size) {
2209 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2210 if (device->physical_device->rad_info.chip_class >= CIK)
2211 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2212 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2213 cb->cb_color_fmask = va >> 8;
2214 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2215 } else {
2216 /* This must be set for fast clear to work without FMASK. */
2217 if (device->physical_device->rad_info.chip_class >= CIK)
2218 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2219 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2220 cb->cb_color_fmask = cb->cb_color_base;
2221 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2222 }
2223
2224 ntype = radv_translate_color_numformat(iview->vk_format,
2225 desc,
2226 vk_format_get_first_non_void_channel(iview->vk_format));
2227 format = radv_translate_colorformat(iview->vk_format);
2228 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2229 radv_finishme("Illegal color\n");
2230 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2231 endian = radv_colorformat_endian_swap(format);
2232
2233 /* blend clamp should be set for all NORM/SRGB types */
2234 if (ntype == V_028C70_NUMBER_UNORM ||
2235 ntype == V_028C70_NUMBER_SNORM ||
2236 ntype == V_028C70_NUMBER_SRGB)
2237 blend_clamp = 1;
2238
2239 /* set blend bypass according to docs if SINT/UINT or
2240 8/24 COLOR variants */
2241 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2242 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2243 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2244 blend_clamp = 0;
2245 blend_bypass = 1;
2246 }
2247 #if 0
2248 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2249 (format == V_028C70_COLOR_8 ||
2250 format == V_028C70_COLOR_8_8 ||
2251 format == V_028C70_COLOR_8_8_8_8))
2252 ->color_is_int8 = true;
2253 #endif
2254 cb->cb_color_info = S_028C70_FORMAT(format) |
2255 S_028C70_COMP_SWAP(swap) |
2256 S_028C70_BLEND_CLAMP(blend_clamp) |
2257 S_028C70_BLEND_BYPASS(blend_bypass) |
2258 S_028C70_SIMPLE_FLOAT(1) |
2259 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2260 ntype != V_028C70_NUMBER_SNORM &&
2261 ntype != V_028C70_NUMBER_SRGB &&
2262 format != V_028C70_COLOR_8_24 &&
2263 format != V_028C70_COLOR_24_8) |
2264 S_028C70_NUMBER_TYPE(ntype) |
2265 S_028C70_ENDIAN(endian);
2266 if (iview->image->samples > 1)
2267 if (iview->image->fmask.size)
2268 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2269
2270 if (iview->image->cmask.size &&
2271 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2272 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2273
2274 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2275 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2276
2277 if (device->physical_device->rad_info.chip_class >= VI) {
2278 unsigned max_uncompressed_block_size = 2;
2279 if (iview->image->samples > 1) {
2280 if (iview->image->surface.bpe == 1)
2281 max_uncompressed_block_size = 0;
2282 else if (iview->image->surface.bpe == 2)
2283 max_uncompressed_block_size = 1;
2284 }
2285
2286 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2287 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2288 }
2289
2290 /* This must be set for fast clear to work without FMASK. */
2291 if (!iview->image->fmask.size &&
2292 device->physical_device->rad_info.chip_class == SI) {
2293 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2294 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2295 }
2296 }
2297
2298 static void
2299 radv_initialise_ds_surface(struct radv_device *device,
2300 struct radv_ds_buffer_info *ds,
2301 struct radv_image_view *iview)
2302 {
2303 unsigned level = iview->base_mip;
2304 unsigned format;
2305 uint64_t va, s_offs, z_offs;
2306 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2307 memset(ds, 0, sizeof(*ds));
2308 switch (iview->vk_format) {
2309 case VK_FORMAT_D24_UNORM_S8_UINT:
2310 case VK_FORMAT_X8_D24_UNORM_PACK32:
2311 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2312 ds->offset_scale = 2.0f;
2313 break;
2314 case VK_FORMAT_D16_UNORM:
2315 case VK_FORMAT_D16_UNORM_S8_UINT:
2316 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2317 ds->offset_scale = 4.0f;
2318 break;
2319 case VK_FORMAT_D32_SFLOAT:
2320 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2321 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2322 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2323 ds->offset_scale = 1.0f;
2324 break;
2325 default:
2326 break;
2327 }
2328
2329 format = radv_translate_dbformat(iview->vk_format);
2330 if (format == V_028040_Z_INVALID) {
2331 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2332 }
2333
2334 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2335 s_offs = z_offs = va;
2336 z_offs += iview->image->surface.level[level].offset;
2337 s_offs += iview->image->surface.stencil_level[level].offset;
2338
2339 uint32_t max_slice = radv_surface_layer_count(iview);
2340 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2341 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2342 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2343 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2344
2345 if (iview->image->samples > 1)
2346 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2347
2348 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2349 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2350 else
2351 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2352
2353 if (device->physical_device->rad_info.chip_class >= CIK) {
2354 struct radeon_info *info = &device->physical_device->rad_info;
2355 unsigned tiling_index = iview->image->surface.tiling_index[level];
2356 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2357 unsigned macro_index = iview->image->surface.macro_tile_index;
2358 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2359 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2360 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2361
2362 ds->db_depth_info |=
2363 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2364 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2365 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2366 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2367 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2368 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2369 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2370 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2371 } else {
2372 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2373 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2374 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2375 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2376 }
2377
2378 if (iview->image->htile.size && !level) {
2379 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2380 S_028040_ALLOW_EXPCLEAR(1);
2381
2382 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2383 /* Workaround: For a not yet understood reason, the
2384 * combination of MSAA, fast stencil clear and stencil
2385 * decompress messes with subsequent stencil buffer
2386 * uses. Problem was reproduced on Verde, Bonaire,
2387 * Tonga, and Carrizo.
2388 *
2389 * Disabling EXPCLEAR works around the problem.
2390 *
2391 * Check piglit's arb_texture_multisample-stencil-clear
2392 * test if you want to try changing this.
2393 */
2394 if (iview->image->samples <= 1)
2395 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2396 } else
2397 /* Use all of the htile_buffer for depth if there's no stencil. */
2398 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2399
2400 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2401 iview->image->htile.offset;
2402 ds->db_htile_data_base = va >> 8;
2403 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2404 } else {
2405 ds->db_htile_data_base = 0;
2406 ds->db_htile_surface = 0;
2407 }
2408
2409 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2410 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2411
2412 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2413 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2414 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2415 }
2416
2417 VkResult radv_CreateFramebuffer(
2418 VkDevice _device,
2419 const VkFramebufferCreateInfo* pCreateInfo,
2420 const VkAllocationCallbacks* pAllocator,
2421 VkFramebuffer* pFramebuffer)
2422 {
2423 RADV_FROM_HANDLE(radv_device, device, _device);
2424 struct radv_framebuffer *framebuffer;
2425
2426 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2427
2428 size_t size = sizeof(*framebuffer) +
2429 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2430 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2431 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2432 if (framebuffer == NULL)
2433 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2434
2435 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2436 framebuffer->width = pCreateInfo->width;
2437 framebuffer->height = pCreateInfo->height;
2438 framebuffer->layers = pCreateInfo->layers;
2439 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2440 VkImageView _iview = pCreateInfo->pAttachments[i];
2441 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2442 framebuffer->attachments[i].attachment = iview;
2443 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2444 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2445 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2446 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2447 }
2448 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2449 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2450 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2451 }
2452
2453 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2454 return VK_SUCCESS;
2455 }
2456
2457 void radv_DestroyFramebuffer(
2458 VkDevice _device,
2459 VkFramebuffer _fb,
2460 const VkAllocationCallbacks* pAllocator)
2461 {
2462 RADV_FROM_HANDLE(radv_device, device, _device);
2463 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2464
2465 if (!fb)
2466 return;
2467 vk_free2(&device->alloc, pAllocator, fb);
2468 }
2469
2470 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2471 {
2472 switch (address_mode) {
2473 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2474 return V_008F30_SQ_TEX_WRAP;
2475 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2476 return V_008F30_SQ_TEX_MIRROR;
2477 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2478 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2479 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2480 return V_008F30_SQ_TEX_CLAMP_BORDER;
2481 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2482 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2483 default:
2484 unreachable("illegal tex wrap mode");
2485 break;
2486 }
2487 }
2488
2489 static unsigned
2490 radv_tex_compare(VkCompareOp op)
2491 {
2492 switch (op) {
2493 case VK_COMPARE_OP_NEVER:
2494 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2495 case VK_COMPARE_OP_LESS:
2496 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2497 case VK_COMPARE_OP_EQUAL:
2498 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2499 case VK_COMPARE_OP_LESS_OR_EQUAL:
2500 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2501 case VK_COMPARE_OP_GREATER:
2502 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2503 case VK_COMPARE_OP_NOT_EQUAL:
2504 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2505 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2506 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2507 case VK_COMPARE_OP_ALWAYS:
2508 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2509 default:
2510 unreachable("illegal compare mode");
2511 break;
2512 }
2513 }
2514
2515 static unsigned
2516 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2517 {
2518 switch (filter) {
2519 case VK_FILTER_NEAREST:
2520 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2521 V_008F38_SQ_TEX_XY_FILTER_POINT);
2522 case VK_FILTER_LINEAR:
2523 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2524 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2525 case VK_FILTER_CUBIC_IMG:
2526 default:
2527 fprintf(stderr, "illegal texture filter");
2528 return 0;
2529 }
2530 }
2531
2532 static unsigned
2533 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2534 {
2535 switch (mode) {
2536 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2537 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2538 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2539 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2540 default:
2541 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2542 }
2543 }
2544
2545 static unsigned
2546 radv_tex_bordercolor(VkBorderColor bcolor)
2547 {
2548 switch (bcolor) {
2549 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2550 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2551 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2552 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2553 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2554 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2555 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2556 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2557 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2558 default:
2559 break;
2560 }
2561 return 0;
2562 }
2563
2564 static unsigned
2565 radv_tex_aniso_filter(unsigned filter)
2566 {
2567 if (filter < 2)
2568 return 0;
2569 if (filter < 4)
2570 return 1;
2571 if (filter < 8)
2572 return 2;
2573 if (filter < 16)
2574 return 3;
2575 return 4;
2576 }
2577
2578 static void
2579 radv_init_sampler(struct radv_device *device,
2580 struct radv_sampler *sampler,
2581 const VkSamplerCreateInfo *pCreateInfo)
2582 {
2583 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2584 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2585 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2586 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2587
2588 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2589 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2590 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2591 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2592 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2593 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2594 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2595 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2596 S_008F30_DISABLE_CUBE_WRAP(0) |
2597 S_008F30_COMPAT_MODE(is_vi));
2598 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2599 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2600 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2601 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2602 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2603 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2604 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2605 S_008F38_MIP_POINT_PRECLAMP(1) |
2606 S_008F38_DISABLE_LSB_CEIL(1) |
2607 S_008F38_FILTER_PREC_FIX(1) |
2608 S_008F38_ANISO_OVERRIDE(is_vi));
2609 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2610 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2611 }
2612
2613 VkResult radv_CreateSampler(
2614 VkDevice _device,
2615 const VkSamplerCreateInfo* pCreateInfo,
2616 const VkAllocationCallbacks* pAllocator,
2617 VkSampler* pSampler)
2618 {
2619 RADV_FROM_HANDLE(radv_device, device, _device);
2620 struct radv_sampler *sampler;
2621
2622 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2623
2624 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2625 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2626 if (!sampler)
2627 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2628
2629 radv_init_sampler(device, sampler, pCreateInfo);
2630 *pSampler = radv_sampler_to_handle(sampler);
2631
2632 return VK_SUCCESS;
2633 }
2634
2635 void radv_DestroySampler(
2636 VkDevice _device,
2637 VkSampler _sampler,
2638 const VkAllocationCallbacks* pAllocator)
2639 {
2640 RADV_FROM_HANDLE(radv_device, device, _device);
2641 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2642
2643 if (!sampler)
2644 return;
2645 vk_free2(&device->alloc, pAllocator, sampler);
2646 }
2647
2648
2649 /* vk_icd.h does not declare this function, so we declare it here to
2650 * suppress Wmissing-prototypes.
2651 */
2652 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2653 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2654
2655 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2656 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2657 {
2658 /* For the full details on loader interface versioning, see
2659 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2660 * What follows is a condensed summary, to help you navigate the large and
2661 * confusing official doc.
2662 *
2663 * - Loader interface v0 is incompatible with later versions. We don't
2664 * support it.
2665 *
2666 * - In loader interface v1:
2667 * - The first ICD entrypoint called by the loader is
2668 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2669 * entrypoint.
2670 * - The ICD must statically expose no other Vulkan symbol unless it is
2671 * linked with -Bsymbolic.
2672 * - Each dispatchable Vulkan handle created by the ICD must be
2673 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2674 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2675 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2676 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2677 * such loader-managed surfaces.
2678 *
2679 * - Loader interface v2 differs from v1 in:
2680 * - The first ICD entrypoint called by the loader is
2681 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2682 * statically expose this entrypoint.
2683 *
2684 * - Loader interface v3 differs from v2 in:
2685 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2686 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2687 * because the loader no longer does so.
2688 */
2689 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2690 return VK_SUCCESS;
2691 }