radv: Handle command buffers that need scratch memory.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
113 .specVersion = 68,
114 },
115 {
116 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
117 .specVersion = 1,
118 },
119 {
120 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 };
124
125 static VkResult
126 radv_extensions_register(struct radv_instance *instance,
127 struct radv_extensions *extensions,
128 const VkExtensionProperties *new_ext,
129 uint32_t num_ext)
130 {
131 size_t new_size;
132 VkExtensionProperties *new_ptr;
133
134 assert(new_ext && num_ext > 0);
135
136 if (!new_ext)
137 return VK_ERROR_INITIALIZATION_FAILED;
138
139 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
140 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
141 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
142
143 /* Old array continues to be valid, update nothing */
144 if (!new_ptr)
145 return VK_ERROR_OUT_OF_HOST_MEMORY;
146
147 memcpy(&new_ptr[extensions->num_ext], new_ext,
148 num_ext * sizeof(VkExtensionProperties));
149 extensions->ext_array = new_ptr;
150 extensions->num_ext += num_ext;
151
152 return VK_SUCCESS;
153 }
154
155 static void
156 radv_extensions_finish(struct radv_instance *instance,
157 struct radv_extensions *extensions)
158 {
159 assert(extensions);
160
161 if (!extensions)
162 radv_loge("Attemted to free invalid extension struct\n");
163
164 if (extensions->ext_array)
165 vk_free(&instance->alloc, extensions->ext_array);
166 }
167
168 static bool
169 is_extension_enabled(const VkExtensionProperties *extensions,
170 size_t num_ext,
171 const char *name)
172 {
173 assert(extensions && name);
174
175 for (uint32_t i = 0; i < num_ext; i++) {
176 if (strcmp(name, extensions[i].extensionName) == 0)
177 return true;
178 }
179
180 return false;
181 }
182
183 static VkResult
184 radv_physical_device_init(struct radv_physical_device *device,
185 struct radv_instance *instance,
186 const char *path)
187 {
188 VkResult result;
189 drmVersionPtr version;
190 int fd;
191
192 fd = open(path, O_RDWR | O_CLOEXEC);
193 if (fd < 0)
194 return VK_ERROR_INCOMPATIBLE_DRIVER;
195
196 version = drmGetVersion(fd);
197 if (!version) {
198 close(fd);
199 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
200 "failed to get version %s: %m", path);
201 }
202
203 if (strcmp(version->name, "amdgpu")) {
204 drmFreeVersion(version);
205 close(fd);
206 return VK_ERROR_INCOMPATIBLE_DRIVER;
207 }
208 drmFreeVersion(version);
209
210 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
211 device->instance = instance;
212 assert(strlen(path) < ARRAY_SIZE(device->path));
213 strncpy(device->path, path, ARRAY_SIZE(device->path));
214
215 device->ws = radv_amdgpu_winsys_create(fd);
216 if (!device->ws) {
217 result = VK_ERROR_INCOMPATIBLE_DRIVER;
218 goto fail;
219 }
220 device->ws->query_info(device->ws, &device->rad_info);
221 result = radv_init_wsi(device);
222 if (result != VK_SUCCESS) {
223 device->ws->destroy(device->ws);
224 goto fail;
225 }
226
227 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
228 radv_finish_wsi(device);
229 device->ws->destroy(device->ws);
230 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
231 "cannot generate UUID");
232 goto fail;
233 }
234
235 result = radv_extensions_register(instance,
236 &device->extensions,
237 common_device_extensions,
238 ARRAY_SIZE(common_device_extensions));
239 if (result != VK_SUCCESS)
240 goto fail;
241
242 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
243 device->name = device->rad_info.name;
244 close(fd);
245 return VK_SUCCESS;
246
247 fail:
248 close(fd);
249 return result;
250 }
251
252 static void
253 radv_physical_device_finish(struct radv_physical_device *device)
254 {
255 radv_extensions_finish(device->instance, &device->extensions);
256 radv_finish_wsi(device);
257 device->ws->destroy(device->ws);
258 }
259
260
261 static void *
262 default_alloc_func(void *pUserData, size_t size, size_t align,
263 VkSystemAllocationScope allocationScope)
264 {
265 return malloc(size);
266 }
267
268 static void *
269 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
270 size_t align, VkSystemAllocationScope allocationScope)
271 {
272 return realloc(pOriginal, size);
273 }
274
275 static void
276 default_free_func(void *pUserData, void *pMemory)
277 {
278 free(pMemory);
279 }
280
281 static const VkAllocationCallbacks default_alloc = {
282 .pUserData = NULL,
283 .pfnAllocation = default_alloc_func,
284 .pfnReallocation = default_realloc_func,
285 .pfnFree = default_free_func,
286 };
287
288 static const struct debug_control radv_debug_options[] = {
289 {"fastclears", RADV_DEBUG_FAST_CLEARS},
290 {"nodcc", RADV_DEBUG_NO_DCC},
291 {"shaders", RADV_DEBUG_DUMP_SHADERS},
292 {"nocache", RADV_DEBUG_NO_CACHE},
293 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
294 {"nohiz", RADV_DEBUG_NO_HIZ},
295 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
296 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
297 {NULL, 0}
298 };
299
300 VkResult radv_CreateInstance(
301 const VkInstanceCreateInfo* pCreateInfo,
302 const VkAllocationCallbacks* pAllocator,
303 VkInstance* pInstance)
304 {
305 struct radv_instance *instance;
306
307 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
308
309 uint32_t client_version;
310 if (pCreateInfo->pApplicationInfo &&
311 pCreateInfo->pApplicationInfo->apiVersion != 0) {
312 client_version = pCreateInfo->pApplicationInfo->apiVersion;
313 } else {
314 client_version = VK_MAKE_VERSION(1, 0, 0);
315 }
316
317 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
318 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
319 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
320 "Client requested version %d.%d.%d",
321 VK_VERSION_MAJOR(client_version),
322 VK_VERSION_MINOR(client_version),
323 VK_VERSION_PATCH(client_version));
324 }
325
326 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
327 if (!is_extension_enabled(instance_extensions,
328 ARRAY_SIZE(instance_extensions),
329 pCreateInfo->ppEnabledExtensionNames[i]))
330 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
331 }
332
333 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
334 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
335 if (!instance)
336 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
337
338 memset(instance, 0, sizeof(*instance));
339
340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341
342 if (pAllocator)
343 instance->alloc = *pAllocator;
344 else
345 instance->alloc = default_alloc;
346
347 instance->apiVersion = client_version;
348 instance->physicalDeviceCount = -1;
349
350 _mesa_locale_init();
351
352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353
354 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
355 radv_debug_options);
356
357 *pInstance = radv_instance_to_handle(instance);
358
359 return VK_SUCCESS;
360 }
361
362 void radv_DestroyInstance(
363 VkInstance _instance,
364 const VkAllocationCallbacks* pAllocator)
365 {
366 RADV_FROM_HANDLE(radv_instance, instance, _instance);
367
368 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
369 radv_physical_device_finish(instance->physicalDevices + i);
370 }
371
372 VG(VALGRIND_DESTROY_MEMPOOL(instance));
373
374 _mesa_locale_fini();
375
376 vk_free(&instance->alloc, instance);
377 }
378
379 VkResult radv_EnumeratePhysicalDevices(
380 VkInstance _instance,
381 uint32_t* pPhysicalDeviceCount,
382 VkPhysicalDevice* pPhysicalDevices)
383 {
384 RADV_FROM_HANDLE(radv_instance, instance, _instance);
385 VkResult result;
386
387 if (instance->physicalDeviceCount < 0) {
388 char path[20];
389 instance->physicalDeviceCount = 0;
390 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
391 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
392 result = radv_physical_device_init(instance->physicalDevices +
393 instance->physicalDeviceCount,
394 instance, path);
395 if (result == VK_SUCCESS)
396 ++instance->physicalDeviceCount;
397 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
398 return result;
399 }
400 }
401
402 if (!pPhysicalDevices) {
403 *pPhysicalDeviceCount = instance->physicalDeviceCount;
404 } else {
405 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
406 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
407 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
408 }
409
410 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
411 : VK_SUCCESS;
412 }
413
414 void radv_GetPhysicalDeviceFeatures(
415 VkPhysicalDevice physicalDevice,
416 VkPhysicalDeviceFeatures* pFeatures)
417 {
418 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
419
420 memset(pFeatures, 0, sizeof(*pFeatures));
421
422 *pFeatures = (VkPhysicalDeviceFeatures) {
423 .robustBufferAccess = true,
424 .fullDrawIndexUint32 = true,
425 .imageCubeArray = true,
426 .independentBlend = true,
427 .geometryShader = false,
428 .tessellationShader = false,
429 .sampleRateShading = false,
430 .dualSrcBlend = true,
431 .logicOp = true,
432 .multiDrawIndirect = true,
433 .drawIndirectFirstInstance = true,
434 .depthClamp = true,
435 .depthBiasClamp = true,
436 .fillModeNonSolid = true,
437 .depthBounds = true,
438 .wideLines = true,
439 .largePoints = true,
440 .alphaToOne = true,
441 .multiViewport = false,
442 .samplerAnisotropy = true,
443 .textureCompressionETC2 = false,
444 .textureCompressionASTC_LDR = false,
445 .textureCompressionBC = true,
446 .occlusionQueryPrecise = true,
447 .pipelineStatisticsQuery = false,
448 .vertexPipelineStoresAndAtomics = true,
449 .fragmentStoresAndAtomics = true,
450 .shaderTessellationAndGeometryPointSize = true,
451 .shaderImageGatherExtended = true,
452 .shaderStorageImageExtendedFormats = true,
453 .shaderStorageImageMultisample = false,
454 .shaderUniformBufferArrayDynamicIndexing = true,
455 .shaderSampledImageArrayDynamicIndexing = true,
456 .shaderStorageBufferArrayDynamicIndexing = true,
457 .shaderStorageImageArrayDynamicIndexing = true,
458 .shaderStorageImageReadWithoutFormat = false,
459 .shaderStorageImageWriteWithoutFormat = false,
460 .shaderClipDistance = true,
461 .shaderCullDistance = true,
462 .shaderFloat64 = false,
463 .shaderInt64 = false,
464 .shaderInt16 = false,
465 .alphaToOne = true,
466 .variableMultisampleRate = false,
467 .inheritedQueries = false,
468 };
469 }
470
471 void radv_GetPhysicalDeviceFeatures2KHR(
472 VkPhysicalDevice physicalDevice,
473 VkPhysicalDeviceFeatures2KHR *pFeatures)
474 {
475 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
476 }
477
478 void radv_GetPhysicalDeviceProperties(
479 VkPhysicalDevice physicalDevice,
480 VkPhysicalDeviceProperties* pProperties)
481 {
482 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
483 VkSampleCountFlags sample_counts = 0xf;
484 VkPhysicalDeviceLimits limits = {
485 .maxImageDimension1D = (1 << 14),
486 .maxImageDimension2D = (1 << 14),
487 .maxImageDimension3D = (1 << 11),
488 .maxImageDimensionCube = (1 << 14),
489 .maxImageArrayLayers = (1 << 11),
490 .maxTexelBufferElements = 128 * 1024 * 1024,
491 .maxUniformBufferRange = UINT32_MAX,
492 .maxStorageBufferRange = UINT32_MAX,
493 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
494 .maxMemoryAllocationCount = UINT32_MAX,
495 .maxSamplerAllocationCount = 64 * 1024,
496 .bufferImageGranularity = 64, /* A cache line */
497 .sparseAddressSpaceSize = 0,
498 .maxBoundDescriptorSets = MAX_SETS,
499 .maxPerStageDescriptorSamplers = 64,
500 .maxPerStageDescriptorUniformBuffers = 64,
501 .maxPerStageDescriptorStorageBuffers = 64,
502 .maxPerStageDescriptorSampledImages = 64,
503 .maxPerStageDescriptorStorageImages = 64,
504 .maxPerStageDescriptorInputAttachments = 64,
505 .maxPerStageResources = 128,
506 .maxDescriptorSetSamplers = 256,
507 .maxDescriptorSetUniformBuffers = 256,
508 .maxDescriptorSetUniformBuffersDynamic = 256,
509 .maxDescriptorSetStorageBuffers = 256,
510 .maxDescriptorSetStorageBuffersDynamic = 256,
511 .maxDescriptorSetSampledImages = 256,
512 .maxDescriptorSetStorageImages = 256,
513 .maxDescriptorSetInputAttachments = 256,
514 .maxVertexInputAttributes = 32,
515 .maxVertexInputBindings = 32,
516 .maxVertexInputAttributeOffset = 2047,
517 .maxVertexInputBindingStride = 2048,
518 .maxVertexOutputComponents = 128,
519 .maxTessellationGenerationLevel = 0,
520 .maxTessellationPatchSize = 0,
521 .maxTessellationControlPerVertexInputComponents = 0,
522 .maxTessellationControlPerVertexOutputComponents = 0,
523 .maxTessellationControlPerPatchOutputComponents = 0,
524 .maxTessellationControlTotalOutputComponents = 0,
525 .maxTessellationEvaluationInputComponents = 0,
526 .maxTessellationEvaluationOutputComponents = 0,
527 .maxGeometryShaderInvocations = 32,
528 .maxGeometryInputComponents = 64,
529 .maxGeometryOutputComponents = 128,
530 .maxGeometryOutputVertices = 256,
531 .maxGeometryTotalOutputComponents = 1024,
532 .maxFragmentInputComponents = 128,
533 .maxFragmentOutputAttachments = 8,
534 .maxFragmentDualSrcAttachments = 1,
535 .maxFragmentCombinedOutputResources = 8,
536 .maxComputeSharedMemorySize = 32768,
537 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
538 .maxComputeWorkGroupInvocations = 2048,
539 .maxComputeWorkGroupSize = {
540 2048,
541 2048,
542 2048
543 },
544 .subPixelPrecisionBits = 4 /* FIXME */,
545 .subTexelPrecisionBits = 4 /* FIXME */,
546 .mipmapPrecisionBits = 4 /* FIXME */,
547 .maxDrawIndexedIndexValue = UINT32_MAX,
548 .maxDrawIndirectCount = UINT32_MAX,
549 .maxSamplerLodBias = 16,
550 .maxSamplerAnisotropy = 16,
551 .maxViewports = MAX_VIEWPORTS,
552 .maxViewportDimensions = { (1 << 14), (1 << 14) },
553 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
554 .viewportSubPixelBits = 13, /* We take a float? */
555 .minMemoryMapAlignment = 4096, /* A page */
556 .minTexelBufferOffsetAlignment = 1,
557 .minUniformBufferOffsetAlignment = 4,
558 .minStorageBufferOffsetAlignment = 4,
559 .minTexelOffset = -32,
560 .maxTexelOffset = 31,
561 .minTexelGatherOffset = -32,
562 .maxTexelGatherOffset = 31,
563 .minInterpolationOffset = -2,
564 .maxInterpolationOffset = 2,
565 .subPixelInterpolationOffsetBits = 8,
566 .maxFramebufferWidth = (1 << 14),
567 .maxFramebufferHeight = (1 << 14),
568 .maxFramebufferLayers = (1 << 10),
569 .framebufferColorSampleCounts = sample_counts,
570 .framebufferDepthSampleCounts = sample_counts,
571 .framebufferStencilSampleCounts = sample_counts,
572 .framebufferNoAttachmentsSampleCounts = sample_counts,
573 .maxColorAttachments = MAX_RTS,
574 .sampledImageColorSampleCounts = sample_counts,
575 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
576 .sampledImageDepthSampleCounts = sample_counts,
577 .sampledImageStencilSampleCounts = sample_counts,
578 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
579 .maxSampleMaskWords = 1,
580 .timestampComputeAndGraphics = false,
581 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
582 .maxClipDistances = 8,
583 .maxCullDistances = 8,
584 .maxCombinedClipAndCullDistances = 8,
585 .discreteQueuePriorities = 1,
586 .pointSizeRange = { 0.125, 255.875 },
587 .lineWidthRange = { 0.0, 7.9921875 },
588 .pointSizeGranularity = (1.0 / 8.0),
589 .lineWidthGranularity = (1.0 / 128.0),
590 .strictLines = false, /* FINISHME */
591 .standardSampleLocations = true,
592 .optimalBufferCopyOffsetAlignment = 128,
593 .optimalBufferCopyRowPitchAlignment = 128,
594 .nonCoherentAtomSize = 64,
595 };
596
597 *pProperties = (VkPhysicalDeviceProperties) {
598 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
599 .driverVersion = 1,
600 .vendorID = 0x1002,
601 .deviceID = pdevice->rad_info.pci_id,
602 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
603 .limits = limits,
604 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
605 };
606
607 strcpy(pProperties->deviceName, pdevice->name);
608 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
609 }
610
611 void radv_GetPhysicalDeviceProperties2KHR(
612 VkPhysicalDevice physicalDevice,
613 VkPhysicalDeviceProperties2KHR *pProperties)
614 {
615 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
616 }
617
618 void radv_GetPhysicalDeviceQueueFamilyProperties(
619 VkPhysicalDevice physicalDevice,
620 uint32_t* pCount,
621 VkQueueFamilyProperties* pQueueFamilyProperties)
622 {
623 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
624 int num_queue_families = 1;
625 int idx;
626 if (pdevice->rad_info.compute_rings > 0 &&
627 pdevice->rad_info.chip_class >= CIK &&
628 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
629 num_queue_families++;
630
631 if (pQueueFamilyProperties == NULL) {
632 *pCount = num_queue_families;
633 return;
634 }
635
636 if (!*pCount)
637 return;
638
639 idx = 0;
640 if (*pCount >= 1) {
641 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
642 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
643 VK_QUEUE_COMPUTE_BIT |
644 VK_QUEUE_TRANSFER_BIT,
645 .queueCount = 1,
646 .timestampValidBits = 64,
647 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
648 };
649 idx++;
650 }
651
652 if (pdevice->rad_info.compute_rings > 0 &&
653 pdevice->rad_info.chip_class >= CIK &&
654 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
655 if (*pCount > idx) {
656 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
657 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
658 .queueCount = pdevice->rad_info.compute_rings,
659 .timestampValidBits = 64,
660 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
661 };
662 idx++;
663 }
664 }
665 *pCount = idx;
666 }
667
668 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
669 VkPhysicalDevice physicalDevice,
670 uint32_t* pCount,
671 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
672 {
673 return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
674 pCount,
675 &pQueueFamilyProperties->queueFamilyProperties);
676 }
677
678 void radv_GetPhysicalDeviceMemoryProperties(
679 VkPhysicalDevice physicalDevice,
680 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
681 {
682 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
683
684 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
685
686 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
687 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
688 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
689 .heapIndex = RADV_MEM_HEAP_VRAM,
690 };
691 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
692 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
693 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
694 .heapIndex = RADV_MEM_HEAP_GTT,
695 };
696 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
697 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
698 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
699 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
700 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
701 };
702 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
703 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
704 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
705 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
706 .heapIndex = RADV_MEM_HEAP_GTT,
707 };
708
709 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
710
711 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
712 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
713 .size = physical_device->rad_info.vram_size -
714 physical_device->rad_info.visible_vram_size,
715 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
716 };
717 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
718 .size = physical_device->rad_info.visible_vram_size,
719 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
720 };
721 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
722 .size = physical_device->rad_info.gart_size,
723 .flags = 0,
724 };
725 }
726
727 void radv_GetPhysicalDeviceMemoryProperties2KHR(
728 VkPhysicalDevice physicalDevice,
729 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
730 {
731 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
732 &pMemoryProperties->memoryProperties);
733 }
734
735 static int
736 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
737 int queue_family_index, int idx)
738 {
739 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
740 queue->device = device;
741 queue->queue_family_index = queue_family_index;
742 queue->queue_idx = idx;
743
744 queue->hw_ctx = device->ws->ctx_create(device->ws);
745 if (!queue->hw_ctx)
746 return VK_ERROR_OUT_OF_HOST_MEMORY;
747
748 return VK_SUCCESS;
749 }
750
751 static void
752 radv_queue_finish(struct radv_queue *queue)
753 {
754 if (queue->hw_ctx)
755 queue->device->ws->ctx_destroy(queue->hw_ctx);
756
757 if (queue->preamble_cs)
758 queue->device->ws->cs_destroy(queue->preamble_cs);
759 if (queue->descriptor_bo)
760 queue->device->ws->buffer_destroy(queue->descriptor_bo);
761 if (queue->scratch_bo)
762 queue->device->ws->buffer_destroy(queue->scratch_bo);
763 if (queue->compute_scratch_bo)
764 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
765 }
766
767 VkResult radv_CreateDevice(
768 VkPhysicalDevice physicalDevice,
769 const VkDeviceCreateInfo* pCreateInfo,
770 const VkAllocationCallbacks* pAllocator,
771 VkDevice* pDevice)
772 {
773 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
774 VkResult result;
775 struct radv_device *device;
776
777 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
778 if (!is_extension_enabled(physical_device->extensions.ext_array,
779 physical_device->extensions.num_ext,
780 pCreateInfo->ppEnabledExtensionNames[i]))
781 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
782 }
783
784 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
785 sizeof(*device), 8,
786 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
787 if (!device)
788 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
789
790 memset(device, 0, sizeof(*device));
791
792 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
793 device->instance = physical_device->instance;
794 device->physical_device = physical_device;
795
796 device->debug_flags = device->instance->debug_flags;
797
798 device->ws = physical_device->ws;
799 if (pAllocator)
800 device->alloc = *pAllocator;
801 else
802 device->alloc = physical_device->instance->alloc;
803
804 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
805 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
806 uint32_t qfi = queue_create->queueFamilyIndex;
807
808 device->queues[qfi] = vk_alloc(&device->alloc,
809 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
810 if (!device->queues[qfi]) {
811 result = VK_ERROR_OUT_OF_HOST_MEMORY;
812 goto fail;
813 }
814
815 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
816
817 device->queue_count[qfi] = queue_create->queueCount;
818
819 for (unsigned q = 0; q < queue_create->queueCount; q++) {
820 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
821 if (result != VK_SUCCESS)
822 goto fail;
823 }
824 }
825
826 #if HAVE_LLVM < 0x0400
827 device->llvm_supports_spill = false;
828 #else
829 device->llvm_supports_spill = true;
830 #endif
831
832 /* The maximum number of scratch waves. Scratch space isn't divided
833 * evenly between CUs. The number is only a function of the number of CUs.
834 * We can decrease the constant to decrease the scratch buffer size.
835 *
836 * sctx->scratch_waves must be >= the maximum posible size of
837 * 1 threadgroup, so that the hw doesn't hang from being unable
838 * to start any.
839 *
840 * The recommended value is 4 per CU at most. Higher numbers don't
841 * bring much benefit, but they still occupy chip resources (think
842 * async compute). I've seen ~2% performance difference between 4 and 32.
843 */
844 uint32_t max_threads_per_block = 2048;
845 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
846 max_threads_per_block / 64);
847
848 result = radv_device_init_meta(device);
849 if (result != VK_SUCCESS)
850 goto fail;
851
852 radv_device_init_msaa(device);
853
854 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
855 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
856 switch (family) {
857 case RADV_QUEUE_GENERAL:
858 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
859 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
860 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
861 break;
862 case RADV_QUEUE_COMPUTE:
863 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
864 radeon_emit(device->empty_cs[family], 0);
865 break;
866 }
867 device->ws->cs_finalize(device->empty_cs[family]);
868 }
869
870 if (getenv("RADV_TRACE_FILE")) {
871 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
872 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
873 if (!device->trace_bo)
874 goto fail;
875
876 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
877 if (!device->trace_id_ptr)
878 goto fail;
879 }
880
881 *pDevice = radv_device_to_handle(device);
882 return VK_SUCCESS;
883
884 fail:
885 if (device->trace_bo)
886 device->ws->buffer_destroy(device->trace_bo);
887
888 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
889 for (unsigned q = 0; q < device->queue_count[i]; q++)
890 radv_queue_finish(&device->queues[i][q]);
891 if (device->queue_count[i])
892 vk_free(&device->alloc, device->queues[i]);
893 }
894
895 vk_free(&device->alloc, device);
896 return result;
897 }
898
899 void radv_DestroyDevice(
900 VkDevice _device,
901 const VkAllocationCallbacks* pAllocator)
902 {
903 RADV_FROM_HANDLE(radv_device, device, _device);
904
905 if (device->trace_bo)
906 device->ws->buffer_destroy(device->trace_bo);
907
908 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
909 for (unsigned q = 0; q < device->queue_count[i]; q++)
910 radv_queue_finish(&device->queues[i][q]);
911 if (device->queue_count[i])
912 vk_free(&device->alloc, device->queues[i]);
913 }
914 radv_device_finish_meta(device);
915
916 vk_free(&device->alloc, device);
917 }
918
919 VkResult radv_EnumerateInstanceExtensionProperties(
920 const char* pLayerName,
921 uint32_t* pPropertyCount,
922 VkExtensionProperties* pProperties)
923 {
924 if (pProperties == NULL) {
925 *pPropertyCount = ARRAY_SIZE(instance_extensions);
926 return VK_SUCCESS;
927 }
928
929 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
930 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
931
932 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
933 return VK_INCOMPLETE;
934
935 return VK_SUCCESS;
936 }
937
938 VkResult radv_EnumerateDeviceExtensionProperties(
939 VkPhysicalDevice physicalDevice,
940 const char* pLayerName,
941 uint32_t* pPropertyCount,
942 VkExtensionProperties* pProperties)
943 {
944 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
945
946 if (pProperties == NULL) {
947 *pPropertyCount = pdevice->extensions.num_ext;
948 return VK_SUCCESS;
949 }
950
951 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
952 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
953
954 if (*pPropertyCount < pdevice->extensions.num_ext)
955 return VK_INCOMPLETE;
956
957 return VK_SUCCESS;
958 }
959
960 VkResult radv_EnumerateInstanceLayerProperties(
961 uint32_t* pPropertyCount,
962 VkLayerProperties* pProperties)
963 {
964 if (pProperties == NULL) {
965 *pPropertyCount = 0;
966 return VK_SUCCESS;
967 }
968
969 /* None supported at this time */
970 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
971 }
972
973 VkResult radv_EnumerateDeviceLayerProperties(
974 VkPhysicalDevice physicalDevice,
975 uint32_t* pPropertyCount,
976 VkLayerProperties* pProperties)
977 {
978 if (pProperties == NULL) {
979 *pPropertyCount = 0;
980 return VK_SUCCESS;
981 }
982
983 /* None supported at this time */
984 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
985 }
986
987 void radv_GetDeviceQueue(
988 VkDevice _device,
989 uint32_t queueFamilyIndex,
990 uint32_t queueIndex,
991 VkQueue* pQueue)
992 {
993 RADV_FROM_HANDLE(radv_device, device, _device);
994
995 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
996 }
997
998 static void radv_dump_trace(struct radv_device *device,
999 struct radeon_winsys_cs *cs)
1000 {
1001 const char *filename = getenv("RADV_TRACE_FILE");
1002 FILE *f = fopen(filename, "w");
1003 if (!f) {
1004 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1005 return;
1006 }
1007
1008 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1009 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1010 fclose(f);
1011 }
1012
1013 static VkResult
1014 radv_get_preamble_cs(struct radv_queue *queue,
1015 uint32_t scratch_size,
1016 uint32_t compute_scratch_size,
1017 struct radeon_winsys_cs **preamble_cs)
1018 {
1019 struct radeon_winsys_bo *scratch_bo = NULL;
1020 struct radeon_winsys_bo *descriptor_bo = NULL;
1021 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1022 struct radeon_winsys_cs *cs = NULL;
1023
1024 if (!scratch_size && !compute_scratch_size) {
1025 *preamble_cs = NULL;
1026 return VK_SUCCESS;
1027 }
1028
1029 if (scratch_size <= queue->scratch_size &&
1030 compute_scratch_size <= queue->compute_scratch_size) {
1031 *preamble_cs = queue->preamble_cs;
1032 return VK_SUCCESS;
1033 }
1034
1035 if (scratch_size > queue->scratch_size) {
1036 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1037 scratch_size,
1038 4096,
1039 RADEON_DOMAIN_VRAM,
1040 RADEON_FLAG_NO_CPU_ACCESS);
1041 if (!scratch_bo)
1042 goto fail;
1043 } else
1044 scratch_bo = queue->scratch_bo;
1045
1046 if (compute_scratch_size > queue->compute_scratch_size) {
1047 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1048 compute_scratch_size,
1049 4096,
1050 RADEON_DOMAIN_VRAM,
1051 RADEON_FLAG_NO_CPU_ACCESS);
1052 if (!compute_scratch_bo)
1053 goto fail;
1054
1055 } else
1056 compute_scratch_bo = queue->compute_scratch_bo;
1057
1058 if (scratch_bo != queue->scratch_bo) {
1059 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1060 8,
1061 4096,
1062 RADEON_DOMAIN_VRAM,
1063 RADEON_FLAG_CPU_ACCESS);
1064 if (!descriptor_bo)
1065 goto fail;
1066 } else
1067 descriptor_bo = queue->descriptor_bo;
1068
1069 cs = queue->device->ws->cs_create(queue->device->ws,
1070 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1071 if (!cs)
1072 goto fail;
1073
1074
1075 if (scratch_bo)
1076 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1077
1078 if (descriptor_bo)
1079 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1080
1081 if (descriptor_bo != queue->descriptor_bo) {
1082 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1083 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1084 S_008F04_SWIZZLE_ENABLE(1);
1085
1086 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1087
1088 map[0] = scratch_va;
1089 map[1] = rsrc1;
1090
1091 queue->device->ws->buffer_unmap(descriptor_bo);
1092 }
1093
1094 if (descriptor_bo) {
1095 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1096 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1097 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1098 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1099 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1100 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1101
1102 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1103
1104 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1105 radeon_set_sh_reg_seq(cs, regs[i], 2);
1106 radeon_emit(cs, va);
1107 radeon_emit(cs, va >> 32);
1108 }
1109 }
1110
1111 if (compute_scratch_bo) {
1112 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1113 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1114 S_008F04_SWIZZLE_ENABLE(1);
1115
1116 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1117
1118 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1119 radeon_emit(cs, scratch_va);
1120 radeon_emit(cs, rsrc1);
1121 }
1122
1123 if (!queue->device->ws->cs_finalize(cs))
1124 goto fail;
1125
1126 if (queue->preamble_cs)
1127 queue->device->ws->cs_destroy(queue->preamble_cs);
1128
1129 queue->preamble_cs = cs;
1130
1131 if (scratch_bo != queue->scratch_bo) {
1132 if (queue->scratch_bo)
1133 queue->device->ws->buffer_destroy(queue->scratch_bo);
1134 queue->scratch_bo = scratch_bo;
1135 queue->scratch_size = scratch_size;
1136 }
1137
1138 if (compute_scratch_bo != queue->compute_scratch_bo) {
1139 if (queue->compute_scratch_bo)
1140 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1141 queue->compute_scratch_bo = compute_scratch_bo;
1142 queue->compute_scratch_size = compute_scratch_size;
1143 }
1144
1145 if (descriptor_bo != queue->descriptor_bo) {
1146 if (queue->descriptor_bo)
1147 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1148
1149 queue->descriptor_bo = descriptor_bo;
1150 }
1151
1152 *preamble_cs = cs;
1153 return VK_SUCCESS;
1154 fail:
1155 if (cs)
1156 queue->device->ws->cs_destroy(cs);
1157 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1158 queue->device->ws->buffer_destroy(descriptor_bo);
1159 if (scratch_bo && scratch_bo != queue->scratch_bo)
1160 queue->device->ws->buffer_destroy(scratch_bo);
1161 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1162 queue->device->ws->buffer_destroy(compute_scratch_bo);
1163 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1164 }
1165
1166 VkResult radv_QueueSubmit(
1167 VkQueue _queue,
1168 uint32_t submitCount,
1169 const VkSubmitInfo* pSubmits,
1170 VkFence _fence)
1171 {
1172 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1173 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1174 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1175 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1176 int ret;
1177 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1178 uint32_t scratch_size = 0;
1179 uint32_t compute_scratch_size = 0;
1180 struct radeon_winsys_cs *preamble_cs = NULL;
1181 VkResult result;
1182
1183 /* Do this first so failing to allocate scratch buffers can't result in
1184 * partially executed submissions. */
1185 for (uint32_t i = 0; i < submitCount; i++) {
1186 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1187 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1188 pSubmits[i].pCommandBuffers[j]);
1189
1190 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1191 compute_scratch_size = MAX2(compute_scratch_size,
1192 cmd_buffer->compute_scratch_size_needed);
1193 }
1194 }
1195
1196 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, &preamble_cs);
1197 if (result != VK_SUCCESS)
1198 return result;
1199
1200 for (uint32_t i = 0; i < submitCount; i++) {
1201 struct radeon_winsys_cs **cs_array;
1202 bool can_patch = true;
1203 uint32_t advance;
1204
1205 if (!pSubmits[i].commandBufferCount)
1206 continue;
1207
1208 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1209 pSubmits[i].commandBufferCount);
1210
1211 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1212 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1213 pSubmits[i].pCommandBuffers[j]);
1214 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1215
1216 cs_array[j] = cmd_buffer->cs;
1217 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1218 can_patch = false;
1219 }
1220
1221 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1222 advance = MIN2(max_cs_submission,
1223 pSubmits[i].commandBufferCount - j);
1224 bool b = j == 0;
1225 bool e = j + advance == pSubmits[i].commandBufferCount;
1226
1227 if (queue->device->trace_bo)
1228 *queue->device->trace_id_ptr = 0;
1229
1230 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1231 advance, preamble_cs,
1232 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1233 b ? pSubmits[i].waitSemaphoreCount : 0,
1234 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1235 e ? pSubmits[i].signalSemaphoreCount : 0,
1236 can_patch, base_fence);
1237
1238 if (ret) {
1239 radv_loge("failed to submit CS %d\n", i);
1240 abort();
1241 }
1242 if (queue->device->trace_bo) {
1243 bool success = queue->device->ws->ctx_wait_idle(
1244 queue->hw_ctx,
1245 radv_queue_family_to_ring(
1246 queue->queue_family_index),
1247 queue->queue_idx);
1248
1249 if (!success) { /* Hang */
1250 radv_dump_trace(queue->device, cs_array[j]);
1251 abort();
1252 }
1253 }
1254 }
1255 free(cs_array);
1256 }
1257
1258 if (fence) {
1259 if (!submitCount)
1260 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1261 &queue->device->empty_cs[queue->queue_family_index],
1262 1, NULL, NULL, 0, NULL, 0,
1263 false, base_fence);
1264
1265 fence->submitted = true;
1266 }
1267
1268 return VK_SUCCESS;
1269 }
1270
1271 VkResult radv_QueueWaitIdle(
1272 VkQueue _queue)
1273 {
1274 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1275
1276 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1277 radv_queue_family_to_ring(queue->queue_family_index),
1278 queue->queue_idx);
1279 return VK_SUCCESS;
1280 }
1281
1282 VkResult radv_DeviceWaitIdle(
1283 VkDevice _device)
1284 {
1285 RADV_FROM_HANDLE(radv_device, device, _device);
1286
1287 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1288 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1289 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1290 }
1291 }
1292 return VK_SUCCESS;
1293 }
1294
1295 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1296 VkInstance instance,
1297 const char* pName)
1298 {
1299 return radv_lookup_entrypoint(pName);
1300 }
1301
1302 /* The loader wants us to expose a second GetInstanceProcAddr function
1303 * to work around certain LD_PRELOAD issues seen in apps.
1304 */
1305 PUBLIC
1306 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1307 VkInstance instance,
1308 const char* pName);
1309
1310 PUBLIC
1311 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1312 VkInstance instance,
1313 const char* pName)
1314 {
1315 return radv_GetInstanceProcAddr(instance, pName);
1316 }
1317
1318 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1319 VkDevice device,
1320 const char* pName)
1321 {
1322 return radv_lookup_entrypoint(pName);
1323 }
1324
1325 VkResult radv_AllocateMemory(
1326 VkDevice _device,
1327 const VkMemoryAllocateInfo* pAllocateInfo,
1328 const VkAllocationCallbacks* pAllocator,
1329 VkDeviceMemory* pMem)
1330 {
1331 RADV_FROM_HANDLE(radv_device, device, _device);
1332 struct radv_device_memory *mem;
1333 VkResult result;
1334 enum radeon_bo_domain domain;
1335 uint32_t flags = 0;
1336 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1337
1338 if (pAllocateInfo->allocationSize == 0) {
1339 /* Apparently, this is allowed */
1340 *pMem = VK_NULL_HANDLE;
1341 return VK_SUCCESS;
1342 }
1343
1344 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1345 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1346 if (mem == NULL)
1347 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1348
1349 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1350 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1351 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1352 domain = RADEON_DOMAIN_GTT;
1353 else
1354 domain = RADEON_DOMAIN_VRAM;
1355
1356 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1357 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1358 else
1359 flags |= RADEON_FLAG_CPU_ACCESS;
1360
1361 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1362 flags |= RADEON_FLAG_GTT_WC;
1363
1364 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1365 domain, flags);
1366
1367 if (!mem->bo) {
1368 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1369 goto fail;
1370 }
1371 mem->type_index = pAllocateInfo->memoryTypeIndex;
1372
1373 *pMem = radv_device_memory_to_handle(mem);
1374
1375 return VK_SUCCESS;
1376
1377 fail:
1378 vk_free2(&device->alloc, pAllocator, mem);
1379
1380 return result;
1381 }
1382
1383 void radv_FreeMemory(
1384 VkDevice _device,
1385 VkDeviceMemory _mem,
1386 const VkAllocationCallbacks* pAllocator)
1387 {
1388 RADV_FROM_HANDLE(radv_device, device, _device);
1389 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1390
1391 if (mem == NULL)
1392 return;
1393
1394 device->ws->buffer_destroy(mem->bo);
1395 mem->bo = NULL;
1396
1397 vk_free2(&device->alloc, pAllocator, mem);
1398 }
1399
1400 VkResult radv_MapMemory(
1401 VkDevice _device,
1402 VkDeviceMemory _memory,
1403 VkDeviceSize offset,
1404 VkDeviceSize size,
1405 VkMemoryMapFlags flags,
1406 void** ppData)
1407 {
1408 RADV_FROM_HANDLE(radv_device, device, _device);
1409 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1410
1411 if (mem == NULL) {
1412 *ppData = NULL;
1413 return VK_SUCCESS;
1414 }
1415
1416 *ppData = device->ws->buffer_map(mem->bo);
1417 if (*ppData) {
1418 *ppData += offset;
1419 return VK_SUCCESS;
1420 }
1421
1422 return VK_ERROR_MEMORY_MAP_FAILED;
1423 }
1424
1425 void radv_UnmapMemory(
1426 VkDevice _device,
1427 VkDeviceMemory _memory)
1428 {
1429 RADV_FROM_HANDLE(radv_device, device, _device);
1430 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1431
1432 if (mem == NULL)
1433 return;
1434
1435 device->ws->buffer_unmap(mem->bo);
1436 }
1437
1438 VkResult radv_FlushMappedMemoryRanges(
1439 VkDevice _device,
1440 uint32_t memoryRangeCount,
1441 const VkMappedMemoryRange* pMemoryRanges)
1442 {
1443 return VK_SUCCESS;
1444 }
1445
1446 VkResult radv_InvalidateMappedMemoryRanges(
1447 VkDevice _device,
1448 uint32_t memoryRangeCount,
1449 const VkMappedMemoryRange* pMemoryRanges)
1450 {
1451 return VK_SUCCESS;
1452 }
1453
1454 void radv_GetBufferMemoryRequirements(
1455 VkDevice device,
1456 VkBuffer _buffer,
1457 VkMemoryRequirements* pMemoryRequirements)
1458 {
1459 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1460
1461 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1462
1463 pMemoryRequirements->size = buffer->size;
1464 pMemoryRequirements->alignment = 16;
1465 }
1466
1467 void radv_GetImageMemoryRequirements(
1468 VkDevice device,
1469 VkImage _image,
1470 VkMemoryRequirements* pMemoryRequirements)
1471 {
1472 RADV_FROM_HANDLE(radv_image, image, _image);
1473
1474 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1475
1476 pMemoryRequirements->size = image->size;
1477 pMemoryRequirements->alignment = image->alignment;
1478 }
1479
1480 void radv_GetImageSparseMemoryRequirements(
1481 VkDevice device,
1482 VkImage image,
1483 uint32_t* pSparseMemoryRequirementCount,
1484 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1485 {
1486 stub();
1487 }
1488
1489 void radv_GetDeviceMemoryCommitment(
1490 VkDevice device,
1491 VkDeviceMemory memory,
1492 VkDeviceSize* pCommittedMemoryInBytes)
1493 {
1494 *pCommittedMemoryInBytes = 0;
1495 }
1496
1497 VkResult radv_BindBufferMemory(
1498 VkDevice device,
1499 VkBuffer _buffer,
1500 VkDeviceMemory _memory,
1501 VkDeviceSize memoryOffset)
1502 {
1503 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1504 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1505
1506 if (mem) {
1507 buffer->bo = mem->bo;
1508 buffer->offset = memoryOffset;
1509 } else {
1510 buffer->bo = NULL;
1511 buffer->offset = 0;
1512 }
1513
1514 return VK_SUCCESS;
1515 }
1516
1517 VkResult radv_BindImageMemory(
1518 VkDevice device,
1519 VkImage _image,
1520 VkDeviceMemory _memory,
1521 VkDeviceSize memoryOffset)
1522 {
1523 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1524 RADV_FROM_HANDLE(radv_image, image, _image);
1525
1526 if (mem) {
1527 image->bo = mem->bo;
1528 image->offset = memoryOffset;
1529 } else {
1530 image->bo = NULL;
1531 image->offset = 0;
1532 }
1533
1534 return VK_SUCCESS;
1535 }
1536
1537 VkResult radv_QueueBindSparse(
1538 VkQueue queue,
1539 uint32_t bindInfoCount,
1540 const VkBindSparseInfo* pBindInfo,
1541 VkFence fence)
1542 {
1543 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1544 }
1545
1546 VkResult radv_CreateFence(
1547 VkDevice _device,
1548 const VkFenceCreateInfo* pCreateInfo,
1549 const VkAllocationCallbacks* pAllocator,
1550 VkFence* pFence)
1551 {
1552 RADV_FROM_HANDLE(radv_device, device, _device);
1553 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1554 sizeof(*fence), 8,
1555 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1556
1557 if (!fence)
1558 return VK_ERROR_OUT_OF_HOST_MEMORY;
1559
1560 memset(fence, 0, sizeof(*fence));
1561 fence->submitted = false;
1562 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1563 fence->fence = device->ws->create_fence();
1564 if (!fence->fence) {
1565 vk_free2(&device->alloc, pAllocator, fence);
1566 return VK_ERROR_OUT_OF_HOST_MEMORY;
1567 }
1568
1569 *pFence = radv_fence_to_handle(fence);
1570
1571 return VK_SUCCESS;
1572 }
1573
1574 void radv_DestroyFence(
1575 VkDevice _device,
1576 VkFence _fence,
1577 const VkAllocationCallbacks* pAllocator)
1578 {
1579 RADV_FROM_HANDLE(radv_device, device, _device);
1580 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1581
1582 if (!fence)
1583 return;
1584 device->ws->destroy_fence(fence->fence);
1585 vk_free2(&device->alloc, pAllocator, fence);
1586 }
1587
1588 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1589 {
1590 uint64_t current_time;
1591 struct timespec tv;
1592
1593 clock_gettime(CLOCK_MONOTONIC, &tv);
1594 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1595
1596 timeout = MIN2(UINT64_MAX - current_time, timeout);
1597
1598 return current_time + timeout;
1599 }
1600
1601 VkResult radv_WaitForFences(
1602 VkDevice _device,
1603 uint32_t fenceCount,
1604 const VkFence* pFences,
1605 VkBool32 waitAll,
1606 uint64_t timeout)
1607 {
1608 RADV_FROM_HANDLE(radv_device, device, _device);
1609 timeout = radv_get_absolute_timeout(timeout);
1610
1611 if (!waitAll && fenceCount > 1) {
1612 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1613 }
1614
1615 for (uint32_t i = 0; i < fenceCount; ++i) {
1616 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1617 bool expired = false;
1618
1619 if (fence->signalled)
1620 continue;
1621
1622 if (!fence->submitted)
1623 return VK_TIMEOUT;
1624
1625 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1626 if (!expired)
1627 return VK_TIMEOUT;
1628
1629 fence->signalled = true;
1630 }
1631
1632 return VK_SUCCESS;
1633 }
1634
1635 VkResult radv_ResetFences(VkDevice device,
1636 uint32_t fenceCount,
1637 const VkFence *pFences)
1638 {
1639 for (unsigned i = 0; i < fenceCount; ++i) {
1640 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1641 fence->submitted = fence->signalled = false;
1642 }
1643
1644 return VK_SUCCESS;
1645 }
1646
1647 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1648 {
1649 RADV_FROM_HANDLE(radv_device, device, _device);
1650 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1651
1652 if (fence->signalled)
1653 return VK_SUCCESS;
1654 if (!fence->submitted)
1655 return VK_NOT_READY;
1656
1657 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1658 return VK_NOT_READY;
1659
1660 return VK_SUCCESS;
1661 }
1662
1663
1664 // Queue semaphore functions
1665
1666 VkResult radv_CreateSemaphore(
1667 VkDevice _device,
1668 const VkSemaphoreCreateInfo* pCreateInfo,
1669 const VkAllocationCallbacks* pAllocator,
1670 VkSemaphore* pSemaphore)
1671 {
1672 RADV_FROM_HANDLE(radv_device, device, _device);
1673 struct radeon_winsys_sem *sem;
1674
1675 sem = device->ws->create_sem(device->ws);
1676 if (!sem)
1677 return VK_ERROR_OUT_OF_HOST_MEMORY;
1678
1679 *pSemaphore = (VkSemaphore)sem;
1680 return VK_SUCCESS;
1681 }
1682
1683 void radv_DestroySemaphore(
1684 VkDevice _device,
1685 VkSemaphore _semaphore,
1686 const VkAllocationCallbacks* pAllocator)
1687 {
1688 RADV_FROM_HANDLE(radv_device, device, _device);
1689 struct radeon_winsys_sem *sem;
1690 if (!_semaphore)
1691 return;
1692
1693 sem = (struct radeon_winsys_sem *)_semaphore;
1694 device->ws->destroy_sem(sem);
1695 }
1696
1697 VkResult radv_CreateEvent(
1698 VkDevice _device,
1699 const VkEventCreateInfo* pCreateInfo,
1700 const VkAllocationCallbacks* pAllocator,
1701 VkEvent* pEvent)
1702 {
1703 RADV_FROM_HANDLE(radv_device, device, _device);
1704 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1705 sizeof(*event), 8,
1706 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1707
1708 if (!event)
1709 return VK_ERROR_OUT_OF_HOST_MEMORY;
1710
1711 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1712 RADEON_DOMAIN_GTT,
1713 RADEON_FLAG_CPU_ACCESS);
1714 if (!event->bo) {
1715 vk_free2(&device->alloc, pAllocator, event);
1716 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1717 }
1718
1719 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1720
1721 *pEvent = radv_event_to_handle(event);
1722
1723 return VK_SUCCESS;
1724 }
1725
1726 void radv_DestroyEvent(
1727 VkDevice _device,
1728 VkEvent _event,
1729 const VkAllocationCallbacks* pAllocator)
1730 {
1731 RADV_FROM_HANDLE(radv_device, device, _device);
1732 RADV_FROM_HANDLE(radv_event, event, _event);
1733
1734 if (!event)
1735 return;
1736 device->ws->buffer_destroy(event->bo);
1737 vk_free2(&device->alloc, pAllocator, event);
1738 }
1739
1740 VkResult radv_GetEventStatus(
1741 VkDevice _device,
1742 VkEvent _event)
1743 {
1744 RADV_FROM_HANDLE(radv_event, event, _event);
1745
1746 if (*event->map == 1)
1747 return VK_EVENT_SET;
1748 return VK_EVENT_RESET;
1749 }
1750
1751 VkResult radv_SetEvent(
1752 VkDevice _device,
1753 VkEvent _event)
1754 {
1755 RADV_FROM_HANDLE(radv_event, event, _event);
1756 *event->map = 1;
1757
1758 return VK_SUCCESS;
1759 }
1760
1761 VkResult radv_ResetEvent(
1762 VkDevice _device,
1763 VkEvent _event)
1764 {
1765 RADV_FROM_HANDLE(radv_event, event, _event);
1766 *event->map = 0;
1767
1768 return VK_SUCCESS;
1769 }
1770
1771 VkResult radv_CreateBuffer(
1772 VkDevice _device,
1773 const VkBufferCreateInfo* pCreateInfo,
1774 const VkAllocationCallbacks* pAllocator,
1775 VkBuffer* pBuffer)
1776 {
1777 RADV_FROM_HANDLE(radv_device, device, _device);
1778 struct radv_buffer *buffer;
1779
1780 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1781
1782 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1783 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1784 if (buffer == NULL)
1785 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1786
1787 buffer->size = pCreateInfo->size;
1788 buffer->usage = pCreateInfo->usage;
1789 buffer->bo = NULL;
1790 buffer->offset = 0;
1791
1792 *pBuffer = radv_buffer_to_handle(buffer);
1793
1794 return VK_SUCCESS;
1795 }
1796
1797 void radv_DestroyBuffer(
1798 VkDevice _device,
1799 VkBuffer _buffer,
1800 const VkAllocationCallbacks* pAllocator)
1801 {
1802 RADV_FROM_HANDLE(radv_device, device, _device);
1803 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1804
1805 if (!buffer)
1806 return;
1807
1808 vk_free2(&device->alloc, pAllocator, buffer);
1809 }
1810
1811 static inline unsigned
1812 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
1813 {
1814 if (stencil)
1815 return image->surface.stencil_tiling_index[level];
1816 else
1817 return image->surface.tiling_index[level];
1818 }
1819
1820 static void
1821 radv_initialise_color_surface(struct radv_device *device,
1822 struct radv_color_buffer_info *cb,
1823 struct radv_image_view *iview)
1824 {
1825 const struct vk_format_description *desc;
1826 unsigned ntype, format, swap, endian;
1827 unsigned blend_clamp = 0, blend_bypass = 0;
1828 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1829 uint64_t va;
1830 const struct radeon_surf *surf = &iview->image->surface;
1831 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
1832
1833 desc = vk_format_description(iview->vk_format);
1834
1835 memset(cb, 0, sizeof(*cb));
1836
1837 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1838 va += level_info->offset;
1839 cb->cb_color_base = va >> 8;
1840
1841 /* CMASK variables */
1842 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1843 va += iview->image->cmask.offset;
1844 cb->cb_color_cmask = va >> 8;
1845 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
1846
1847 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1848 va += iview->image->dcc_offset;
1849 cb->cb_dcc_base = va >> 8;
1850
1851 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
1852 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
1853 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
1854
1855 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
1856 pitch_tile_max = level_info->nblk_x / 8 - 1;
1857 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1858 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
1859
1860 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1861 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1862
1863 /* Intensity is implemented as Red, so treat it that way. */
1864 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
1865 S_028C74_TILE_MODE_INDEX(tile_mode_index);
1866
1867 if (iview->image->samples > 1) {
1868 unsigned log_samples = util_logbase2(iview->image->samples);
1869
1870 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1871 S_028C74_NUM_FRAGMENTS(log_samples);
1872 }
1873
1874 if (iview->image->fmask.size) {
1875 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
1876 if (device->physical_device->rad_info.chip_class >= CIK)
1877 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
1878 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
1879 cb->cb_color_fmask = va >> 8;
1880 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
1881 } else {
1882 /* This must be set for fast clear to work without FMASK. */
1883 if (device->physical_device->rad_info.chip_class >= CIK)
1884 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1885 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1886 cb->cb_color_fmask = cb->cb_color_base;
1887 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1888 }
1889
1890 ntype = radv_translate_color_numformat(iview->vk_format,
1891 desc,
1892 vk_format_get_first_non_void_channel(iview->vk_format));
1893 format = radv_translate_colorformat(iview->vk_format);
1894 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
1895 radv_finishme("Illegal color\n");
1896 swap = radv_translate_colorswap(iview->vk_format, FALSE);
1897 endian = radv_colorformat_endian_swap(format);
1898
1899 /* blend clamp should be set for all NORM/SRGB types */
1900 if (ntype == V_028C70_NUMBER_UNORM ||
1901 ntype == V_028C70_NUMBER_SNORM ||
1902 ntype == V_028C70_NUMBER_SRGB)
1903 blend_clamp = 1;
1904
1905 /* set blend bypass according to docs if SINT/UINT or
1906 8/24 COLOR variants */
1907 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1908 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1909 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1910 blend_clamp = 0;
1911 blend_bypass = 1;
1912 }
1913 #if 0
1914 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1915 (format == V_028C70_COLOR_8 ||
1916 format == V_028C70_COLOR_8_8 ||
1917 format == V_028C70_COLOR_8_8_8_8))
1918 ->color_is_int8 = true;
1919 #endif
1920 cb->cb_color_info = S_028C70_FORMAT(format) |
1921 S_028C70_COMP_SWAP(swap) |
1922 S_028C70_BLEND_CLAMP(blend_clamp) |
1923 S_028C70_BLEND_BYPASS(blend_bypass) |
1924 S_028C70_SIMPLE_FLOAT(1) |
1925 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
1926 ntype != V_028C70_NUMBER_SNORM &&
1927 ntype != V_028C70_NUMBER_SRGB &&
1928 format != V_028C70_COLOR_8_24 &&
1929 format != V_028C70_COLOR_24_8) |
1930 S_028C70_NUMBER_TYPE(ntype) |
1931 S_028C70_ENDIAN(endian);
1932 if (iview->image->samples > 1)
1933 if (iview->image->fmask.size)
1934 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1935
1936 if (iview->image->cmask.size &&
1937 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
1938 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1939
1940 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
1941 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1942
1943 if (device->physical_device->rad_info.chip_class >= VI) {
1944 unsigned max_uncompressed_block_size = 2;
1945 if (iview->image->samples > 1) {
1946 if (iview->image->surface.bpe == 1)
1947 max_uncompressed_block_size = 0;
1948 else if (iview->image->surface.bpe == 2)
1949 max_uncompressed_block_size = 1;
1950 }
1951
1952 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1953 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1954 }
1955
1956 /* This must be set for fast clear to work without FMASK. */
1957 if (!iview->image->fmask.size &&
1958 device->physical_device->rad_info.chip_class == SI) {
1959 unsigned bankh = util_logbase2(iview->image->surface.bankh);
1960 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1961 }
1962 }
1963
1964 static void
1965 radv_initialise_ds_surface(struct radv_device *device,
1966 struct radv_ds_buffer_info *ds,
1967 struct radv_image_view *iview)
1968 {
1969 unsigned level = iview->base_mip;
1970 unsigned format;
1971 uint64_t va, s_offs, z_offs;
1972 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
1973 memset(ds, 0, sizeof(*ds));
1974 switch (iview->vk_format) {
1975 case VK_FORMAT_D24_UNORM_S8_UINT:
1976 case VK_FORMAT_X8_D24_UNORM_PACK32:
1977 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1978 ds->offset_scale = 2.0f;
1979 break;
1980 case VK_FORMAT_D16_UNORM:
1981 case VK_FORMAT_D16_UNORM_S8_UINT:
1982 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1983 ds->offset_scale = 4.0f;
1984 break;
1985 case VK_FORMAT_D32_SFLOAT:
1986 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1987 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1988 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1989 ds->offset_scale = 1.0f;
1990 break;
1991 default:
1992 break;
1993 }
1994
1995 format = radv_translate_dbformat(iview->vk_format);
1996 if (format == V_028040_Z_INVALID) {
1997 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
1998 }
1999
2000 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2001 s_offs = z_offs = va;
2002 z_offs += iview->image->surface.level[level].offset;
2003 s_offs += iview->image->surface.stencil_level[level].offset;
2004
2005 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2006 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2007 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2008 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2009 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2010
2011 if (iview->image->samples > 1)
2012 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2013
2014 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2015 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2016 else
2017 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2018
2019 if (device->physical_device->rad_info.chip_class >= CIK) {
2020 struct radeon_info *info = &device->physical_device->rad_info;
2021 unsigned tiling_index = iview->image->surface.tiling_index[level];
2022 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2023 unsigned macro_index = iview->image->surface.macro_tile_index;
2024 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2025 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2026 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2027
2028 ds->db_depth_info |=
2029 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2030 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2031 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2032 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2033 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2034 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2035 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2036 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2037 } else {
2038 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2039 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2040 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2041 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2042 }
2043
2044 if (iview->image->htile.size && !level) {
2045 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2046 S_028040_ALLOW_EXPCLEAR(1);
2047
2048 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2049 /* Workaround: For a not yet understood reason, the
2050 * combination of MSAA, fast stencil clear and stencil
2051 * decompress messes with subsequent stencil buffer
2052 * uses. Problem was reproduced on Verde, Bonaire,
2053 * Tonga, and Carrizo.
2054 *
2055 * Disabling EXPCLEAR works around the problem.
2056 *
2057 * Check piglit's arb_texture_multisample-stencil-clear
2058 * test if you want to try changing this.
2059 */
2060 if (iview->image->samples <= 1)
2061 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2062 } else
2063 /* Use all of the htile_buffer for depth if there's no stencil. */
2064 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2065
2066 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2067 iview->image->htile.offset;
2068 ds->db_htile_data_base = va >> 8;
2069 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2070 } else {
2071 ds->db_htile_data_base = 0;
2072 ds->db_htile_surface = 0;
2073 }
2074
2075 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2076 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2077
2078 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2079 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2080 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2081 }
2082
2083 VkResult radv_CreateFramebuffer(
2084 VkDevice _device,
2085 const VkFramebufferCreateInfo* pCreateInfo,
2086 const VkAllocationCallbacks* pAllocator,
2087 VkFramebuffer* pFramebuffer)
2088 {
2089 RADV_FROM_HANDLE(radv_device, device, _device);
2090 struct radv_framebuffer *framebuffer;
2091
2092 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2093
2094 size_t size = sizeof(*framebuffer) +
2095 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2096 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2097 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2098 if (framebuffer == NULL)
2099 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2100
2101 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2102 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2103 VkImageView _iview = pCreateInfo->pAttachments[i];
2104 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2105 framebuffer->attachments[i].attachment = iview;
2106 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2107 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2108 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2109 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2110 }
2111 }
2112
2113 framebuffer->width = pCreateInfo->width;
2114 framebuffer->height = pCreateInfo->height;
2115 framebuffer->layers = pCreateInfo->layers;
2116
2117 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2118 return VK_SUCCESS;
2119 }
2120
2121 void radv_DestroyFramebuffer(
2122 VkDevice _device,
2123 VkFramebuffer _fb,
2124 const VkAllocationCallbacks* pAllocator)
2125 {
2126 RADV_FROM_HANDLE(radv_device, device, _device);
2127 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2128
2129 if (!fb)
2130 return;
2131 vk_free2(&device->alloc, pAllocator, fb);
2132 }
2133
2134 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2135 {
2136 switch (address_mode) {
2137 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2138 return V_008F30_SQ_TEX_WRAP;
2139 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2140 return V_008F30_SQ_TEX_MIRROR;
2141 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2142 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2143 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2144 return V_008F30_SQ_TEX_CLAMP_BORDER;
2145 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2146 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2147 default:
2148 unreachable("illegal tex wrap mode");
2149 break;
2150 }
2151 }
2152
2153 static unsigned
2154 radv_tex_compare(VkCompareOp op)
2155 {
2156 switch (op) {
2157 case VK_COMPARE_OP_NEVER:
2158 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2159 case VK_COMPARE_OP_LESS:
2160 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2161 case VK_COMPARE_OP_EQUAL:
2162 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2163 case VK_COMPARE_OP_LESS_OR_EQUAL:
2164 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2165 case VK_COMPARE_OP_GREATER:
2166 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2167 case VK_COMPARE_OP_NOT_EQUAL:
2168 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2169 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2170 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2171 case VK_COMPARE_OP_ALWAYS:
2172 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2173 default:
2174 unreachable("illegal compare mode");
2175 break;
2176 }
2177 }
2178
2179 static unsigned
2180 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2181 {
2182 switch (filter) {
2183 case VK_FILTER_NEAREST:
2184 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2185 V_008F38_SQ_TEX_XY_FILTER_POINT);
2186 case VK_FILTER_LINEAR:
2187 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2188 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2189 case VK_FILTER_CUBIC_IMG:
2190 default:
2191 fprintf(stderr, "illegal texture filter");
2192 return 0;
2193 }
2194 }
2195
2196 static unsigned
2197 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2198 {
2199 switch (mode) {
2200 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2201 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2202 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2203 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2204 default:
2205 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2206 }
2207 }
2208
2209 static unsigned
2210 radv_tex_bordercolor(VkBorderColor bcolor)
2211 {
2212 switch (bcolor) {
2213 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2214 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2215 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2216 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2217 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2218 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2219 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2220 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2221 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2222 default:
2223 break;
2224 }
2225 return 0;
2226 }
2227
2228 static unsigned
2229 radv_tex_aniso_filter(unsigned filter)
2230 {
2231 if (filter < 2)
2232 return 0;
2233 if (filter < 4)
2234 return 1;
2235 if (filter < 8)
2236 return 2;
2237 if (filter < 16)
2238 return 3;
2239 return 4;
2240 }
2241
2242 static void
2243 radv_init_sampler(struct radv_device *device,
2244 struct radv_sampler *sampler,
2245 const VkSamplerCreateInfo *pCreateInfo)
2246 {
2247 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2248 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2249 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2250 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2251
2252 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2253 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2254 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2255 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2256 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2257 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2258 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2259 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2260 S_008F30_DISABLE_CUBE_WRAP(0) |
2261 S_008F30_COMPAT_MODE(is_vi));
2262 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2263 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2264 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2265 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2266 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2267 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2268 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2269 S_008F38_MIP_POINT_PRECLAMP(1) |
2270 S_008F38_DISABLE_LSB_CEIL(1) |
2271 S_008F38_FILTER_PREC_FIX(1) |
2272 S_008F38_ANISO_OVERRIDE(is_vi));
2273 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2274 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2275 }
2276
2277 VkResult radv_CreateSampler(
2278 VkDevice _device,
2279 const VkSamplerCreateInfo* pCreateInfo,
2280 const VkAllocationCallbacks* pAllocator,
2281 VkSampler* pSampler)
2282 {
2283 RADV_FROM_HANDLE(radv_device, device, _device);
2284 struct radv_sampler *sampler;
2285
2286 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2287
2288 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2289 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2290 if (!sampler)
2291 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2292
2293 radv_init_sampler(device, sampler, pCreateInfo);
2294 *pSampler = radv_sampler_to_handle(sampler);
2295
2296 return VK_SUCCESS;
2297 }
2298
2299 void radv_DestroySampler(
2300 VkDevice _device,
2301 VkSampler _sampler,
2302 const VkAllocationCallbacks* pAllocator)
2303 {
2304 RADV_FROM_HANDLE(radv_device, device, _device);
2305 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2306
2307 if (!sampler)
2308 return;
2309 vk_free2(&device->alloc, pAllocator, sampler);
2310 }
2311
2312
2313 /* vk_icd.h does not declare this function, so we declare it here to
2314 * suppress Wmissing-prototypes.
2315 */
2316 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2317 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2318
2319 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2320 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2321 {
2322 /* For the full details on loader interface versioning, see
2323 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2324 * What follows is a condensed summary, to help you navigate the large and
2325 * confusing official doc.
2326 *
2327 * - Loader interface v0 is incompatible with later versions. We don't
2328 * support it.
2329 *
2330 * - In loader interface v1:
2331 * - The first ICD entrypoint called by the loader is
2332 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2333 * entrypoint.
2334 * - The ICD must statically expose no other Vulkan symbol unless it is
2335 * linked with -Bsymbolic.
2336 * - Each dispatchable Vulkan handle created by the ICD must be
2337 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2338 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2339 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2340 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2341 * such loader-managed surfaces.
2342 *
2343 * - Loader interface v2 differs from v1 in:
2344 * - The first ICD entrypoint called by the loader is
2345 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2346 * statically expose this entrypoint.
2347 *
2348 * - Loader interface v3 differs from v2 in:
2349 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2350 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2351 * because the loader no longer does so.
2352 */
2353 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2354 return VK_SUCCESS;
2355 }