radv: Expose VK_KHR_maintenance1
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
117 .specVersion = 68,
118 },
119 {
120 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 };
128
129 static VkResult
130 radv_extensions_register(struct radv_instance *instance,
131 struct radv_extensions *extensions,
132 const VkExtensionProperties *new_ext,
133 uint32_t num_ext)
134 {
135 size_t new_size;
136 VkExtensionProperties *new_ptr;
137
138 assert(new_ext && num_ext > 0);
139
140 if (!new_ext)
141 return VK_ERROR_INITIALIZATION_FAILED;
142
143 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
144 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
145 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
146
147 /* Old array continues to be valid, update nothing */
148 if (!new_ptr)
149 return VK_ERROR_OUT_OF_HOST_MEMORY;
150
151 memcpy(&new_ptr[extensions->num_ext], new_ext,
152 num_ext * sizeof(VkExtensionProperties));
153 extensions->ext_array = new_ptr;
154 extensions->num_ext += num_ext;
155
156 return VK_SUCCESS;
157 }
158
159 static void
160 radv_extensions_finish(struct radv_instance *instance,
161 struct radv_extensions *extensions)
162 {
163 assert(extensions);
164
165 if (!extensions)
166 radv_loge("Attemted to free invalid extension struct\n");
167
168 if (extensions->ext_array)
169 vk_free(&instance->alloc, extensions->ext_array);
170 }
171
172 static bool
173 is_extension_enabled(const VkExtensionProperties *extensions,
174 size_t num_ext,
175 const char *name)
176 {
177 assert(extensions && name);
178
179 for (uint32_t i = 0; i < num_ext; i++) {
180 if (strcmp(name, extensions[i].extensionName) == 0)
181 return true;
182 }
183
184 return false;
185 }
186
187 static VkResult
188 radv_physical_device_init(struct radv_physical_device *device,
189 struct radv_instance *instance,
190 const char *path)
191 {
192 VkResult result;
193 drmVersionPtr version;
194 int fd;
195
196 fd = open(path, O_RDWR | O_CLOEXEC);
197 if (fd < 0)
198 return VK_ERROR_INCOMPATIBLE_DRIVER;
199
200 version = drmGetVersion(fd);
201 if (!version) {
202 close(fd);
203 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
204 "failed to get version %s: %m", path);
205 }
206
207 if (strcmp(version->name, "amdgpu")) {
208 drmFreeVersion(version);
209 close(fd);
210 return VK_ERROR_INCOMPATIBLE_DRIVER;
211 }
212 drmFreeVersion(version);
213
214 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
215 device->instance = instance;
216 assert(strlen(path) < ARRAY_SIZE(device->path));
217 strncpy(device->path, path, ARRAY_SIZE(device->path));
218
219 device->ws = radv_amdgpu_winsys_create(fd);
220 if (!device->ws) {
221 result = VK_ERROR_INCOMPATIBLE_DRIVER;
222 goto fail;
223 }
224 device->ws->query_info(device->ws, &device->rad_info);
225 result = radv_init_wsi(device);
226 if (result != VK_SUCCESS) {
227 device->ws->destroy(device->ws);
228 goto fail;
229 }
230
231 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
232 radv_finish_wsi(device);
233 device->ws->destroy(device->ws);
234 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
235 "cannot generate UUID");
236 goto fail;
237 }
238
239 result = radv_extensions_register(instance,
240 &device->extensions,
241 common_device_extensions,
242 ARRAY_SIZE(common_device_extensions));
243 if (result != VK_SUCCESS)
244 goto fail;
245
246 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
247 device->name = device->rad_info.name;
248 close(fd);
249 return VK_SUCCESS;
250
251 fail:
252 close(fd);
253 return result;
254 }
255
256 static void
257 radv_physical_device_finish(struct radv_physical_device *device)
258 {
259 radv_extensions_finish(device->instance, &device->extensions);
260 radv_finish_wsi(device);
261 device->ws->destroy(device->ws);
262 }
263
264
265 static void *
266 default_alloc_func(void *pUserData, size_t size, size_t align,
267 VkSystemAllocationScope allocationScope)
268 {
269 return malloc(size);
270 }
271
272 static void *
273 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
274 size_t align, VkSystemAllocationScope allocationScope)
275 {
276 return realloc(pOriginal, size);
277 }
278
279 static void
280 default_free_func(void *pUserData, void *pMemory)
281 {
282 free(pMemory);
283 }
284
285 static const VkAllocationCallbacks default_alloc = {
286 .pUserData = NULL,
287 .pfnAllocation = default_alloc_func,
288 .pfnReallocation = default_realloc_func,
289 .pfnFree = default_free_func,
290 };
291
292 static const struct debug_control radv_debug_options[] = {
293 {"fastclears", RADV_DEBUG_FAST_CLEARS},
294 {"nodcc", RADV_DEBUG_NO_DCC},
295 {"shaders", RADV_DEBUG_DUMP_SHADERS},
296 {"nocache", RADV_DEBUG_NO_CACHE},
297 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
298 {"nohiz", RADV_DEBUG_NO_HIZ},
299 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
300 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
301 {NULL, 0}
302 };
303
304 VkResult radv_CreateInstance(
305 const VkInstanceCreateInfo* pCreateInfo,
306 const VkAllocationCallbacks* pAllocator,
307 VkInstance* pInstance)
308 {
309 struct radv_instance *instance;
310
311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
312
313 uint32_t client_version;
314 if (pCreateInfo->pApplicationInfo &&
315 pCreateInfo->pApplicationInfo->apiVersion != 0) {
316 client_version = pCreateInfo->pApplicationInfo->apiVersion;
317 } else {
318 client_version = VK_MAKE_VERSION(1, 0, 0);
319 }
320
321 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
322 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
323 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
324 "Client requested version %d.%d.%d",
325 VK_VERSION_MAJOR(client_version),
326 VK_VERSION_MINOR(client_version),
327 VK_VERSION_PATCH(client_version));
328 }
329
330 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
331 if (!is_extension_enabled(instance_extensions,
332 ARRAY_SIZE(instance_extensions),
333 pCreateInfo->ppEnabledExtensionNames[i]))
334 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
335 }
336
337 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
338 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
339 if (!instance)
340 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
341
342 memset(instance, 0, sizeof(*instance));
343
344 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
345
346 if (pAllocator)
347 instance->alloc = *pAllocator;
348 else
349 instance->alloc = default_alloc;
350
351 instance->apiVersion = client_version;
352 instance->physicalDeviceCount = -1;
353
354 _mesa_locale_init();
355
356 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
357
358 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
359 radv_debug_options);
360
361 *pInstance = radv_instance_to_handle(instance);
362
363 return VK_SUCCESS;
364 }
365
366 void radv_DestroyInstance(
367 VkInstance _instance,
368 const VkAllocationCallbacks* pAllocator)
369 {
370 RADV_FROM_HANDLE(radv_instance, instance, _instance);
371
372 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
373 radv_physical_device_finish(instance->physicalDevices + i);
374 }
375
376 VG(VALGRIND_DESTROY_MEMPOOL(instance));
377
378 _mesa_locale_fini();
379
380 vk_free(&instance->alloc, instance);
381 }
382
383 VkResult radv_EnumeratePhysicalDevices(
384 VkInstance _instance,
385 uint32_t* pPhysicalDeviceCount,
386 VkPhysicalDevice* pPhysicalDevices)
387 {
388 RADV_FROM_HANDLE(radv_instance, instance, _instance);
389 VkResult result;
390
391 if (instance->physicalDeviceCount < 0) {
392 char path[20];
393 instance->physicalDeviceCount = 0;
394 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
395 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
396 result = radv_physical_device_init(instance->physicalDevices +
397 instance->physicalDeviceCount,
398 instance, path);
399 if (result == VK_SUCCESS)
400 ++instance->physicalDeviceCount;
401 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
402 return result;
403 }
404 }
405
406 if (!pPhysicalDevices) {
407 *pPhysicalDeviceCount = instance->physicalDeviceCount;
408 } else {
409 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
410 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
411 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
412 }
413
414 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
415 : VK_SUCCESS;
416 }
417
418 void radv_GetPhysicalDeviceFeatures(
419 VkPhysicalDevice physicalDevice,
420 VkPhysicalDeviceFeatures* pFeatures)
421 {
422 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
423
424 memset(pFeatures, 0, sizeof(*pFeatures));
425
426 *pFeatures = (VkPhysicalDeviceFeatures) {
427 .robustBufferAccess = true,
428 .fullDrawIndexUint32 = true,
429 .imageCubeArray = true,
430 .independentBlend = true,
431 .geometryShader = false,
432 .tessellationShader = false,
433 .sampleRateShading = false,
434 .dualSrcBlend = true,
435 .logicOp = true,
436 .multiDrawIndirect = true,
437 .drawIndirectFirstInstance = true,
438 .depthClamp = true,
439 .depthBiasClamp = true,
440 .fillModeNonSolid = true,
441 .depthBounds = true,
442 .wideLines = true,
443 .largePoints = true,
444 .alphaToOne = true,
445 .multiViewport = false,
446 .samplerAnisotropy = true,
447 .textureCompressionETC2 = false,
448 .textureCompressionASTC_LDR = false,
449 .textureCompressionBC = true,
450 .occlusionQueryPrecise = true,
451 .pipelineStatisticsQuery = false,
452 .vertexPipelineStoresAndAtomics = true,
453 .fragmentStoresAndAtomics = true,
454 .shaderTessellationAndGeometryPointSize = true,
455 .shaderImageGatherExtended = true,
456 .shaderStorageImageExtendedFormats = true,
457 .shaderStorageImageMultisample = false,
458 .shaderUniformBufferArrayDynamicIndexing = true,
459 .shaderSampledImageArrayDynamicIndexing = true,
460 .shaderStorageBufferArrayDynamicIndexing = true,
461 .shaderStorageImageArrayDynamicIndexing = true,
462 .shaderStorageImageReadWithoutFormat = false,
463 .shaderStorageImageWriteWithoutFormat = false,
464 .shaderClipDistance = true,
465 .shaderCullDistance = true,
466 .shaderFloat64 = false,
467 .shaderInt64 = false,
468 .shaderInt16 = false,
469 .alphaToOne = true,
470 .variableMultisampleRate = false,
471 .inheritedQueries = false,
472 };
473 }
474
475 void radv_GetPhysicalDeviceFeatures2KHR(
476 VkPhysicalDevice physicalDevice,
477 VkPhysicalDeviceFeatures2KHR *pFeatures)
478 {
479 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
480 }
481
482 void radv_GetPhysicalDeviceProperties(
483 VkPhysicalDevice physicalDevice,
484 VkPhysicalDeviceProperties* pProperties)
485 {
486 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
487 VkSampleCountFlags sample_counts = 0xf;
488 VkPhysicalDeviceLimits limits = {
489 .maxImageDimension1D = (1 << 14),
490 .maxImageDimension2D = (1 << 14),
491 .maxImageDimension3D = (1 << 11),
492 .maxImageDimensionCube = (1 << 14),
493 .maxImageArrayLayers = (1 << 11),
494 .maxTexelBufferElements = 128 * 1024 * 1024,
495 .maxUniformBufferRange = UINT32_MAX,
496 .maxStorageBufferRange = UINT32_MAX,
497 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
498 .maxMemoryAllocationCount = UINT32_MAX,
499 .maxSamplerAllocationCount = 64 * 1024,
500 .bufferImageGranularity = 64, /* A cache line */
501 .sparseAddressSpaceSize = 0,
502 .maxBoundDescriptorSets = MAX_SETS,
503 .maxPerStageDescriptorSamplers = 64,
504 .maxPerStageDescriptorUniformBuffers = 64,
505 .maxPerStageDescriptorStorageBuffers = 64,
506 .maxPerStageDescriptorSampledImages = 64,
507 .maxPerStageDescriptorStorageImages = 64,
508 .maxPerStageDescriptorInputAttachments = 64,
509 .maxPerStageResources = 128,
510 .maxDescriptorSetSamplers = 256,
511 .maxDescriptorSetUniformBuffers = 256,
512 .maxDescriptorSetUniformBuffersDynamic = 256,
513 .maxDescriptorSetStorageBuffers = 256,
514 .maxDescriptorSetStorageBuffersDynamic = 256,
515 .maxDescriptorSetSampledImages = 256,
516 .maxDescriptorSetStorageImages = 256,
517 .maxDescriptorSetInputAttachments = 256,
518 .maxVertexInputAttributes = 32,
519 .maxVertexInputBindings = 32,
520 .maxVertexInputAttributeOffset = 2047,
521 .maxVertexInputBindingStride = 2048,
522 .maxVertexOutputComponents = 128,
523 .maxTessellationGenerationLevel = 0,
524 .maxTessellationPatchSize = 0,
525 .maxTessellationControlPerVertexInputComponents = 0,
526 .maxTessellationControlPerVertexOutputComponents = 0,
527 .maxTessellationControlPerPatchOutputComponents = 0,
528 .maxTessellationControlTotalOutputComponents = 0,
529 .maxTessellationEvaluationInputComponents = 0,
530 .maxTessellationEvaluationOutputComponents = 0,
531 .maxGeometryShaderInvocations = 32,
532 .maxGeometryInputComponents = 64,
533 .maxGeometryOutputComponents = 128,
534 .maxGeometryOutputVertices = 256,
535 .maxGeometryTotalOutputComponents = 1024,
536 .maxFragmentInputComponents = 128,
537 .maxFragmentOutputAttachments = 8,
538 .maxFragmentDualSrcAttachments = 1,
539 .maxFragmentCombinedOutputResources = 8,
540 .maxComputeSharedMemorySize = 32768,
541 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
542 .maxComputeWorkGroupInvocations = 2048,
543 .maxComputeWorkGroupSize = {
544 2048,
545 2048,
546 2048
547 },
548 .subPixelPrecisionBits = 4 /* FIXME */,
549 .subTexelPrecisionBits = 4 /* FIXME */,
550 .mipmapPrecisionBits = 4 /* FIXME */,
551 .maxDrawIndexedIndexValue = UINT32_MAX,
552 .maxDrawIndirectCount = UINT32_MAX,
553 .maxSamplerLodBias = 16,
554 .maxSamplerAnisotropy = 16,
555 .maxViewports = MAX_VIEWPORTS,
556 .maxViewportDimensions = { (1 << 14), (1 << 14) },
557 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
558 .viewportSubPixelBits = 13, /* We take a float? */
559 .minMemoryMapAlignment = 4096, /* A page */
560 .minTexelBufferOffsetAlignment = 1,
561 .minUniformBufferOffsetAlignment = 4,
562 .minStorageBufferOffsetAlignment = 4,
563 .minTexelOffset = -32,
564 .maxTexelOffset = 31,
565 .minTexelGatherOffset = -32,
566 .maxTexelGatherOffset = 31,
567 .minInterpolationOffset = -2,
568 .maxInterpolationOffset = 2,
569 .subPixelInterpolationOffsetBits = 8,
570 .maxFramebufferWidth = (1 << 14),
571 .maxFramebufferHeight = (1 << 14),
572 .maxFramebufferLayers = (1 << 10),
573 .framebufferColorSampleCounts = sample_counts,
574 .framebufferDepthSampleCounts = sample_counts,
575 .framebufferStencilSampleCounts = sample_counts,
576 .framebufferNoAttachmentsSampleCounts = sample_counts,
577 .maxColorAttachments = MAX_RTS,
578 .sampledImageColorSampleCounts = sample_counts,
579 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
580 .sampledImageDepthSampleCounts = sample_counts,
581 .sampledImageStencilSampleCounts = sample_counts,
582 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
583 .maxSampleMaskWords = 1,
584 .timestampComputeAndGraphics = false,
585 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
586 .maxClipDistances = 8,
587 .maxCullDistances = 8,
588 .maxCombinedClipAndCullDistances = 8,
589 .discreteQueuePriorities = 1,
590 .pointSizeRange = { 0.125, 255.875 },
591 .lineWidthRange = { 0.0, 7.9921875 },
592 .pointSizeGranularity = (1.0 / 8.0),
593 .lineWidthGranularity = (1.0 / 128.0),
594 .strictLines = false, /* FINISHME */
595 .standardSampleLocations = true,
596 .optimalBufferCopyOffsetAlignment = 128,
597 .optimalBufferCopyRowPitchAlignment = 128,
598 .nonCoherentAtomSize = 64,
599 };
600
601 *pProperties = (VkPhysicalDeviceProperties) {
602 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
603 .driverVersion = 1,
604 .vendorID = 0x1002,
605 .deviceID = pdevice->rad_info.pci_id,
606 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
607 .limits = limits,
608 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
609 };
610
611 strcpy(pProperties->deviceName, pdevice->name);
612 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
613 }
614
615 void radv_GetPhysicalDeviceProperties2KHR(
616 VkPhysicalDevice physicalDevice,
617 VkPhysicalDeviceProperties2KHR *pProperties)
618 {
619 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
620 }
621
622 void radv_GetPhysicalDeviceQueueFamilyProperties(
623 VkPhysicalDevice physicalDevice,
624 uint32_t* pCount,
625 VkQueueFamilyProperties* pQueueFamilyProperties)
626 {
627 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
628 int num_queue_families = 1;
629 int idx;
630 if (pdevice->rad_info.compute_rings > 0 &&
631 pdevice->rad_info.chip_class >= CIK &&
632 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
633 num_queue_families++;
634
635 if (pQueueFamilyProperties == NULL) {
636 *pCount = num_queue_families;
637 return;
638 }
639
640 if (!*pCount)
641 return;
642
643 idx = 0;
644 if (*pCount >= 1) {
645 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
646 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
647 VK_QUEUE_COMPUTE_BIT |
648 VK_QUEUE_TRANSFER_BIT,
649 .queueCount = 1,
650 .timestampValidBits = 64,
651 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
652 };
653 idx++;
654 }
655
656 if (pdevice->rad_info.compute_rings > 0 &&
657 pdevice->rad_info.chip_class >= CIK &&
658 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
659 if (*pCount > idx) {
660 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
661 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
662 .queueCount = pdevice->rad_info.compute_rings,
663 .timestampValidBits = 64,
664 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
665 };
666 idx++;
667 }
668 }
669 *pCount = idx;
670 }
671
672 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
673 VkPhysicalDevice physicalDevice,
674 uint32_t* pCount,
675 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
676 {
677 return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
678 pCount,
679 &pQueueFamilyProperties->queueFamilyProperties);
680 }
681
682 void radv_GetPhysicalDeviceMemoryProperties(
683 VkPhysicalDevice physicalDevice,
684 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
685 {
686 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
687
688 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
689
690 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
691 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
692 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
693 .heapIndex = RADV_MEM_HEAP_VRAM,
694 };
695 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
696 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
697 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
698 .heapIndex = RADV_MEM_HEAP_GTT,
699 };
700 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
701 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
702 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
703 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
704 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
705 };
706 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
707 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
708 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
709 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
710 .heapIndex = RADV_MEM_HEAP_GTT,
711 };
712
713 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
714
715 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
716 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
717 .size = physical_device->rad_info.vram_size -
718 physical_device->rad_info.visible_vram_size,
719 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
720 };
721 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
722 .size = physical_device->rad_info.visible_vram_size,
723 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
724 };
725 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
726 .size = physical_device->rad_info.gart_size,
727 .flags = 0,
728 };
729 }
730
731 void radv_GetPhysicalDeviceMemoryProperties2KHR(
732 VkPhysicalDevice physicalDevice,
733 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
734 {
735 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
736 &pMemoryProperties->memoryProperties);
737 }
738
739 static int
740 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
741 int queue_family_index, int idx)
742 {
743 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
744 queue->device = device;
745 queue->queue_family_index = queue_family_index;
746 queue->queue_idx = idx;
747
748 queue->hw_ctx = device->ws->ctx_create(device->ws);
749 if (!queue->hw_ctx)
750 return VK_ERROR_OUT_OF_HOST_MEMORY;
751
752 return VK_SUCCESS;
753 }
754
755 static void
756 radv_queue_finish(struct radv_queue *queue)
757 {
758 if (queue->hw_ctx)
759 queue->device->ws->ctx_destroy(queue->hw_ctx);
760
761 if (queue->preamble_cs)
762 queue->device->ws->cs_destroy(queue->preamble_cs);
763 if (queue->descriptor_bo)
764 queue->device->ws->buffer_destroy(queue->descriptor_bo);
765 if (queue->scratch_bo)
766 queue->device->ws->buffer_destroy(queue->scratch_bo);
767 if (queue->compute_scratch_bo)
768 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
769 }
770
771 VkResult radv_CreateDevice(
772 VkPhysicalDevice physicalDevice,
773 const VkDeviceCreateInfo* pCreateInfo,
774 const VkAllocationCallbacks* pAllocator,
775 VkDevice* pDevice)
776 {
777 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
778 VkResult result;
779 struct radv_device *device;
780
781 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
782 if (!is_extension_enabled(physical_device->extensions.ext_array,
783 physical_device->extensions.num_ext,
784 pCreateInfo->ppEnabledExtensionNames[i]))
785 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
786 }
787
788 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
789 sizeof(*device), 8,
790 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
791 if (!device)
792 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
793
794 memset(device, 0, sizeof(*device));
795
796 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
797 device->instance = physical_device->instance;
798 device->physical_device = physical_device;
799
800 device->debug_flags = device->instance->debug_flags;
801
802 device->ws = physical_device->ws;
803 if (pAllocator)
804 device->alloc = *pAllocator;
805 else
806 device->alloc = physical_device->instance->alloc;
807
808 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
809 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
810 uint32_t qfi = queue_create->queueFamilyIndex;
811
812 device->queues[qfi] = vk_alloc(&device->alloc,
813 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
814 if (!device->queues[qfi]) {
815 result = VK_ERROR_OUT_OF_HOST_MEMORY;
816 goto fail;
817 }
818
819 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
820
821 device->queue_count[qfi] = queue_create->queueCount;
822
823 for (unsigned q = 0; q < queue_create->queueCount; q++) {
824 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
825 if (result != VK_SUCCESS)
826 goto fail;
827 }
828 }
829
830 #if HAVE_LLVM < 0x0400
831 device->llvm_supports_spill = false;
832 #else
833 device->llvm_supports_spill = true;
834 #endif
835
836 /* The maximum number of scratch waves. Scratch space isn't divided
837 * evenly between CUs. The number is only a function of the number of CUs.
838 * We can decrease the constant to decrease the scratch buffer size.
839 *
840 * sctx->scratch_waves must be >= the maximum posible size of
841 * 1 threadgroup, so that the hw doesn't hang from being unable
842 * to start any.
843 *
844 * The recommended value is 4 per CU at most. Higher numbers don't
845 * bring much benefit, but they still occupy chip resources (think
846 * async compute). I've seen ~2% performance difference between 4 and 32.
847 */
848 uint32_t max_threads_per_block = 2048;
849 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
850 max_threads_per_block / 64);
851
852 result = radv_device_init_meta(device);
853 if (result != VK_SUCCESS)
854 goto fail;
855
856 radv_device_init_msaa(device);
857
858 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
859 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
860 switch (family) {
861 case RADV_QUEUE_GENERAL:
862 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
863 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
864 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
865 break;
866 case RADV_QUEUE_COMPUTE:
867 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
868 radeon_emit(device->empty_cs[family], 0);
869 break;
870 }
871 device->ws->cs_finalize(device->empty_cs[family]);
872 }
873
874 if (getenv("RADV_TRACE_FILE")) {
875 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
876 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
877 if (!device->trace_bo)
878 goto fail;
879
880 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
881 if (!device->trace_id_ptr)
882 goto fail;
883 }
884
885 *pDevice = radv_device_to_handle(device);
886 return VK_SUCCESS;
887
888 fail:
889 if (device->trace_bo)
890 device->ws->buffer_destroy(device->trace_bo);
891
892 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
893 for (unsigned q = 0; q < device->queue_count[i]; q++)
894 radv_queue_finish(&device->queues[i][q]);
895 if (device->queue_count[i])
896 vk_free(&device->alloc, device->queues[i]);
897 }
898
899 vk_free(&device->alloc, device);
900 return result;
901 }
902
903 void radv_DestroyDevice(
904 VkDevice _device,
905 const VkAllocationCallbacks* pAllocator)
906 {
907 RADV_FROM_HANDLE(radv_device, device, _device);
908
909 if (device->trace_bo)
910 device->ws->buffer_destroy(device->trace_bo);
911
912 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
913 for (unsigned q = 0; q < device->queue_count[i]; q++)
914 radv_queue_finish(&device->queues[i][q]);
915 if (device->queue_count[i])
916 vk_free(&device->alloc, device->queues[i]);
917 }
918 radv_device_finish_meta(device);
919
920 vk_free(&device->alloc, device);
921 }
922
923 VkResult radv_EnumerateInstanceExtensionProperties(
924 const char* pLayerName,
925 uint32_t* pPropertyCount,
926 VkExtensionProperties* pProperties)
927 {
928 if (pProperties == NULL) {
929 *pPropertyCount = ARRAY_SIZE(instance_extensions);
930 return VK_SUCCESS;
931 }
932
933 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
934 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
935
936 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
937 return VK_INCOMPLETE;
938
939 return VK_SUCCESS;
940 }
941
942 VkResult radv_EnumerateDeviceExtensionProperties(
943 VkPhysicalDevice physicalDevice,
944 const char* pLayerName,
945 uint32_t* pPropertyCount,
946 VkExtensionProperties* pProperties)
947 {
948 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
949
950 if (pProperties == NULL) {
951 *pPropertyCount = pdevice->extensions.num_ext;
952 return VK_SUCCESS;
953 }
954
955 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
956 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
957
958 if (*pPropertyCount < pdevice->extensions.num_ext)
959 return VK_INCOMPLETE;
960
961 return VK_SUCCESS;
962 }
963
964 VkResult radv_EnumerateInstanceLayerProperties(
965 uint32_t* pPropertyCount,
966 VkLayerProperties* pProperties)
967 {
968 if (pProperties == NULL) {
969 *pPropertyCount = 0;
970 return VK_SUCCESS;
971 }
972
973 /* None supported at this time */
974 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
975 }
976
977 VkResult radv_EnumerateDeviceLayerProperties(
978 VkPhysicalDevice physicalDevice,
979 uint32_t* pPropertyCount,
980 VkLayerProperties* pProperties)
981 {
982 if (pProperties == NULL) {
983 *pPropertyCount = 0;
984 return VK_SUCCESS;
985 }
986
987 /* None supported at this time */
988 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
989 }
990
991 void radv_GetDeviceQueue(
992 VkDevice _device,
993 uint32_t queueFamilyIndex,
994 uint32_t queueIndex,
995 VkQueue* pQueue)
996 {
997 RADV_FROM_HANDLE(radv_device, device, _device);
998
999 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1000 }
1001
1002 static void radv_dump_trace(struct radv_device *device,
1003 struct radeon_winsys_cs *cs)
1004 {
1005 const char *filename = getenv("RADV_TRACE_FILE");
1006 FILE *f = fopen(filename, "w");
1007 if (!f) {
1008 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1009 return;
1010 }
1011
1012 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1013 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1014 fclose(f);
1015 }
1016
1017 static VkResult
1018 radv_get_preamble_cs(struct radv_queue *queue,
1019 uint32_t scratch_size,
1020 uint32_t compute_scratch_size,
1021 struct radeon_winsys_cs **preamble_cs)
1022 {
1023 struct radeon_winsys_bo *scratch_bo = NULL;
1024 struct radeon_winsys_bo *descriptor_bo = NULL;
1025 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1026 struct radeon_winsys_cs *cs = NULL;
1027
1028 if (!scratch_size && !compute_scratch_size) {
1029 *preamble_cs = NULL;
1030 return VK_SUCCESS;
1031 }
1032
1033 if (scratch_size <= queue->scratch_size &&
1034 compute_scratch_size <= queue->compute_scratch_size) {
1035 *preamble_cs = queue->preamble_cs;
1036 return VK_SUCCESS;
1037 }
1038
1039 if (scratch_size > queue->scratch_size) {
1040 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1041 scratch_size,
1042 4096,
1043 RADEON_DOMAIN_VRAM,
1044 RADEON_FLAG_NO_CPU_ACCESS);
1045 if (!scratch_bo)
1046 goto fail;
1047 } else
1048 scratch_bo = queue->scratch_bo;
1049
1050 if (compute_scratch_size > queue->compute_scratch_size) {
1051 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1052 compute_scratch_size,
1053 4096,
1054 RADEON_DOMAIN_VRAM,
1055 RADEON_FLAG_NO_CPU_ACCESS);
1056 if (!compute_scratch_bo)
1057 goto fail;
1058
1059 } else
1060 compute_scratch_bo = queue->compute_scratch_bo;
1061
1062 if (scratch_bo != queue->scratch_bo) {
1063 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1064 8,
1065 4096,
1066 RADEON_DOMAIN_VRAM,
1067 RADEON_FLAG_CPU_ACCESS);
1068 if (!descriptor_bo)
1069 goto fail;
1070 } else
1071 descriptor_bo = queue->descriptor_bo;
1072
1073 cs = queue->device->ws->cs_create(queue->device->ws,
1074 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1075 if (!cs)
1076 goto fail;
1077
1078
1079 if (scratch_bo)
1080 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1081
1082 if (descriptor_bo)
1083 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1084
1085 if (descriptor_bo != queue->descriptor_bo) {
1086 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1087 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1088 S_008F04_SWIZZLE_ENABLE(1);
1089
1090 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1091
1092 map[0] = scratch_va;
1093 map[1] = rsrc1;
1094
1095 queue->device->ws->buffer_unmap(descriptor_bo);
1096 }
1097
1098 if (descriptor_bo) {
1099 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1100 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1101 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1102 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1103 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1104 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1105
1106 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1107
1108 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1109 radeon_set_sh_reg_seq(cs, regs[i], 2);
1110 radeon_emit(cs, va);
1111 radeon_emit(cs, va >> 32);
1112 }
1113 }
1114
1115 if (compute_scratch_bo) {
1116 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1117 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1118 S_008F04_SWIZZLE_ENABLE(1);
1119
1120 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1121
1122 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1123 radeon_emit(cs, scratch_va);
1124 radeon_emit(cs, rsrc1);
1125 }
1126
1127 if (!queue->device->ws->cs_finalize(cs))
1128 goto fail;
1129
1130 if (queue->preamble_cs)
1131 queue->device->ws->cs_destroy(queue->preamble_cs);
1132
1133 queue->preamble_cs = cs;
1134
1135 if (scratch_bo != queue->scratch_bo) {
1136 if (queue->scratch_bo)
1137 queue->device->ws->buffer_destroy(queue->scratch_bo);
1138 queue->scratch_bo = scratch_bo;
1139 queue->scratch_size = scratch_size;
1140 }
1141
1142 if (compute_scratch_bo != queue->compute_scratch_bo) {
1143 if (queue->compute_scratch_bo)
1144 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1145 queue->compute_scratch_bo = compute_scratch_bo;
1146 queue->compute_scratch_size = compute_scratch_size;
1147 }
1148
1149 if (descriptor_bo != queue->descriptor_bo) {
1150 if (queue->descriptor_bo)
1151 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1152
1153 queue->descriptor_bo = descriptor_bo;
1154 }
1155
1156 *preamble_cs = cs;
1157 return VK_SUCCESS;
1158 fail:
1159 if (cs)
1160 queue->device->ws->cs_destroy(cs);
1161 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1162 queue->device->ws->buffer_destroy(descriptor_bo);
1163 if (scratch_bo && scratch_bo != queue->scratch_bo)
1164 queue->device->ws->buffer_destroy(scratch_bo);
1165 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1166 queue->device->ws->buffer_destroy(compute_scratch_bo);
1167 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1168 }
1169
1170 VkResult radv_QueueSubmit(
1171 VkQueue _queue,
1172 uint32_t submitCount,
1173 const VkSubmitInfo* pSubmits,
1174 VkFence _fence)
1175 {
1176 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1177 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1178 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1179 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1180 int ret;
1181 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1182 uint32_t scratch_size = 0;
1183 uint32_t compute_scratch_size = 0;
1184 struct radeon_winsys_cs *preamble_cs = NULL;
1185 VkResult result;
1186
1187 /* Do this first so failing to allocate scratch buffers can't result in
1188 * partially executed submissions. */
1189 for (uint32_t i = 0; i < submitCount; i++) {
1190 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1191 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1192 pSubmits[i].pCommandBuffers[j]);
1193
1194 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1195 compute_scratch_size = MAX2(compute_scratch_size,
1196 cmd_buffer->compute_scratch_size_needed);
1197 }
1198 }
1199
1200 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, &preamble_cs);
1201 if (result != VK_SUCCESS)
1202 return result;
1203
1204 for (uint32_t i = 0; i < submitCount; i++) {
1205 struct radeon_winsys_cs **cs_array;
1206 bool can_patch = true;
1207 uint32_t advance;
1208
1209 if (!pSubmits[i].commandBufferCount)
1210 continue;
1211
1212 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1213 pSubmits[i].commandBufferCount);
1214
1215 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1216 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1217 pSubmits[i].pCommandBuffers[j]);
1218 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1219
1220 cs_array[j] = cmd_buffer->cs;
1221 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1222 can_patch = false;
1223 }
1224
1225 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1226 advance = MIN2(max_cs_submission,
1227 pSubmits[i].commandBufferCount - j);
1228 bool b = j == 0;
1229 bool e = j + advance == pSubmits[i].commandBufferCount;
1230
1231 if (queue->device->trace_bo)
1232 *queue->device->trace_id_ptr = 0;
1233
1234 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1235 advance, preamble_cs,
1236 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1237 b ? pSubmits[i].waitSemaphoreCount : 0,
1238 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1239 e ? pSubmits[i].signalSemaphoreCount : 0,
1240 can_patch, base_fence);
1241
1242 if (ret) {
1243 radv_loge("failed to submit CS %d\n", i);
1244 abort();
1245 }
1246 if (queue->device->trace_bo) {
1247 bool success = queue->device->ws->ctx_wait_idle(
1248 queue->hw_ctx,
1249 radv_queue_family_to_ring(
1250 queue->queue_family_index),
1251 queue->queue_idx);
1252
1253 if (!success) { /* Hang */
1254 radv_dump_trace(queue->device, cs_array[j]);
1255 abort();
1256 }
1257 }
1258 }
1259 free(cs_array);
1260 }
1261
1262 if (fence) {
1263 if (!submitCount)
1264 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1265 &queue->device->empty_cs[queue->queue_family_index],
1266 1, NULL, NULL, 0, NULL, 0,
1267 false, base_fence);
1268
1269 fence->submitted = true;
1270 }
1271
1272 return VK_SUCCESS;
1273 }
1274
1275 VkResult radv_QueueWaitIdle(
1276 VkQueue _queue)
1277 {
1278 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1279
1280 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1281 radv_queue_family_to_ring(queue->queue_family_index),
1282 queue->queue_idx);
1283 return VK_SUCCESS;
1284 }
1285
1286 VkResult radv_DeviceWaitIdle(
1287 VkDevice _device)
1288 {
1289 RADV_FROM_HANDLE(radv_device, device, _device);
1290
1291 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1292 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1293 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1294 }
1295 }
1296 return VK_SUCCESS;
1297 }
1298
1299 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1300 VkInstance instance,
1301 const char* pName)
1302 {
1303 return radv_lookup_entrypoint(pName);
1304 }
1305
1306 /* The loader wants us to expose a second GetInstanceProcAddr function
1307 * to work around certain LD_PRELOAD issues seen in apps.
1308 */
1309 PUBLIC
1310 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1311 VkInstance instance,
1312 const char* pName);
1313
1314 PUBLIC
1315 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1316 VkInstance instance,
1317 const char* pName)
1318 {
1319 return radv_GetInstanceProcAddr(instance, pName);
1320 }
1321
1322 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1323 VkDevice device,
1324 const char* pName)
1325 {
1326 return radv_lookup_entrypoint(pName);
1327 }
1328
1329 VkResult radv_AllocateMemory(
1330 VkDevice _device,
1331 const VkMemoryAllocateInfo* pAllocateInfo,
1332 const VkAllocationCallbacks* pAllocator,
1333 VkDeviceMemory* pMem)
1334 {
1335 RADV_FROM_HANDLE(radv_device, device, _device);
1336 struct radv_device_memory *mem;
1337 VkResult result;
1338 enum radeon_bo_domain domain;
1339 uint32_t flags = 0;
1340 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1341
1342 if (pAllocateInfo->allocationSize == 0) {
1343 /* Apparently, this is allowed */
1344 *pMem = VK_NULL_HANDLE;
1345 return VK_SUCCESS;
1346 }
1347
1348 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1349 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1350 if (mem == NULL)
1351 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1352
1353 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1354 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1355 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1356 domain = RADEON_DOMAIN_GTT;
1357 else
1358 domain = RADEON_DOMAIN_VRAM;
1359
1360 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1361 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1362 else
1363 flags |= RADEON_FLAG_CPU_ACCESS;
1364
1365 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1366 flags |= RADEON_FLAG_GTT_WC;
1367
1368 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1369 domain, flags);
1370
1371 if (!mem->bo) {
1372 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1373 goto fail;
1374 }
1375 mem->type_index = pAllocateInfo->memoryTypeIndex;
1376
1377 *pMem = radv_device_memory_to_handle(mem);
1378
1379 return VK_SUCCESS;
1380
1381 fail:
1382 vk_free2(&device->alloc, pAllocator, mem);
1383
1384 return result;
1385 }
1386
1387 void radv_FreeMemory(
1388 VkDevice _device,
1389 VkDeviceMemory _mem,
1390 const VkAllocationCallbacks* pAllocator)
1391 {
1392 RADV_FROM_HANDLE(radv_device, device, _device);
1393 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1394
1395 if (mem == NULL)
1396 return;
1397
1398 device->ws->buffer_destroy(mem->bo);
1399 mem->bo = NULL;
1400
1401 vk_free2(&device->alloc, pAllocator, mem);
1402 }
1403
1404 VkResult radv_MapMemory(
1405 VkDevice _device,
1406 VkDeviceMemory _memory,
1407 VkDeviceSize offset,
1408 VkDeviceSize size,
1409 VkMemoryMapFlags flags,
1410 void** ppData)
1411 {
1412 RADV_FROM_HANDLE(radv_device, device, _device);
1413 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1414
1415 if (mem == NULL) {
1416 *ppData = NULL;
1417 return VK_SUCCESS;
1418 }
1419
1420 *ppData = device->ws->buffer_map(mem->bo);
1421 if (*ppData) {
1422 *ppData += offset;
1423 return VK_SUCCESS;
1424 }
1425
1426 return VK_ERROR_MEMORY_MAP_FAILED;
1427 }
1428
1429 void radv_UnmapMemory(
1430 VkDevice _device,
1431 VkDeviceMemory _memory)
1432 {
1433 RADV_FROM_HANDLE(radv_device, device, _device);
1434 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1435
1436 if (mem == NULL)
1437 return;
1438
1439 device->ws->buffer_unmap(mem->bo);
1440 }
1441
1442 VkResult radv_FlushMappedMemoryRanges(
1443 VkDevice _device,
1444 uint32_t memoryRangeCount,
1445 const VkMappedMemoryRange* pMemoryRanges)
1446 {
1447 return VK_SUCCESS;
1448 }
1449
1450 VkResult radv_InvalidateMappedMemoryRanges(
1451 VkDevice _device,
1452 uint32_t memoryRangeCount,
1453 const VkMappedMemoryRange* pMemoryRanges)
1454 {
1455 return VK_SUCCESS;
1456 }
1457
1458 void radv_GetBufferMemoryRequirements(
1459 VkDevice device,
1460 VkBuffer _buffer,
1461 VkMemoryRequirements* pMemoryRequirements)
1462 {
1463 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1464
1465 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1466
1467 pMemoryRequirements->size = buffer->size;
1468 pMemoryRequirements->alignment = 16;
1469 }
1470
1471 void radv_GetImageMemoryRequirements(
1472 VkDevice device,
1473 VkImage _image,
1474 VkMemoryRequirements* pMemoryRequirements)
1475 {
1476 RADV_FROM_HANDLE(radv_image, image, _image);
1477
1478 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1479
1480 pMemoryRequirements->size = image->size;
1481 pMemoryRequirements->alignment = image->alignment;
1482 }
1483
1484 void radv_GetImageSparseMemoryRequirements(
1485 VkDevice device,
1486 VkImage image,
1487 uint32_t* pSparseMemoryRequirementCount,
1488 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1489 {
1490 stub();
1491 }
1492
1493 void radv_GetDeviceMemoryCommitment(
1494 VkDevice device,
1495 VkDeviceMemory memory,
1496 VkDeviceSize* pCommittedMemoryInBytes)
1497 {
1498 *pCommittedMemoryInBytes = 0;
1499 }
1500
1501 VkResult radv_BindBufferMemory(
1502 VkDevice device,
1503 VkBuffer _buffer,
1504 VkDeviceMemory _memory,
1505 VkDeviceSize memoryOffset)
1506 {
1507 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1508 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1509
1510 if (mem) {
1511 buffer->bo = mem->bo;
1512 buffer->offset = memoryOffset;
1513 } else {
1514 buffer->bo = NULL;
1515 buffer->offset = 0;
1516 }
1517
1518 return VK_SUCCESS;
1519 }
1520
1521 VkResult radv_BindImageMemory(
1522 VkDevice device,
1523 VkImage _image,
1524 VkDeviceMemory _memory,
1525 VkDeviceSize memoryOffset)
1526 {
1527 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1528 RADV_FROM_HANDLE(radv_image, image, _image);
1529
1530 if (mem) {
1531 image->bo = mem->bo;
1532 image->offset = memoryOffset;
1533 } else {
1534 image->bo = NULL;
1535 image->offset = 0;
1536 }
1537
1538 return VK_SUCCESS;
1539 }
1540
1541 VkResult radv_QueueBindSparse(
1542 VkQueue queue,
1543 uint32_t bindInfoCount,
1544 const VkBindSparseInfo* pBindInfo,
1545 VkFence fence)
1546 {
1547 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1548 }
1549
1550 VkResult radv_CreateFence(
1551 VkDevice _device,
1552 const VkFenceCreateInfo* pCreateInfo,
1553 const VkAllocationCallbacks* pAllocator,
1554 VkFence* pFence)
1555 {
1556 RADV_FROM_HANDLE(radv_device, device, _device);
1557 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1558 sizeof(*fence), 8,
1559 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1560
1561 if (!fence)
1562 return VK_ERROR_OUT_OF_HOST_MEMORY;
1563
1564 memset(fence, 0, sizeof(*fence));
1565 fence->submitted = false;
1566 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1567 fence->fence = device->ws->create_fence();
1568 if (!fence->fence) {
1569 vk_free2(&device->alloc, pAllocator, fence);
1570 return VK_ERROR_OUT_OF_HOST_MEMORY;
1571 }
1572
1573 *pFence = radv_fence_to_handle(fence);
1574
1575 return VK_SUCCESS;
1576 }
1577
1578 void radv_DestroyFence(
1579 VkDevice _device,
1580 VkFence _fence,
1581 const VkAllocationCallbacks* pAllocator)
1582 {
1583 RADV_FROM_HANDLE(radv_device, device, _device);
1584 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1585
1586 if (!fence)
1587 return;
1588 device->ws->destroy_fence(fence->fence);
1589 vk_free2(&device->alloc, pAllocator, fence);
1590 }
1591
1592 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1593 {
1594 uint64_t current_time;
1595 struct timespec tv;
1596
1597 clock_gettime(CLOCK_MONOTONIC, &tv);
1598 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1599
1600 timeout = MIN2(UINT64_MAX - current_time, timeout);
1601
1602 return current_time + timeout;
1603 }
1604
1605 VkResult radv_WaitForFences(
1606 VkDevice _device,
1607 uint32_t fenceCount,
1608 const VkFence* pFences,
1609 VkBool32 waitAll,
1610 uint64_t timeout)
1611 {
1612 RADV_FROM_HANDLE(radv_device, device, _device);
1613 timeout = radv_get_absolute_timeout(timeout);
1614
1615 if (!waitAll && fenceCount > 1) {
1616 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1617 }
1618
1619 for (uint32_t i = 0; i < fenceCount; ++i) {
1620 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1621 bool expired = false;
1622
1623 if (fence->signalled)
1624 continue;
1625
1626 if (!fence->submitted)
1627 return VK_TIMEOUT;
1628
1629 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1630 if (!expired)
1631 return VK_TIMEOUT;
1632
1633 fence->signalled = true;
1634 }
1635
1636 return VK_SUCCESS;
1637 }
1638
1639 VkResult radv_ResetFences(VkDevice device,
1640 uint32_t fenceCount,
1641 const VkFence *pFences)
1642 {
1643 for (unsigned i = 0; i < fenceCount; ++i) {
1644 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1645 fence->submitted = fence->signalled = false;
1646 }
1647
1648 return VK_SUCCESS;
1649 }
1650
1651 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1652 {
1653 RADV_FROM_HANDLE(radv_device, device, _device);
1654 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1655
1656 if (fence->signalled)
1657 return VK_SUCCESS;
1658 if (!fence->submitted)
1659 return VK_NOT_READY;
1660
1661 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1662 return VK_NOT_READY;
1663
1664 return VK_SUCCESS;
1665 }
1666
1667
1668 // Queue semaphore functions
1669
1670 VkResult radv_CreateSemaphore(
1671 VkDevice _device,
1672 const VkSemaphoreCreateInfo* pCreateInfo,
1673 const VkAllocationCallbacks* pAllocator,
1674 VkSemaphore* pSemaphore)
1675 {
1676 RADV_FROM_HANDLE(radv_device, device, _device);
1677 struct radeon_winsys_sem *sem;
1678
1679 sem = device->ws->create_sem(device->ws);
1680 if (!sem)
1681 return VK_ERROR_OUT_OF_HOST_MEMORY;
1682
1683 *pSemaphore = (VkSemaphore)sem;
1684 return VK_SUCCESS;
1685 }
1686
1687 void radv_DestroySemaphore(
1688 VkDevice _device,
1689 VkSemaphore _semaphore,
1690 const VkAllocationCallbacks* pAllocator)
1691 {
1692 RADV_FROM_HANDLE(radv_device, device, _device);
1693 struct radeon_winsys_sem *sem;
1694 if (!_semaphore)
1695 return;
1696
1697 sem = (struct radeon_winsys_sem *)_semaphore;
1698 device->ws->destroy_sem(sem);
1699 }
1700
1701 VkResult radv_CreateEvent(
1702 VkDevice _device,
1703 const VkEventCreateInfo* pCreateInfo,
1704 const VkAllocationCallbacks* pAllocator,
1705 VkEvent* pEvent)
1706 {
1707 RADV_FROM_HANDLE(radv_device, device, _device);
1708 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1709 sizeof(*event), 8,
1710 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1711
1712 if (!event)
1713 return VK_ERROR_OUT_OF_HOST_MEMORY;
1714
1715 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1716 RADEON_DOMAIN_GTT,
1717 RADEON_FLAG_CPU_ACCESS);
1718 if (!event->bo) {
1719 vk_free2(&device->alloc, pAllocator, event);
1720 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1721 }
1722
1723 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1724
1725 *pEvent = radv_event_to_handle(event);
1726
1727 return VK_SUCCESS;
1728 }
1729
1730 void radv_DestroyEvent(
1731 VkDevice _device,
1732 VkEvent _event,
1733 const VkAllocationCallbacks* pAllocator)
1734 {
1735 RADV_FROM_HANDLE(radv_device, device, _device);
1736 RADV_FROM_HANDLE(radv_event, event, _event);
1737
1738 if (!event)
1739 return;
1740 device->ws->buffer_destroy(event->bo);
1741 vk_free2(&device->alloc, pAllocator, event);
1742 }
1743
1744 VkResult radv_GetEventStatus(
1745 VkDevice _device,
1746 VkEvent _event)
1747 {
1748 RADV_FROM_HANDLE(radv_event, event, _event);
1749
1750 if (*event->map == 1)
1751 return VK_EVENT_SET;
1752 return VK_EVENT_RESET;
1753 }
1754
1755 VkResult radv_SetEvent(
1756 VkDevice _device,
1757 VkEvent _event)
1758 {
1759 RADV_FROM_HANDLE(radv_event, event, _event);
1760 *event->map = 1;
1761
1762 return VK_SUCCESS;
1763 }
1764
1765 VkResult radv_ResetEvent(
1766 VkDevice _device,
1767 VkEvent _event)
1768 {
1769 RADV_FROM_HANDLE(radv_event, event, _event);
1770 *event->map = 0;
1771
1772 return VK_SUCCESS;
1773 }
1774
1775 VkResult radv_CreateBuffer(
1776 VkDevice _device,
1777 const VkBufferCreateInfo* pCreateInfo,
1778 const VkAllocationCallbacks* pAllocator,
1779 VkBuffer* pBuffer)
1780 {
1781 RADV_FROM_HANDLE(radv_device, device, _device);
1782 struct radv_buffer *buffer;
1783
1784 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1785
1786 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1787 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1788 if (buffer == NULL)
1789 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1790
1791 buffer->size = pCreateInfo->size;
1792 buffer->usage = pCreateInfo->usage;
1793 buffer->bo = NULL;
1794 buffer->offset = 0;
1795
1796 *pBuffer = radv_buffer_to_handle(buffer);
1797
1798 return VK_SUCCESS;
1799 }
1800
1801 void radv_DestroyBuffer(
1802 VkDevice _device,
1803 VkBuffer _buffer,
1804 const VkAllocationCallbacks* pAllocator)
1805 {
1806 RADV_FROM_HANDLE(radv_device, device, _device);
1807 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1808
1809 if (!buffer)
1810 return;
1811
1812 vk_free2(&device->alloc, pAllocator, buffer);
1813 }
1814
1815 static inline unsigned
1816 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
1817 {
1818 if (stencil)
1819 return image->surface.stencil_tiling_index[level];
1820 else
1821 return image->surface.tiling_index[level];
1822 }
1823
1824 static void
1825 radv_initialise_color_surface(struct radv_device *device,
1826 struct radv_color_buffer_info *cb,
1827 struct radv_image_view *iview)
1828 {
1829 const struct vk_format_description *desc;
1830 unsigned ntype, format, swap, endian;
1831 unsigned blend_clamp = 0, blend_bypass = 0;
1832 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1833 uint64_t va;
1834 const struct radeon_surf *surf = &iview->image->surface;
1835 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
1836
1837 desc = vk_format_description(iview->vk_format);
1838
1839 memset(cb, 0, sizeof(*cb));
1840
1841 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1842 va += level_info->offset;
1843 cb->cb_color_base = va >> 8;
1844
1845 /* CMASK variables */
1846 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1847 va += iview->image->cmask.offset;
1848 cb->cb_color_cmask = va >> 8;
1849 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
1850
1851 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1852 va += iview->image->dcc_offset;
1853 cb->cb_dcc_base = va >> 8;
1854
1855 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
1856 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
1857 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
1858
1859 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
1860 pitch_tile_max = level_info->nblk_x / 8 - 1;
1861 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1862 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
1863
1864 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1865 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1866
1867 /* Intensity is implemented as Red, so treat it that way. */
1868 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
1869 S_028C74_TILE_MODE_INDEX(tile_mode_index);
1870
1871 if (iview->image->samples > 1) {
1872 unsigned log_samples = util_logbase2(iview->image->samples);
1873
1874 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1875 S_028C74_NUM_FRAGMENTS(log_samples);
1876 }
1877
1878 if (iview->image->fmask.size) {
1879 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
1880 if (device->physical_device->rad_info.chip_class >= CIK)
1881 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
1882 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
1883 cb->cb_color_fmask = va >> 8;
1884 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
1885 } else {
1886 /* This must be set for fast clear to work without FMASK. */
1887 if (device->physical_device->rad_info.chip_class >= CIK)
1888 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1889 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1890 cb->cb_color_fmask = cb->cb_color_base;
1891 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1892 }
1893
1894 ntype = radv_translate_color_numformat(iview->vk_format,
1895 desc,
1896 vk_format_get_first_non_void_channel(iview->vk_format));
1897 format = radv_translate_colorformat(iview->vk_format);
1898 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
1899 radv_finishme("Illegal color\n");
1900 swap = radv_translate_colorswap(iview->vk_format, FALSE);
1901 endian = radv_colorformat_endian_swap(format);
1902
1903 /* blend clamp should be set for all NORM/SRGB types */
1904 if (ntype == V_028C70_NUMBER_UNORM ||
1905 ntype == V_028C70_NUMBER_SNORM ||
1906 ntype == V_028C70_NUMBER_SRGB)
1907 blend_clamp = 1;
1908
1909 /* set blend bypass according to docs if SINT/UINT or
1910 8/24 COLOR variants */
1911 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1912 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1913 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1914 blend_clamp = 0;
1915 blend_bypass = 1;
1916 }
1917 #if 0
1918 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1919 (format == V_028C70_COLOR_8 ||
1920 format == V_028C70_COLOR_8_8 ||
1921 format == V_028C70_COLOR_8_8_8_8))
1922 ->color_is_int8 = true;
1923 #endif
1924 cb->cb_color_info = S_028C70_FORMAT(format) |
1925 S_028C70_COMP_SWAP(swap) |
1926 S_028C70_BLEND_CLAMP(blend_clamp) |
1927 S_028C70_BLEND_BYPASS(blend_bypass) |
1928 S_028C70_SIMPLE_FLOAT(1) |
1929 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
1930 ntype != V_028C70_NUMBER_SNORM &&
1931 ntype != V_028C70_NUMBER_SRGB &&
1932 format != V_028C70_COLOR_8_24 &&
1933 format != V_028C70_COLOR_24_8) |
1934 S_028C70_NUMBER_TYPE(ntype) |
1935 S_028C70_ENDIAN(endian);
1936 if (iview->image->samples > 1)
1937 if (iview->image->fmask.size)
1938 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1939
1940 if (iview->image->cmask.size &&
1941 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
1942 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1943
1944 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
1945 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1946
1947 if (device->physical_device->rad_info.chip_class >= VI) {
1948 unsigned max_uncompressed_block_size = 2;
1949 if (iview->image->samples > 1) {
1950 if (iview->image->surface.bpe == 1)
1951 max_uncompressed_block_size = 0;
1952 else if (iview->image->surface.bpe == 2)
1953 max_uncompressed_block_size = 1;
1954 }
1955
1956 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1957 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1958 }
1959
1960 /* This must be set for fast clear to work without FMASK. */
1961 if (!iview->image->fmask.size &&
1962 device->physical_device->rad_info.chip_class == SI) {
1963 unsigned bankh = util_logbase2(iview->image->surface.bankh);
1964 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1965 }
1966 }
1967
1968 static void
1969 radv_initialise_ds_surface(struct radv_device *device,
1970 struct radv_ds_buffer_info *ds,
1971 struct radv_image_view *iview)
1972 {
1973 unsigned level = iview->base_mip;
1974 unsigned format;
1975 uint64_t va, s_offs, z_offs;
1976 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
1977 memset(ds, 0, sizeof(*ds));
1978 switch (iview->vk_format) {
1979 case VK_FORMAT_D24_UNORM_S8_UINT:
1980 case VK_FORMAT_X8_D24_UNORM_PACK32:
1981 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1982 ds->offset_scale = 2.0f;
1983 break;
1984 case VK_FORMAT_D16_UNORM:
1985 case VK_FORMAT_D16_UNORM_S8_UINT:
1986 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1987 ds->offset_scale = 4.0f;
1988 break;
1989 case VK_FORMAT_D32_SFLOAT:
1990 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1991 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1992 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1993 ds->offset_scale = 1.0f;
1994 break;
1995 default:
1996 break;
1997 }
1998
1999 format = radv_translate_dbformat(iview->vk_format);
2000 if (format == V_028040_Z_INVALID) {
2001 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2002 }
2003
2004 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2005 s_offs = z_offs = va;
2006 z_offs += iview->image->surface.level[level].offset;
2007 s_offs += iview->image->surface.stencil_level[level].offset;
2008
2009 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2010 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2011 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2012 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2013 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2014
2015 if (iview->image->samples > 1)
2016 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2017
2018 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2019 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2020 else
2021 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2022
2023 if (device->physical_device->rad_info.chip_class >= CIK) {
2024 struct radeon_info *info = &device->physical_device->rad_info;
2025 unsigned tiling_index = iview->image->surface.tiling_index[level];
2026 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2027 unsigned macro_index = iview->image->surface.macro_tile_index;
2028 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2029 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2030 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2031
2032 ds->db_depth_info |=
2033 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2034 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2035 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2036 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2037 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2038 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2039 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2040 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2041 } else {
2042 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2043 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2044 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2045 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2046 }
2047
2048 if (iview->image->htile.size && !level) {
2049 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2050 S_028040_ALLOW_EXPCLEAR(1);
2051
2052 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2053 /* Workaround: For a not yet understood reason, the
2054 * combination of MSAA, fast stencil clear and stencil
2055 * decompress messes with subsequent stencil buffer
2056 * uses. Problem was reproduced on Verde, Bonaire,
2057 * Tonga, and Carrizo.
2058 *
2059 * Disabling EXPCLEAR works around the problem.
2060 *
2061 * Check piglit's arb_texture_multisample-stencil-clear
2062 * test if you want to try changing this.
2063 */
2064 if (iview->image->samples <= 1)
2065 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2066 } else
2067 /* Use all of the htile_buffer for depth if there's no stencil. */
2068 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2069
2070 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2071 iview->image->htile.offset;
2072 ds->db_htile_data_base = va >> 8;
2073 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2074 } else {
2075 ds->db_htile_data_base = 0;
2076 ds->db_htile_surface = 0;
2077 }
2078
2079 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2080 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2081
2082 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2083 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2084 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2085 }
2086
2087 VkResult radv_CreateFramebuffer(
2088 VkDevice _device,
2089 const VkFramebufferCreateInfo* pCreateInfo,
2090 const VkAllocationCallbacks* pAllocator,
2091 VkFramebuffer* pFramebuffer)
2092 {
2093 RADV_FROM_HANDLE(radv_device, device, _device);
2094 struct radv_framebuffer *framebuffer;
2095
2096 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2097
2098 size_t size = sizeof(*framebuffer) +
2099 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2100 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2101 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2102 if (framebuffer == NULL)
2103 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2104
2105 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2106 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2107 VkImageView _iview = pCreateInfo->pAttachments[i];
2108 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2109 framebuffer->attachments[i].attachment = iview;
2110 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2111 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2112 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2113 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2114 }
2115 }
2116
2117 framebuffer->width = pCreateInfo->width;
2118 framebuffer->height = pCreateInfo->height;
2119 framebuffer->layers = pCreateInfo->layers;
2120
2121 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2122 return VK_SUCCESS;
2123 }
2124
2125 void radv_DestroyFramebuffer(
2126 VkDevice _device,
2127 VkFramebuffer _fb,
2128 const VkAllocationCallbacks* pAllocator)
2129 {
2130 RADV_FROM_HANDLE(radv_device, device, _device);
2131 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2132
2133 if (!fb)
2134 return;
2135 vk_free2(&device->alloc, pAllocator, fb);
2136 }
2137
2138 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2139 {
2140 switch (address_mode) {
2141 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2142 return V_008F30_SQ_TEX_WRAP;
2143 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2144 return V_008F30_SQ_TEX_MIRROR;
2145 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2146 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2147 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2148 return V_008F30_SQ_TEX_CLAMP_BORDER;
2149 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2150 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2151 default:
2152 unreachable("illegal tex wrap mode");
2153 break;
2154 }
2155 }
2156
2157 static unsigned
2158 radv_tex_compare(VkCompareOp op)
2159 {
2160 switch (op) {
2161 case VK_COMPARE_OP_NEVER:
2162 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2163 case VK_COMPARE_OP_LESS:
2164 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2165 case VK_COMPARE_OP_EQUAL:
2166 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2167 case VK_COMPARE_OP_LESS_OR_EQUAL:
2168 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2169 case VK_COMPARE_OP_GREATER:
2170 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2171 case VK_COMPARE_OP_NOT_EQUAL:
2172 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2173 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2174 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2175 case VK_COMPARE_OP_ALWAYS:
2176 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2177 default:
2178 unreachable("illegal compare mode");
2179 break;
2180 }
2181 }
2182
2183 static unsigned
2184 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2185 {
2186 switch (filter) {
2187 case VK_FILTER_NEAREST:
2188 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2189 V_008F38_SQ_TEX_XY_FILTER_POINT);
2190 case VK_FILTER_LINEAR:
2191 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2192 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2193 case VK_FILTER_CUBIC_IMG:
2194 default:
2195 fprintf(stderr, "illegal texture filter");
2196 return 0;
2197 }
2198 }
2199
2200 static unsigned
2201 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2202 {
2203 switch (mode) {
2204 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2205 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2206 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2207 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2208 default:
2209 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2210 }
2211 }
2212
2213 static unsigned
2214 radv_tex_bordercolor(VkBorderColor bcolor)
2215 {
2216 switch (bcolor) {
2217 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2218 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2219 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2220 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2221 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2222 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2223 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2224 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2225 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2226 default:
2227 break;
2228 }
2229 return 0;
2230 }
2231
2232 static unsigned
2233 radv_tex_aniso_filter(unsigned filter)
2234 {
2235 if (filter < 2)
2236 return 0;
2237 if (filter < 4)
2238 return 1;
2239 if (filter < 8)
2240 return 2;
2241 if (filter < 16)
2242 return 3;
2243 return 4;
2244 }
2245
2246 static void
2247 radv_init_sampler(struct radv_device *device,
2248 struct radv_sampler *sampler,
2249 const VkSamplerCreateInfo *pCreateInfo)
2250 {
2251 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2252 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2253 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2254 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2255
2256 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2257 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2258 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2259 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2260 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2261 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2262 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2263 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2264 S_008F30_DISABLE_CUBE_WRAP(0) |
2265 S_008F30_COMPAT_MODE(is_vi));
2266 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2267 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2268 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2269 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2270 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2271 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2272 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2273 S_008F38_MIP_POINT_PRECLAMP(1) |
2274 S_008F38_DISABLE_LSB_CEIL(1) |
2275 S_008F38_FILTER_PREC_FIX(1) |
2276 S_008F38_ANISO_OVERRIDE(is_vi));
2277 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2278 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2279 }
2280
2281 VkResult radv_CreateSampler(
2282 VkDevice _device,
2283 const VkSamplerCreateInfo* pCreateInfo,
2284 const VkAllocationCallbacks* pAllocator,
2285 VkSampler* pSampler)
2286 {
2287 RADV_FROM_HANDLE(radv_device, device, _device);
2288 struct radv_sampler *sampler;
2289
2290 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2291
2292 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2293 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2294 if (!sampler)
2295 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2296
2297 radv_init_sampler(device, sampler, pCreateInfo);
2298 *pSampler = radv_sampler_to_handle(sampler);
2299
2300 return VK_SUCCESS;
2301 }
2302
2303 void radv_DestroySampler(
2304 VkDevice _device,
2305 VkSampler _sampler,
2306 const VkAllocationCallbacks* pAllocator)
2307 {
2308 RADV_FROM_HANDLE(radv_device, device, _device);
2309 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2310
2311 if (!sampler)
2312 return;
2313 vk_free2(&device->alloc, pAllocator, sampler);
2314 }
2315
2316
2317 /* vk_icd.h does not declare this function, so we declare it here to
2318 * suppress Wmissing-prototypes.
2319 */
2320 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2321 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2322
2323 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2324 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2325 {
2326 /* For the full details on loader interface versioning, see
2327 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2328 * What follows is a condensed summary, to help you navigate the large and
2329 * confusing official doc.
2330 *
2331 * - Loader interface v0 is incompatible with later versions. We don't
2332 * support it.
2333 *
2334 * - In loader interface v1:
2335 * - The first ICD entrypoint called by the loader is
2336 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2337 * entrypoint.
2338 * - The ICD must statically expose no other Vulkan symbol unless it is
2339 * linked with -Bsymbolic.
2340 * - Each dispatchable Vulkan handle created by the ICD must be
2341 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2342 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2343 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2344 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2345 * such loader-managed surfaces.
2346 *
2347 * - Loader interface v2 differs from v1 in:
2348 * - The first ICD entrypoint called by the loader is
2349 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2350 * statically expose this entrypoint.
2351 *
2352 * - Loader interface v3 differs from v2 in:
2353 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2354 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2355 * because the loader no longer does so.
2356 */
2357 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2358 return VK_SUCCESS;
2359 }