radv: Incorporate GPU family into cache UUID.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "util/strtod.h"
36
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "util/debug.h"
46 struct radv_dispatch_table dtable;
47
48 static int
49 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
50 {
51 Dl_info info;
52 struct stat st;
53 if (!dladdr(ptr, &info) || !info.dli_fname) {
54 return -1;
55 }
56 if (stat(info.dli_fname, &st)) {
57 return -1;
58 }
59 *timestamp = st.st_mtim.tv_sec;
60 return 0;
61 }
62
63 static int
64 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
65 {
66 uint32_t mesa_timestamp, llvm_timestamp;
67 uint16_t f = family;
68 memset(uuid, 0, VK_UUID_SIZE);
69 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
70 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
71 return -1;
72
73 memcpy(uuid, &mesa_timestamp, 4);
74 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
75 memcpy((char*)uuid + 8, &f, 2);
76 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
77 return 0;
78 }
79
80 static VkResult
81 radv_physical_device_init(struct radv_physical_device *device,
82 struct radv_instance *instance,
83 const char *path)
84 {
85 VkResult result;
86 drmVersionPtr version;
87 int fd;
88
89 fd = open(path, O_RDWR | O_CLOEXEC);
90 if (fd < 0)
91 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
92 "failed to open %s: %m", path);
93
94 version = drmGetVersion(fd);
95 if (!version) {
96 close(fd);
97 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
98 "failed to get version %s: %m", path);
99 }
100
101 if (strcmp(version->name, "amdgpu")) {
102 drmFreeVersion(version);
103 close(fd);
104 return VK_ERROR_INCOMPATIBLE_DRIVER;
105 }
106 drmFreeVersion(version);
107
108 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
109 device->instance = instance;
110 assert(strlen(path) < ARRAY_SIZE(device->path));
111 strncpy(device->path, path, ARRAY_SIZE(device->path));
112
113 device->ws = radv_amdgpu_winsys_create(fd);
114 if (!device->ws) {
115 result = VK_ERROR_INCOMPATIBLE_DRIVER;
116 goto fail;
117 }
118 device->ws->query_info(device->ws, &device->rad_info);
119 result = radv_init_wsi(device);
120 if (result != VK_SUCCESS) {
121 device->ws->destroy(device->ws);
122 goto fail;
123 }
124
125 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
126 radv_finish_wsi(device);
127 device->ws->destroy(device->ws);
128 goto fail;
129 }
130
131 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
132 device->name = device->rad_info.name;
133 return VK_SUCCESS;
134
135 fail:
136 close(fd);
137 return result;
138 }
139
140 static void
141 radv_physical_device_finish(struct radv_physical_device *device)
142 {
143 radv_finish_wsi(device);
144 device->ws->destroy(device->ws);
145 }
146
147 static const VkExtensionProperties global_extensions[] = {
148 {
149 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
150 .specVersion = 25,
151 },
152 #ifdef VK_USE_PLATFORM_XCB_KHR
153 {
154 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
155 .specVersion = 6,
156 },
157 #endif
158 #ifdef VK_USE_PLATFORM_XLIB_KHR
159 {
160 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
161 .specVersion = 6,
162 },
163 #endif
164 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
165 {
166 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
167 .specVersion = 5,
168 },
169 #endif
170 };
171
172 static const VkExtensionProperties device_extensions[] = {
173 {
174 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
175 .specVersion = 68,
176 },
177 };
178
179 static void *
180 default_alloc_func(void *pUserData, size_t size, size_t align,
181 VkSystemAllocationScope allocationScope)
182 {
183 return malloc(size);
184 }
185
186 static void *
187 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
188 size_t align, VkSystemAllocationScope allocationScope)
189 {
190 return realloc(pOriginal, size);
191 }
192
193 static void
194 default_free_func(void *pUserData, void *pMemory)
195 {
196 free(pMemory);
197 }
198
199 static const VkAllocationCallbacks default_alloc = {
200 .pUserData = NULL,
201 .pfnAllocation = default_alloc_func,
202 .pfnReallocation = default_realloc_func,
203 .pfnFree = default_free_func,
204 };
205
206 VkResult radv_CreateInstance(
207 const VkInstanceCreateInfo* pCreateInfo,
208 const VkAllocationCallbacks* pAllocator,
209 VkInstance* pInstance)
210 {
211 struct radv_instance *instance;
212
213 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
214
215 uint32_t client_version;
216 if (pCreateInfo->pApplicationInfo &&
217 pCreateInfo->pApplicationInfo->apiVersion != 0) {
218 client_version = pCreateInfo->pApplicationInfo->apiVersion;
219 } else {
220 client_version = VK_MAKE_VERSION(1, 0, 0);
221 }
222
223 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
224 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
225 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
226 "Client requested version %d.%d.%d",
227 VK_VERSION_MAJOR(client_version),
228 VK_VERSION_MINOR(client_version),
229 VK_VERSION_PATCH(client_version));
230 }
231
232 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
233 bool found = false;
234 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
235 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
236 global_extensions[j].extensionName) == 0) {
237 found = true;
238 break;
239 }
240 }
241 if (!found)
242 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
243 }
244
245 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
246 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
247 if (!instance)
248 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
249
250 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
251
252 if (pAllocator)
253 instance->alloc = *pAllocator;
254 else
255 instance->alloc = default_alloc;
256
257 instance->apiVersion = client_version;
258 instance->physicalDeviceCount = -1;
259
260 _mesa_locale_init();
261
262 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
263
264 *pInstance = radv_instance_to_handle(instance);
265
266 return VK_SUCCESS;
267 }
268
269 void radv_DestroyInstance(
270 VkInstance _instance,
271 const VkAllocationCallbacks* pAllocator)
272 {
273 RADV_FROM_HANDLE(radv_instance, instance, _instance);
274
275 if (instance->physicalDeviceCount > 0) {
276 /* We support at most one physical device. */
277 assert(instance->physicalDeviceCount == 1);
278 radv_physical_device_finish(&instance->physicalDevice);
279 }
280
281 VG(VALGRIND_DESTROY_MEMPOOL(instance));
282
283 _mesa_locale_fini();
284
285 vk_free(&instance->alloc, instance);
286 }
287
288 VkResult radv_EnumeratePhysicalDevices(
289 VkInstance _instance,
290 uint32_t* pPhysicalDeviceCount,
291 VkPhysicalDevice* pPhysicalDevices)
292 {
293 RADV_FROM_HANDLE(radv_instance, instance, _instance);
294 VkResult result;
295
296 if (instance->physicalDeviceCount < 0) {
297 char path[20];
298 for (unsigned i = 0; i < 8; i++) {
299 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
300 result = radv_physical_device_init(&instance->physicalDevice,
301 instance, path);
302 if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
303 break;
304 }
305
306 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
307 instance->physicalDeviceCount = 0;
308 } else if (result == VK_SUCCESS) {
309 instance->physicalDeviceCount = 1;
310 } else {
311 return result;
312 }
313 }
314
315 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
316 * otherwise it's an inout parameter.
317 *
318 * The Vulkan spec (git aaed022) says:
319 *
320 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
321 * that is initialized with the number of devices the application is
322 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
323 * an array of at least this many VkPhysicalDevice handles [...].
324 *
325 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
326 * overwrites the contents of the variable pointed to by
327 * pPhysicalDeviceCount with the number of physical devices in in the
328 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
329 * pPhysicalDeviceCount with the number of physical handles written to
330 * pPhysicalDevices.
331 */
332 if (!pPhysicalDevices) {
333 *pPhysicalDeviceCount = instance->physicalDeviceCount;
334 } else if (*pPhysicalDeviceCount >= 1) {
335 pPhysicalDevices[0] = radv_physical_device_to_handle(&instance->physicalDevice);
336 *pPhysicalDeviceCount = 1;
337 } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
338 return VK_INCOMPLETE;
339 } else {
340 *pPhysicalDeviceCount = 0;
341 }
342
343 return VK_SUCCESS;
344 }
345
346 void radv_GetPhysicalDeviceFeatures(
347 VkPhysicalDevice physicalDevice,
348 VkPhysicalDeviceFeatures* pFeatures)
349 {
350 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
351
352 memset(pFeatures, 0, sizeof(*pFeatures));
353
354 *pFeatures = (VkPhysicalDeviceFeatures) {
355 .robustBufferAccess = true,
356 .fullDrawIndexUint32 = true,
357 .imageCubeArray = true,
358 .independentBlend = true,
359 .geometryShader = false,
360 .tessellationShader = false,
361 .sampleRateShading = false,
362 .dualSrcBlend = true,
363 .logicOp = true,
364 .multiDrawIndirect = true,
365 .drawIndirectFirstInstance = true,
366 .depthClamp = true,
367 .depthBiasClamp = true,
368 .fillModeNonSolid = true,
369 .depthBounds = true,
370 .wideLines = true,
371 .largePoints = true,
372 .alphaToOne = true,
373 .multiViewport = false,
374 .samplerAnisotropy = false, /* FINISHME */
375 .textureCompressionETC2 = false,
376 .textureCompressionASTC_LDR = false,
377 .textureCompressionBC = true,
378 .occlusionQueryPrecise = true,
379 .pipelineStatisticsQuery = false,
380 .vertexPipelineStoresAndAtomics = true,
381 .fragmentStoresAndAtomics = true,
382 .shaderTessellationAndGeometryPointSize = true,
383 .shaderImageGatherExtended = false,
384 .shaderStorageImageExtendedFormats = false,
385 .shaderStorageImageMultisample = false,
386 .shaderUniformBufferArrayDynamicIndexing = true,
387 .shaderSampledImageArrayDynamicIndexing = true,
388 .shaderStorageBufferArrayDynamicIndexing = true,
389 .shaderStorageImageArrayDynamicIndexing = true,
390 .shaderStorageImageReadWithoutFormat = false,
391 .shaderStorageImageWriteWithoutFormat = true,
392 .shaderClipDistance = true,
393 .shaderCullDistance = true,
394 .shaderFloat64 = false,
395 .shaderInt64 = false,
396 .shaderInt16 = false,
397 .alphaToOne = true,
398 .variableMultisampleRate = false,
399 .inheritedQueries = false,
400 };
401 }
402
403 void radv_GetPhysicalDeviceProperties(
404 VkPhysicalDevice physicalDevice,
405 VkPhysicalDeviceProperties* pProperties)
406 {
407 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
408 VkSampleCountFlags sample_counts = 0xf;
409 VkPhysicalDeviceLimits limits = {
410 .maxImageDimension1D = (1 << 14),
411 .maxImageDimension2D = (1 << 14),
412 .maxImageDimension3D = (1 << 11),
413 .maxImageDimensionCube = (1 << 14),
414 .maxImageArrayLayers = (1 << 11),
415 .maxTexelBufferElements = 128 * 1024 * 1024,
416 .maxUniformBufferRange = UINT32_MAX,
417 .maxStorageBufferRange = UINT32_MAX,
418 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
419 .maxMemoryAllocationCount = UINT32_MAX,
420 .maxSamplerAllocationCount = 64 * 1024,
421 .bufferImageGranularity = 64, /* A cache line */
422 .sparseAddressSpaceSize = 0,
423 .maxBoundDescriptorSets = MAX_SETS,
424 .maxPerStageDescriptorSamplers = 64,
425 .maxPerStageDescriptorUniformBuffers = 64,
426 .maxPerStageDescriptorStorageBuffers = 64,
427 .maxPerStageDescriptorSampledImages = 64,
428 .maxPerStageDescriptorStorageImages = 64,
429 .maxPerStageDescriptorInputAttachments = 64,
430 .maxPerStageResources = 128,
431 .maxDescriptorSetSamplers = 256,
432 .maxDescriptorSetUniformBuffers = 256,
433 .maxDescriptorSetUniformBuffersDynamic = 256,
434 .maxDescriptorSetStorageBuffers = 256,
435 .maxDescriptorSetStorageBuffersDynamic = 256,
436 .maxDescriptorSetSampledImages = 256,
437 .maxDescriptorSetStorageImages = 256,
438 .maxDescriptorSetInputAttachments = 256,
439 .maxVertexInputAttributes = 32,
440 .maxVertexInputBindings = 32,
441 .maxVertexInputAttributeOffset = 2047,
442 .maxVertexInputBindingStride = 2048,
443 .maxVertexOutputComponents = 128,
444 .maxTessellationGenerationLevel = 0,
445 .maxTessellationPatchSize = 0,
446 .maxTessellationControlPerVertexInputComponents = 0,
447 .maxTessellationControlPerVertexOutputComponents = 0,
448 .maxTessellationControlPerPatchOutputComponents = 0,
449 .maxTessellationControlTotalOutputComponents = 0,
450 .maxTessellationEvaluationInputComponents = 0,
451 .maxTessellationEvaluationOutputComponents = 0,
452 .maxGeometryShaderInvocations = 32,
453 .maxGeometryInputComponents = 64,
454 .maxGeometryOutputComponents = 128,
455 .maxGeometryOutputVertices = 256,
456 .maxGeometryTotalOutputComponents = 1024,
457 .maxFragmentInputComponents = 128,
458 .maxFragmentOutputAttachments = 8,
459 .maxFragmentDualSrcAttachments = 2,
460 .maxFragmentCombinedOutputResources = 8,
461 .maxComputeSharedMemorySize = 32768,
462 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
463 .maxComputeWorkGroupInvocations = 16 * 1024,
464 .maxComputeWorkGroupSize = {
465 16 * 1024/*devinfo->max_cs_threads*/,
466 16 * 1024,
467 16 * 1024
468 },
469 .subPixelPrecisionBits = 4 /* FIXME */,
470 .subTexelPrecisionBits = 4 /* FIXME */,
471 .mipmapPrecisionBits = 4 /* FIXME */,
472 .maxDrawIndexedIndexValue = UINT32_MAX,
473 .maxDrawIndirectCount = UINT32_MAX,
474 .maxSamplerLodBias = 16,
475 .maxSamplerAnisotropy = 16,
476 .maxViewports = MAX_VIEWPORTS,
477 .maxViewportDimensions = { (1 << 14), (1 << 14) },
478 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
479 .viewportSubPixelBits = 13, /* We take a float? */
480 .minMemoryMapAlignment = 4096, /* A page */
481 .minTexelBufferOffsetAlignment = 1,
482 .minUniformBufferOffsetAlignment = 4,
483 .minStorageBufferOffsetAlignment = 4,
484 .minTexelOffset = -8,
485 .maxTexelOffset = 7,
486 .minTexelGatherOffset = -8,
487 .maxTexelGatherOffset = 7,
488 .minInterpolationOffset = 0, /* FIXME */
489 .maxInterpolationOffset = 0, /* FIXME */
490 .subPixelInterpolationOffsetBits = 0, /* FIXME */
491 .maxFramebufferWidth = (1 << 14),
492 .maxFramebufferHeight = (1 << 14),
493 .maxFramebufferLayers = (1 << 10),
494 .framebufferColorSampleCounts = sample_counts,
495 .framebufferDepthSampleCounts = sample_counts,
496 .framebufferStencilSampleCounts = sample_counts,
497 .framebufferNoAttachmentsSampleCounts = sample_counts,
498 .maxColorAttachments = MAX_RTS,
499 .sampledImageColorSampleCounts = sample_counts,
500 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
501 .sampledImageDepthSampleCounts = sample_counts,
502 .sampledImageStencilSampleCounts = sample_counts,
503 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
504 .maxSampleMaskWords = 1,
505 .timestampComputeAndGraphics = false,
506 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
507 .maxClipDistances = 8,
508 .maxCullDistances = 8,
509 .maxCombinedClipAndCullDistances = 8,
510 .discreteQueuePriorities = 1,
511 .pointSizeRange = { 0.125, 255.875 },
512 .lineWidthRange = { 0.0, 7.9921875 },
513 .pointSizeGranularity = (1.0 / 8.0),
514 .lineWidthGranularity = (1.0 / 128.0),
515 .strictLines = false, /* FINISHME */
516 .standardSampleLocations = true,
517 .optimalBufferCopyOffsetAlignment = 128,
518 .optimalBufferCopyRowPitchAlignment = 128,
519 .nonCoherentAtomSize = 64,
520 };
521
522 *pProperties = (VkPhysicalDeviceProperties) {
523 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
524 .driverVersion = 1,
525 .vendorID = 0x1002,
526 .deviceID = pdevice->rad_info.pci_id,
527 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
528 .limits = limits,
529 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
530 };
531
532 strcpy(pProperties->deviceName, pdevice->name);
533 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
534 }
535
536 void radv_GetPhysicalDeviceQueueFamilyProperties(
537 VkPhysicalDevice physicalDevice,
538 uint32_t* pCount,
539 VkQueueFamilyProperties* pQueueFamilyProperties)
540 {
541 if (pQueueFamilyProperties == NULL) {
542 *pCount = 1;
543 return;
544 }
545 assert(*pCount >= 1);
546
547 *pQueueFamilyProperties = (VkQueueFamilyProperties) {
548 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
549 VK_QUEUE_COMPUTE_BIT |
550 VK_QUEUE_TRANSFER_BIT,
551 .queueCount = 1,
552 .timestampValidBits = 64,
553 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
554 };
555 }
556
557 void radv_GetPhysicalDeviceMemoryProperties(
558 VkPhysicalDevice physicalDevice,
559 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
560 {
561 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
562
563 pMemoryProperties->memoryTypeCount = 4;
564 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
565 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
566 .heapIndex = 0,
567 };
568 pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
569 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
570 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
571 .heapIndex = 2,
572 };
573 pMemoryProperties->memoryTypes[2] = (VkMemoryType) {
574 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
575 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
576 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
577 .heapIndex = 1,
578 };
579 pMemoryProperties->memoryTypes[3] = (VkMemoryType) {
580 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
581 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
582 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
583 .heapIndex = 2,
584 };
585
586 pMemoryProperties->memoryHeapCount = 3;
587 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
588 .size = physical_device->rad_info.vram_size -
589 physical_device->rad_info.visible_vram_size,
590 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
591 };
592 pMemoryProperties->memoryHeaps[1] = (VkMemoryHeap) {
593 .size = physical_device->rad_info.visible_vram_size,
594 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
595 };
596 pMemoryProperties->memoryHeaps[2] = (VkMemoryHeap) {
597 .size = physical_device->rad_info.gart_size,
598 .flags = 0,
599 };
600 }
601
602 static VkResult
603 radv_queue_init(struct radv_device *device, struct radv_queue *queue)
604 {
605 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
606 queue->device = device;
607
608 return VK_SUCCESS;
609 }
610
611 static void
612 radv_queue_finish(struct radv_queue *queue)
613 {
614 }
615
616 VkResult radv_CreateDevice(
617 VkPhysicalDevice physicalDevice,
618 const VkDeviceCreateInfo* pCreateInfo,
619 const VkAllocationCallbacks* pAllocator,
620 VkDevice* pDevice)
621 {
622 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
623 VkResult result;
624 struct radv_device *device;
625
626 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
627 bool found = false;
628 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
629 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
630 device_extensions[j].extensionName) == 0) {
631 found = true;
632 break;
633 }
634 }
635 if (!found)
636 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
637 }
638
639 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
640 sizeof(*device), 8,
641 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
642 if (!device)
643 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
644
645 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
646 device->instance = physical_device->instance;
647
648 device->ws = physical_device->ws;
649 if (pAllocator)
650 device->alloc = *pAllocator;
651 else
652 device->alloc = physical_device->instance->alloc;
653
654 device->hw_ctx = device->ws->ctx_create(device->ws);
655 if (!device->hw_ctx) {
656 result = VK_ERROR_OUT_OF_HOST_MEMORY;
657 goto fail_free;
658 }
659
660 radv_queue_init(device, &device->queue);
661
662 result = radv_device_init_meta(device);
663 if (result != VK_SUCCESS) {
664 device->ws->ctx_destroy(device->hw_ctx);
665 goto fail_free;
666 }
667 device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
668 device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", false);
669
670 if (device->allow_fast_clears && device->allow_dcc)
671 radv_finishme("DCC fast clears have not been tested\n");
672
673 radv_device_init_msaa(device);
674 device->empty_cs = device->ws->cs_create(device->ws, RING_GFX);
675 radeon_emit(device->empty_cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
676 radeon_emit(device->empty_cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
677 radeon_emit(device->empty_cs, CONTEXT_CONTROL_SHADOW_ENABLE(1));
678 device->ws->cs_finalize(device->empty_cs);
679 *pDevice = radv_device_to_handle(device);
680 return VK_SUCCESS;
681 fail_free:
682 vk_free(&device->alloc, device);
683 return result;
684 }
685
686 void radv_DestroyDevice(
687 VkDevice _device,
688 const VkAllocationCallbacks* pAllocator)
689 {
690 RADV_FROM_HANDLE(radv_device, device, _device);
691
692 device->ws->ctx_destroy(device->hw_ctx);
693 radv_queue_finish(&device->queue);
694 radv_device_finish_meta(device);
695
696 vk_free(&device->alloc, device);
697 }
698
699 VkResult radv_EnumerateInstanceExtensionProperties(
700 const char* pLayerName,
701 uint32_t* pPropertyCount,
702 VkExtensionProperties* pProperties)
703 {
704 unsigned i;
705 if (pProperties == NULL) {
706 *pPropertyCount = ARRAY_SIZE(global_extensions);
707 return VK_SUCCESS;
708 }
709
710 for (i = 0; i < *pPropertyCount; i++)
711 memcpy(&pProperties[i], &global_extensions[i], sizeof(VkExtensionProperties));
712
713 *pPropertyCount = i;
714 if (i < ARRAY_SIZE(global_extensions))
715 return VK_INCOMPLETE;
716
717 return VK_SUCCESS;
718 }
719
720 VkResult radv_EnumerateDeviceExtensionProperties(
721 VkPhysicalDevice physicalDevice,
722 const char* pLayerName,
723 uint32_t* pPropertyCount,
724 VkExtensionProperties* pProperties)
725 {
726 unsigned i;
727
728 if (pProperties == NULL) {
729 *pPropertyCount = ARRAY_SIZE(device_extensions);
730 return VK_SUCCESS;
731 }
732
733 for (i = 0; i < *pPropertyCount; i++)
734 memcpy(&pProperties[i], &device_extensions[i], sizeof(VkExtensionProperties));
735
736 *pPropertyCount = i;
737 if (i < ARRAY_SIZE(device_extensions))
738 return VK_INCOMPLETE;
739 return VK_SUCCESS;
740 }
741
742 VkResult radv_EnumerateInstanceLayerProperties(
743 uint32_t* pPropertyCount,
744 VkLayerProperties* pProperties)
745 {
746 if (pProperties == NULL) {
747 *pPropertyCount = 0;
748 return VK_SUCCESS;
749 }
750
751 /* None supported at this time */
752 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
753 }
754
755 VkResult radv_EnumerateDeviceLayerProperties(
756 VkPhysicalDevice physicalDevice,
757 uint32_t* pPropertyCount,
758 VkLayerProperties* pProperties)
759 {
760 if (pProperties == NULL) {
761 *pPropertyCount = 0;
762 return VK_SUCCESS;
763 }
764
765 /* None supported at this time */
766 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
767 }
768
769 void radv_GetDeviceQueue(
770 VkDevice _device,
771 uint32_t queueNodeIndex,
772 uint32_t queueIndex,
773 VkQueue* pQueue)
774 {
775 RADV_FROM_HANDLE(radv_device, device, _device);
776
777 assert(queueIndex == 0);
778
779 *pQueue = radv_queue_to_handle(&device->queue);
780 }
781
782 VkResult radv_QueueSubmit(
783 VkQueue _queue,
784 uint32_t submitCount,
785 const VkSubmitInfo* pSubmits,
786 VkFence _fence)
787 {
788 RADV_FROM_HANDLE(radv_queue, queue, _queue);
789 RADV_FROM_HANDLE(radv_fence, fence, _fence);
790 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
791 struct radeon_winsys_ctx *ctx = queue->device->hw_ctx;
792 int ret;
793
794 for (uint32_t i = 0; i < submitCount; i++) {
795 struct radeon_winsys_cs **cs_array;
796 bool can_patch = true;
797
798 if (!pSubmits[i].commandBufferCount)
799 continue;
800
801 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
802 pSubmits[i].commandBufferCount);
803
804 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
805 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
806 pSubmits[i].pCommandBuffers[j]);
807 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
808
809 cs_array[j] = cmd_buffer->cs;
810 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
811 can_patch = false;
812 }
813 ret = queue->device->ws->cs_submit(ctx, cs_array,
814 pSubmits[i].commandBufferCount,
815 can_patch, base_fence);
816 if (ret)
817 radv_loge("failed to submit CS %d\n", i);
818 free(cs_array);
819 }
820
821 if (fence) {
822 if (!submitCount)
823 ret = queue->device->ws->cs_submit(ctx, &queue->device->empty_cs,
824 1, false, base_fence);
825
826 fence->submitted = true;
827 }
828
829 return VK_SUCCESS;
830 }
831
832 VkResult radv_QueueWaitIdle(
833 VkQueue _queue)
834 {
835 RADV_FROM_HANDLE(radv_queue, queue, _queue);
836
837 queue->device->ws->ctx_wait_idle(queue->device->hw_ctx);
838 return VK_SUCCESS;
839 }
840
841 VkResult radv_DeviceWaitIdle(
842 VkDevice _device)
843 {
844 RADV_FROM_HANDLE(radv_device, device, _device);
845
846 device->ws->ctx_wait_idle(device->hw_ctx);
847 return VK_SUCCESS;
848 }
849
850 PFN_vkVoidFunction radv_GetInstanceProcAddr(
851 VkInstance instance,
852 const char* pName)
853 {
854 return radv_lookup_entrypoint(pName);
855 }
856
857 /* The loader wants us to expose a second GetInstanceProcAddr function
858 * to work around certain LD_PRELOAD issues seen in apps.
859 */
860 PUBLIC
861 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
862 VkInstance instance,
863 const char* pName);
864
865 PUBLIC
866 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
867 VkInstance instance,
868 const char* pName)
869 {
870 return radv_GetInstanceProcAddr(instance, pName);
871 }
872
873 PFN_vkVoidFunction radv_GetDeviceProcAddr(
874 VkDevice device,
875 const char* pName)
876 {
877 return radv_lookup_entrypoint(pName);
878 }
879
880 VkResult radv_AllocateMemory(
881 VkDevice _device,
882 const VkMemoryAllocateInfo* pAllocateInfo,
883 const VkAllocationCallbacks* pAllocator,
884 VkDeviceMemory* pMem)
885 {
886 RADV_FROM_HANDLE(radv_device, device, _device);
887 struct radv_device_memory *mem;
888 VkResult result;
889 enum radeon_bo_domain domain;
890 uint32_t flags = 0;
891 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
892
893 if (pAllocateInfo->allocationSize == 0) {
894 /* Apparently, this is allowed */
895 *pMem = VK_NULL_HANDLE;
896 return VK_SUCCESS;
897 }
898
899 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
900 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
901 if (mem == NULL)
902 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
903
904 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
905 if (pAllocateInfo->memoryTypeIndex == 1 || pAllocateInfo->memoryTypeIndex == 3)
906 domain = RADEON_DOMAIN_GTT;
907 else
908 domain = RADEON_DOMAIN_VRAM;
909
910 if (pAllocateInfo->memoryTypeIndex == 0)
911 flags |= RADEON_FLAG_NO_CPU_ACCESS;
912 else
913 flags |= RADEON_FLAG_CPU_ACCESS;
914
915 if (pAllocateInfo->memoryTypeIndex == 1)
916 flags |= RADEON_FLAG_GTT_WC;
917
918 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
919 domain, flags);
920
921 if (!mem->bo) {
922 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
923 goto fail;
924 }
925 mem->type_index = pAllocateInfo->memoryTypeIndex;
926
927 *pMem = radv_device_memory_to_handle(mem);
928
929 return VK_SUCCESS;
930
931 fail:
932 vk_free2(&device->alloc, pAllocator, mem);
933
934 return result;
935 }
936
937 void radv_FreeMemory(
938 VkDevice _device,
939 VkDeviceMemory _mem,
940 const VkAllocationCallbacks* pAllocator)
941 {
942 RADV_FROM_HANDLE(radv_device, device, _device);
943 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
944
945 if (mem == NULL)
946 return;
947
948 device->ws->buffer_destroy(mem->bo);
949 mem->bo = NULL;
950
951 vk_free2(&device->alloc, pAllocator, mem);
952 }
953
954 VkResult radv_MapMemory(
955 VkDevice _device,
956 VkDeviceMemory _memory,
957 VkDeviceSize offset,
958 VkDeviceSize size,
959 VkMemoryMapFlags flags,
960 void** ppData)
961 {
962 RADV_FROM_HANDLE(radv_device, device, _device);
963 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
964
965 if (mem == NULL) {
966 *ppData = NULL;
967 return VK_SUCCESS;
968 }
969
970 *ppData = device->ws->buffer_map(mem->bo);
971 if (*ppData) {
972 *ppData += offset;
973 return VK_SUCCESS;
974 }
975
976 return VK_ERROR_MEMORY_MAP_FAILED;
977 }
978
979 void radv_UnmapMemory(
980 VkDevice _device,
981 VkDeviceMemory _memory)
982 {
983 RADV_FROM_HANDLE(radv_device, device, _device);
984 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
985
986 if (mem == NULL)
987 return;
988
989 device->ws->buffer_unmap(mem->bo);
990 }
991
992 VkResult radv_FlushMappedMemoryRanges(
993 VkDevice _device,
994 uint32_t memoryRangeCount,
995 const VkMappedMemoryRange* pMemoryRanges)
996 {
997 return VK_SUCCESS;
998 }
999
1000 VkResult radv_InvalidateMappedMemoryRanges(
1001 VkDevice _device,
1002 uint32_t memoryRangeCount,
1003 const VkMappedMemoryRange* pMemoryRanges)
1004 {
1005 return VK_SUCCESS;
1006 }
1007
1008 void radv_GetBufferMemoryRequirements(
1009 VkDevice device,
1010 VkBuffer _buffer,
1011 VkMemoryRequirements* pMemoryRequirements)
1012 {
1013 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1014
1015 /* The Vulkan spec (git aaed022) says:
1016 *
1017 * memoryTypeBits is a bitfield and contains one bit set for every
1018 * supported memory type for the resource. The bit `1<<i` is set if and
1019 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1020 * structure for the physical device is supported.
1021 *
1022 * We support exactly one memory type.
1023 */
1024 pMemoryRequirements->memoryTypeBits = 0x7;
1025
1026 pMemoryRequirements->size = buffer->size;
1027 pMemoryRequirements->alignment = 16;
1028 }
1029
1030 void radv_GetImageMemoryRequirements(
1031 VkDevice device,
1032 VkImage _image,
1033 VkMemoryRequirements* pMemoryRequirements)
1034 {
1035 RADV_FROM_HANDLE(radv_image, image, _image);
1036
1037 /* The Vulkan spec (git aaed022) says:
1038 *
1039 * memoryTypeBits is a bitfield and contains one bit set for every
1040 * supported memory type for the resource. The bit `1<<i` is set if and
1041 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1042 * structure for the physical device is supported.
1043 *
1044 * We support exactly one memory type.
1045 */
1046 pMemoryRequirements->memoryTypeBits = 0x7;
1047
1048 pMemoryRequirements->size = image->size;
1049 pMemoryRequirements->alignment = image->alignment;
1050 }
1051
1052 void radv_GetImageSparseMemoryRequirements(
1053 VkDevice device,
1054 VkImage image,
1055 uint32_t* pSparseMemoryRequirementCount,
1056 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1057 {
1058 stub();
1059 }
1060
1061 void radv_GetDeviceMemoryCommitment(
1062 VkDevice device,
1063 VkDeviceMemory memory,
1064 VkDeviceSize* pCommittedMemoryInBytes)
1065 {
1066 *pCommittedMemoryInBytes = 0;
1067 }
1068
1069 VkResult radv_BindBufferMemory(
1070 VkDevice device,
1071 VkBuffer _buffer,
1072 VkDeviceMemory _memory,
1073 VkDeviceSize memoryOffset)
1074 {
1075 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1076 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1077
1078 if (mem) {
1079 buffer->bo = mem->bo;
1080 buffer->offset = memoryOffset;
1081 } else {
1082 buffer->bo = NULL;
1083 buffer->offset = 0;
1084 }
1085
1086 return VK_SUCCESS;
1087 }
1088
1089 VkResult radv_BindImageMemory(
1090 VkDevice device,
1091 VkImage _image,
1092 VkDeviceMemory _memory,
1093 VkDeviceSize memoryOffset)
1094 {
1095 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1096 RADV_FROM_HANDLE(radv_image, image, _image);
1097
1098 if (mem) {
1099 image->bo = mem->bo;
1100 image->offset = memoryOffset;
1101 } else {
1102 image->bo = NULL;
1103 image->offset = 0;
1104 }
1105
1106 return VK_SUCCESS;
1107 }
1108
1109 VkResult radv_QueueBindSparse(
1110 VkQueue queue,
1111 uint32_t bindInfoCount,
1112 const VkBindSparseInfo* pBindInfo,
1113 VkFence fence)
1114 {
1115 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1116 }
1117
1118 VkResult radv_CreateFence(
1119 VkDevice _device,
1120 const VkFenceCreateInfo* pCreateInfo,
1121 const VkAllocationCallbacks* pAllocator,
1122 VkFence* pFence)
1123 {
1124 RADV_FROM_HANDLE(radv_device, device, _device);
1125 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1126 sizeof(*fence), 8,
1127 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1128
1129 if (!fence)
1130 return VK_ERROR_OUT_OF_HOST_MEMORY;
1131
1132 memset(fence, 0, sizeof(*fence));
1133 fence->submitted = false;
1134 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1135 fence->fence = device->ws->create_fence();
1136
1137
1138 *pFence = radv_fence_to_handle(fence);
1139
1140 return VK_SUCCESS;
1141 }
1142
1143 void radv_DestroyFence(
1144 VkDevice _device,
1145 VkFence _fence,
1146 const VkAllocationCallbacks* pAllocator)
1147 {
1148 RADV_FROM_HANDLE(radv_device, device, _device);
1149 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1150
1151 if (!fence)
1152 return;
1153 device->ws->destroy_fence(fence->fence);
1154 vk_free2(&device->alloc, pAllocator, fence);
1155 }
1156
1157 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1158 {
1159 uint64_t current_time;
1160 struct timespec tv;
1161
1162 clock_gettime(CLOCK_MONOTONIC, &tv);
1163 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1164
1165 timeout = MIN2(UINT64_MAX - current_time, timeout);
1166
1167 return current_time + timeout;
1168 }
1169
1170 VkResult radv_WaitForFences(
1171 VkDevice _device,
1172 uint32_t fenceCount,
1173 const VkFence* pFences,
1174 VkBool32 waitAll,
1175 uint64_t timeout)
1176 {
1177 RADV_FROM_HANDLE(radv_device, device, _device);
1178 timeout = radv_get_absolute_timeout(timeout);
1179
1180 if (!waitAll && fenceCount > 1) {
1181 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1182 }
1183
1184 for (uint32_t i = 0; i < fenceCount; ++i) {
1185 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1186 bool expired = false;
1187
1188 if (fence->signalled)
1189 continue;
1190
1191 if (!fence->submitted)
1192 return VK_TIMEOUT;
1193
1194 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1195 if (!expired)
1196 return VK_TIMEOUT;
1197
1198 fence->signalled = true;
1199 }
1200
1201 return VK_SUCCESS;
1202 }
1203
1204 VkResult radv_ResetFences(VkDevice device,
1205 uint32_t fenceCount,
1206 const VkFence *pFences)
1207 {
1208 for (unsigned i = 0; i < fenceCount; ++i) {
1209 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1210 fence->submitted = fence->signalled = false;
1211 }
1212
1213 return VK_SUCCESS;
1214 }
1215
1216 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1217 {
1218 RADV_FROM_HANDLE(radv_device, device, _device);
1219 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1220
1221 if (fence->signalled)
1222 return VK_SUCCESS;
1223 if (!fence->submitted)
1224 return VK_NOT_READY;
1225
1226 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1227 return VK_NOT_READY;
1228
1229 return VK_SUCCESS;
1230 }
1231
1232
1233 // Queue semaphore functions
1234
1235 VkResult radv_CreateSemaphore(
1236 VkDevice device,
1237 const VkSemaphoreCreateInfo* pCreateInfo,
1238 const VkAllocationCallbacks* pAllocator,
1239 VkSemaphore* pSemaphore)
1240 {
1241 /* The DRM execbuffer ioctl always execute in-oder, even between different
1242 * rings. As such, there's nothing to do for the user space semaphore.
1243 */
1244
1245 *pSemaphore = (VkSemaphore)1;
1246
1247 return VK_SUCCESS;
1248 }
1249
1250 void radv_DestroySemaphore(
1251 VkDevice device,
1252 VkSemaphore semaphore,
1253 const VkAllocationCallbacks* pAllocator)
1254 {
1255 }
1256
1257 VkResult radv_CreateEvent(
1258 VkDevice _device,
1259 const VkEventCreateInfo* pCreateInfo,
1260 const VkAllocationCallbacks* pAllocator,
1261 VkEvent* pEvent)
1262 {
1263 RADV_FROM_HANDLE(radv_device, device, _device);
1264 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1265 sizeof(*event), 8,
1266 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1267
1268 if (!event)
1269 return VK_ERROR_OUT_OF_HOST_MEMORY;
1270
1271 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1272 RADEON_DOMAIN_GTT,
1273 RADEON_FLAG_CPU_ACCESS);
1274 if (!event->bo) {
1275 vk_free2(&device->alloc, pAllocator, event);
1276 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1277 }
1278
1279 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1280
1281 *pEvent = radv_event_to_handle(event);
1282
1283 return VK_SUCCESS;
1284 }
1285
1286 void radv_DestroyEvent(
1287 VkDevice _device,
1288 VkEvent _event,
1289 const VkAllocationCallbacks* pAllocator)
1290 {
1291 RADV_FROM_HANDLE(radv_device, device, _device);
1292 RADV_FROM_HANDLE(radv_event, event, _event);
1293
1294 if (!event)
1295 return;
1296 device->ws->buffer_destroy(event->bo);
1297 vk_free2(&device->alloc, pAllocator, event);
1298 }
1299
1300 VkResult radv_GetEventStatus(
1301 VkDevice _device,
1302 VkEvent _event)
1303 {
1304 RADV_FROM_HANDLE(radv_event, event, _event);
1305
1306 if (*event->map == 1)
1307 return VK_EVENT_SET;
1308 return VK_EVENT_RESET;
1309 }
1310
1311 VkResult radv_SetEvent(
1312 VkDevice _device,
1313 VkEvent _event)
1314 {
1315 RADV_FROM_HANDLE(radv_event, event, _event);
1316 *event->map = 1;
1317
1318 return VK_SUCCESS;
1319 }
1320
1321 VkResult radv_ResetEvent(
1322 VkDevice _device,
1323 VkEvent _event)
1324 {
1325 RADV_FROM_HANDLE(radv_event, event, _event);
1326 *event->map = 0;
1327
1328 return VK_SUCCESS;
1329 }
1330
1331 VkResult radv_CreateBuffer(
1332 VkDevice _device,
1333 const VkBufferCreateInfo* pCreateInfo,
1334 const VkAllocationCallbacks* pAllocator,
1335 VkBuffer* pBuffer)
1336 {
1337 RADV_FROM_HANDLE(radv_device, device, _device);
1338 struct radv_buffer *buffer;
1339
1340 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1341
1342 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1343 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1344 if (buffer == NULL)
1345 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1346
1347 buffer->size = pCreateInfo->size;
1348 buffer->usage = pCreateInfo->usage;
1349 buffer->bo = NULL;
1350 buffer->offset = 0;
1351
1352 *pBuffer = radv_buffer_to_handle(buffer);
1353
1354 return VK_SUCCESS;
1355 }
1356
1357 void radv_DestroyBuffer(
1358 VkDevice _device,
1359 VkBuffer _buffer,
1360 const VkAllocationCallbacks* pAllocator)
1361 {
1362 RADV_FROM_HANDLE(radv_device, device, _device);
1363 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1364
1365 if (!buffer)
1366 return;
1367
1368 vk_free2(&device->alloc, pAllocator, buffer);
1369 }
1370
1371 static inline unsigned
1372 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
1373 {
1374 if (stencil)
1375 return image->surface.stencil_tiling_index[level];
1376 else
1377 return image->surface.tiling_index[level];
1378 }
1379
1380 static void
1381 radv_initialise_color_surface(struct radv_device *device,
1382 struct radv_color_buffer_info *cb,
1383 struct radv_image_view *iview)
1384 {
1385 const struct vk_format_description *desc;
1386 unsigned ntype, format, swap, endian;
1387 unsigned blend_clamp = 0, blend_bypass = 0;
1388 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1389 uint64_t va;
1390 const struct radeon_surf *surf = &iview->image->surface;
1391 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
1392
1393 desc = vk_format_description(iview->vk_format);
1394
1395 memset(cb, 0, sizeof(*cb));
1396
1397 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1398 va += level_info->offset;
1399 cb->cb_color_base = va >> 8;
1400
1401 /* CMASK variables */
1402 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1403 va += iview->image->cmask.offset;
1404 cb->cb_color_cmask = va >> 8;
1405 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
1406
1407 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1408 va += iview->image->dcc_offset;
1409 cb->cb_dcc_base = va >> 8;
1410
1411 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
1412 S_028C6C_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
1413
1414 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
1415 pitch_tile_max = level_info->nblk_x / 8 - 1;
1416 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1417 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
1418
1419 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1420 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1421
1422 /* Intensity is implemented as Red, so treat it that way. */
1423 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
1424 S_028C74_TILE_MODE_INDEX(tile_mode_index);
1425
1426 if (iview->image->samples > 1) {
1427 unsigned log_samples = util_logbase2(iview->image->samples);
1428
1429 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
1430 S_028C74_NUM_FRAGMENTS(log_samples);
1431 }
1432
1433 if (iview->image->fmask.size) {
1434 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
1435 if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
1436 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
1437 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
1438 cb->cb_color_fmask = va >> 8;
1439 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
1440 } else {
1441 /* This must be set for fast clear to work without FMASK. */
1442 if (device->instance->physicalDevice.rad_info.chip_class >= CIK)
1443 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1444 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1445 cb->cb_color_fmask = cb->cb_color_base;
1446 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1447 }
1448
1449 ntype = radv_translate_color_numformat(iview->vk_format,
1450 desc,
1451 vk_format_get_first_non_void_channel(iview->vk_format));
1452 format = radv_translate_colorformat(iview->vk_format);
1453 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
1454 radv_finishme("Illegal color\n");
1455 swap = radv_translate_colorswap(iview->vk_format, FALSE);
1456 endian = radv_colorformat_endian_swap(format);
1457
1458 /* blend clamp should be set for all NORM/SRGB types */
1459 if (ntype == V_028C70_NUMBER_UNORM ||
1460 ntype == V_028C70_NUMBER_SNORM ||
1461 ntype == V_028C70_NUMBER_SRGB)
1462 blend_clamp = 1;
1463
1464 /* set blend bypass according to docs if SINT/UINT or
1465 8/24 COLOR variants */
1466 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
1467 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
1468 format == V_028C70_COLOR_X24_8_32_FLOAT) {
1469 blend_clamp = 0;
1470 blend_bypass = 1;
1471 }
1472 #if 0
1473 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1474 (format == V_028C70_COLOR_8 ||
1475 format == V_028C70_COLOR_8_8 ||
1476 format == V_028C70_COLOR_8_8_8_8))
1477 ->color_is_int8 = true;
1478 #endif
1479 cb->cb_color_info = S_028C70_FORMAT(format) |
1480 S_028C70_COMP_SWAP(swap) |
1481 S_028C70_BLEND_CLAMP(blend_clamp) |
1482 S_028C70_BLEND_BYPASS(blend_bypass) |
1483 S_028C70_SIMPLE_FLOAT(1) |
1484 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
1485 ntype != V_028C70_NUMBER_SNORM &&
1486 ntype != V_028C70_NUMBER_SRGB &&
1487 format != V_028C70_COLOR_8_24 &&
1488 format != V_028C70_COLOR_24_8) |
1489 S_028C70_NUMBER_TYPE(ntype) |
1490 S_028C70_ENDIAN(endian);
1491 if (iview->image->samples > 1)
1492 if (iview->image->fmask.size)
1493 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1494
1495 if (iview->image->cmask.size && device->allow_fast_clears)
1496 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1497
1498 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
1499 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1500
1501 if (device->instance->physicalDevice.rad_info.chip_class >= VI) {
1502 unsigned max_uncompressed_block_size = 2;
1503 if (iview->image->samples > 1) {
1504 if (iview->image->surface.bpe == 1)
1505 max_uncompressed_block_size = 0;
1506 else if (iview->image->surface.bpe == 2)
1507 max_uncompressed_block_size = 1;
1508 }
1509
1510 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1511 S_028C78_INDEPENDENT_64B_BLOCKS(1);
1512 }
1513
1514 /* This must be set for fast clear to work without FMASK. */
1515 if (!iview->image->fmask.size &&
1516 device->instance->physicalDevice.rad_info.chip_class == SI) {
1517 unsigned bankh = util_logbase2(iview->image->surface.bankh);
1518 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1519 }
1520 }
1521
1522 static void
1523 radv_initialise_ds_surface(struct radv_device *device,
1524 struct radv_ds_buffer_info *ds,
1525 struct radv_image_view *iview)
1526 {
1527 unsigned level = iview->base_mip;
1528 unsigned format;
1529 uint64_t va, s_offs, z_offs;
1530 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
1531 memset(ds, 0, sizeof(*ds));
1532 switch (iview->vk_format) {
1533 case VK_FORMAT_D24_UNORM_S8_UINT:
1534 case VK_FORMAT_X8_D24_UNORM_PACK32:
1535 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
1536 ds->offset_scale = 2.0f;
1537 break;
1538 case VK_FORMAT_D16_UNORM:
1539 case VK_FORMAT_D16_UNORM_S8_UINT:
1540 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
1541 ds->offset_scale = 4.0f;
1542 break;
1543 case VK_FORMAT_D32_SFLOAT:
1544 case VK_FORMAT_D32_SFLOAT_S8_UINT:
1545 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
1546 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
1547 ds->offset_scale = 1.0f;
1548 break;
1549 default:
1550 break;
1551 }
1552
1553 format = radv_translate_dbformat(iview->vk_format);
1554 if (format == V_028040_Z_INVALID) {
1555 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
1556 }
1557
1558 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
1559 s_offs = z_offs = va;
1560 z_offs += iview->image->surface.level[level].offset;
1561 s_offs += iview->image->surface.stencil_level[level].offset;
1562
1563 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
1564 S_028008_SLICE_MAX(iview->base_layer + iview->extent.depth - 1);
1565 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
1566 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
1567
1568 if (iview->image->samples > 1)
1569 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
1570
1571 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
1572 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
1573 else
1574 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
1575
1576 if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
1577 struct radeon_info *info = &device->instance->physicalDevice.rad_info;
1578 unsigned tiling_index = iview->image->surface.tiling_index[level];
1579 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
1580 unsigned macro_index = iview->image->surface.macro_tile_index;
1581 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
1582 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
1583 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
1584
1585 ds->db_depth_info |=
1586 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
1587 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
1588 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
1589 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
1590 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
1591 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
1592 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
1593 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
1594 } else {
1595 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
1596 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1597 tile_mode_index = si_tile_mode_index(iview->image, level, true);
1598 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1599 }
1600
1601 if (iview->image->htile.size && !level) {
1602 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
1603 S_028040_ALLOW_EXPCLEAR(1);
1604
1605 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
1606 /* Workaround: For a not yet understood reason, the
1607 * combination of MSAA, fast stencil clear and stencil
1608 * decompress messes with subsequent stencil buffer
1609 * uses. Problem was reproduced on Verde, Bonaire,
1610 * Tonga, and Carrizo.
1611 *
1612 * Disabling EXPCLEAR works around the problem.
1613 *
1614 * Check piglit's arb_texture_multisample-stencil-clear
1615 * test if you want to try changing this.
1616 */
1617 if (iview->image->samples <= 1)
1618 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
1619 } else
1620 /* Use all of the htile_buffer for depth if there's no stencil. */
1621 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
1622
1623 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
1624 iview->image->htile.offset;
1625 ds->db_htile_data_base = va >> 8;
1626 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
1627 } else {
1628 ds->db_htile_data_base = 0;
1629 ds->db_htile_surface = 0;
1630 }
1631
1632 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
1633 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
1634
1635 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
1636 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
1637 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
1638 }
1639
1640 VkResult radv_CreateFramebuffer(
1641 VkDevice _device,
1642 const VkFramebufferCreateInfo* pCreateInfo,
1643 const VkAllocationCallbacks* pAllocator,
1644 VkFramebuffer* pFramebuffer)
1645 {
1646 RADV_FROM_HANDLE(radv_device, device, _device);
1647 struct radv_framebuffer *framebuffer;
1648
1649 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1650
1651 size_t size = sizeof(*framebuffer) +
1652 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
1653 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
1654 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1655 if (framebuffer == NULL)
1656 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1657
1658 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1659 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1660 VkImageView _iview = pCreateInfo->pAttachments[i];
1661 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
1662 framebuffer->attachments[i].attachment = iview;
1663 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
1664 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
1665 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1666 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
1667 }
1668 }
1669
1670 framebuffer->width = pCreateInfo->width;
1671 framebuffer->height = pCreateInfo->height;
1672 framebuffer->layers = pCreateInfo->layers;
1673
1674 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
1675 return VK_SUCCESS;
1676 }
1677
1678 void radv_DestroyFramebuffer(
1679 VkDevice _device,
1680 VkFramebuffer _fb,
1681 const VkAllocationCallbacks* pAllocator)
1682 {
1683 RADV_FROM_HANDLE(radv_device, device, _device);
1684 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
1685
1686 if (!fb)
1687 return;
1688 vk_free2(&device->alloc, pAllocator, fb);
1689 }
1690
1691 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
1692 {
1693 switch (address_mode) {
1694 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
1695 return V_008F30_SQ_TEX_WRAP;
1696 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1697 return V_008F30_SQ_TEX_MIRROR;
1698 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1699 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
1700 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
1701 return V_008F30_SQ_TEX_CLAMP_BORDER;
1702 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1703 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
1704 default:
1705 unreachable("illegal tex wrap mode");
1706 break;
1707 }
1708 }
1709
1710 static unsigned
1711 radv_tex_compare(VkCompareOp op)
1712 {
1713 switch (op) {
1714 case VK_COMPARE_OP_NEVER:
1715 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
1716 case VK_COMPARE_OP_LESS:
1717 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
1718 case VK_COMPARE_OP_EQUAL:
1719 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
1720 case VK_COMPARE_OP_LESS_OR_EQUAL:
1721 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
1722 case VK_COMPARE_OP_GREATER:
1723 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
1724 case VK_COMPARE_OP_NOT_EQUAL:
1725 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
1726 case VK_COMPARE_OP_GREATER_OR_EQUAL:
1727 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
1728 case VK_COMPARE_OP_ALWAYS:
1729 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
1730 default:
1731 unreachable("illegal compare mode");
1732 break;
1733 }
1734 }
1735
1736 static unsigned
1737 radv_tex_filter(VkFilter filter, unsigned max_ansio)
1738 {
1739 switch (filter) {
1740 case VK_FILTER_NEAREST:
1741 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
1742 V_008F38_SQ_TEX_XY_FILTER_POINT);
1743 case VK_FILTER_LINEAR:
1744 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
1745 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
1746 case VK_FILTER_CUBIC_IMG:
1747 default:
1748 fprintf(stderr, "illegal texture filter");
1749 return 0;
1750 }
1751 }
1752
1753 static unsigned
1754 radv_tex_mipfilter(VkSamplerMipmapMode mode)
1755 {
1756 switch (mode) {
1757 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
1758 return V_008F38_SQ_TEX_Z_FILTER_POINT;
1759 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
1760 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
1761 default:
1762 return V_008F38_SQ_TEX_Z_FILTER_NONE;
1763 }
1764 }
1765
1766 static unsigned
1767 radv_tex_bordercolor(VkBorderColor bcolor)
1768 {
1769 switch (bcolor) {
1770 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
1771 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
1772 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
1773 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
1774 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
1775 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
1776 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
1777 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
1778 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
1779 default:
1780 break;
1781 }
1782 return 0;
1783 }
1784
1785 static unsigned
1786 radv_tex_aniso_filter(unsigned filter)
1787 {
1788 if (filter < 2)
1789 return 0;
1790 if (filter < 4)
1791 return 1;
1792 if (filter < 8)
1793 return 2;
1794 if (filter < 16)
1795 return 3;
1796 return 4;
1797 }
1798
1799 static void
1800 radv_init_sampler(struct radv_device *device,
1801 struct radv_sampler *sampler,
1802 const VkSamplerCreateInfo *pCreateInfo)
1803 {
1804 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
1805 (uint32_t) pCreateInfo->maxAnisotropy : 0;
1806 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
1807 bool is_vi;
1808 is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);
1809
1810 if (!is_vi && max_aniso > 0) {
1811 radv_finishme("Anisotropic filtering must be disabled manually "
1812 "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n");
1813 max_aniso = max_aniso_ratio = 0;
1814 }
1815
1816 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
1817 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
1818 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
1819 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
1820 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
1821 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
1822 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
1823 S_008F30_ANISO_BIAS(max_aniso_ratio) |
1824 S_008F30_DISABLE_CUBE_WRAP(0) |
1825 S_008F30_COMPAT_MODE(is_vi));
1826 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
1827 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
1828 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
1829 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
1830 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
1831 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
1832 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
1833 S_008F38_MIP_POINT_PRECLAMP(1) |
1834 S_008F38_DISABLE_LSB_CEIL(1) |
1835 S_008F38_FILTER_PREC_FIX(1) |
1836 S_008F38_ANISO_OVERRIDE(is_vi));
1837 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
1838 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
1839 }
1840
1841 VkResult radv_CreateSampler(
1842 VkDevice _device,
1843 const VkSamplerCreateInfo* pCreateInfo,
1844 const VkAllocationCallbacks* pAllocator,
1845 VkSampler* pSampler)
1846 {
1847 RADV_FROM_HANDLE(radv_device, device, _device);
1848 struct radv_sampler *sampler;
1849
1850 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1851
1852 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
1853 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1854 if (!sampler)
1855 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1856
1857 radv_init_sampler(device, sampler, pCreateInfo);
1858 *pSampler = radv_sampler_to_handle(sampler);
1859
1860 return VK_SUCCESS;
1861 }
1862
1863 void radv_DestroySampler(
1864 VkDevice _device,
1865 VkSampler _sampler,
1866 const VkAllocationCallbacks* pAllocator)
1867 {
1868 RADV_FROM_HANDLE(radv_device, device, _device);
1869 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
1870
1871 if (!sampler)
1872 return;
1873 vk_free2(&device->alloc, pAllocator, sampler);
1874 }