984bd75bb80ffdccf3f986deda9d4369f2dd1fc1
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
117 .specVersion = 68,
118 },
119 {
120 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 };
128
129 static VkResult
130 radv_extensions_register(struct radv_instance *instance,
131 struct radv_extensions *extensions,
132 const VkExtensionProperties *new_ext,
133 uint32_t num_ext)
134 {
135 size_t new_size;
136 VkExtensionProperties *new_ptr;
137
138 assert(new_ext && num_ext > 0);
139
140 if (!new_ext)
141 return VK_ERROR_INITIALIZATION_FAILED;
142
143 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
144 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
145 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
146
147 /* Old array continues to be valid, update nothing */
148 if (!new_ptr)
149 return VK_ERROR_OUT_OF_HOST_MEMORY;
150
151 memcpy(&new_ptr[extensions->num_ext], new_ext,
152 num_ext * sizeof(VkExtensionProperties));
153 extensions->ext_array = new_ptr;
154 extensions->num_ext += num_ext;
155
156 return VK_SUCCESS;
157 }
158
159 static void
160 radv_extensions_finish(struct radv_instance *instance,
161 struct radv_extensions *extensions)
162 {
163 assert(extensions);
164
165 if (!extensions)
166 radv_loge("Attemted to free invalid extension struct\n");
167
168 if (extensions->ext_array)
169 vk_free(&instance->alloc, extensions->ext_array);
170 }
171
172 static bool
173 is_extension_enabled(const VkExtensionProperties *extensions,
174 size_t num_ext,
175 const char *name)
176 {
177 assert(extensions && name);
178
179 for (uint32_t i = 0; i < num_ext; i++) {
180 if (strcmp(name, extensions[i].extensionName) == 0)
181 return true;
182 }
183
184 return false;
185 }
186
187 static VkResult
188 radv_physical_device_init(struct radv_physical_device *device,
189 struct radv_instance *instance,
190 const char *path)
191 {
192 VkResult result;
193 drmVersionPtr version;
194 int fd;
195
196 fd = open(path, O_RDWR | O_CLOEXEC);
197 if (fd < 0)
198 return VK_ERROR_INCOMPATIBLE_DRIVER;
199
200 version = drmGetVersion(fd);
201 if (!version) {
202 close(fd);
203 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
204 "failed to get version %s: %m", path);
205 }
206
207 if (strcmp(version->name, "amdgpu")) {
208 drmFreeVersion(version);
209 close(fd);
210 return VK_ERROR_INCOMPATIBLE_DRIVER;
211 }
212 drmFreeVersion(version);
213
214 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
215 device->instance = instance;
216 assert(strlen(path) < ARRAY_SIZE(device->path));
217 strncpy(device->path, path, ARRAY_SIZE(device->path));
218
219 device->ws = radv_amdgpu_winsys_create(fd);
220 if (!device->ws) {
221 result = VK_ERROR_INCOMPATIBLE_DRIVER;
222 goto fail;
223 }
224 device->ws->query_info(device->ws, &device->rad_info);
225 result = radv_init_wsi(device);
226 if (result != VK_SUCCESS) {
227 device->ws->destroy(device->ws);
228 goto fail;
229 }
230
231 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
232 radv_finish_wsi(device);
233 device->ws->destroy(device->ws);
234 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
235 "cannot generate UUID");
236 goto fail;
237 }
238
239 result = radv_extensions_register(instance,
240 &device->extensions,
241 common_device_extensions,
242 ARRAY_SIZE(common_device_extensions));
243 if (result != VK_SUCCESS)
244 goto fail;
245
246 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
247 device->name = device->rad_info.name;
248 close(fd);
249 return VK_SUCCESS;
250
251 fail:
252 close(fd);
253 return result;
254 }
255
256 static void
257 radv_physical_device_finish(struct radv_physical_device *device)
258 {
259 radv_extensions_finish(device->instance, &device->extensions);
260 radv_finish_wsi(device);
261 device->ws->destroy(device->ws);
262 }
263
264
265 static void *
266 default_alloc_func(void *pUserData, size_t size, size_t align,
267 VkSystemAllocationScope allocationScope)
268 {
269 return malloc(size);
270 }
271
272 static void *
273 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
274 size_t align, VkSystemAllocationScope allocationScope)
275 {
276 return realloc(pOriginal, size);
277 }
278
279 static void
280 default_free_func(void *pUserData, void *pMemory)
281 {
282 free(pMemory);
283 }
284
285 static const VkAllocationCallbacks default_alloc = {
286 .pUserData = NULL,
287 .pfnAllocation = default_alloc_func,
288 .pfnReallocation = default_realloc_func,
289 .pfnFree = default_free_func,
290 };
291
292 static const struct debug_control radv_debug_options[] = {
293 {"fastclears", RADV_DEBUG_FAST_CLEARS},
294 {"nodcc", RADV_DEBUG_NO_DCC},
295 {"shaders", RADV_DEBUG_DUMP_SHADERS},
296 {"nocache", RADV_DEBUG_NO_CACHE},
297 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
298 {"nohiz", RADV_DEBUG_NO_HIZ},
299 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
300 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
301 {NULL, 0}
302 };
303
304 VkResult radv_CreateInstance(
305 const VkInstanceCreateInfo* pCreateInfo,
306 const VkAllocationCallbacks* pAllocator,
307 VkInstance* pInstance)
308 {
309 struct radv_instance *instance;
310
311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
312
313 uint32_t client_version;
314 if (pCreateInfo->pApplicationInfo &&
315 pCreateInfo->pApplicationInfo->apiVersion != 0) {
316 client_version = pCreateInfo->pApplicationInfo->apiVersion;
317 } else {
318 client_version = VK_MAKE_VERSION(1, 0, 0);
319 }
320
321 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
322 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
323 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
324 "Client requested version %d.%d.%d",
325 VK_VERSION_MAJOR(client_version),
326 VK_VERSION_MINOR(client_version),
327 VK_VERSION_PATCH(client_version));
328 }
329
330 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
331 if (!is_extension_enabled(instance_extensions,
332 ARRAY_SIZE(instance_extensions),
333 pCreateInfo->ppEnabledExtensionNames[i]))
334 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
335 }
336
337 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
338 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
339 if (!instance)
340 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
341
342 memset(instance, 0, sizeof(*instance));
343
344 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
345
346 if (pAllocator)
347 instance->alloc = *pAllocator;
348 else
349 instance->alloc = default_alloc;
350
351 instance->apiVersion = client_version;
352 instance->physicalDeviceCount = -1;
353
354 _mesa_locale_init();
355
356 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
357
358 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
359 radv_debug_options);
360
361 *pInstance = radv_instance_to_handle(instance);
362
363 return VK_SUCCESS;
364 }
365
366 void radv_DestroyInstance(
367 VkInstance _instance,
368 const VkAllocationCallbacks* pAllocator)
369 {
370 RADV_FROM_HANDLE(radv_instance, instance, _instance);
371
372 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
373 radv_physical_device_finish(instance->physicalDevices + i);
374 }
375
376 VG(VALGRIND_DESTROY_MEMPOOL(instance));
377
378 _mesa_locale_fini();
379
380 vk_free(&instance->alloc, instance);
381 }
382
383 VkResult radv_EnumeratePhysicalDevices(
384 VkInstance _instance,
385 uint32_t* pPhysicalDeviceCount,
386 VkPhysicalDevice* pPhysicalDevices)
387 {
388 RADV_FROM_HANDLE(radv_instance, instance, _instance);
389 VkResult result;
390
391 if (instance->physicalDeviceCount < 0) {
392 char path[20];
393 instance->physicalDeviceCount = 0;
394 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
395 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
396 result = radv_physical_device_init(instance->physicalDevices +
397 instance->physicalDeviceCount,
398 instance, path);
399 if (result == VK_SUCCESS)
400 ++instance->physicalDeviceCount;
401 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
402 return result;
403 }
404 }
405
406 if (!pPhysicalDevices) {
407 *pPhysicalDeviceCount = instance->physicalDeviceCount;
408 } else {
409 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
410 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
411 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
412 }
413
414 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
415 : VK_SUCCESS;
416 }
417
418 void radv_GetPhysicalDeviceFeatures(
419 VkPhysicalDevice physicalDevice,
420 VkPhysicalDeviceFeatures* pFeatures)
421 {
422 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
423
424 memset(pFeatures, 0, sizeof(*pFeatures));
425
426 *pFeatures = (VkPhysicalDeviceFeatures) {
427 .robustBufferAccess = true,
428 .fullDrawIndexUint32 = true,
429 .imageCubeArray = true,
430 .independentBlend = true,
431 .geometryShader = false,
432 .tessellationShader = false,
433 .sampleRateShading = false,
434 .dualSrcBlend = true,
435 .logicOp = true,
436 .multiDrawIndirect = true,
437 .drawIndirectFirstInstance = true,
438 .depthClamp = true,
439 .depthBiasClamp = true,
440 .fillModeNonSolid = true,
441 .depthBounds = true,
442 .wideLines = true,
443 .largePoints = true,
444 .alphaToOne = true,
445 .multiViewport = false,
446 .samplerAnisotropy = true,
447 .textureCompressionETC2 = false,
448 .textureCompressionASTC_LDR = false,
449 .textureCompressionBC = true,
450 .occlusionQueryPrecise = true,
451 .pipelineStatisticsQuery = false,
452 .vertexPipelineStoresAndAtomics = true,
453 .fragmentStoresAndAtomics = true,
454 .shaderTessellationAndGeometryPointSize = true,
455 .shaderImageGatherExtended = true,
456 .shaderStorageImageExtendedFormats = true,
457 .shaderStorageImageMultisample = false,
458 .shaderUniformBufferArrayDynamicIndexing = true,
459 .shaderSampledImageArrayDynamicIndexing = true,
460 .shaderStorageBufferArrayDynamicIndexing = true,
461 .shaderStorageImageArrayDynamicIndexing = true,
462 .shaderStorageImageReadWithoutFormat = false,
463 .shaderStorageImageWriteWithoutFormat = false,
464 .shaderClipDistance = true,
465 .shaderCullDistance = true,
466 .shaderFloat64 = false,
467 .shaderInt64 = false,
468 .shaderInt16 = false,
469 .alphaToOne = true,
470 .variableMultisampleRate = false,
471 .inheritedQueries = false,
472 };
473 }
474
475 void radv_GetPhysicalDeviceFeatures2KHR(
476 VkPhysicalDevice physicalDevice,
477 VkPhysicalDeviceFeatures2KHR *pFeatures)
478 {
479 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
480 }
481
482 void radv_GetPhysicalDeviceProperties(
483 VkPhysicalDevice physicalDevice,
484 VkPhysicalDeviceProperties* pProperties)
485 {
486 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
487 VkSampleCountFlags sample_counts = 0xf;
488 VkPhysicalDeviceLimits limits = {
489 .maxImageDimension1D = (1 << 14),
490 .maxImageDimension2D = (1 << 14),
491 .maxImageDimension3D = (1 << 11),
492 .maxImageDimensionCube = (1 << 14),
493 .maxImageArrayLayers = (1 << 11),
494 .maxTexelBufferElements = 128 * 1024 * 1024,
495 .maxUniformBufferRange = UINT32_MAX,
496 .maxStorageBufferRange = UINT32_MAX,
497 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
498 .maxMemoryAllocationCount = UINT32_MAX,
499 .maxSamplerAllocationCount = 64 * 1024,
500 .bufferImageGranularity = 64, /* A cache line */
501 .sparseAddressSpaceSize = 0,
502 .maxBoundDescriptorSets = MAX_SETS,
503 .maxPerStageDescriptorSamplers = 64,
504 .maxPerStageDescriptorUniformBuffers = 64,
505 .maxPerStageDescriptorStorageBuffers = 64,
506 .maxPerStageDescriptorSampledImages = 64,
507 .maxPerStageDescriptorStorageImages = 64,
508 .maxPerStageDescriptorInputAttachments = 64,
509 .maxPerStageResources = 128,
510 .maxDescriptorSetSamplers = 256,
511 .maxDescriptorSetUniformBuffers = 256,
512 .maxDescriptorSetUniformBuffersDynamic = 256,
513 .maxDescriptorSetStorageBuffers = 256,
514 .maxDescriptorSetStorageBuffersDynamic = 256,
515 .maxDescriptorSetSampledImages = 256,
516 .maxDescriptorSetStorageImages = 256,
517 .maxDescriptorSetInputAttachments = 256,
518 .maxVertexInputAttributes = 32,
519 .maxVertexInputBindings = 32,
520 .maxVertexInputAttributeOffset = 2047,
521 .maxVertexInputBindingStride = 2048,
522 .maxVertexOutputComponents = 128,
523 .maxTessellationGenerationLevel = 0,
524 .maxTessellationPatchSize = 0,
525 .maxTessellationControlPerVertexInputComponents = 0,
526 .maxTessellationControlPerVertexOutputComponents = 0,
527 .maxTessellationControlPerPatchOutputComponents = 0,
528 .maxTessellationControlTotalOutputComponents = 0,
529 .maxTessellationEvaluationInputComponents = 0,
530 .maxTessellationEvaluationOutputComponents = 0,
531 .maxGeometryShaderInvocations = 32,
532 .maxGeometryInputComponents = 64,
533 .maxGeometryOutputComponents = 128,
534 .maxGeometryOutputVertices = 256,
535 .maxGeometryTotalOutputComponents = 1024,
536 .maxFragmentInputComponents = 128,
537 .maxFragmentOutputAttachments = 8,
538 .maxFragmentDualSrcAttachments = 1,
539 .maxFragmentCombinedOutputResources = 8,
540 .maxComputeSharedMemorySize = 32768,
541 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
542 .maxComputeWorkGroupInvocations = 2048,
543 .maxComputeWorkGroupSize = {
544 2048,
545 2048,
546 2048
547 },
548 .subPixelPrecisionBits = 4 /* FIXME */,
549 .subTexelPrecisionBits = 4 /* FIXME */,
550 .mipmapPrecisionBits = 4 /* FIXME */,
551 .maxDrawIndexedIndexValue = UINT32_MAX,
552 .maxDrawIndirectCount = UINT32_MAX,
553 .maxSamplerLodBias = 16,
554 .maxSamplerAnisotropy = 16,
555 .maxViewports = MAX_VIEWPORTS,
556 .maxViewportDimensions = { (1 << 14), (1 << 14) },
557 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
558 .viewportSubPixelBits = 13, /* We take a float? */
559 .minMemoryMapAlignment = 4096, /* A page */
560 .minTexelBufferOffsetAlignment = 1,
561 .minUniformBufferOffsetAlignment = 4,
562 .minStorageBufferOffsetAlignment = 4,
563 .minTexelOffset = -32,
564 .maxTexelOffset = 31,
565 .minTexelGatherOffset = -32,
566 .maxTexelGatherOffset = 31,
567 .minInterpolationOffset = -2,
568 .maxInterpolationOffset = 2,
569 .subPixelInterpolationOffsetBits = 8,
570 .maxFramebufferWidth = (1 << 14),
571 .maxFramebufferHeight = (1 << 14),
572 .maxFramebufferLayers = (1 << 10),
573 .framebufferColorSampleCounts = sample_counts,
574 .framebufferDepthSampleCounts = sample_counts,
575 .framebufferStencilSampleCounts = sample_counts,
576 .framebufferNoAttachmentsSampleCounts = sample_counts,
577 .maxColorAttachments = MAX_RTS,
578 .sampledImageColorSampleCounts = sample_counts,
579 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
580 .sampledImageDepthSampleCounts = sample_counts,
581 .sampledImageStencilSampleCounts = sample_counts,
582 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
583 .maxSampleMaskWords = 1,
584 .timestampComputeAndGraphics = false,
585 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
586 .maxClipDistances = 8,
587 .maxCullDistances = 8,
588 .maxCombinedClipAndCullDistances = 8,
589 .discreteQueuePriorities = 1,
590 .pointSizeRange = { 0.125, 255.875 },
591 .lineWidthRange = { 0.0, 7.9921875 },
592 .pointSizeGranularity = (1.0 / 8.0),
593 .lineWidthGranularity = (1.0 / 128.0),
594 .strictLines = false, /* FINISHME */
595 .standardSampleLocations = true,
596 .optimalBufferCopyOffsetAlignment = 128,
597 .optimalBufferCopyRowPitchAlignment = 128,
598 .nonCoherentAtomSize = 64,
599 };
600
601 *pProperties = (VkPhysicalDeviceProperties) {
602 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
603 .driverVersion = 1,
604 .vendorID = 0x1002,
605 .deviceID = pdevice->rad_info.pci_id,
606 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
607 .limits = limits,
608 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
609 };
610
611 strcpy(pProperties->deviceName, pdevice->name);
612 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
613 }
614
615 void radv_GetPhysicalDeviceProperties2KHR(
616 VkPhysicalDevice physicalDevice,
617 VkPhysicalDeviceProperties2KHR *pProperties)
618 {
619 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
620 }
621
622 void radv_GetPhysicalDeviceQueueFamilyProperties(
623 VkPhysicalDevice physicalDevice,
624 uint32_t* pCount,
625 VkQueueFamilyProperties* pQueueFamilyProperties)
626 {
627 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
628 int num_queue_families = 1;
629 int idx;
630 if (pdevice->rad_info.compute_rings > 0 &&
631 pdevice->rad_info.chip_class >= CIK &&
632 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
633 num_queue_families++;
634
635 if (pQueueFamilyProperties == NULL) {
636 *pCount = num_queue_families;
637 return;
638 }
639
640 if (!*pCount)
641 return;
642
643 idx = 0;
644 if (*pCount >= 1) {
645 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
646 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
647 VK_QUEUE_COMPUTE_BIT |
648 VK_QUEUE_TRANSFER_BIT,
649 .queueCount = 1,
650 .timestampValidBits = 64,
651 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
652 };
653 idx++;
654 }
655
656 if (pdevice->rad_info.compute_rings > 0 &&
657 pdevice->rad_info.chip_class >= CIK &&
658 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
659 if (*pCount > idx) {
660 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
661 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
662 .queueCount = pdevice->rad_info.compute_rings,
663 .timestampValidBits = 64,
664 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
665 };
666 idx++;
667 }
668 }
669 *pCount = idx;
670 }
671
672 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
673 VkPhysicalDevice physicalDevice,
674 uint32_t* pCount,
675 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
676 {
677 return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
678 pCount,
679 &pQueueFamilyProperties->queueFamilyProperties);
680 }
681
682 void radv_GetPhysicalDeviceMemoryProperties(
683 VkPhysicalDevice physicalDevice,
684 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
685 {
686 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
687
688 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
689
690 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
691 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
692 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
693 .heapIndex = RADV_MEM_HEAP_VRAM,
694 };
695 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
696 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
697 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
698 .heapIndex = RADV_MEM_HEAP_GTT,
699 };
700 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
701 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
702 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
703 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
704 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
705 };
706 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
707 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
708 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
709 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
710 .heapIndex = RADV_MEM_HEAP_GTT,
711 };
712
713 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
714
715 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
716 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
717 .size = physical_device->rad_info.vram_size -
718 physical_device->rad_info.visible_vram_size,
719 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
720 };
721 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
722 .size = physical_device->rad_info.visible_vram_size,
723 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
724 };
725 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
726 .size = physical_device->rad_info.gart_size,
727 .flags = 0,
728 };
729 }
730
731 void radv_GetPhysicalDeviceMemoryProperties2KHR(
732 VkPhysicalDevice physicalDevice,
733 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
734 {
735 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
736 &pMemoryProperties->memoryProperties);
737 }
738
739 static int
740 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
741 int queue_family_index, int idx)
742 {
743 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
744 queue->device = device;
745 queue->queue_family_index = queue_family_index;
746 queue->queue_idx = idx;
747
748 queue->hw_ctx = device->ws->ctx_create(device->ws);
749 if (!queue->hw_ctx)
750 return VK_ERROR_OUT_OF_HOST_MEMORY;
751
752 return VK_SUCCESS;
753 }
754
755 static void
756 radv_queue_finish(struct radv_queue *queue)
757 {
758 if (queue->hw_ctx)
759 queue->device->ws->ctx_destroy(queue->hw_ctx);
760
761 if (queue->preamble_cs)
762 queue->device->ws->cs_destroy(queue->preamble_cs);
763 if (queue->descriptor_bo)
764 queue->device->ws->buffer_destroy(queue->descriptor_bo);
765 if (queue->scratch_bo)
766 queue->device->ws->buffer_destroy(queue->scratch_bo);
767 if (queue->esgs_ring_bo)
768 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
769 if (queue->gsvs_ring_bo)
770 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
771 if (queue->compute_scratch_bo)
772 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
773 }
774
775 static void
776 radv_device_init_gs_info(struct radv_device *device)
777 {
778 switch (device->physical_device->rad_info.family) {
779 case CHIP_OLAND:
780 case CHIP_HAINAN:
781 case CHIP_KAVERI:
782 case CHIP_KABINI:
783 case CHIP_MULLINS:
784 case CHIP_ICELAND:
785 case CHIP_CARRIZO:
786 case CHIP_STONEY:
787 device->gs_table_depth = 16;
788 return;
789 case CHIP_TAHITI:
790 case CHIP_PITCAIRN:
791 case CHIP_VERDE:
792 case CHIP_BONAIRE:
793 case CHIP_HAWAII:
794 case CHIP_TONGA:
795 case CHIP_FIJI:
796 case CHIP_POLARIS10:
797 case CHIP_POLARIS11:
798 device->gs_table_depth = 32;
799 return;
800 default:
801 unreachable("unknown GPU");
802 }
803 }
804
805 VkResult radv_CreateDevice(
806 VkPhysicalDevice physicalDevice,
807 const VkDeviceCreateInfo* pCreateInfo,
808 const VkAllocationCallbacks* pAllocator,
809 VkDevice* pDevice)
810 {
811 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
812 VkResult result;
813 struct radv_device *device;
814
815 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
816 if (!is_extension_enabled(physical_device->extensions.ext_array,
817 physical_device->extensions.num_ext,
818 pCreateInfo->ppEnabledExtensionNames[i]))
819 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
820 }
821
822 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
823 sizeof(*device), 8,
824 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
825 if (!device)
826 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
827
828 memset(device, 0, sizeof(*device));
829
830 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
831 device->instance = physical_device->instance;
832 device->physical_device = physical_device;
833
834 device->debug_flags = device->instance->debug_flags;
835
836 device->ws = physical_device->ws;
837 if (pAllocator)
838 device->alloc = *pAllocator;
839 else
840 device->alloc = physical_device->instance->alloc;
841
842 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
843 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
844 uint32_t qfi = queue_create->queueFamilyIndex;
845
846 device->queues[qfi] = vk_alloc(&device->alloc,
847 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
848 if (!device->queues[qfi]) {
849 result = VK_ERROR_OUT_OF_HOST_MEMORY;
850 goto fail;
851 }
852
853 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
854
855 device->queue_count[qfi] = queue_create->queueCount;
856
857 for (unsigned q = 0; q < queue_create->queueCount; q++) {
858 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
859 if (result != VK_SUCCESS)
860 goto fail;
861 }
862 }
863
864 #if HAVE_LLVM < 0x0400
865 device->llvm_supports_spill = false;
866 #else
867 device->llvm_supports_spill = true;
868 #endif
869
870 /* The maximum number of scratch waves. Scratch space isn't divided
871 * evenly between CUs. The number is only a function of the number of CUs.
872 * We can decrease the constant to decrease the scratch buffer size.
873 *
874 * sctx->scratch_waves must be >= the maximum posible size of
875 * 1 threadgroup, so that the hw doesn't hang from being unable
876 * to start any.
877 *
878 * The recommended value is 4 per CU at most. Higher numbers don't
879 * bring much benefit, but they still occupy chip resources (think
880 * async compute). I've seen ~2% performance difference between 4 and 32.
881 */
882 uint32_t max_threads_per_block = 2048;
883 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
884 max_threads_per_block / 64);
885
886 radv_device_init_gs_info(device);
887
888 result = radv_device_init_meta(device);
889 if (result != VK_SUCCESS)
890 goto fail;
891
892 radv_device_init_msaa(device);
893
894 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
895 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
896 switch (family) {
897 case RADV_QUEUE_GENERAL:
898 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
899 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
900 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
901 break;
902 case RADV_QUEUE_COMPUTE:
903 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
904 radeon_emit(device->empty_cs[family], 0);
905 break;
906 }
907 device->ws->cs_finalize(device->empty_cs[family]);
908 }
909
910 if (getenv("RADV_TRACE_FILE")) {
911 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
912 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
913 if (!device->trace_bo)
914 goto fail;
915
916 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
917 if (!device->trace_id_ptr)
918 goto fail;
919 }
920
921 *pDevice = radv_device_to_handle(device);
922 return VK_SUCCESS;
923
924 fail:
925 if (device->trace_bo)
926 device->ws->buffer_destroy(device->trace_bo);
927
928 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
929 for (unsigned q = 0; q < device->queue_count[i]; q++)
930 radv_queue_finish(&device->queues[i][q]);
931 if (device->queue_count[i])
932 vk_free(&device->alloc, device->queues[i]);
933 }
934
935 vk_free(&device->alloc, device);
936 return result;
937 }
938
939 void radv_DestroyDevice(
940 VkDevice _device,
941 const VkAllocationCallbacks* pAllocator)
942 {
943 RADV_FROM_HANDLE(radv_device, device, _device);
944
945 if (device->trace_bo)
946 device->ws->buffer_destroy(device->trace_bo);
947
948 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
949 for (unsigned q = 0; q < device->queue_count[i]; q++)
950 radv_queue_finish(&device->queues[i][q]);
951 if (device->queue_count[i])
952 vk_free(&device->alloc, device->queues[i]);
953 }
954 radv_device_finish_meta(device);
955
956 vk_free(&device->alloc, device);
957 }
958
959 VkResult radv_EnumerateInstanceExtensionProperties(
960 const char* pLayerName,
961 uint32_t* pPropertyCount,
962 VkExtensionProperties* pProperties)
963 {
964 if (pProperties == NULL) {
965 *pPropertyCount = ARRAY_SIZE(instance_extensions);
966 return VK_SUCCESS;
967 }
968
969 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
970 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
971
972 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
973 return VK_INCOMPLETE;
974
975 return VK_SUCCESS;
976 }
977
978 VkResult radv_EnumerateDeviceExtensionProperties(
979 VkPhysicalDevice physicalDevice,
980 const char* pLayerName,
981 uint32_t* pPropertyCount,
982 VkExtensionProperties* pProperties)
983 {
984 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
985
986 if (pProperties == NULL) {
987 *pPropertyCount = pdevice->extensions.num_ext;
988 return VK_SUCCESS;
989 }
990
991 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
992 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
993
994 if (*pPropertyCount < pdevice->extensions.num_ext)
995 return VK_INCOMPLETE;
996
997 return VK_SUCCESS;
998 }
999
1000 VkResult radv_EnumerateInstanceLayerProperties(
1001 uint32_t* pPropertyCount,
1002 VkLayerProperties* pProperties)
1003 {
1004 if (pProperties == NULL) {
1005 *pPropertyCount = 0;
1006 return VK_SUCCESS;
1007 }
1008
1009 /* None supported at this time */
1010 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1011 }
1012
1013 VkResult radv_EnumerateDeviceLayerProperties(
1014 VkPhysicalDevice physicalDevice,
1015 uint32_t* pPropertyCount,
1016 VkLayerProperties* pProperties)
1017 {
1018 if (pProperties == NULL) {
1019 *pPropertyCount = 0;
1020 return VK_SUCCESS;
1021 }
1022
1023 /* None supported at this time */
1024 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1025 }
1026
1027 void radv_GetDeviceQueue(
1028 VkDevice _device,
1029 uint32_t queueFamilyIndex,
1030 uint32_t queueIndex,
1031 VkQueue* pQueue)
1032 {
1033 RADV_FROM_HANDLE(radv_device, device, _device);
1034
1035 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1036 }
1037
1038 static void radv_dump_trace(struct radv_device *device,
1039 struct radeon_winsys_cs *cs)
1040 {
1041 const char *filename = getenv("RADV_TRACE_FILE");
1042 FILE *f = fopen(filename, "w");
1043 if (!f) {
1044 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1045 return;
1046 }
1047
1048 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1049 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1050 fclose(f);
1051 }
1052
1053 static void
1054 fill_geom_rings(struct radv_queue *queue,
1055 uint32_t *map,
1056 uint32_t esgs_ring_size,
1057 struct radeon_winsys_bo *esgs_ring_bo,
1058 uint32_t gsvs_ring_size,
1059 struct radeon_winsys_bo *gsvs_ring_bo)
1060 {
1061 uint64_t esgs_va, gsvs_va;
1062 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1063 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1064 uint32_t *desc = &map[4];
1065
1066 /* stride 0, num records - size, add tid, swizzle, elsize4,
1067 index stride 64 */
1068 desc[0] = esgs_va;
1069 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1070 S_008F04_STRIDE(0) |
1071 S_008F04_SWIZZLE_ENABLE(true);
1072 desc[2] = esgs_ring_size;
1073 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1074 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1075 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1076 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1077 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1078 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1079 S_008F0C_ELEMENT_SIZE(1) |
1080 S_008F0C_INDEX_STRIDE(3) |
1081 S_008F0C_ADD_TID_ENABLE(true);
1082
1083 desc += 4;
1084 /* GS entry for ES->GS ring */
1085 /* stride 0, num records - size, elsize0,
1086 index stride 0 */
1087 desc[0] = esgs_va;
1088 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1089 S_008F04_STRIDE(0) |
1090 S_008F04_SWIZZLE_ENABLE(false);
1091 desc[2] = esgs_ring_size;
1092 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1093 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1094 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1095 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1096 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1097 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1098 S_008F0C_ELEMENT_SIZE(0) |
1099 S_008F0C_INDEX_STRIDE(0) |
1100 S_008F0C_ADD_TID_ENABLE(false);
1101
1102 desc += 4;
1103 /* VS entry for GS->VS ring */
1104 /* stride 0, num records - size, elsize0,
1105 index stride 0 */
1106 desc[0] = gsvs_va;
1107 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1108 S_008F04_STRIDE(0) |
1109 S_008F04_SWIZZLE_ENABLE(false);
1110 desc[2] = gsvs_ring_size;
1111 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1112 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1113 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1114 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1115 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1116 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1117 S_008F0C_ELEMENT_SIZE(0) |
1118 S_008F0C_INDEX_STRIDE(0) |
1119 S_008F0C_ADD_TID_ENABLE(false);
1120 desc += 4;
1121
1122 /* stride gsvs_itemsize, num records 64
1123 elsize 4, index stride 16 */
1124 /* shader will patch stride and desc[2] */
1125 desc[0] = gsvs_va;
1126 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1127 S_008F04_STRIDE(0) |
1128 S_008F04_SWIZZLE_ENABLE(true);
1129 desc[2] = 0;
1130 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1131 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1132 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1133 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1134 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1135 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1136 S_008F0C_ELEMENT_SIZE(1) |
1137 S_008F0C_INDEX_STRIDE(1) |
1138 S_008F0C_ADD_TID_ENABLE(true);
1139 }
1140
1141 static VkResult
1142 radv_get_preamble_cs(struct radv_queue *queue,
1143 uint32_t scratch_size,
1144 uint32_t compute_scratch_size,
1145 uint32_t esgs_ring_size,
1146 uint32_t gsvs_ring_size,
1147 struct radeon_winsys_cs **preamble_cs)
1148 {
1149 struct radeon_winsys_bo *scratch_bo = NULL;
1150 struct radeon_winsys_bo *descriptor_bo = NULL;
1151 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1152 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1153 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1154 struct radeon_winsys_cs *cs = NULL;
1155
1156 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
1157 *preamble_cs = NULL;
1158 return VK_SUCCESS;
1159 }
1160
1161 if (scratch_size <= queue->scratch_size &&
1162 compute_scratch_size <= queue->compute_scratch_size &&
1163 esgs_ring_size <= queue->esgs_ring_size &&
1164 gsvs_ring_size <= queue->gsvs_ring_size) {
1165 *preamble_cs = queue->preamble_cs;
1166 return VK_SUCCESS;
1167 }
1168
1169 if (scratch_size > queue->scratch_size) {
1170 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1171 scratch_size,
1172 4096,
1173 RADEON_DOMAIN_VRAM,
1174 RADEON_FLAG_NO_CPU_ACCESS);
1175 if (!scratch_bo)
1176 goto fail;
1177 } else
1178 scratch_bo = queue->scratch_bo;
1179
1180 if (compute_scratch_size > queue->compute_scratch_size) {
1181 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1182 compute_scratch_size,
1183 4096,
1184 RADEON_DOMAIN_VRAM,
1185 RADEON_FLAG_NO_CPU_ACCESS);
1186 if (!compute_scratch_bo)
1187 goto fail;
1188
1189 } else
1190 compute_scratch_bo = queue->compute_scratch_bo;
1191
1192 if (esgs_ring_size > queue->esgs_ring_size) {
1193 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1194 esgs_ring_size,
1195 4096,
1196 RADEON_DOMAIN_VRAM,
1197 RADEON_FLAG_NO_CPU_ACCESS);
1198 if (!esgs_ring_bo)
1199 goto fail;
1200 } else {
1201 esgs_ring_bo = queue->esgs_ring_bo;
1202 esgs_ring_size = queue->esgs_ring_size;
1203 }
1204
1205 if (gsvs_ring_size > queue->gsvs_ring_size) {
1206 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1207 gsvs_ring_size,
1208 4096,
1209 RADEON_DOMAIN_VRAM,
1210 RADEON_FLAG_NO_CPU_ACCESS);
1211 if (!gsvs_ring_bo)
1212 goto fail;
1213 } else {
1214 gsvs_ring_bo = queue->gsvs_ring_bo;
1215 gsvs_ring_size = queue->gsvs_ring_size;
1216 }
1217
1218 if (scratch_bo != queue->scratch_bo ||
1219 esgs_ring_bo != queue->esgs_ring_bo ||
1220 gsvs_ring_bo != queue->gsvs_ring_bo) {
1221 uint32_t size = 0;
1222 if (gsvs_ring_bo || esgs_ring_bo)
1223 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1224 else if (scratch_bo)
1225 size = 8; /* 2 dword */
1226
1227 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1228 size,
1229 4096,
1230 RADEON_DOMAIN_VRAM,
1231 RADEON_FLAG_CPU_ACCESS);
1232 if (!descriptor_bo)
1233 goto fail;
1234 } else
1235 descriptor_bo = queue->descriptor_bo;
1236
1237 cs = queue->device->ws->cs_create(queue->device->ws,
1238 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1239 if (!cs)
1240 goto fail;
1241
1242
1243 if (scratch_bo)
1244 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1245
1246 if (esgs_ring_bo)
1247 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1248
1249 if (gsvs_ring_bo)
1250 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1251
1252 if (descriptor_bo)
1253 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1254
1255 if (descriptor_bo != queue->descriptor_bo) {
1256 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1257
1258 if (scratch_bo) {
1259 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1260 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1261 S_008F04_SWIZZLE_ENABLE(1);
1262 map[0] = scratch_va;
1263 map[1] = rsrc1;
1264 }
1265
1266 if (esgs_ring_bo || gsvs_ring_bo)
1267 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1268
1269 queue->device->ws->buffer_unmap(descriptor_bo);
1270 }
1271
1272 if (esgs_ring_bo || gsvs_ring_bo) {
1273 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1274 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1275 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1276 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1277
1278 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1279 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1280 radeon_emit(cs, esgs_ring_size >> 8);
1281 radeon_emit(cs, gsvs_ring_size >> 8);
1282 } else {
1283 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1284 radeon_emit(cs, esgs_ring_size >> 8);
1285 radeon_emit(cs, gsvs_ring_size >> 8);
1286 }
1287 }
1288
1289 if (descriptor_bo) {
1290 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1291 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1292 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1293 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1294 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1295 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1296
1297 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1298
1299 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1300 radeon_set_sh_reg_seq(cs, regs[i], 2);
1301 radeon_emit(cs, va);
1302 radeon_emit(cs, va >> 32);
1303 }
1304 }
1305
1306 if (compute_scratch_bo) {
1307 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1308 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1309 S_008F04_SWIZZLE_ENABLE(1);
1310
1311 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1312
1313 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1314 radeon_emit(cs, scratch_va);
1315 radeon_emit(cs, rsrc1);
1316 }
1317
1318 if (!queue->device->ws->cs_finalize(cs))
1319 goto fail;
1320
1321 if (queue->preamble_cs)
1322 queue->device->ws->cs_destroy(queue->preamble_cs);
1323
1324 queue->preamble_cs = cs;
1325
1326 if (scratch_bo != queue->scratch_bo) {
1327 if (queue->scratch_bo)
1328 queue->device->ws->buffer_destroy(queue->scratch_bo);
1329 queue->scratch_bo = scratch_bo;
1330 queue->scratch_size = scratch_size;
1331 }
1332
1333 if (compute_scratch_bo != queue->compute_scratch_bo) {
1334 if (queue->compute_scratch_bo)
1335 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1336 queue->compute_scratch_bo = compute_scratch_bo;
1337 queue->compute_scratch_size = compute_scratch_size;
1338 }
1339
1340 if (esgs_ring_bo != queue->esgs_ring_bo) {
1341 if (queue->esgs_ring_bo)
1342 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1343 queue->esgs_ring_bo = esgs_ring_bo;
1344 queue->esgs_ring_size = esgs_ring_size;
1345 }
1346
1347 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1348 if (queue->gsvs_ring_bo)
1349 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1350 queue->gsvs_ring_bo = gsvs_ring_bo;
1351 queue->gsvs_ring_size = gsvs_ring_size;
1352 }
1353
1354 if (descriptor_bo != queue->descriptor_bo) {
1355 if (queue->descriptor_bo)
1356 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1357
1358 queue->descriptor_bo = descriptor_bo;
1359 }
1360
1361 *preamble_cs = cs;
1362 return VK_SUCCESS;
1363 fail:
1364 if (cs)
1365 queue->device->ws->cs_destroy(cs);
1366 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1367 queue->device->ws->buffer_destroy(descriptor_bo);
1368 if (scratch_bo && scratch_bo != queue->scratch_bo)
1369 queue->device->ws->buffer_destroy(scratch_bo);
1370 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1371 queue->device->ws->buffer_destroy(compute_scratch_bo);
1372 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1373 queue->device->ws->buffer_destroy(esgs_ring_bo);
1374 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1375 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1376 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1377 }
1378
1379 VkResult radv_QueueSubmit(
1380 VkQueue _queue,
1381 uint32_t submitCount,
1382 const VkSubmitInfo* pSubmits,
1383 VkFence _fence)
1384 {
1385 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1386 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1387 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1388 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1389 int ret;
1390 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1391 uint32_t scratch_size = 0;
1392 uint32_t compute_scratch_size = 0;
1393 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1394 struct radeon_winsys_cs *preamble_cs = NULL;
1395 VkResult result;
1396
1397 /* Do this first so failing to allocate scratch buffers can't result in
1398 * partially executed submissions. */
1399 for (uint32_t i = 0; i < submitCount; i++) {
1400 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1401 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1402 pSubmits[i].pCommandBuffers[j]);
1403
1404 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1405 compute_scratch_size = MAX2(compute_scratch_size,
1406 cmd_buffer->compute_scratch_size_needed);
1407 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1408 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1409 }
1410 }
1411
1412 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
1413 if (result != VK_SUCCESS)
1414 return result;
1415
1416 for (uint32_t i = 0; i < submitCount; i++) {
1417 struct radeon_winsys_cs **cs_array;
1418 bool can_patch = true;
1419 uint32_t advance;
1420
1421 if (!pSubmits[i].commandBufferCount)
1422 continue;
1423
1424 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1425 pSubmits[i].commandBufferCount);
1426
1427 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1428 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1429 pSubmits[i].pCommandBuffers[j]);
1430 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1431
1432 cs_array[j] = cmd_buffer->cs;
1433 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1434 can_patch = false;
1435 }
1436
1437 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1438 advance = MIN2(max_cs_submission,
1439 pSubmits[i].commandBufferCount - j);
1440 bool b = j == 0;
1441 bool e = j + advance == pSubmits[i].commandBufferCount;
1442
1443 if (queue->device->trace_bo)
1444 *queue->device->trace_id_ptr = 0;
1445
1446 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1447 advance, preamble_cs,
1448 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1449 b ? pSubmits[i].waitSemaphoreCount : 0,
1450 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1451 e ? pSubmits[i].signalSemaphoreCount : 0,
1452 can_patch, base_fence);
1453
1454 if (ret) {
1455 radv_loge("failed to submit CS %d\n", i);
1456 abort();
1457 }
1458 if (queue->device->trace_bo) {
1459 bool success = queue->device->ws->ctx_wait_idle(
1460 queue->hw_ctx,
1461 radv_queue_family_to_ring(
1462 queue->queue_family_index),
1463 queue->queue_idx);
1464
1465 if (!success) { /* Hang */
1466 radv_dump_trace(queue->device, cs_array[j]);
1467 abort();
1468 }
1469 }
1470 }
1471 free(cs_array);
1472 }
1473
1474 if (fence) {
1475 if (!submitCount)
1476 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1477 &queue->device->empty_cs[queue->queue_family_index],
1478 1, NULL, NULL, 0, NULL, 0,
1479 false, base_fence);
1480
1481 fence->submitted = true;
1482 }
1483
1484 return VK_SUCCESS;
1485 }
1486
1487 VkResult radv_QueueWaitIdle(
1488 VkQueue _queue)
1489 {
1490 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1491
1492 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1493 radv_queue_family_to_ring(queue->queue_family_index),
1494 queue->queue_idx);
1495 return VK_SUCCESS;
1496 }
1497
1498 VkResult radv_DeviceWaitIdle(
1499 VkDevice _device)
1500 {
1501 RADV_FROM_HANDLE(radv_device, device, _device);
1502
1503 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1504 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1505 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1506 }
1507 }
1508 return VK_SUCCESS;
1509 }
1510
1511 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1512 VkInstance instance,
1513 const char* pName)
1514 {
1515 return radv_lookup_entrypoint(pName);
1516 }
1517
1518 /* The loader wants us to expose a second GetInstanceProcAddr function
1519 * to work around certain LD_PRELOAD issues seen in apps.
1520 */
1521 PUBLIC
1522 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1523 VkInstance instance,
1524 const char* pName);
1525
1526 PUBLIC
1527 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1528 VkInstance instance,
1529 const char* pName)
1530 {
1531 return radv_GetInstanceProcAddr(instance, pName);
1532 }
1533
1534 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1535 VkDevice device,
1536 const char* pName)
1537 {
1538 return radv_lookup_entrypoint(pName);
1539 }
1540
1541 VkResult radv_AllocateMemory(
1542 VkDevice _device,
1543 const VkMemoryAllocateInfo* pAllocateInfo,
1544 const VkAllocationCallbacks* pAllocator,
1545 VkDeviceMemory* pMem)
1546 {
1547 RADV_FROM_HANDLE(radv_device, device, _device);
1548 struct radv_device_memory *mem;
1549 VkResult result;
1550 enum radeon_bo_domain domain;
1551 uint32_t flags = 0;
1552 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1553
1554 if (pAllocateInfo->allocationSize == 0) {
1555 /* Apparently, this is allowed */
1556 *pMem = VK_NULL_HANDLE;
1557 return VK_SUCCESS;
1558 }
1559
1560 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1561 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1562 if (mem == NULL)
1563 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1564
1565 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1566 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1567 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1568 domain = RADEON_DOMAIN_GTT;
1569 else
1570 domain = RADEON_DOMAIN_VRAM;
1571
1572 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1573 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1574 else
1575 flags |= RADEON_FLAG_CPU_ACCESS;
1576
1577 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1578 flags |= RADEON_FLAG_GTT_WC;
1579
1580 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1581 domain, flags);
1582
1583 if (!mem->bo) {
1584 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1585 goto fail;
1586 }
1587 mem->type_index = pAllocateInfo->memoryTypeIndex;
1588
1589 *pMem = radv_device_memory_to_handle(mem);
1590
1591 return VK_SUCCESS;
1592
1593 fail:
1594 vk_free2(&device->alloc, pAllocator, mem);
1595
1596 return result;
1597 }
1598
1599 void radv_FreeMemory(
1600 VkDevice _device,
1601 VkDeviceMemory _mem,
1602 const VkAllocationCallbacks* pAllocator)
1603 {
1604 RADV_FROM_HANDLE(radv_device, device, _device);
1605 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1606
1607 if (mem == NULL)
1608 return;
1609
1610 device->ws->buffer_destroy(mem->bo);
1611 mem->bo = NULL;
1612
1613 vk_free2(&device->alloc, pAllocator, mem);
1614 }
1615
1616 VkResult radv_MapMemory(
1617 VkDevice _device,
1618 VkDeviceMemory _memory,
1619 VkDeviceSize offset,
1620 VkDeviceSize size,
1621 VkMemoryMapFlags flags,
1622 void** ppData)
1623 {
1624 RADV_FROM_HANDLE(radv_device, device, _device);
1625 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1626
1627 if (mem == NULL) {
1628 *ppData = NULL;
1629 return VK_SUCCESS;
1630 }
1631
1632 *ppData = device->ws->buffer_map(mem->bo);
1633 if (*ppData) {
1634 *ppData += offset;
1635 return VK_SUCCESS;
1636 }
1637
1638 return VK_ERROR_MEMORY_MAP_FAILED;
1639 }
1640
1641 void radv_UnmapMemory(
1642 VkDevice _device,
1643 VkDeviceMemory _memory)
1644 {
1645 RADV_FROM_HANDLE(radv_device, device, _device);
1646 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1647
1648 if (mem == NULL)
1649 return;
1650
1651 device->ws->buffer_unmap(mem->bo);
1652 }
1653
1654 VkResult radv_FlushMappedMemoryRanges(
1655 VkDevice _device,
1656 uint32_t memoryRangeCount,
1657 const VkMappedMemoryRange* pMemoryRanges)
1658 {
1659 return VK_SUCCESS;
1660 }
1661
1662 VkResult radv_InvalidateMappedMemoryRanges(
1663 VkDevice _device,
1664 uint32_t memoryRangeCount,
1665 const VkMappedMemoryRange* pMemoryRanges)
1666 {
1667 return VK_SUCCESS;
1668 }
1669
1670 void radv_GetBufferMemoryRequirements(
1671 VkDevice device,
1672 VkBuffer _buffer,
1673 VkMemoryRequirements* pMemoryRequirements)
1674 {
1675 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1676
1677 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1678
1679 pMemoryRequirements->size = buffer->size;
1680 pMemoryRequirements->alignment = 16;
1681 }
1682
1683 void radv_GetImageMemoryRequirements(
1684 VkDevice device,
1685 VkImage _image,
1686 VkMemoryRequirements* pMemoryRequirements)
1687 {
1688 RADV_FROM_HANDLE(radv_image, image, _image);
1689
1690 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1691
1692 pMemoryRequirements->size = image->size;
1693 pMemoryRequirements->alignment = image->alignment;
1694 }
1695
1696 void radv_GetImageSparseMemoryRequirements(
1697 VkDevice device,
1698 VkImage image,
1699 uint32_t* pSparseMemoryRequirementCount,
1700 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1701 {
1702 stub();
1703 }
1704
1705 void radv_GetDeviceMemoryCommitment(
1706 VkDevice device,
1707 VkDeviceMemory memory,
1708 VkDeviceSize* pCommittedMemoryInBytes)
1709 {
1710 *pCommittedMemoryInBytes = 0;
1711 }
1712
1713 VkResult radv_BindBufferMemory(
1714 VkDevice device,
1715 VkBuffer _buffer,
1716 VkDeviceMemory _memory,
1717 VkDeviceSize memoryOffset)
1718 {
1719 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1720 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1721
1722 if (mem) {
1723 buffer->bo = mem->bo;
1724 buffer->offset = memoryOffset;
1725 } else {
1726 buffer->bo = NULL;
1727 buffer->offset = 0;
1728 }
1729
1730 return VK_SUCCESS;
1731 }
1732
1733 VkResult radv_BindImageMemory(
1734 VkDevice device,
1735 VkImage _image,
1736 VkDeviceMemory _memory,
1737 VkDeviceSize memoryOffset)
1738 {
1739 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1740 RADV_FROM_HANDLE(radv_image, image, _image);
1741
1742 if (mem) {
1743 image->bo = mem->bo;
1744 image->offset = memoryOffset;
1745 } else {
1746 image->bo = NULL;
1747 image->offset = 0;
1748 }
1749
1750 return VK_SUCCESS;
1751 }
1752
1753 VkResult radv_QueueBindSparse(
1754 VkQueue queue,
1755 uint32_t bindInfoCount,
1756 const VkBindSparseInfo* pBindInfo,
1757 VkFence fence)
1758 {
1759 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1760 }
1761
1762 VkResult radv_CreateFence(
1763 VkDevice _device,
1764 const VkFenceCreateInfo* pCreateInfo,
1765 const VkAllocationCallbacks* pAllocator,
1766 VkFence* pFence)
1767 {
1768 RADV_FROM_HANDLE(radv_device, device, _device);
1769 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1770 sizeof(*fence), 8,
1771 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1772
1773 if (!fence)
1774 return VK_ERROR_OUT_OF_HOST_MEMORY;
1775
1776 memset(fence, 0, sizeof(*fence));
1777 fence->submitted = false;
1778 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1779 fence->fence = device->ws->create_fence();
1780 if (!fence->fence) {
1781 vk_free2(&device->alloc, pAllocator, fence);
1782 return VK_ERROR_OUT_OF_HOST_MEMORY;
1783 }
1784
1785 *pFence = radv_fence_to_handle(fence);
1786
1787 return VK_SUCCESS;
1788 }
1789
1790 void radv_DestroyFence(
1791 VkDevice _device,
1792 VkFence _fence,
1793 const VkAllocationCallbacks* pAllocator)
1794 {
1795 RADV_FROM_HANDLE(radv_device, device, _device);
1796 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1797
1798 if (!fence)
1799 return;
1800 device->ws->destroy_fence(fence->fence);
1801 vk_free2(&device->alloc, pAllocator, fence);
1802 }
1803
1804 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1805 {
1806 uint64_t current_time;
1807 struct timespec tv;
1808
1809 clock_gettime(CLOCK_MONOTONIC, &tv);
1810 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1811
1812 timeout = MIN2(UINT64_MAX - current_time, timeout);
1813
1814 return current_time + timeout;
1815 }
1816
1817 VkResult radv_WaitForFences(
1818 VkDevice _device,
1819 uint32_t fenceCount,
1820 const VkFence* pFences,
1821 VkBool32 waitAll,
1822 uint64_t timeout)
1823 {
1824 RADV_FROM_HANDLE(radv_device, device, _device);
1825 timeout = radv_get_absolute_timeout(timeout);
1826
1827 if (!waitAll && fenceCount > 1) {
1828 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1829 }
1830
1831 for (uint32_t i = 0; i < fenceCount; ++i) {
1832 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1833 bool expired = false;
1834
1835 if (fence->signalled)
1836 continue;
1837
1838 if (!fence->submitted)
1839 return VK_TIMEOUT;
1840
1841 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1842 if (!expired)
1843 return VK_TIMEOUT;
1844
1845 fence->signalled = true;
1846 }
1847
1848 return VK_SUCCESS;
1849 }
1850
1851 VkResult radv_ResetFences(VkDevice device,
1852 uint32_t fenceCount,
1853 const VkFence *pFences)
1854 {
1855 for (unsigned i = 0; i < fenceCount; ++i) {
1856 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1857 fence->submitted = fence->signalled = false;
1858 }
1859
1860 return VK_SUCCESS;
1861 }
1862
1863 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1864 {
1865 RADV_FROM_HANDLE(radv_device, device, _device);
1866 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1867
1868 if (fence->signalled)
1869 return VK_SUCCESS;
1870 if (!fence->submitted)
1871 return VK_NOT_READY;
1872
1873 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1874 return VK_NOT_READY;
1875
1876 return VK_SUCCESS;
1877 }
1878
1879
1880 // Queue semaphore functions
1881
1882 VkResult radv_CreateSemaphore(
1883 VkDevice _device,
1884 const VkSemaphoreCreateInfo* pCreateInfo,
1885 const VkAllocationCallbacks* pAllocator,
1886 VkSemaphore* pSemaphore)
1887 {
1888 RADV_FROM_HANDLE(radv_device, device, _device);
1889 struct radeon_winsys_sem *sem;
1890
1891 sem = device->ws->create_sem(device->ws);
1892 if (!sem)
1893 return VK_ERROR_OUT_OF_HOST_MEMORY;
1894
1895 *pSemaphore = (VkSemaphore)sem;
1896 return VK_SUCCESS;
1897 }
1898
1899 void radv_DestroySemaphore(
1900 VkDevice _device,
1901 VkSemaphore _semaphore,
1902 const VkAllocationCallbacks* pAllocator)
1903 {
1904 RADV_FROM_HANDLE(radv_device, device, _device);
1905 struct radeon_winsys_sem *sem;
1906 if (!_semaphore)
1907 return;
1908
1909 sem = (struct radeon_winsys_sem *)_semaphore;
1910 device->ws->destroy_sem(sem);
1911 }
1912
1913 VkResult radv_CreateEvent(
1914 VkDevice _device,
1915 const VkEventCreateInfo* pCreateInfo,
1916 const VkAllocationCallbacks* pAllocator,
1917 VkEvent* pEvent)
1918 {
1919 RADV_FROM_HANDLE(radv_device, device, _device);
1920 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1921 sizeof(*event), 8,
1922 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1923
1924 if (!event)
1925 return VK_ERROR_OUT_OF_HOST_MEMORY;
1926
1927 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1928 RADEON_DOMAIN_GTT,
1929 RADEON_FLAG_CPU_ACCESS);
1930 if (!event->bo) {
1931 vk_free2(&device->alloc, pAllocator, event);
1932 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1933 }
1934
1935 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1936
1937 *pEvent = radv_event_to_handle(event);
1938
1939 return VK_SUCCESS;
1940 }
1941
1942 void radv_DestroyEvent(
1943 VkDevice _device,
1944 VkEvent _event,
1945 const VkAllocationCallbacks* pAllocator)
1946 {
1947 RADV_FROM_HANDLE(radv_device, device, _device);
1948 RADV_FROM_HANDLE(radv_event, event, _event);
1949
1950 if (!event)
1951 return;
1952 device->ws->buffer_destroy(event->bo);
1953 vk_free2(&device->alloc, pAllocator, event);
1954 }
1955
1956 VkResult radv_GetEventStatus(
1957 VkDevice _device,
1958 VkEvent _event)
1959 {
1960 RADV_FROM_HANDLE(radv_event, event, _event);
1961
1962 if (*event->map == 1)
1963 return VK_EVENT_SET;
1964 return VK_EVENT_RESET;
1965 }
1966
1967 VkResult radv_SetEvent(
1968 VkDevice _device,
1969 VkEvent _event)
1970 {
1971 RADV_FROM_HANDLE(radv_event, event, _event);
1972 *event->map = 1;
1973
1974 return VK_SUCCESS;
1975 }
1976
1977 VkResult radv_ResetEvent(
1978 VkDevice _device,
1979 VkEvent _event)
1980 {
1981 RADV_FROM_HANDLE(radv_event, event, _event);
1982 *event->map = 0;
1983
1984 return VK_SUCCESS;
1985 }
1986
1987 VkResult radv_CreateBuffer(
1988 VkDevice _device,
1989 const VkBufferCreateInfo* pCreateInfo,
1990 const VkAllocationCallbacks* pAllocator,
1991 VkBuffer* pBuffer)
1992 {
1993 RADV_FROM_HANDLE(radv_device, device, _device);
1994 struct radv_buffer *buffer;
1995
1996 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1997
1998 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1999 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2000 if (buffer == NULL)
2001 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2002
2003 buffer->size = pCreateInfo->size;
2004 buffer->usage = pCreateInfo->usage;
2005 buffer->bo = NULL;
2006 buffer->offset = 0;
2007
2008 *pBuffer = radv_buffer_to_handle(buffer);
2009
2010 return VK_SUCCESS;
2011 }
2012
2013 void radv_DestroyBuffer(
2014 VkDevice _device,
2015 VkBuffer _buffer,
2016 const VkAllocationCallbacks* pAllocator)
2017 {
2018 RADV_FROM_HANDLE(radv_device, device, _device);
2019 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2020
2021 if (!buffer)
2022 return;
2023
2024 vk_free2(&device->alloc, pAllocator, buffer);
2025 }
2026
2027 static inline unsigned
2028 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2029 {
2030 if (stencil)
2031 return image->surface.stencil_tiling_index[level];
2032 else
2033 return image->surface.tiling_index[level];
2034 }
2035
2036 static void
2037 radv_initialise_color_surface(struct radv_device *device,
2038 struct radv_color_buffer_info *cb,
2039 struct radv_image_view *iview)
2040 {
2041 const struct vk_format_description *desc;
2042 unsigned ntype, format, swap, endian;
2043 unsigned blend_clamp = 0, blend_bypass = 0;
2044 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2045 uint64_t va;
2046 const struct radeon_surf *surf = &iview->image->surface;
2047 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2048
2049 desc = vk_format_description(iview->vk_format);
2050
2051 memset(cb, 0, sizeof(*cb));
2052
2053 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2054 va += level_info->offset;
2055 cb->cb_color_base = va >> 8;
2056
2057 /* CMASK variables */
2058 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2059 va += iview->image->cmask.offset;
2060 cb->cb_color_cmask = va >> 8;
2061 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2062
2063 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2064 va += iview->image->dcc_offset;
2065 cb->cb_dcc_base = va >> 8;
2066
2067 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2068 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2069 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2070
2071 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2072 pitch_tile_max = level_info->nblk_x / 8 - 1;
2073 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2074 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2075
2076 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2077 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2078
2079 /* Intensity is implemented as Red, so treat it that way. */
2080 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2081 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2082
2083 if (iview->image->samples > 1) {
2084 unsigned log_samples = util_logbase2(iview->image->samples);
2085
2086 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2087 S_028C74_NUM_FRAGMENTS(log_samples);
2088 }
2089
2090 if (iview->image->fmask.size) {
2091 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2092 if (device->physical_device->rad_info.chip_class >= CIK)
2093 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2094 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2095 cb->cb_color_fmask = va >> 8;
2096 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2097 } else {
2098 /* This must be set for fast clear to work without FMASK. */
2099 if (device->physical_device->rad_info.chip_class >= CIK)
2100 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2101 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2102 cb->cb_color_fmask = cb->cb_color_base;
2103 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2104 }
2105
2106 ntype = radv_translate_color_numformat(iview->vk_format,
2107 desc,
2108 vk_format_get_first_non_void_channel(iview->vk_format));
2109 format = radv_translate_colorformat(iview->vk_format);
2110 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2111 radv_finishme("Illegal color\n");
2112 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2113 endian = radv_colorformat_endian_swap(format);
2114
2115 /* blend clamp should be set for all NORM/SRGB types */
2116 if (ntype == V_028C70_NUMBER_UNORM ||
2117 ntype == V_028C70_NUMBER_SNORM ||
2118 ntype == V_028C70_NUMBER_SRGB)
2119 blend_clamp = 1;
2120
2121 /* set blend bypass according to docs if SINT/UINT or
2122 8/24 COLOR variants */
2123 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2124 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2125 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2126 blend_clamp = 0;
2127 blend_bypass = 1;
2128 }
2129 #if 0
2130 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2131 (format == V_028C70_COLOR_8 ||
2132 format == V_028C70_COLOR_8_8 ||
2133 format == V_028C70_COLOR_8_8_8_8))
2134 ->color_is_int8 = true;
2135 #endif
2136 cb->cb_color_info = S_028C70_FORMAT(format) |
2137 S_028C70_COMP_SWAP(swap) |
2138 S_028C70_BLEND_CLAMP(blend_clamp) |
2139 S_028C70_BLEND_BYPASS(blend_bypass) |
2140 S_028C70_SIMPLE_FLOAT(1) |
2141 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2142 ntype != V_028C70_NUMBER_SNORM &&
2143 ntype != V_028C70_NUMBER_SRGB &&
2144 format != V_028C70_COLOR_8_24 &&
2145 format != V_028C70_COLOR_24_8) |
2146 S_028C70_NUMBER_TYPE(ntype) |
2147 S_028C70_ENDIAN(endian);
2148 if (iview->image->samples > 1)
2149 if (iview->image->fmask.size)
2150 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2151
2152 if (iview->image->cmask.size &&
2153 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
2154 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2155
2156 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2157 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2158
2159 if (device->physical_device->rad_info.chip_class >= VI) {
2160 unsigned max_uncompressed_block_size = 2;
2161 if (iview->image->samples > 1) {
2162 if (iview->image->surface.bpe == 1)
2163 max_uncompressed_block_size = 0;
2164 else if (iview->image->surface.bpe == 2)
2165 max_uncompressed_block_size = 1;
2166 }
2167
2168 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2169 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2170 }
2171
2172 /* This must be set for fast clear to work without FMASK. */
2173 if (!iview->image->fmask.size &&
2174 device->physical_device->rad_info.chip_class == SI) {
2175 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2176 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2177 }
2178 }
2179
2180 static void
2181 radv_initialise_ds_surface(struct radv_device *device,
2182 struct radv_ds_buffer_info *ds,
2183 struct radv_image_view *iview)
2184 {
2185 unsigned level = iview->base_mip;
2186 unsigned format;
2187 uint64_t va, s_offs, z_offs;
2188 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2189 memset(ds, 0, sizeof(*ds));
2190 switch (iview->vk_format) {
2191 case VK_FORMAT_D24_UNORM_S8_UINT:
2192 case VK_FORMAT_X8_D24_UNORM_PACK32:
2193 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2194 ds->offset_scale = 2.0f;
2195 break;
2196 case VK_FORMAT_D16_UNORM:
2197 case VK_FORMAT_D16_UNORM_S8_UINT:
2198 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2199 ds->offset_scale = 4.0f;
2200 break;
2201 case VK_FORMAT_D32_SFLOAT:
2202 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2203 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2204 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2205 ds->offset_scale = 1.0f;
2206 break;
2207 default:
2208 break;
2209 }
2210
2211 format = radv_translate_dbformat(iview->vk_format);
2212 if (format == V_028040_Z_INVALID) {
2213 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2214 }
2215
2216 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2217 s_offs = z_offs = va;
2218 z_offs += iview->image->surface.level[level].offset;
2219 s_offs += iview->image->surface.stencil_level[level].offset;
2220
2221 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2222 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2223 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2224 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2225 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2226
2227 if (iview->image->samples > 1)
2228 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2229
2230 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2231 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2232 else
2233 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2234
2235 if (device->physical_device->rad_info.chip_class >= CIK) {
2236 struct radeon_info *info = &device->physical_device->rad_info;
2237 unsigned tiling_index = iview->image->surface.tiling_index[level];
2238 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2239 unsigned macro_index = iview->image->surface.macro_tile_index;
2240 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2241 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2242 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2243
2244 ds->db_depth_info |=
2245 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2246 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2247 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2248 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2249 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2250 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2251 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2252 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2253 } else {
2254 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2255 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2256 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2257 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2258 }
2259
2260 if (iview->image->htile.size && !level) {
2261 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2262 S_028040_ALLOW_EXPCLEAR(1);
2263
2264 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2265 /* Workaround: For a not yet understood reason, the
2266 * combination of MSAA, fast stencil clear and stencil
2267 * decompress messes with subsequent stencil buffer
2268 * uses. Problem was reproduced on Verde, Bonaire,
2269 * Tonga, and Carrizo.
2270 *
2271 * Disabling EXPCLEAR works around the problem.
2272 *
2273 * Check piglit's arb_texture_multisample-stencil-clear
2274 * test if you want to try changing this.
2275 */
2276 if (iview->image->samples <= 1)
2277 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2278 } else
2279 /* Use all of the htile_buffer for depth if there's no stencil. */
2280 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2281
2282 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2283 iview->image->htile.offset;
2284 ds->db_htile_data_base = va >> 8;
2285 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2286 } else {
2287 ds->db_htile_data_base = 0;
2288 ds->db_htile_surface = 0;
2289 }
2290
2291 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2292 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2293
2294 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2295 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2296 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2297 }
2298
2299 VkResult radv_CreateFramebuffer(
2300 VkDevice _device,
2301 const VkFramebufferCreateInfo* pCreateInfo,
2302 const VkAllocationCallbacks* pAllocator,
2303 VkFramebuffer* pFramebuffer)
2304 {
2305 RADV_FROM_HANDLE(radv_device, device, _device);
2306 struct radv_framebuffer *framebuffer;
2307
2308 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2309
2310 size_t size = sizeof(*framebuffer) +
2311 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2312 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2313 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2314 if (framebuffer == NULL)
2315 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2316
2317 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2318 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2319 VkImageView _iview = pCreateInfo->pAttachments[i];
2320 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2321 framebuffer->attachments[i].attachment = iview;
2322 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2323 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2324 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2325 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2326 }
2327 }
2328
2329 framebuffer->width = pCreateInfo->width;
2330 framebuffer->height = pCreateInfo->height;
2331 framebuffer->layers = pCreateInfo->layers;
2332
2333 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2334 return VK_SUCCESS;
2335 }
2336
2337 void radv_DestroyFramebuffer(
2338 VkDevice _device,
2339 VkFramebuffer _fb,
2340 const VkAllocationCallbacks* pAllocator)
2341 {
2342 RADV_FROM_HANDLE(radv_device, device, _device);
2343 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2344
2345 if (!fb)
2346 return;
2347 vk_free2(&device->alloc, pAllocator, fb);
2348 }
2349
2350 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2351 {
2352 switch (address_mode) {
2353 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2354 return V_008F30_SQ_TEX_WRAP;
2355 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2356 return V_008F30_SQ_TEX_MIRROR;
2357 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2358 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2359 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2360 return V_008F30_SQ_TEX_CLAMP_BORDER;
2361 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2362 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2363 default:
2364 unreachable("illegal tex wrap mode");
2365 break;
2366 }
2367 }
2368
2369 static unsigned
2370 radv_tex_compare(VkCompareOp op)
2371 {
2372 switch (op) {
2373 case VK_COMPARE_OP_NEVER:
2374 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2375 case VK_COMPARE_OP_LESS:
2376 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2377 case VK_COMPARE_OP_EQUAL:
2378 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2379 case VK_COMPARE_OP_LESS_OR_EQUAL:
2380 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2381 case VK_COMPARE_OP_GREATER:
2382 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2383 case VK_COMPARE_OP_NOT_EQUAL:
2384 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2385 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2386 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2387 case VK_COMPARE_OP_ALWAYS:
2388 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2389 default:
2390 unreachable("illegal compare mode");
2391 break;
2392 }
2393 }
2394
2395 static unsigned
2396 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2397 {
2398 switch (filter) {
2399 case VK_FILTER_NEAREST:
2400 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2401 V_008F38_SQ_TEX_XY_FILTER_POINT);
2402 case VK_FILTER_LINEAR:
2403 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2404 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2405 case VK_FILTER_CUBIC_IMG:
2406 default:
2407 fprintf(stderr, "illegal texture filter");
2408 return 0;
2409 }
2410 }
2411
2412 static unsigned
2413 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2414 {
2415 switch (mode) {
2416 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2417 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2418 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2419 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2420 default:
2421 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2422 }
2423 }
2424
2425 static unsigned
2426 radv_tex_bordercolor(VkBorderColor bcolor)
2427 {
2428 switch (bcolor) {
2429 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2430 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2431 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2432 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2433 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2434 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2435 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2436 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2437 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2438 default:
2439 break;
2440 }
2441 return 0;
2442 }
2443
2444 static unsigned
2445 radv_tex_aniso_filter(unsigned filter)
2446 {
2447 if (filter < 2)
2448 return 0;
2449 if (filter < 4)
2450 return 1;
2451 if (filter < 8)
2452 return 2;
2453 if (filter < 16)
2454 return 3;
2455 return 4;
2456 }
2457
2458 static void
2459 radv_init_sampler(struct radv_device *device,
2460 struct radv_sampler *sampler,
2461 const VkSamplerCreateInfo *pCreateInfo)
2462 {
2463 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2464 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2465 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2466 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2467
2468 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2469 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2470 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2471 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2472 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2473 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2474 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2475 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2476 S_008F30_DISABLE_CUBE_WRAP(0) |
2477 S_008F30_COMPAT_MODE(is_vi));
2478 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2479 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2480 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2481 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2482 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2483 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2484 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2485 S_008F38_MIP_POINT_PRECLAMP(1) |
2486 S_008F38_DISABLE_LSB_CEIL(1) |
2487 S_008F38_FILTER_PREC_FIX(1) |
2488 S_008F38_ANISO_OVERRIDE(is_vi));
2489 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2490 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2491 }
2492
2493 VkResult radv_CreateSampler(
2494 VkDevice _device,
2495 const VkSamplerCreateInfo* pCreateInfo,
2496 const VkAllocationCallbacks* pAllocator,
2497 VkSampler* pSampler)
2498 {
2499 RADV_FROM_HANDLE(radv_device, device, _device);
2500 struct radv_sampler *sampler;
2501
2502 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2503
2504 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2505 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2506 if (!sampler)
2507 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2508
2509 radv_init_sampler(device, sampler, pCreateInfo);
2510 *pSampler = radv_sampler_to_handle(sampler);
2511
2512 return VK_SUCCESS;
2513 }
2514
2515 void radv_DestroySampler(
2516 VkDevice _device,
2517 VkSampler _sampler,
2518 const VkAllocationCallbacks* pAllocator)
2519 {
2520 RADV_FROM_HANDLE(radv_device, device, _device);
2521 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2522
2523 if (!sampler)
2524 return;
2525 vk_free2(&device->alloc, pAllocator, sampler);
2526 }
2527
2528
2529 /* vk_icd.h does not declare this function, so we declare it here to
2530 * suppress Wmissing-prototypes.
2531 */
2532 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2533 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2534
2535 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2536 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2537 {
2538 /* For the full details on loader interface versioning, see
2539 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2540 * What follows is a condensed summary, to help you navigate the large and
2541 * confusing official doc.
2542 *
2543 * - Loader interface v0 is incompatible with later versions. We don't
2544 * support it.
2545 *
2546 * - In loader interface v1:
2547 * - The first ICD entrypoint called by the loader is
2548 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2549 * entrypoint.
2550 * - The ICD must statically expose no other Vulkan symbol unless it is
2551 * linked with -Bsymbolic.
2552 * - Each dispatchable Vulkan handle created by the ICD must be
2553 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2554 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2555 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2556 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2557 * such loader-managed surfaces.
2558 *
2559 * - Loader interface v2 differs from v1 in:
2560 * - The first ICD entrypoint called by the loader is
2561 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2562 * statically expose this entrypoint.
2563 *
2564 * - Loader interface v3 differs from v2 in:
2565 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2566 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2567 * because the loader no longer does so.
2568 */
2569 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2570 return VK_SUCCESS;
2571 }