radv/geom: check if esgs and gsvs ring exists before filling geom rings
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
117 .specVersion = 68,
118 },
119 {
120 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 };
128
129 static VkResult
130 radv_extensions_register(struct radv_instance *instance,
131 struct radv_extensions *extensions,
132 const VkExtensionProperties *new_ext,
133 uint32_t num_ext)
134 {
135 size_t new_size;
136 VkExtensionProperties *new_ptr;
137
138 assert(new_ext && num_ext > 0);
139
140 if (!new_ext)
141 return VK_ERROR_INITIALIZATION_FAILED;
142
143 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
144 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
145 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
146
147 /* Old array continues to be valid, update nothing */
148 if (!new_ptr)
149 return VK_ERROR_OUT_OF_HOST_MEMORY;
150
151 memcpy(&new_ptr[extensions->num_ext], new_ext,
152 num_ext * sizeof(VkExtensionProperties));
153 extensions->ext_array = new_ptr;
154 extensions->num_ext += num_ext;
155
156 return VK_SUCCESS;
157 }
158
159 static void
160 radv_extensions_finish(struct radv_instance *instance,
161 struct radv_extensions *extensions)
162 {
163 assert(extensions);
164
165 if (!extensions)
166 radv_loge("Attemted to free invalid extension struct\n");
167
168 if (extensions->ext_array)
169 vk_free(&instance->alloc, extensions->ext_array);
170 }
171
172 static bool
173 is_extension_enabled(const VkExtensionProperties *extensions,
174 size_t num_ext,
175 const char *name)
176 {
177 assert(extensions && name);
178
179 for (uint32_t i = 0; i < num_ext; i++) {
180 if (strcmp(name, extensions[i].extensionName) == 0)
181 return true;
182 }
183
184 return false;
185 }
186
187 static VkResult
188 radv_physical_device_init(struct radv_physical_device *device,
189 struct radv_instance *instance,
190 const char *path)
191 {
192 VkResult result;
193 drmVersionPtr version;
194 int fd;
195
196 fd = open(path, O_RDWR | O_CLOEXEC);
197 if (fd < 0)
198 return VK_ERROR_INCOMPATIBLE_DRIVER;
199
200 version = drmGetVersion(fd);
201 if (!version) {
202 close(fd);
203 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
204 "failed to get version %s: %m", path);
205 }
206
207 if (strcmp(version->name, "amdgpu")) {
208 drmFreeVersion(version);
209 close(fd);
210 return VK_ERROR_INCOMPATIBLE_DRIVER;
211 }
212 drmFreeVersion(version);
213
214 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
215 device->instance = instance;
216 assert(strlen(path) < ARRAY_SIZE(device->path));
217 strncpy(device->path, path, ARRAY_SIZE(device->path));
218
219 device->ws = radv_amdgpu_winsys_create(fd);
220 if (!device->ws) {
221 result = VK_ERROR_INCOMPATIBLE_DRIVER;
222 goto fail;
223 }
224 device->ws->query_info(device->ws, &device->rad_info);
225 result = radv_init_wsi(device);
226 if (result != VK_SUCCESS) {
227 device->ws->destroy(device->ws);
228 goto fail;
229 }
230
231 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
232 radv_finish_wsi(device);
233 device->ws->destroy(device->ws);
234 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
235 "cannot generate UUID");
236 goto fail;
237 }
238
239 result = radv_extensions_register(instance,
240 &device->extensions,
241 common_device_extensions,
242 ARRAY_SIZE(common_device_extensions));
243 if (result != VK_SUCCESS)
244 goto fail;
245
246 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
247 device->name = device->rad_info.name;
248 close(fd);
249 return VK_SUCCESS;
250
251 fail:
252 close(fd);
253 return result;
254 }
255
256 static void
257 radv_physical_device_finish(struct radv_physical_device *device)
258 {
259 radv_extensions_finish(device->instance, &device->extensions);
260 radv_finish_wsi(device);
261 device->ws->destroy(device->ws);
262 }
263
264
265 static void *
266 default_alloc_func(void *pUserData, size_t size, size_t align,
267 VkSystemAllocationScope allocationScope)
268 {
269 return malloc(size);
270 }
271
272 static void *
273 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
274 size_t align, VkSystemAllocationScope allocationScope)
275 {
276 return realloc(pOriginal, size);
277 }
278
279 static void
280 default_free_func(void *pUserData, void *pMemory)
281 {
282 free(pMemory);
283 }
284
285 static const VkAllocationCallbacks default_alloc = {
286 .pUserData = NULL,
287 .pfnAllocation = default_alloc_func,
288 .pfnReallocation = default_realloc_func,
289 .pfnFree = default_free_func,
290 };
291
292 static const struct debug_control radv_debug_options[] = {
293 {"fastclears", RADV_DEBUG_FAST_CLEARS},
294 {"nodcc", RADV_DEBUG_NO_DCC},
295 {"shaders", RADV_DEBUG_DUMP_SHADERS},
296 {"nocache", RADV_DEBUG_NO_CACHE},
297 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
298 {"nohiz", RADV_DEBUG_NO_HIZ},
299 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
300 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
301 {NULL, 0}
302 };
303
304 VkResult radv_CreateInstance(
305 const VkInstanceCreateInfo* pCreateInfo,
306 const VkAllocationCallbacks* pAllocator,
307 VkInstance* pInstance)
308 {
309 struct radv_instance *instance;
310
311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
312
313 uint32_t client_version;
314 if (pCreateInfo->pApplicationInfo &&
315 pCreateInfo->pApplicationInfo->apiVersion != 0) {
316 client_version = pCreateInfo->pApplicationInfo->apiVersion;
317 } else {
318 client_version = VK_MAKE_VERSION(1, 0, 0);
319 }
320
321 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
322 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
323 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
324 "Client requested version %d.%d.%d",
325 VK_VERSION_MAJOR(client_version),
326 VK_VERSION_MINOR(client_version),
327 VK_VERSION_PATCH(client_version));
328 }
329
330 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
331 if (!is_extension_enabled(instance_extensions,
332 ARRAY_SIZE(instance_extensions),
333 pCreateInfo->ppEnabledExtensionNames[i]))
334 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
335 }
336
337 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
338 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
339 if (!instance)
340 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
341
342 memset(instance, 0, sizeof(*instance));
343
344 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
345
346 if (pAllocator)
347 instance->alloc = *pAllocator;
348 else
349 instance->alloc = default_alloc;
350
351 instance->apiVersion = client_version;
352 instance->physicalDeviceCount = -1;
353
354 _mesa_locale_init();
355
356 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
357
358 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
359 radv_debug_options);
360
361 *pInstance = radv_instance_to_handle(instance);
362
363 return VK_SUCCESS;
364 }
365
366 void radv_DestroyInstance(
367 VkInstance _instance,
368 const VkAllocationCallbacks* pAllocator)
369 {
370 RADV_FROM_HANDLE(radv_instance, instance, _instance);
371
372 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
373 radv_physical_device_finish(instance->physicalDevices + i);
374 }
375
376 VG(VALGRIND_DESTROY_MEMPOOL(instance));
377
378 _mesa_locale_fini();
379
380 vk_free(&instance->alloc, instance);
381 }
382
383 VkResult radv_EnumeratePhysicalDevices(
384 VkInstance _instance,
385 uint32_t* pPhysicalDeviceCount,
386 VkPhysicalDevice* pPhysicalDevices)
387 {
388 RADV_FROM_HANDLE(radv_instance, instance, _instance);
389 VkResult result;
390
391 if (instance->physicalDeviceCount < 0) {
392 char path[20];
393 instance->physicalDeviceCount = 0;
394 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
395 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
396 result = radv_physical_device_init(instance->physicalDevices +
397 instance->physicalDeviceCount,
398 instance, path);
399 if (result == VK_SUCCESS)
400 ++instance->physicalDeviceCount;
401 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
402 return result;
403 }
404 }
405
406 if (!pPhysicalDevices) {
407 *pPhysicalDeviceCount = instance->physicalDeviceCount;
408 } else {
409 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
410 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
411 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
412 }
413
414 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
415 : VK_SUCCESS;
416 }
417
418 void radv_GetPhysicalDeviceFeatures(
419 VkPhysicalDevice physicalDevice,
420 VkPhysicalDeviceFeatures* pFeatures)
421 {
422 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
423
424 memset(pFeatures, 0, sizeof(*pFeatures));
425
426 *pFeatures = (VkPhysicalDeviceFeatures) {
427 .robustBufferAccess = true,
428 .fullDrawIndexUint32 = true,
429 .imageCubeArray = true,
430 .independentBlend = true,
431 .geometryShader = true,
432 .tessellationShader = false,
433 .sampleRateShading = false,
434 .dualSrcBlend = true,
435 .logicOp = true,
436 .multiDrawIndirect = true,
437 .drawIndirectFirstInstance = true,
438 .depthClamp = true,
439 .depthBiasClamp = true,
440 .fillModeNonSolid = true,
441 .depthBounds = true,
442 .wideLines = true,
443 .largePoints = true,
444 .alphaToOne = true,
445 .multiViewport = true,
446 .samplerAnisotropy = true,
447 .textureCompressionETC2 = false,
448 .textureCompressionASTC_LDR = false,
449 .textureCompressionBC = true,
450 .occlusionQueryPrecise = true,
451 .pipelineStatisticsQuery = false,
452 .vertexPipelineStoresAndAtomics = true,
453 .fragmentStoresAndAtomics = true,
454 .shaderTessellationAndGeometryPointSize = true,
455 .shaderImageGatherExtended = true,
456 .shaderStorageImageExtendedFormats = true,
457 .shaderStorageImageMultisample = false,
458 .shaderUniformBufferArrayDynamicIndexing = true,
459 .shaderSampledImageArrayDynamicIndexing = true,
460 .shaderStorageBufferArrayDynamicIndexing = true,
461 .shaderStorageImageArrayDynamicIndexing = true,
462 .shaderStorageImageReadWithoutFormat = false,
463 .shaderStorageImageWriteWithoutFormat = false,
464 .shaderClipDistance = true,
465 .shaderCullDistance = true,
466 .shaderFloat64 = false,
467 .shaderInt64 = false,
468 .shaderInt16 = false,
469 .alphaToOne = true,
470 .variableMultisampleRate = false,
471 .inheritedQueries = false,
472 };
473 }
474
475 void radv_GetPhysicalDeviceFeatures2KHR(
476 VkPhysicalDevice physicalDevice,
477 VkPhysicalDeviceFeatures2KHR *pFeatures)
478 {
479 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
480 }
481
482 void radv_GetPhysicalDeviceProperties(
483 VkPhysicalDevice physicalDevice,
484 VkPhysicalDeviceProperties* pProperties)
485 {
486 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
487 VkSampleCountFlags sample_counts = 0xf;
488 VkPhysicalDeviceLimits limits = {
489 .maxImageDimension1D = (1 << 14),
490 .maxImageDimension2D = (1 << 14),
491 .maxImageDimension3D = (1 << 11),
492 .maxImageDimensionCube = (1 << 14),
493 .maxImageArrayLayers = (1 << 11),
494 .maxTexelBufferElements = 128 * 1024 * 1024,
495 .maxUniformBufferRange = UINT32_MAX,
496 .maxStorageBufferRange = UINT32_MAX,
497 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
498 .maxMemoryAllocationCount = UINT32_MAX,
499 .maxSamplerAllocationCount = 64 * 1024,
500 .bufferImageGranularity = 64, /* A cache line */
501 .sparseAddressSpaceSize = 0,
502 .maxBoundDescriptorSets = MAX_SETS,
503 .maxPerStageDescriptorSamplers = 64,
504 .maxPerStageDescriptorUniformBuffers = 64,
505 .maxPerStageDescriptorStorageBuffers = 64,
506 .maxPerStageDescriptorSampledImages = 64,
507 .maxPerStageDescriptorStorageImages = 64,
508 .maxPerStageDescriptorInputAttachments = 64,
509 .maxPerStageResources = 128,
510 .maxDescriptorSetSamplers = 256,
511 .maxDescriptorSetUniformBuffers = 256,
512 .maxDescriptorSetUniformBuffersDynamic = 256,
513 .maxDescriptorSetStorageBuffers = 256,
514 .maxDescriptorSetStorageBuffersDynamic = 256,
515 .maxDescriptorSetSampledImages = 256,
516 .maxDescriptorSetStorageImages = 256,
517 .maxDescriptorSetInputAttachments = 256,
518 .maxVertexInputAttributes = 32,
519 .maxVertexInputBindings = 32,
520 .maxVertexInputAttributeOffset = 2047,
521 .maxVertexInputBindingStride = 2048,
522 .maxVertexOutputComponents = 128,
523 .maxTessellationGenerationLevel = 0,
524 .maxTessellationPatchSize = 0,
525 .maxTessellationControlPerVertexInputComponents = 0,
526 .maxTessellationControlPerVertexOutputComponents = 0,
527 .maxTessellationControlPerPatchOutputComponents = 0,
528 .maxTessellationControlTotalOutputComponents = 0,
529 .maxTessellationEvaluationInputComponents = 0,
530 .maxTessellationEvaluationOutputComponents = 0,
531 .maxGeometryShaderInvocations = 32,
532 .maxGeometryInputComponents = 64,
533 .maxGeometryOutputComponents = 128,
534 .maxGeometryOutputVertices = 256,
535 .maxGeometryTotalOutputComponents = 1024,
536 .maxFragmentInputComponents = 128,
537 .maxFragmentOutputAttachments = 8,
538 .maxFragmentDualSrcAttachments = 1,
539 .maxFragmentCombinedOutputResources = 8,
540 .maxComputeSharedMemorySize = 32768,
541 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
542 .maxComputeWorkGroupInvocations = 2048,
543 .maxComputeWorkGroupSize = {
544 2048,
545 2048,
546 2048
547 },
548 .subPixelPrecisionBits = 4 /* FIXME */,
549 .subTexelPrecisionBits = 4 /* FIXME */,
550 .mipmapPrecisionBits = 4 /* FIXME */,
551 .maxDrawIndexedIndexValue = UINT32_MAX,
552 .maxDrawIndirectCount = UINT32_MAX,
553 .maxSamplerLodBias = 16,
554 .maxSamplerAnisotropy = 16,
555 .maxViewports = MAX_VIEWPORTS,
556 .maxViewportDimensions = { (1 << 14), (1 << 14) },
557 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
558 .viewportSubPixelBits = 13, /* We take a float? */
559 .minMemoryMapAlignment = 4096, /* A page */
560 .minTexelBufferOffsetAlignment = 1,
561 .minUniformBufferOffsetAlignment = 4,
562 .minStorageBufferOffsetAlignment = 4,
563 .minTexelOffset = -32,
564 .maxTexelOffset = 31,
565 .minTexelGatherOffset = -32,
566 .maxTexelGatherOffset = 31,
567 .minInterpolationOffset = -2,
568 .maxInterpolationOffset = 2,
569 .subPixelInterpolationOffsetBits = 8,
570 .maxFramebufferWidth = (1 << 14),
571 .maxFramebufferHeight = (1 << 14),
572 .maxFramebufferLayers = (1 << 10),
573 .framebufferColorSampleCounts = sample_counts,
574 .framebufferDepthSampleCounts = sample_counts,
575 .framebufferStencilSampleCounts = sample_counts,
576 .framebufferNoAttachmentsSampleCounts = sample_counts,
577 .maxColorAttachments = MAX_RTS,
578 .sampledImageColorSampleCounts = sample_counts,
579 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
580 .sampledImageDepthSampleCounts = sample_counts,
581 .sampledImageStencilSampleCounts = sample_counts,
582 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
583 .maxSampleMaskWords = 1,
584 .timestampComputeAndGraphics = false,
585 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
586 .maxClipDistances = 8,
587 .maxCullDistances = 8,
588 .maxCombinedClipAndCullDistances = 8,
589 .discreteQueuePriorities = 1,
590 .pointSizeRange = { 0.125, 255.875 },
591 .lineWidthRange = { 0.0, 7.9921875 },
592 .pointSizeGranularity = (1.0 / 8.0),
593 .lineWidthGranularity = (1.0 / 128.0),
594 .strictLines = false, /* FINISHME */
595 .standardSampleLocations = true,
596 .optimalBufferCopyOffsetAlignment = 128,
597 .optimalBufferCopyRowPitchAlignment = 128,
598 .nonCoherentAtomSize = 64,
599 };
600
601 *pProperties = (VkPhysicalDeviceProperties) {
602 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
603 .driverVersion = 1,
604 .vendorID = 0x1002,
605 .deviceID = pdevice->rad_info.pci_id,
606 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
607 .limits = limits,
608 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
609 };
610
611 strcpy(pProperties->deviceName, pdevice->name);
612 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
613 }
614
615 void radv_GetPhysicalDeviceProperties2KHR(
616 VkPhysicalDevice physicalDevice,
617 VkPhysicalDeviceProperties2KHR *pProperties)
618 {
619 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
620 }
621
622 void radv_GetPhysicalDeviceQueueFamilyProperties(
623 VkPhysicalDevice physicalDevice,
624 uint32_t* pCount,
625 VkQueueFamilyProperties* pQueueFamilyProperties)
626 {
627 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
628 int num_queue_families = 1;
629 int idx;
630 if (pdevice->rad_info.compute_rings > 0 &&
631 pdevice->rad_info.chip_class >= CIK &&
632 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
633 num_queue_families++;
634
635 if (pQueueFamilyProperties == NULL) {
636 *pCount = num_queue_families;
637 return;
638 }
639
640 if (!*pCount)
641 return;
642
643 idx = 0;
644 if (*pCount >= 1) {
645 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
646 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
647 VK_QUEUE_COMPUTE_BIT |
648 VK_QUEUE_TRANSFER_BIT,
649 .queueCount = 1,
650 .timestampValidBits = 64,
651 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
652 };
653 idx++;
654 }
655
656 if (pdevice->rad_info.compute_rings > 0 &&
657 pdevice->rad_info.chip_class >= CIK &&
658 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
659 if (*pCount > idx) {
660 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
661 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
662 .queueCount = pdevice->rad_info.compute_rings,
663 .timestampValidBits = 64,
664 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
665 };
666 idx++;
667 }
668 }
669 *pCount = idx;
670 }
671
672 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
673 VkPhysicalDevice physicalDevice,
674 uint32_t* pCount,
675 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
676 {
677 return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice,
678 pCount,
679 &pQueueFamilyProperties->queueFamilyProperties);
680 }
681
682 void radv_GetPhysicalDeviceMemoryProperties(
683 VkPhysicalDevice physicalDevice,
684 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
685 {
686 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
687
688 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
689
690 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
691 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
692 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
693 .heapIndex = RADV_MEM_HEAP_VRAM,
694 };
695 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
696 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
697 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
698 .heapIndex = RADV_MEM_HEAP_GTT,
699 };
700 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
701 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
702 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
703 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
704 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
705 };
706 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
707 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
708 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
709 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
710 .heapIndex = RADV_MEM_HEAP_GTT,
711 };
712
713 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
714
715 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
716 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
717 .size = physical_device->rad_info.vram_size -
718 physical_device->rad_info.visible_vram_size,
719 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
720 };
721 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
722 .size = physical_device->rad_info.visible_vram_size,
723 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
724 };
725 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
726 .size = physical_device->rad_info.gart_size,
727 .flags = 0,
728 };
729 }
730
731 void radv_GetPhysicalDeviceMemoryProperties2KHR(
732 VkPhysicalDevice physicalDevice,
733 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
734 {
735 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
736 &pMemoryProperties->memoryProperties);
737 }
738
739 static int
740 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
741 int queue_family_index, int idx)
742 {
743 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
744 queue->device = device;
745 queue->queue_family_index = queue_family_index;
746 queue->queue_idx = idx;
747
748 queue->hw_ctx = device->ws->ctx_create(device->ws);
749 if (!queue->hw_ctx)
750 return VK_ERROR_OUT_OF_HOST_MEMORY;
751
752 return VK_SUCCESS;
753 }
754
755 static void
756 radv_queue_finish(struct radv_queue *queue)
757 {
758 if (queue->hw_ctx)
759 queue->device->ws->ctx_destroy(queue->hw_ctx);
760
761 if (queue->preamble_cs)
762 queue->device->ws->cs_destroy(queue->preamble_cs);
763 if (queue->descriptor_bo)
764 queue->device->ws->buffer_destroy(queue->descriptor_bo);
765 if (queue->scratch_bo)
766 queue->device->ws->buffer_destroy(queue->scratch_bo);
767 if (queue->esgs_ring_bo)
768 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
769 if (queue->gsvs_ring_bo)
770 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
771 if (queue->compute_scratch_bo)
772 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
773 }
774
775 static void
776 radv_device_init_gs_info(struct radv_device *device)
777 {
778 switch (device->physical_device->rad_info.family) {
779 case CHIP_OLAND:
780 case CHIP_HAINAN:
781 case CHIP_KAVERI:
782 case CHIP_KABINI:
783 case CHIP_MULLINS:
784 case CHIP_ICELAND:
785 case CHIP_CARRIZO:
786 case CHIP_STONEY:
787 device->gs_table_depth = 16;
788 return;
789 case CHIP_TAHITI:
790 case CHIP_PITCAIRN:
791 case CHIP_VERDE:
792 case CHIP_BONAIRE:
793 case CHIP_HAWAII:
794 case CHIP_TONGA:
795 case CHIP_FIJI:
796 case CHIP_POLARIS10:
797 case CHIP_POLARIS11:
798 device->gs_table_depth = 32;
799 return;
800 default:
801 unreachable("unknown GPU");
802 }
803 }
804
805 VkResult radv_CreateDevice(
806 VkPhysicalDevice physicalDevice,
807 const VkDeviceCreateInfo* pCreateInfo,
808 const VkAllocationCallbacks* pAllocator,
809 VkDevice* pDevice)
810 {
811 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
812 VkResult result;
813 struct radv_device *device;
814
815 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
816 if (!is_extension_enabled(physical_device->extensions.ext_array,
817 physical_device->extensions.num_ext,
818 pCreateInfo->ppEnabledExtensionNames[i]))
819 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
820 }
821
822 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
823 sizeof(*device), 8,
824 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
825 if (!device)
826 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
827
828 memset(device, 0, sizeof(*device));
829
830 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
831 device->instance = physical_device->instance;
832 device->physical_device = physical_device;
833
834 device->debug_flags = device->instance->debug_flags;
835
836 device->ws = physical_device->ws;
837 if (pAllocator)
838 device->alloc = *pAllocator;
839 else
840 device->alloc = physical_device->instance->alloc;
841
842 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
843 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
844 uint32_t qfi = queue_create->queueFamilyIndex;
845
846 device->queues[qfi] = vk_alloc(&device->alloc,
847 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
848 if (!device->queues[qfi]) {
849 result = VK_ERROR_OUT_OF_HOST_MEMORY;
850 goto fail;
851 }
852
853 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
854
855 device->queue_count[qfi] = queue_create->queueCount;
856
857 for (unsigned q = 0; q < queue_create->queueCount; q++) {
858 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
859 if (result != VK_SUCCESS)
860 goto fail;
861 }
862 }
863
864 #if HAVE_LLVM < 0x0400
865 device->llvm_supports_spill = false;
866 #else
867 device->llvm_supports_spill = true;
868 #endif
869
870 /* The maximum number of scratch waves. Scratch space isn't divided
871 * evenly between CUs. The number is only a function of the number of CUs.
872 * We can decrease the constant to decrease the scratch buffer size.
873 *
874 * sctx->scratch_waves must be >= the maximum posible size of
875 * 1 threadgroup, so that the hw doesn't hang from being unable
876 * to start any.
877 *
878 * The recommended value is 4 per CU at most. Higher numbers don't
879 * bring much benefit, but they still occupy chip resources (think
880 * async compute). I've seen ~2% performance difference between 4 and 32.
881 */
882 uint32_t max_threads_per_block = 2048;
883 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
884 max_threads_per_block / 64);
885
886 radv_device_init_gs_info(device);
887
888 result = radv_device_init_meta(device);
889 if (result != VK_SUCCESS)
890 goto fail;
891
892 radv_device_init_msaa(device);
893
894 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
895 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
896 switch (family) {
897 case RADV_QUEUE_GENERAL:
898 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
899 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
900 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
901 break;
902 case RADV_QUEUE_COMPUTE:
903 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
904 radeon_emit(device->empty_cs[family], 0);
905 break;
906 }
907 device->ws->cs_finalize(device->empty_cs[family]);
908 }
909
910 if (getenv("RADV_TRACE_FILE")) {
911 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
912 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
913 if (!device->trace_bo)
914 goto fail;
915
916 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
917 if (!device->trace_id_ptr)
918 goto fail;
919 }
920
921 *pDevice = radv_device_to_handle(device);
922 return VK_SUCCESS;
923
924 fail:
925 if (device->trace_bo)
926 device->ws->buffer_destroy(device->trace_bo);
927
928 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
929 for (unsigned q = 0; q < device->queue_count[i]; q++)
930 radv_queue_finish(&device->queues[i][q]);
931 if (device->queue_count[i])
932 vk_free(&device->alloc, device->queues[i]);
933 }
934
935 vk_free(&device->alloc, device);
936 return result;
937 }
938
939 void radv_DestroyDevice(
940 VkDevice _device,
941 const VkAllocationCallbacks* pAllocator)
942 {
943 RADV_FROM_HANDLE(radv_device, device, _device);
944
945 if (device->trace_bo)
946 device->ws->buffer_destroy(device->trace_bo);
947
948 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
949 for (unsigned q = 0; q < device->queue_count[i]; q++)
950 radv_queue_finish(&device->queues[i][q]);
951 if (device->queue_count[i])
952 vk_free(&device->alloc, device->queues[i]);
953 }
954 radv_device_finish_meta(device);
955
956 vk_free(&device->alloc, device);
957 }
958
959 VkResult radv_EnumerateInstanceExtensionProperties(
960 const char* pLayerName,
961 uint32_t* pPropertyCount,
962 VkExtensionProperties* pProperties)
963 {
964 if (pProperties == NULL) {
965 *pPropertyCount = ARRAY_SIZE(instance_extensions);
966 return VK_SUCCESS;
967 }
968
969 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
970 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
971
972 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
973 return VK_INCOMPLETE;
974
975 return VK_SUCCESS;
976 }
977
978 VkResult radv_EnumerateDeviceExtensionProperties(
979 VkPhysicalDevice physicalDevice,
980 const char* pLayerName,
981 uint32_t* pPropertyCount,
982 VkExtensionProperties* pProperties)
983 {
984 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
985
986 if (pProperties == NULL) {
987 *pPropertyCount = pdevice->extensions.num_ext;
988 return VK_SUCCESS;
989 }
990
991 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
992 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
993
994 if (*pPropertyCount < pdevice->extensions.num_ext)
995 return VK_INCOMPLETE;
996
997 return VK_SUCCESS;
998 }
999
1000 VkResult radv_EnumerateInstanceLayerProperties(
1001 uint32_t* pPropertyCount,
1002 VkLayerProperties* pProperties)
1003 {
1004 if (pProperties == NULL) {
1005 *pPropertyCount = 0;
1006 return VK_SUCCESS;
1007 }
1008
1009 /* None supported at this time */
1010 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1011 }
1012
1013 VkResult radv_EnumerateDeviceLayerProperties(
1014 VkPhysicalDevice physicalDevice,
1015 uint32_t* pPropertyCount,
1016 VkLayerProperties* pProperties)
1017 {
1018 if (pProperties == NULL) {
1019 *pPropertyCount = 0;
1020 return VK_SUCCESS;
1021 }
1022
1023 /* None supported at this time */
1024 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1025 }
1026
1027 void radv_GetDeviceQueue(
1028 VkDevice _device,
1029 uint32_t queueFamilyIndex,
1030 uint32_t queueIndex,
1031 VkQueue* pQueue)
1032 {
1033 RADV_FROM_HANDLE(radv_device, device, _device);
1034
1035 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1036 }
1037
1038 static void radv_dump_trace(struct radv_device *device,
1039 struct radeon_winsys_cs *cs)
1040 {
1041 const char *filename = getenv("RADV_TRACE_FILE");
1042 FILE *f = fopen(filename, "w");
1043 if (!f) {
1044 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1045 return;
1046 }
1047
1048 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1049 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1050 fclose(f);
1051 }
1052
1053 static void
1054 fill_geom_rings(struct radv_queue *queue,
1055 uint32_t *map,
1056 uint32_t esgs_ring_size,
1057 struct radeon_winsys_bo *esgs_ring_bo,
1058 uint32_t gsvs_ring_size,
1059 struct radeon_winsys_bo *gsvs_ring_bo)
1060 {
1061 uint64_t esgs_va = 0, gsvs_va = 0;
1062 uint32_t *desc = &map[4];
1063
1064 if (esgs_ring_bo)
1065 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1066 if (gsvs_ring_bo)
1067 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1068
1069 /* stride 0, num records - size, add tid, swizzle, elsize4,
1070 index stride 64 */
1071 desc[0] = esgs_va;
1072 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1073 S_008F04_STRIDE(0) |
1074 S_008F04_SWIZZLE_ENABLE(true);
1075 desc[2] = esgs_ring_size;
1076 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1077 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1078 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1079 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1080 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1081 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1082 S_008F0C_ELEMENT_SIZE(1) |
1083 S_008F0C_INDEX_STRIDE(3) |
1084 S_008F0C_ADD_TID_ENABLE(true);
1085
1086 desc += 4;
1087 /* GS entry for ES->GS ring */
1088 /* stride 0, num records - size, elsize0,
1089 index stride 0 */
1090 desc[0] = esgs_va;
1091 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1092 S_008F04_STRIDE(0) |
1093 S_008F04_SWIZZLE_ENABLE(false);
1094 desc[2] = esgs_ring_size;
1095 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1096 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1097 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1098 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1099 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1100 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1101 S_008F0C_ELEMENT_SIZE(0) |
1102 S_008F0C_INDEX_STRIDE(0) |
1103 S_008F0C_ADD_TID_ENABLE(false);
1104
1105 desc += 4;
1106 /* VS entry for GS->VS ring */
1107 /* stride 0, num records - size, elsize0,
1108 index stride 0 */
1109 desc[0] = gsvs_va;
1110 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1111 S_008F04_STRIDE(0) |
1112 S_008F04_SWIZZLE_ENABLE(false);
1113 desc[2] = gsvs_ring_size;
1114 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1115 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1116 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1117 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1118 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1119 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1120 S_008F0C_ELEMENT_SIZE(0) |
1121 S_008F0C_INDEX_STRIDE(0) |
1122 S_008F0C_ADD_TID_ENABLE(false);
1123 desc += 4;
1124
1125 /* stride gsvs_itemsize, num records 64
1126 elsize 4, index stride 16 */
1127 /* shader will patch stride and desc[2] */
1128 desc[0] = gsvs_va;
1129 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1130 S_008F04_STRIDE(0) |
1131 S_008F04_SWIZZLE_ENABLE(true);
1132 desc[2] = 0;
1133 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1134 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1135 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1136 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1137 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1138 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1139 S_008F0C_ELEMENT_SIZE(1) |
1140 S_008F0C_INDEX_STRIDE(1) |
1141 S_008F0C_ADD_TID_ENABLE(true);
1142 }
1143
1144 static VkResult
1145 radv_get_preamble_cs(struct radv_queue *queue,
1146 uint32_t scratch_size,
1147 uint32_t compute_scratch_size,
1148 uint32_t esgs_ring_size,
1149 uint32_t gsvs_ring_size,
1150 struct radeon_winsys_cs **preamble_cs)
1151 {
1152 struct radeon_winsys_bo *scratch_bo = NULL;
1153 struct radeon_winsys_bo *descriptor_bo = NULL;
1154 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1155 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1156 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1157 struct radeon_winsys_cs *cs = NULL;
1158
1159 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
1160 *preamble_cs = NULL;
1161 return VK_SUCCESS;
1162 }
1163
1164 if (scratch_size <= queue->scratch_size &&
1165 compute_scratch_size <= queue->compute_scratch_size &&
1166 esgs_ring_size <= queue->esgs_ring_size &&
1167 gsvs_ring_size <= queue->gsvs_ring_size) {
1168 *preamble_cs = queue->preamble_cs;
1169 return VK_SUCCESS;
1170 }
1171
1172 if (scratch_size > queue->scratch_size) {
1173 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1174 scratch_size,
1175 4096,
1176 RADEON_DOMAIN_VRAM,
1177 RADEON_FLAG_NO_CPU_ACCESS);
1178 if (!scratch_bo)
1179 goto fail;
1180 } else
1181 scratch_bo = queue->scratch_bo;
1182
1183 if (compute_scratch_size > queue->compute_scratch_size) {
1184 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1185 compute_scratch_size,
1186 4096,
1187 RADEON_DOMAIN_VRAM,
1188 RADEON_FLAG_NO_CPU_ACCESS);
1189 if (!compute_scratch_bo)
1190 goto fail;
1191
1192 } else
1193 compute_scratch_bo = queue->compute_scratch_bo;
1194
1195 if (esgs_ring_size > queue->esgs_ring_size) {
1196 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1197 esgs_ring_size,
1198 4096,
1199 RADEON_DOMAIN_VRAM,
1200 RADEON_FLAG_NO_CPU_ACCESS);
1201 if (!esgs_ring_bo)
1202 goto fail;
1203 } else {
1204 esgs_ring_bo = queue->esgs_ring_bo;
1205 esgs_ring_size = queue->esgs_ring_size;
1206 }
1207
1208 if (gsvs_ring_size > queue->gsvs_ring_size) {
1209 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1210 gsvs_ring_size,
1211 4096,
1212 RADEON_DOMAIN_VRAM,
1213 RADEON_FLAG_NO_CPU_ACCESS);
1214 if (!gsvs_ring_bo)
1215 goto fail;
1216 } else {
1217 gsvs_ring_bo = queue->gsvs_ring_bo;
1218 gsvs_ring_size = queue->gsvs_ring_size;
1219 }
1220
1221 if (scratch_bo != queue->scratch_bo ||
1222 esgs_ring_bo != queue->esgs_ring_bo ||
1223 gsvs_ring_bo != queue->gsvs_ring_bo) {
1224 uint32_t size = 0;
1225 if (gsvs_ring_bo || esgs_ring_bo)
1226 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1227 else if (scratch_bo)
1228 size = 8; /* 2 dword */
1229
1230 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1231 size,
1232 4096,
1233 RADEON_DOMAIN_VRAM,
1234 RADEON_FLAG_CPU_ACCESS);
1235 if (!descriptor_bo)
1236 goto fail;
1237 } else
1238 descriptor_bo = queue->descriptor_bo;
1239
1240 cs = queue->device->ws->cs_create(queue->device->ws,
1241 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1242 if (!cs)
1243 goto fail;
1244
1245
1246 if (scratch_bo)
1247 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1248
1249 if (esgs_ring_bo)
1250 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1251
1252 if (gsvs_ring_bo)
1253 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1254
1255 if (descriptor_bo)
1256 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1257
1258 if (descriptor_bo != queue->descriptor_bo) {
1259 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1260
1261 if (scratch_bo) {
1262 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1263 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1264 S_008F04_SWIZZLE_ENABLE(1);
1265 map[0] = scratch_va;
1266 map[1] = rsrc1;
1267 }
1268
1269 if (esgs_ring_bo || gsvs_ring_bo)
1270 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1271
1272 queue->device->ws->buffer_unmap(descriptor_bo);
1273 }
1274
1275 if (esgs_ring_bo || gsvs_ring_bo) {
1276 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1277 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1278 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1279 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1280
1281 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1282 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1283 radeon_emit(cs, esgs_ring_size >> 8);
1284 radeon_emit(cs, gsvs_ring_size >> 8);
1285 } else {
1286 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1287 radeon_emit(cs, esgs_ring_size >> 8);
1288 radeon_emit(cs, gsvs_ring_size >> 8);
1289 }
1290 }
1291
1292 if (descriptor_bo) {
1293 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1294 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1295 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1296 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1297 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1298 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1299
1300 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1301
1302 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1303 radeon_set_sh_reg_seq(cs, regs[i], 2);
1304 radeon_emit(cs, va);
1305 radeon_emit(cs, va >> 32);
1306 }
1307 }
1308
1309 if (compute_scratch_bo) {
1310 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1311 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1312 S_008F04_SWIZZLE_ENABLE(1);
1313
1314 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1315
1316 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1317 radeon_emit(cs, scratch_va);
1318 radeon_emit(cs, rsrc1);
1319 }
1320
1321 if (!queue->device->ws->cs_finalize(cs))
1322 goto fail;
1323
1324 if (queue->preamble_cs)
1325 queue->device->ws->cs_destroy(queue->preamble_cs);
1326
1327 queue->preamble_cs = cs;
1328
1329 if (scratch_bo != queue->scratch_bo) {
1330 if (queue->scratch_bo)
1331 queue->device->ws->buffer_destroy(queue->scratch_bo);
1332 queue->scratch_bo = scratch_bo;
1333 queue->scratch_size = scratch_size;
1334 }
1335
1336 if (compute_scratch_bo != queue->compute_scratch_bo) {
1337 if (queue->compute_scratch_bo)
1338 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1339 queue->compute_scratch_bo = compute_scratch_bo;
1340 queue->compute_scratch_size = compute_scratch_size;
1341 }
1342
1343 if (esgs_ring_bo != queue->esgs_ring_bo) {
1344 if (queue->esgs_ring_bo)
1345 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1346 queue->esgs_ring_bo = esgs_ring_bo;
1347 queue->esgs_ring_size = esgs_ring_size;
1348 }
1349
1350 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1351 if (queue->gsvs_ring_bo)
1352 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1353 queue->gsvs_ring_bo = gsvs_ring_bo;
1354 queue->gsvs_ring_size = gsvs_ring_size;
1355 }
1356
1357 if (descriptor_bo != queue->descriptor_bo) {
1358 if (queue->descriptor_bo)
1359 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1360
1361 queue->descriptor_bo = descriptor_bo;
1362 }
1363
1364 *preamble_cs = cs;
1365 return VK_SUCCESS;
1366 fail:
1367 if (cs)
1368 queue->device->ws->cs_destroy(cs);
1369 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1370 queue->device->ws->buffer_destroy(descriptor_bo);
1371 if (scratch_bo && scratch_bo != queue->scratch_bo)
1372 queue->device->ws->buffer_destroy(scratch_bo);
1373 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1374 queue->device->ws->buffer_destroy(compute_scratch_bo);
1375 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1376 queue->device->ws->buffer_destroy(esgs_ring_bo);
1377 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1378 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1379 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1380 }
1381
1382 VkResult radv_QueueSubmit(
1383 VkQueue _queue,
1384 uint32_t submitCount,
1385 const VkSubmitInfo* pSubmits,
1386 VkFence _fence)
1387 {
1388 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1389 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1390 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1391 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1392 int ret;
1393 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1394 uint32_t scratch_size = 0;
1395 uint32_t compute_scratch_size = 0;
1396 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1397 struct radeon_winsys_cs *preamble_cs = NULL;
1398 VkResult result;
1399
1400 /* Do this first so failing to allocate scratch buffers can't result in
1401 * partially executed submissions. */
1402 for (uint32_t i = 0; i < submitCount; i++) {
1403 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1404 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1405 pSubmits[i].pCommandBuffers[j]);
1406
1407 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1408 compute_scratch_size = MAX2(compute_scratch_size,
1409 cmd_buffer->compute_scratch_size_needed);
1410 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1411 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1412 }
1413 }
1414
1415 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
1416 if (result != VK_SUCCESS)
1417 return result;
1418
1419 for (uint32_t i = 0; i < submitCount; i++) {
1420 struct radeon_winsys_cs **cs_array;
1421 bool can_patch = true;
1422 uint32_t advance;
1423
1424 if (!pSubmits[i].commandBufferCount)
1425 continue;
1426
1427 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1428 pSubmits[i].commandBufferCount);
1429
1430 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1431 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1432 pSubmits[i].pCommandBuffers[j]);
1433 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1434
1435 cs_array[j] = cmd_buffer->cs;
1436 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1437 can_patch = false;
1438 }
1439
1440 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) {
1441 advance = MIN2(max_cs_submission,
1442 pSubmits[i].commandBufferCount - j);
1443 bool b = j == 0;
1444 bool e = j + advance == pSubmits[i].commandBufferCount;
1445
1446 if (queue->device->trace_bo)
1447 *queue->device->trace_id_ptr = 0;
1448
1449 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1450 advance, preamble_cs,
1451 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1452 b ? pSubmits[i].waitSemaphoreCount : 0,
1453 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1454 e ? pSubmits[i].signalSemaphoreCount : 0,
1455 can_patch, base_fence);
1456
1457 if (ret) {
1458 radv_loge("failed to submit CS %d\n", i);
1459 abort();
1460 }
1461 if (queue->device->trace_bo) {
1462 bool success = queue->device->ws->ctx_wait_idle(
1463 queue->hw_ctx,
1464 radv_queue_family_to_ring(
1465 queue->queue_family_index),
1466 queue->queue_idx);
1467
1468 if (!success) { /* Hang */
1469 radv_dump_trace(queue->device, cs_array[j]);
1470 abort();
1471 }
1472 }
1473 }
1474 free(cs_array);
1475 }
1476
1477 if (fence) {
1478 if (!submitCount)
1479 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1480 &queue->device->empty_cs[queue->queue_family_index],
1481 1, NULL, NULL, 0, NULL, 0,
1482 false, base_fence);
1483
1484 fence->submitted = true;
1485 }
1486
1487 return VK_SUCCESS;
1488 }
1489
1490 VkResult radv_QueueWaitIdle(
1491 VkQueue _queue)
1492 {
1493 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1494
1495 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1496 radv_queue_family_to_ring(queue->queue_family_index),
1497 queue->queue_idx);
1498 return VK_SUCCESS;
1499 }
1500
1501 VkResult radv_DeviceWaitIdle(
1502 VkDevice _device)
1503 {
1504 RADV_FROM_HANDLE(radv_device, device, _device);
1505
1506 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1507 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1508 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1509 }
1510 }
1511 return VK_SUCCESS;
1512 }
1513
1514 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1515 VkInstance instance,
1516 const char* pName)
1517 {
1518 return radv_lookup_entrypoint(pName);
1519 }
1520
1521 /* The loader wants us to expose a second GetInstanceProcAddr function
1522 * to work around certain LD_PRELOAD issues seen in apps.
1523 */
1524 PUBLIC
1525 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1526 VkInstance instance,
1527 const char* pName);
1528
1529 PUBLIC
1530 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1531 VkInstance instance,
1532 const char* pName)
1533 {
1534 return radv_GetInstanceProcAddr(instance, pName);
1535 }
1536
1537 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1538 VkDevice device,
1539 const char* pName)
1540 {
1541 return radv_lookup_entrypoint(pName);
1542 }
1543
1544 VkResult radv_AllocateMemory(
1545 VkDevice _device,
1546 const VkMemoryAllocateInfo* pAllocateInfo,
1547 const VkAllocationCallbacks* pAllocator,
1548 VkDeviceMemory* pMem)
1549 {
1550 RADV_FROM_HANDLE(radv_device, device, _device);
1551 struct radv_device_memory *mem;
1552 VkResult result;
1553 enum radeon_bo_domain domain;
1554 uint32_t flags = 0;
1555 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1556
1557 if (pAllocateInfo->allocationSize == 0) {
1558 /* Apparently, this is allowed */
1559 *pMem = VK_NULL_HANDLE;
1560 return VK_SUCCESS;
1561 }
1562
1563 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1564 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1565 if (mem == NULL)
1566 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1567
1568 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1569 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1570 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1571 domain = RADEON_DOMAIN_GTT;
1572 else
1573 domain = RADEON_DOMAIN_VRAM;
1574
1575 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1576 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1577 else
1578 flags |= RADEON_FLAG_CPU_ACCESS;
1579
1580 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1581 flags |= RADEON_FLAG_GTT_WC;
1582
1583 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1584 domain, flags);
1585
1586 if (!mem->bo) {
1587 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1588 goto fail;
1589 }
1590 mem->type_index = pAllocateInfo->memoryTypeIndex;
1591
1592 *pMem = radv_device_memory_to_handle(mem);
1593
1594 return VK_SUCCESS;
1595
1596 fail:
1597 vk_free2(&device->alloc, pAllocator, mem);
1598
1599 return result;
1600 }
1601
1602 void radv_FreeMemory(
1603 VkDevice _device,
1604 VkDeviceMemory _mem,
1605 const VkAllocationCallbacks* pAllocator)
1606 {
1607 RADV_FROM_HANDLE(radv_device, device, _device);
1608 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1609
1610 if (mem == NULL)
1611 return;
1612
1613 device->ws->buffer_destroy(mem->bo);
1614 mem->bo = NULL;
1615
1616 vk_free2(&device->alloc, pAllocator, mem);
1617 }
1618
1619 VkResult radv_MapMemory(
1620 VkDevice _device,
1621 VkDeviceMemory _memory,
1622 VkDeviceSize offset,
1623 VkDeviceSize size,
1624 VkMemoryMapFlags flags,
1625 void** ppData)
1626 {
1627 RADV_FROM_HANDLE(radv_device, device, _device);
1628 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1629
1630 if (mem == NULL) {
1631 *ppData = NULL;
1632 return VK_SUCCESS;
1633 }
1634
1635 *ppData = device->ws->buffer_map(mem->bo);
1636 if (*ppData) {
1637 *ppData += offset;
1638 return VK_SUCCESS;
1639 }
1640
1641 return VK_ERROR_MEMORY_MAP_FAILED;
1642 }
1643
1644 void radv_UnmapMemory(
1645 VkDevice _device,
1646 VkDeviceMemory _memory)
1647 {
1648 RADV_FROM_HANDLE(radv_device, device, _device);
1649 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1650
1651 if (mem == NULL)
1652 return;
1653
1654 device->ws->buffer_unmap(mem->bo);
1655 }
1656
1657 VkResult radv_FlushMappedMemoryRanges(
1658 VkDevice _device,
1659 uint32_t memoryRangeCount,
1660 const VkMappedMemoryRange* pMemoryRanges)
1661 {
1662 return VK_SUCCESS;
1663 }
1664
1665 VkResult radv_InvalidateMappedMemoryRanges(
1666 VkDevice _device,
1667 uint32_t memoryRangeCount,
1668 const VkMappedMemoryRange* pMemoryRanges)
1669 {
1670 return VK_SUCCESS;
1671 }
1672
1673 void radv_GetBufferMemoryRequirements(
1674 VkDevice device,
1675 VkBuffer _buffer,
1676 VkMemoryRequirements* pMemoryRequirements)
1677 {
1678 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1679
1680 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1681
1682 pMemoryRequirements->size = buffer->size;
1683 pMemoryRequirements->alignment = 16;
1684 }
1685
1686 void radv_GetImageMemoryRequirements(
1687 VkDevice device,
1688 VkImage _image,
1689 VkMemoryRequirements* pMemoryRequirements)
1690 {
1691 RADV_FROM_HANDLE(radv_image, image, _image);
1692
1693 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1694
1695 pMemoryRequirements->size = image->size;
1696 pMemoryRequirements->alignment = image->alignment;
1697 }
1698
1699 void radv_GetImageSparseMemoryRequirements(
1700 VkDevice device,
1701 VkImage image,
1702 uint32_t* pSparseMemoryRequirementCount,
1703 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1704 {
1705 stub();
1706 }
1707
1708 void radv_GetDeviceMemoryCommitment(
1709 VkDevice device,
1710 VkDeviceMemory memory,
1711 VkDeviceSize* pCommittedMemoryInBytes)
1712 {
1713 *pCommittedMemoryInBytes = 0;
1714 }
1715
1716 VkResult radv_BindBufferMemory(
1717 VkDevice device,
1718 VkBuffer _buffer,
1719 VkDeviceMemory _memory,
1720 VkDeviceSize memoryOffset)
1721 {
1722 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1723 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1724
1725 if (mem) {
1726 buffer->bo = mem->bo;
1727 buffer->offset = memoryOffset;
1728 } else {
1729 buffer->bo = NULL;
1730 buffer->offset = 0;
1731 }
1732
1733 return VK_SUCCESS;
1734 }
1735
1736 VkResult radv_BindImageMemory(
1737 VkDevice device,
1738 VkImage _image,
1739 VkDeviceMemory _memory,
1740 VkDeviceSize memoryOffset)
1741 {
1742 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1743 RADV_FROM_HANDLE(radv_image, image, _image);
1744
1745 if (mem) {
1746 image->bo = mem->bo;
1747 image->offset = memoryOffset;
1748 } else {
1749 image->bo = NULL;
1750 image->offset = 0;
1751 }
1752
1753 return VK_SUCCESS;
1754 }
1755
1756 VkResult radv_QueueBindSparse(
1757 VkQueue queue,
1758 uint32_t bindInfoCount,
1759 const VkBindSparseInfo* pBindInfo,
1760 VkFence fence)
1761 {
1762 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1763 }
1764
1765 VkResult radv_CreateFence(
1766 VkDevice _device,
1767 const VkFenceCreateInfo* pCreateInfo,
1768 const VkAllocationCallbacks* pAllocator,
1769 VkFence* pFence)
1770 {
1771 RADV_FROM_HANDLE(radv_device, device, _device);
1772 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1773 sizeof(*fence), 8,
1774 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1775
1776 if (!fence)
1777 return VK_ERROR_OUT_OF_HOST_MEMORY;
1778
1779 memset(fence, 0, sizeof(*fence));
1780 fence->submitted = false;
1781 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1782 fence->fence = device->ws->create_fence();
1783 if (!fence->fence) {
1784 vk_free2(&device->alloc, pAllocator, fence);
1785 return VK_ERROR_OUT_OF_HOST_MEMORY;
1786 }
1787
1788 *pFence = radv_fence_to_handle(fence);
1789
1790 return VK_SUCCESS;
1791 }
1792
1793 void radv_DestroyFence(
1794 VkDevice _device,
1795 VkFence _fence,
1796 const VkAllocationCallbacks* pAllocator)
1797 {
1798 RADV_FROM_HANDLE(radv_device, device, _device);
1799 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1800
1801 if (!fence)
1802 return;
1803 device->ws->destroy_fence(fence->fence);
1804 vk_free2(&device->alloc, pAllocator, fence);
1805 }
1806
1807 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1808 {
1809 uint64_t current_time;
1810 struct timespec tv;
1811
1812 clock_gettime(CLOCK_MONOTONIC, &tv);
1813 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1814
1815 timeout = MIN2(UINT64_MAX - current_time, timeout);
1816
1817 return current_time + timeout;
1818 }
1819
1820 VkResult radv_WaitForFences(
1821 VkDevice _device,
1822 uint32_t fenceCount,
1823 const VkFence* pFences,
1824 VkBool32 waitAll,
1825 uint64_t timeout)
1826 {
1827 RADV_FROM_HANDLE(radv_device, device, _device);
1828 timeout = radv_get_absolute_timeout(timeout);
1829
1830 if (!waitAll && fenceCount > 1) {
1831 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1832 }
1833
1834 for (uint32_t i = 0; i < fenceCount; ++i) {
1835 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1836 bool expired = false;
1837
1838 if (fence->signalled)
1839 continue;
1840
1841 if (!fence->submitted)
1842 return VK_TIMEOUT;
1843
1844 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1845 if (!expired)
1846 return VK_TIMEOUT;
1847
1848 fence->signalled = true;
1849 }
1850
1851 return VK_SUCCESS;
1852 }
1853
1854 VkResult radv_ResetFences(VkDevice device,
1855 uint32_t fenceCount,
1856 const VkFence *pFences)
1857 {
1858 for (unsigned i = 0; i < fenceCount; ++i) {
1859 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1860 fence->submitted = fence->signalled = false;
1861 }
1862
1863 return VK_SUCCESS;
1864 }
1865
1866 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1867 {
1868 RADV_FROM_HANDLE(radv_device, device, _device);
1869 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1870
1871 if (fence->signalled)
1872 return VK_SUCCESS;
1873 if (!fence->submitted)
1874 return VK_NOT_READY;
1875
1876 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1877 return VK_NOT_READY;
1878
1879 return VK_SUCCESS;
1880 }
1881
1882
1883 // Queue semaphore functions
1884
1885 VkResult radv_CreateSemaphore(
1886 VkDevice _device,
1887 const VkSemaphoreCreateInfo* pCreateInfo,
1888 const VkAllocationCallbacks* pAllocator,
1889 VkSemaphore* pSemaphore)
1890 {
1891 RADV_FROM_HANDLE(radv_device, device, _device);
1892 struct radeon_winsys_sem *sem;
1893
1894 sem = device->ws->create_sem(device->ws);
1895 if (!sem)
1896 return VK_ERROR_OUT_OF_HOST_MEMORY;
1897
1898 *pSemaphore = (VkSemaphore)sem;
1899 return VK_SUCCESS;
1900 }
1901
1902 void radv_DestroySemaphore(
1903 VkDevice _device,
1904 VkSemaphore _semaphore,
1905 const VkAllocationCallbacks* pAllocator)
1906 {
1907 RADV_FROM_HANDLE(radv_device, device, _device);
1908 struct radeon_winsys_sem *sem;
1909 if (!_semaphore)
1910 return;
1911
1912 sem = (struct radeon_winsys_sem *)_semaphore;
1913 device->ws->destroy_sem(sem);
1914 }
1915
1916 VkResult radv_CreateEvent(
1917 VkDevice _device,
1918 const VkEventCreateInfo* pCreateInfo,
1919 const VkAllocationCallbacks* pAllocator,
1920 VkEvent* pEvent)
1921 {
1922 RADV_FROM_HANDLE(radv_device, device, _device);
1923 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1924 sizeof(*event), 8,
1925 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1926
1927 if (!event)
1928 return VK_ERROR_OUT_OF_HOST_MEMORY;
1929
1930 event->bo = device->ws->buffer_create(device->ws, 8, 8,
1931 RADEON_DOMAIN_GTT,
1932 RADEON_FLAG_CPU_ACCESS);
1933 if (!event->bo) {
1934 vk_free2(&device->alloc, pAllocator, event);
1935 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1936 }
1937
1938 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
1939
1940 *pEvent = radv_event_to_handle(event);
1941
1942 return VK_SUCCESS;
1943 }
1944
1945 void radv_DestroyEvent(
1946 VkDevice _device,
1947 VkEvent _event,
1948 const VkAllocationCallbacks* pAllocator)
1949 {
1950 RADV_FROM_HANDLE(radv_device, device, _device);
1951 RADV_FROM_HANDLE(radv_event, event, _event);
1952
1953 if (!event)
1954 return;
1955 device->ws->buffer_destroy(event->bo);
1956 vk_free2(&device->alloc, pAllocator, event);
1957 }
1958
1959 VkResult radv_GetEventStatus(
1960 VkDevice _device,
1961 VkEvent _event)
1962 {
1963 RADV_FROM_HANDLE(radv_event, event, _event);
1964
1965 if (*event->map == 1)
1966 return VK_EVENT_SET;
1967 return VK_EVENT_RESET;
1968 }
1969
1970 VkResult radv_SetEvent(
1971 VkDevice _device,
1972 VkEvent _event)
1973 {
1974 RADV_FROM_HANDLE(radv_event, event, _event);
1975 *event->map = 1;
1976
1977 return VK_SUCCESS;
1978 }
1979
1980 VkResult radv_ResetEvent(
1981 VkDevice _device,
1982 VkEvent _event)
1983 {
1984 RADV_FROM_HANDLE(radv_event, event, _event);
1985 *event->map = 0;
1986
1987 return VK_SUCCESS;
1988 }
1989
1990 VkResult radv_CreateBuffer(
1991 VkDevice _device,
1992 const VkBufferCreateInfo* pCreateInfo,
1993 const VkAllocationCallbacks* pAllocator,
1994 VkBuffer* pBuffer)
1995 {
1996 RADV_FROM_HANDLE(radv_device, device, _device);
1997 struct radv_buffer *buffer;
1998
1999 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2000
2001 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2002 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2003 if (buffer == NULL)
2004 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2005
2006 buffer->size = pCreateInfo->size;
2007 buffer->usage = pCreateInfo->usage;
2008 buffer->bo = NULL;
2009 buffer->offset = 0;
2010
2011 *pBuffer = radv_buffer_to_handle(buffer);
2012
2013 return VK_SUCCESS;
2014 }
2015
2016 void radv_DestroyBuffer(
2017 VkDevice _device,
2018 VkBuffer _buffer,
2019 const VkAllocationCallbacks* pAllocator)
2020 {
2021 RADV_FROM_HANDLE(radv_device, device, _device);
2022 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2023
2024 if (!buffer)
2025 return;
2026
2027 vk_free2(&device->alloc, pAllocator, buffer);
2028 }
2029
2030 static inline unsigned
2031 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2032 {
2033 if (stencil)
2034 return image->surface.stencil_tiling_index[level];
2035 else
2036 return image->surface.tiling_index[level];
2037 }
2038
2039 static void
2040 radv_initialise_color_surface(struct radv_device *device,
2041 struct radv_color_buffer_info *cb,
2042 struct radv_image_view *iview)
2043 {
2044 const struct vk_format_description *desc;
2045 unsigned ntype, format, swap, endian;
2046 unsigned blend_clamp = 0, blend_bypass = 0;
2047 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2048 uint64_t va;
2049 const struct radeon_surf *surf = &iview->image->surface;
2050 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2051
2052 desc = vk_format_description(iview->vk_format);
2053
2054 memset(cb, 0, sizeof(*cb));
2055
2056 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2057 va += level_info->offset;
2058 cb->cb_color_base = va >> 8;
2059
2060 /* CMASK variables */
2061 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2062 va += iview->image->cmask.offset;
2063 cb->cb_color_cmask = va >> 8;
2064 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2065
2066 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2067 va += iview->image->dcc_offset;
2068 cb->cb_dcc_base = va >> 8;
2069
2070 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2071 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2072 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2073
2074 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2075 pitch_tile_max = level_info->nblk_x / 8 - 1;
2076 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2077 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2078
2079 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2080 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2081
2082 /* Intensity is implemented as Red, so treat it that way. */
2083 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2084 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2085
2086 if (iview->image->samples > 1) {
2087 unsigned log_samples = util_logbase2(iview->image->samples);
2088
2089 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2090 S_028C74_NUM_FRAGMENTS(log_samples);
2091 }
2092
2093 if (iview->image->fmask.size) {
2094 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2095 if (device->physical_device->rad_info.chip_class >= CIK)
2096 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2097 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2098 cb->cb_color_fmask = va >> 8;
2099 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2100 } else {
2101 /* This must be set for fast clear to work without FMASK. */
2102 if (device->physical_device->rad_info.chip_class >= CIK)
2103 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2104 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2105 cb->cb_color_fmask = cb->cb_color_base;
2106 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2107 }
2108
2109 ntype = radv_translate_color_numformat(iview->vk_format,
2110 desc,
2111 vk_format_get_first_non_void_channel(iview->vk_format));
2112 format = radv_translate_colorformat(iview->vk_format);
2113 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2114 radv_finishme("Illegal color\n");
2115 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2116 endian = radv_colorformat_endian_swap(format);
2117
2118 /* blend clamp should be set for all NORM/SRGB types */
2119 if (ntype == V_028C70_NUMBER_UNORM ||
2120 ntype == V_028C70_NUMBER_SNORM ||
2121 ntype == V_028C70_NUMBER_SRGB)
2122 blend_clamp = 1;
2123
2124 /* set blend bypass according to docs if SINT/UINT or
2125 8/24 COLOR variants */
2126 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2127 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2128 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2129 blend_clamp = 0;
2130 blend_bypass = 1;
2131 }
2132 #if 0
2133 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2134 (format == V_028C70_COLOR_8 ||
2135 format == V_028C70_COLOR_8_8 ||
2136 format == V_028C70_COLOR_8_8_8_8))
2137 ->color_is_int8 = true;
2138 #endif
2139 cb->cb_color_info = S_028C70_FORMAT(format) |
2140 S_028C70_COMP_SWAP(swap) |
2141 S_028C70_BLEND_CLAMP(blend_clamp) |
2142 S_028C70_BLEND_BYPASS(blend_bypass) |
2143 S_028C70_SIMPLE_FLOAT(1) |
2144 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2145 ntype != V_028C70_NUMBER_SNORM &&
2146 ntype != V_028C70_NUMBER_SRGB &&
2147 format != V_028C70_COLOR_8_24 &&
2148 format != V_028C70_COLOR_24_8) |
2149 S_028C70_NUMBER_TYPE(ntype) |
2150 S_028C70_ENDIAN(endian);
2151 if (iview->image->samples > 1)
2152 if (iview->image->fmask.size)
2153 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2154
2155 if (iview->image->cmask.size &&
2156 (device->debug_flags & RADV_DEBUG_FAST_CLEARS))
2157 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2158
2159 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2160 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2161
2162 if (device->physical_device->rad_info.chip_class >= VI) {
2163 unsigned max_uncompressed_block_size = 2;
2164 if (iview->image->samples > 1) {
2165 if (iview->image->surface.bpe == 1)
2166 max_uncompressed_block_size = 0;
2167 else if (iview->image->surface.bpe == 2)
2168 max_uncompressed_block_size = 1;
2169 }
2170
2171 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2172 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2173 }
2174
2175 /* This must be set for fast clear to work without FMASK. */
2176 if (!iview->image->fmask.size &&
2177 device->physical_device->rad_info.chip_class == SI) {
2178 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2179 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2180 }
2181 }
2182
2183 static void
2184 radv_initialise_ds_surface(struct radv_device *device,
2185 struct radv_ds_buffer_info *ds,
2186 struct radv_image_view *iview)
2187 {
2188 unsigned level = iview->base_mip;
2189 unsigned format;
2190 uint64_t va, s_offs, z_offs;
2191 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2192 memset(ds, 0, sizeof(*ds));
2193 switch (iview->vk_format) {
2194 case VK_FORMAT_D24_UNORM_S8_UINT:
2195 case VK_FORMAT_X8_D24_UNORM_PACK32:
2196 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2197 ds->offset_scale = 2.0f;
2198 break;
2199 case VK_FORMAT_D16_UNORM:
2200 case VK_FORMAT_D16_UNORM_S8_UINT:
2201 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2202 ds->offset_scale = 4.0f;
2203 break;
2204 case VK_FORMAT_D32_SFLOAT:
2205 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2206 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2207 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2208 ds->offset_scale = 1.0f;
2209 break;
2210 default:
2211 break;
2212 }
2213
2214 format = radv_translate_dbformat(iview->vk_format);
2215 if (format == V_028040_Z_INVALID) {
2216 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2217 }
2218
2219 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2220 s_offs = z_offs = va;
2221 z_offs += iview->image->surface.level[level].offset;
2222 s_offs += iview->image->surface.stencil_level[level].offset;
2223
2224 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2225 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2226 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2227 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2228 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2229
2230 if (iview->image->samples > 1)
2231 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2232
2233 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2234 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2235 else
2236 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2237
2238 if (device->physical_device->rad_info.chip_class >= CIK) {
2239 struct radeon_info *info = &device->physical_device->rad_info;
2240 unsigned tiling_index = iview->image->surface.tiling_index[level];
2241 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2242 unsigned macro_index = iview->image->surface.macro_tile_index;
2243 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2244 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2245 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2246
2247 ds->db_depth_info |=
2248 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2249 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2250 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2251 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2252 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2253 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2254 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2255 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2256 } else {
2257 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2258 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2259 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2260 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2261 }
2262
2263 if (iview->image->htile.size && !level) {
2264 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2265 S_028040_ALLOW_EXPCLEAR(1);
2266
2267 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2268 /* Workaround: For a not yet understood reason, the
2269 * combination of MSAA, fast stencil clear and stencil
2270 * decompress messes with subsequent stencil buffer
2271 * uses. Problem was reproduced on Verde, Bonaire,
2272 * Tonga, and Carrizo.
2273 *
2274 * Disabling EXPCLEAR works around the problem.
2275 *
2276 * Check piglit's arb_texture_multisample-stencil-clear
2277 * test if you want to try changing this.
2278 */
2279 if (iview->image->samples <= 1)
2280 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2281 } else
2282 /* Use all of the htile_buffer for depth if there's no stencil. */
2283 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2284
2285 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2286 iview->image->htile.offset;
2287 ds->db_htile_data_base = va >> 8;
2288 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2289 } else {
2290 ds->db_htile_data_base = 0;
2291 ds->db_htile_surface = 0;
2292 }
2293
2294 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2295 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2296
2297 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2298 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2299 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2300 }
2301
2302 VkResult radv_CreateFramebuffer(
2303 VkDevice _device,
2304 const VkFramebufferCreateInfo* pCreateInfo,
2305 const VkAllocationCallbacks* pAllocator,
2306 VkFramebuffer* pFramebuffer)
2307 {
2308 RADV_FROM_HANDLE(radv_device, device, _device);
2309 struct radv_framebuffer *framebuffer;
2310
2311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2312
2313 size_t size = sizeof(*framebuffer) +
2314 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2315 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2316 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2317 if (framebuffer == NULL)
2318 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2319
2320 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2321 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2322 VkImageView _iview = pCreateInfo->pAttachments[i];
2323 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2324 framebuffer->attachments[i].attachment = iview;
2325 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2326 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2327 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2328 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2329 }
2330 }
2331
2332 framebuffer->width = pCreateInfo->width;
2333 framebuffer->height = pCreateInfo->height;
2334 framebuffer->layers = pCreateInfo->layers;
2335
2336 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2337 return VK_SUCCESS;
2338 }
2339
2340 void radv_DestroyFramebuffer(
2341 VkDevice _device,
2342 VkFramebuffer _fb,
2343 const VkAllocationCallbacks* pAllocator)
2344 {
2345 RADV_FROM_HANDLE(radv_device, device, _device);
2346 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2347
2348 if (!fb)
2349 return;
2350 vk_free2(&device->alloc, pAllocator, fb);
2351 }
2352
2353 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2354 {
2355 switch (address_mode) {
2356 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2357 return V_008F30_SQ_TEX_WRAP;
2358 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2359 return V_008F30_SQ_TEX_MIRROR;
2360 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2361 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2362 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2363 return V_008F30_SQ_TEX_CLAMP_BORDER;
2364 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2365 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2366 default:
2367 unreachable("illegal tex wrap mode");
2368 break;
2369 }
2370 }
2371
2372 static unsigned
2373 radv_tex_compare(VkCompareOp op)
2374 {
2375 switch (op) {
2376 case VK_COMPARE_OP_NEVER:
2377 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2378 case VK_COMPARE_OP_LESS:
2379 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2380 case VK_COMPARE_OP_EQUAL:
2381 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2382 case VK_COMPARE_OP_LESS_OR_EQUAL:
2383 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2384 case VK_COMPARE_OP_GREATER:
2385 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2386 case VK_COMPARE_OP_NOT_EQUAL:
2387 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2388 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2389 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2390 case VK_COMPARE_OP_ALWAYS:
2391 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2392 default:
2393 unreachable("illegal compare mode");
2394 break;
2395 }
2396 }
2397
2398 static unsigned
2399 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2400 {
2401 switch (filter) {
2402 case VK_FILTER_NEAREST:
2403 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2404 V_008F38_SQ_TEX_XY_FILTER_POINT);
2405 case VK_FILTER_LINEAR:
2406 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2407 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2408 case VK_FILTER_CUBIC_IMG:
2409 default:
2410 fprintf(stderr, "illegal texture filter");
2411 return 0;
2412 }
2413 }
2414
2415 static unsigned
2416 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2417 {
2418 switch (mode) {
2419 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2420 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2421 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2422 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2423 default:
2424 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2425 }
2426 }
2427
2428 static unsigned
2429 radv_tex_bordercolor(VkBorderColor bcolor)
2430 {
2431 switch (bcolor) {
2432 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2433 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2434 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2435 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2436 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2437 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2438 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2439 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2440 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2441 default:
2442 break;
2443 }
2444 return 0;
2445 }
2446
2447 static unsigned
2448 radv_tex_aniso_filter(unsigned filter)
2449 {
2450 if (filter < 2)
2451 return 0;
2452 if (filter < 4)
2453 return 1;
2454 if (filter < 8)
2455 return 2;
2456 if (filter < 16)
2457 return 3;
2458 return 4;
2459 }
2460
2461 static void
2462 radv_init_sampler(struct radv_device *device,
2463 struct radv_sampler *sampler,
2464 const VkSamplerCreateInfo *pCreateInfo)
2465 {
2466 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2467 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2468 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2469 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2470
2471 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2472 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2473 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2474 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2475 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2476 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2477 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2478 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2479 S_008F30_DISABLE_CUBE_WRAP(0) |
2480 S_008F30_COMPAT_MODE(is_vi));
2481 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2482 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2483 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2484 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2485 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2486 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2487 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2488 S_008F38_MIP_POINT_PRECLAMP(1) |
2489 S_008F38_DISABLE_LSB_CEIL(1) |
2490 S_008F38_FILTER_PREC_FIX(1) |
2491 S_008F38_ANISO_OVERRIDE(is_vi));
2492 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2493 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2494 }
2495
2496 VkResult radv_CreateSampler(
2497 VkDevice _device,
2498 const VkSamplerCreateInfo* pCreateInfo,
2499 const VkAllocationCallbacks* pAllocator,
2500 VkSampler* pSampler)
2501 {
2502 RADV_FROM_HANDLE(radv_device, device, _device);
2503 struct radv_sampler *sampler;
2504
2505 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2506
2507 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2508 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2509 if (!sampler)
2510 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2511
2512 radv_init_sampler(device, sampler, pCreateInfo);
2513 *pSampler = radv_sampler_to_handle(sampler);
2514
2515 return VK_SUCCESS;
2516 }
2517
2518 void radv_DestroySampler(
2519 VkDevice _device,
2520 VkSampler _sampler,
2521 const VkAllocationCallbacks* pAllocator)
2522 {
2523 RADV_FROM_HANDLE(radv_device, device, _device);
2524 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2525
2526 if (!sampler)
2527 return;
2528 vk_free2(&device->alloc, pAllocator, sampler);
2529 }
2530
2531
2532 /* vk_icd.h does not declare this function, so we declare it here to
2533 * suppress Wmissing-prototypes.
2534 */
2535 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2536 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2537
2538 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2539 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2540 {
2541 /* For the full details on loader interface versioning, see
2542 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2543 * What follows is a condensed summary, to help you navigate the large and
2544 * confusing official doc.
2545 *
2546 * - Loader interface v0 is incompatible with later versions. We don't
2547 * support it.
2548 *
2549 * - In loader interface v1:
2550 * - The first ICD entrypoint called by the loader is
2551 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2552 * entrypoint.
2553 * - The ICD must statically expose no other Vulkan symbol unless it is
2554 * linked with -Bsymbolic.
2555 * - Each dispatchable Vulkan handle created by the ICD must be
2556 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2557 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2558 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2559 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2560 * such loader-managed surfaces.
2561 *
2562 * - Loader interface v2 differs from v1 in:
2563 * - The first ICD entrypoint called by the loader is
2564 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2565 * statically expose this entrypoint.
2566 *
2567 * - Loader interface v3 differs from v2 in:
2568 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2569 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2570 * because the loader no longer does so.
2571 */
2572 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2573 return VK_SUCCESS;
2574 }