radv: disable gfx init on CIK for now
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
117 .specVersion = 68,
118 },
119 {
120 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 {
128 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
129 .specVersion = 1,
130 },
131 };
132
133 static VkResult
134 radv_extensions_register(struct radv_instance *instance,
135 struct radv_extensions *extensions,
136 const VkExtensionProperties *new_ext,
137 uint32_t num_ext)
138 {
139 size_t new_size;
140 VkExtensionProperties *new_ptr;
141
142 assert(new_ext && num_ext > 0);
143
144 if (!new_ext)
145 return VK_ERROR_INITIALIZATION_FAILED;
146
147 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
148 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
149 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
150
151 /* Old array continues to be valid, update nothing */
152 if (!new_ptr)
153 return VK_ERROR_OUT_OF_HOST_MEMORY;
154
155 memcpy(&new_ptr[extensions->num_ext], new_ext,
156 num_ext * sizeof(VkExtensionProperties));
157 extensions->ext_array = new_ptr;
158 extensions->num_ext += num_ext;
159
160 return VK_SUCCESS;
161 }
162
163 static void
164 radv_extensions_finish(struct radv_instance *instance,
165 struct radv_extensions *extensions)
166 {
167 assert(extensions);
168
169 if (!extensions)
170 radv_loge("Attemted to free invalid extension struct\n");
171
172 if (extensions->ext_array)
173 vk_free(&instance->alloc, extensions->ext_array);
174 }
175
176 static bool
177 is_extension_enabled(const VkExtensionProperties *extensions,
178 size_t num_ext,
179 const char *name)
180 {
181 assert(extensions && name);
182
183 for (uint32_t i = 0; i < num_ext; i++) {
184 if (strcmp(name, extensions[i].extensionName) == 0)
185 return true;
186 }
187
188 return false;
189 }
190
191 static VkResult
192 radv_physical_device_init(struct radv_physical_device *device,
193 struct radv_instance *instance,
194 const char *path)
195 {
196 VkResult result;
197 drmVersionPtr version;
198 int fd;
199
200 fd = open(path, O_RDWR | O_CLOEXEC);
201 if (fd < 0)
202 return VK_ERROR_INCOMPATIBLE_DRIVER;
203
204 version = drmGetVersion(fd);
205 if (!version) {
206 close(fd);
207 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
208 "failed to get version %s: %m", path);
209 }
210
211 if (strcmp(version->name, "amdgpu")) {
212 drmFreeVersion(version);
213 close(fd);
214 return VK_ERROR_INCOMPATIBLE_DRIVER;
215 }
216 drmFreeVersion(version);
217
218 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
219 device->instance = instance;
220 assert(strlen(path) < ARRAY_SIZE(device->path));
221 strncpy(device->path, path, ARRAY_SIZE(device->path));
222
223 device->ws = radv_amdgpu_winsys_create(fd);
224 if (!device->ws) {
225 result = VK_ERROR_INCOMPATIBLE_DRIVER;
226 goto fail;
227 }
228 device->ws->query_info(device->ws, &device->rad_info);
229 result = radv_init_wsi(device);
230 if (result != VK_SUCCESS) {
231 device->ws->destroy(device->ws);
232 goto fail;
233 }
234
235 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
236 radv_finish_wsi(device);
237 device->ws->destroy(device->ws);
238 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
239 "cannot generate UUID");
240 goto fail;
241 }
242
243 result = radv_extensions_register(instance,
244 &device->extensions,
245 common_device_extensions,
246 ARRAY_SIZE(common_device_extensions));
247 if (result != VK_SUCCESS)
248 goto fail;
249
250 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
251 device->name = device->rad_info.name;
252 close(fd);
253 return VK_SUCCESS;
254
255 fail:
256 close(fd);
257 return result;
258 }
259
260 static void
261 radv_physical_device_finish(struct radv_physical_device *device)
262 {
263 radv_extensions_finish(device->instance, &device->extensions);
264 radv_finish_wsi(device);
265 device->ws->destroy(device->ws);
266 }
267
268
269 static void *
270 default_alloc_func(void *pUserData, size_t size, size_t align,
271 VkSystemAllocationScope allocationScope)
272 {
273 return malloc(size);
274 }
275
276 static void *
277 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
278 size_t align, VkSystemAllocationScope allocationScope)
279 {
280 return realloc(pOriginal, size);
281 }
282
283 static void
284 default_free_func(void *pUserData, void *pMemory)
285 {
286 free(pMemory);
287 }
288
289 static const VkAllocationCallbacks default_alloc = {
290 .pUserData = NULL,
291 .pfnAllocation = default_alloc_func,
292 .pfnReallocation = default_realloc_func,
293 .pfnFree = default_free_func,
294 };
295
296 static const struct debug_control radv_debug_options[] = {
297 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
298 {"nodcc", RADV_DEBUG_NO_DCC},
299 {"shaders", RADV_DEBUG_DUMP_SHADERS},
300 {"nocache", RADV_DEBUG_NO_CACHE},
301 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
302 {"nohiz", RADV_DEBUG_NO_HIZ},
303 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
304 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
305 {NULL, 0}
306 };
307
308 VkResult radv_CreateInstance(
309 const VkInstanceCreateInfo* pCreateInfo,
310 const VkAllocationCallbacks* pAllocator,
311 VkInstance* pInstance)
312 {
313 struct radv_instance *instance;
314
315 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
316
317 uint32_t client_version;
318 if (pCreateInfo->pApplicationInfo &&
319 pCreateInfo->pApplicationInfo->apiVersion != 0) {
320 client_version = pCreateInfo->pApplicationInfo->apiVersion;
321 } else {
322 client_version = VK_MAKE_VERSION(1, 0, 0);
323 }
324
325 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
326 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
327 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
328 "Client requested version %d.%d.%d",
329 VK_VERSION_MAJOR(client_version),
330 VK_VERSION_MINOR(client_version),
331 VK_VERSION_PATCH(client_version));
332 }
333
334 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
335 if (!is_extension_enabled(instance_extensions,
336 ARRAY_SIZE(instance_extensions),
337 pCreateInfo->ppEnabledExtensionNames[i]))
338 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
339 }
340
341 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
342 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
343 if (!instance)
344 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
345
346 memset(instance, 0, sizeof(*instance));
347
348 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
349
350 if (pAllocator)
351 instance->alloc = *pAllocator;
352 else
353 instance->alloc = default_alloc;
354
355 instance->apiVersion = client_version;
356 instance->physicalDeviceCount = -1;
357
358 _mesa_locale_init();
359
360 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
361
362 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
363 radv_debug_options);
364
365 *pInstance = radv_instance_to_handle(instance);
366
367 return VK_SUCCESS;
368 }
369
370 void radv_DestroyInstance(
371 VkInstance _instance,
372 const VkAllocationCallbacks* pAllocator)
373 {
374 RADV_FROM_HANDLE(radv_instance, instance, _instance);
375
376 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
377 radv_physical_device_finish(instance->physicalDevices + i);
378 }
379
380 VG(VALGRIND_DESTROY_MEMPOOL(instance));
381
382 _mesa_locale_fini();
383
384 vk_free(&instance->alloc, instance);
385 }
386
387 VkResult radv_EnumeratePhysicalDevices(
388 VkInstance _instance,
389 uint32_t* pPhysicalDeviceCount,
390 VkPhysicalDevice* pPhysicalDevices)
391 {
392 RADV_FROM_HANDLE(radv_instance, instance, _instance);
393 VkResult result;
394
395 if (instance->physicalDeviceCount < 0) {
396 char path[20];
397 instance->physicalDeviceCount = 0;
398 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
399 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
400 result = radv_physical_device_init(instance->physicalDevices +
401 instance->physicalDeviceCount,
402 instance, path);
403 if (result == VK_SUCCESS)
404 ++instance->physicalDeviceCount;
405 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
406 return result;
407 }
408 }
409
410 if (!pPhysicalDevices) {
411 *pPhysicalDeviceCount = instance->physicalDeviceCount;
412 } else {
413 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
414 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
415 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
416 }
417
418 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
419 : VK_SUCCESS;
420 }
421
422 void radv_GetPhysicalDeviceFeatures(
423 VkPhysicalDevice physicalDevice,
424 VkPhysicalDeviceFeatures* pFeatures)
425 {
426 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
427
428 memset(pFeatures, 0, sizeof(*pFeatures));
429
430 *pFeatures = (VkPhysicalDeviceFeatures) {
431 .robustBufferAccess = true,
432 .fullDrawIndexUint32 = true,
433 .imageCubeArray = true,
434 .independentBlend = true,
435 .geometryShader = true,
436 .tessellationShader = false,
437 .sampleRateShading = false,
438 .dualSrcBlend = true,
439 .logicOp = true,
440 .multiDrawIndirect = true,
441 .drawIndirectFirstInstance = true,
442 .depthClamp = true,
443 .depthBiasClamp = true,
444 .fillModeNonSolid = true,
445 .depthBounds = true,
446 .wideLines = true,
447 .largePoints = true,
448 .alphaToOne = true,
449 .multiViewport = true,
450 .samplerAnisotropy = true,
451 .textureCompressionETC2 = false,
452 .textureCompressionASTC_LDR = false,
453 .textureCompressionBC = true,
454 .occlusionQueryPrecise = true,
455 .pipelineStatisticsQuery = false,
456 .vertexPipelineStoresAndAtomics = true,
457 .fragmentStoresAndAtomics = true,
458 .shaderTessellationAndGeometryPointSize = true,
459 .shaderImageGatherExtended = true,
460 .shaderStorageImageExtendedFormats = true,
461 .shaderStorageImageMultisample = false,
462 .shaderUniformBufferArrayDynamicIndexing = true,
463 .shaderSampledImageArrayDynamicIndexing = true,
464 .shaderStorageBufferArrayDynamicIndexing = true,
465 .shaderStorageImageArrayDynamicIndexing = true,
466 .shaderStorageImageReadWithoutFormat = false,
467 .shaderStorageImageWriteWithoutFormat = false,
468 .shaderClipDistance = true,
469 .shaderCullDistance = true,
470 .shaderFloat64 = true,
471 .shaderInt64 = false,
472 .shaderInt16 = false,
473 .alphaToOne = true,
474 .variableMultisampleRate = false,
475 .inheritedQueries = false,
476 };
477 }
478
479 void radv_GetPhysicalDeviceFeatures2KHR(
480 VkPhysicalDevice physicalDevice,
481 VkPhysicalDeviceFeatures2KHR *pFeatures)
482 {
483 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
484 }
485
486 void radv_GetPhysicalDeviceProperties(
487 VkPhysicalDevice physicalDevice,
488 VkPhysicalDeviceProperties* pProperties)
489 {
490 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
491 VkSampleCountFlags sample_counts = 0xf;
492 VkPhysicalDeviceLimits limits = {
493 .maxImageDimension1D = (1 << 14),
494 .maxImageDimension2D = (1 << 14),
495 .maxImageDimension3D = (1 << 11),
496 .maxImageDimensionCube = (1 << 14),
497 .maxImageArrayLayers = (1 << 11),
498 .maxTexelBufferElements = 128 * 1024 * 1024,
499 .maxUniformBufferRange = UINT32_MAX,
500 .maxStorageBufferRange = UINT32_MAX,
501 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
502 .maxMemoryAllocationCount = UINT32_MAX,
503 .maxSamplerAllocationCount = 64 * 1024,
504 .bufferImageGranularity = 64, /* A cache line */
505 .sparseAddressSpaceSize = 0,
506 .maxBoundDescriptorSets = MAX_SETS,
507 .maxPerStageDescriptorSamplers = 64,
508 .maxPerStageDescriptorUniformBuffers = 64,
509 .maxPerStageDescriptorStorageBuffers = 64,
510 .maxPerStageDescriptorSampledImages = 64,
511 .maxPerStageDescriptorStorageImages = 64,
512 .maxPerStageDescriptorInputAttachments = 64,
513 .maxPerStageResources = 128,
514 .maxDescriptorSetSamplers = 256,
515 .maxDescriptorSetUniformBuffers = 256,
516 .maxDescriptorSetUniformBuffersDynamic = 256,
517 .maxDescriptorSetStorageBuffers = 256,
518 .maxDescriptorSetStorageBuffersDynamic = 256,
519 .maxDescriptorSetSampledImages = 256,
520 .maxDescriptorSetStorageImages = 256,
521 .maxDescriptorSetInputAttachments = 256,
522 .maxVertexInputAttributes = 32,
523 .maxVertexInputBindings = 32,
524 .maxVertexInputAttributeOffset = 2047,
525 .maxVertexInputBindingStride = 2048,
526 .maxVertexOutputComponents = 128,
527 .maxTessellationGenerationLevel = 0,
528 .maxTessellationPatchSize = 0,
529 .maxTessellationControlPerVertexInputComponents = 0,
530 .maxTessellationControlPerVertexOutputComponents = 0,
531 .maxTessellationControlPerPatchOutputComponents = 0,
532 .maxTessellationControlTotalOutputComponents = 0,
533 .maxTessellationEvaluationInputComponents = 0,
534 .maxTessellationEvaluationOutputComponents = 0,
535 .maxGeometryShaderInvocations = 32,
536 .maxGeometryInputComponents = 64,
537 .maxGeometryOutputComponents = 128,
538 .maxGeometryOutputVertices = 256,
539 .maxGeometryTotalOutputComponents = 1024,
540 .maxFragmentInputComponents = 128,
541 .maxFragmentOutputAttachments = 8,
542 .maxFragmentDualSrcAttachments = 1,
543 .maxFragmentCombinedOutputResources = 8,
544 .maxComputeSharedMemorySize = 32768,
545 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
546 .maxComputeWorkGroupInvocations = 2048,
547 .maxComputeWorkGroupSize = {
548 2048,
549 2048,
550 2048
551 },
552 .subPixelPrecisionBits = 4 /* FIXME */,
553 .subTexelPrecisionBits = 4 /* FIXME */,
554 .mipmapPrecisionBits = 4 /* FIXME */,
555 .maxDrawIndexedIndexValue = UINT32_MAX,
556 .maxDrawIndirectCount = UINT32_MAX,
557 .maxSamplerLodBias = 16,
558 .maxSamplerAnisotropy = 16,
559 .maxViewports = MAX_VIEWPORTS,
560 .maxViewportDimensions = { (1 << 14), (1 << 14) },
561 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
562 .viewportSubPixelBits = 13, /* We take a float? */
563 .minMemoryMapAlignment = 4096, /* A page */
564 .minTexelBufferOffsetAlignment = 1,
565 .minUniformBufferOffsetAlignment = 4,
566 .minStorageBufferOffsetAlignment = 4,
567 .minTexelOffset = -32,
568 .maxTexelOffset = 31,
569 .minTexelGatherOffset = -32,
570 .maxTexelGatherOffset = 31,
571 .minInterpolationOffset = -2,
572 .maxInterpolationOffset = 2,
573 .subPixelInterpolationOffsetBits = 8,
574 .maxFramebufferWidth = (1 << 14),
575 .maxFramebufferHeight = (1 << 14),
576 .maxFramebufferLayers = (1 << 10),
577 .framebufferColorSampleCounts = sample_counts,
578 .framebufferDepthSampleCounts = sample_counts,
579 .framebufferStencilSampleCounts = sample_counts,
580 .framebufferNoAttachmentsSampleCounts = sample_counts,
581 .maxColorAttachments = MAX_RTS,
582 .sampledImageColorSampleCounts = sample_counts,
583 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
584 .sampledImageDepthSampleCounts = sample_counts,
585 .sampledImageStencilSampleCounts = sample_counts,
586 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
587 .maxSampleMaskWords = 1,
588 .timestampComputeAndGraphics = false,
589 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
590 .maxClipDistances = 8,
591 .maxCullDistances = 8,
592 .maxCombinedClipAndCullDistances = 8,
593 .discreteQueuePriorities = 1,
594 .pointSizeRange = { 0.125, 255.875 },
595 .lineWidthRange = { 0.0, 7.9921875 },
596 .pointSizeGranularity = (1.0 / 8.0),
597 .lineWidthGranularity = (1.0 / 128.0),
598 .strictLines = false, /* FINISHME */
599 .standardSampleLocations = true,
600 .optimalBufferCopyOffsetAlignment = 128,
601 .optimalBufferCopyRowPitchAlignment = 128,
602 .nonCoherentAtomSize = 64,
603 };
604
605 *pProperties = (VkPhysicalDeviceProperties) {
606 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
607 .driverVersion = 1,
608 .vendorID = 0x1002,
609 .deviceID = pdevice->rad_info.pci_id,
610 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
611 .limits = limits,
612 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
613 };
614
615 strcpy(pProperties->deviceName, pdevice->name);
616 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
617 }
618
619 void radv_GetPhysicalDeviceProperties2KHR(
620 VkPhysicalDevice physicalDevice,
621 VkPhysicalDeviceProperties2KHR *pProperties)
622 {
623 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
624 }
625
626 static void radv_get_physical_device_queue_family_properties(
627 struct radv_physical_device* pdevice,
628 uint32_t* pCount,
629 VkQueueFamilyProperties** pQueueFamilyProperties)
630 {
631 int num_queue_families = 1;
632 int idx;
633 if (pdevice->rad_info.compute_rings > 0 &&
634 pdevice->rad_info.chip_class >= CIK &&
635 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
636 num_queue_families++;
637
638 if (pQueueFamilyProperties == NULL) {
639 *pCount = num_queue_families;
640 return;
641 }
642
643 if (!*pCount)
644 return;
645
646 idx = 0;
647 if (*pCount >= 1) {
648 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
649 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
650 VK_QUEUE_COMPUTE_BIT |
651 VK_QUEUE_TRANSFER_BIT,
652 .queueCount = 1,
653 .timestampValidBits = 64,
654 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
655 };
656 idx++;
657 }
658
659 if (pdevice->rad_info.compute_rings > 0 &&
660 pdevice->rad_info.chip_class >= CIK &&
661 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
662 if (*pCount > idx) {
663 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
664 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
665 .queueCount = pdevice->rad_info.compute_rings,
666 .timestampValidBits = 64,
667 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
668 };
669 idx++;
670 }
671 }
672 *pCount = idx;
673 }
674
675 void radv_GetPhysicalDeviceQueueFamilyProperties(
676 VkPhysicalDevice physicalDevice,
677 uint32_t* pCount,
678 VkQueueFamilyProperties* pQueueFamilyProperties)
679 {
680 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
681 if (!pQueueFamilyProperties) {
682 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
683 return;
684 }
685 VkQueueFamilyProperties *properties[] = {
686 pQueueFamilyProperties + 0,
687 pQueueFamilyProperties + 1,
688 pQueueFamilyProperties + 2,
689 };
690 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
691 assert(*pCount <= 3);
692 }
693
694 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
695 VkPhysicalDevice physicalDevice,
696 uint32_t* pCount,
697 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
698 {
699 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
700 if (!pQueueFamilyProperties) {
701 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
702 return;
703 }
704 VkQueueFamilyProperties *properties[] = {
705 &pQueueFamilyProperties[0].queueFamilyProperties,
706 &pQueueFamilyProperties[1].queueFamilyProperties,
707 &pQueueFamilyProperties[2].queueFamilyProperties,
708 };
709 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
710 assert(*pCount <= 3);
711 }
712
713 void radv_GetPhysicalDeviceMemoryProperties(
714 VkPhysicalDevice physicalDevice,
715 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
716 {
717 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
718
719 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
720
721 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
722 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
723 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
724 .heapIndex = RADV_MEM_HEAP_VRAM,
725 };
726 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
727 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
728 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
729 .heapIndex = RADV_MEM_HEAP_GTT,
730 };
731 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
732 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
733 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
734 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
735 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
736 };
737 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
738 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
739 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
740 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
741 .heapIndex = RADV_MEM_HEAP_GTT,
742 };
743
744 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
745
746 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
747 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
748 .size = physical_device->rad_info.vram_size -
749 physical_device->rad_info.visible_vram_size,
750 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
751 };
752 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
753 .size = physical_device->rad_info.visible_vram_size,
754 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
755 };
756 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
757 .size = physical_device->rad_info.gart_size,
758 .flags = 0,
759 };
760 }
761
762 void radv_GetPhysicalDeviceMemoryProperties2KHR(
763 VkPhysicalDevice physicalDevice,
764 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
765 {
766 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
767 &pMemoryProperties->memoryProperties);
768 }
769
770 static int
771 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
772 int queue_family_index, int idx)
773 {
774 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
775 queue->device = device;
776 queue->queue_family_index = queue_family_index;
777 queue->queue_idx = idx;
778
779 queue->hw_ctx = device->ws->ctx_create(device->ws);
780 if (!queue->hw_ctx)
781 return VK_ERROR_OUT_OF_HOST_MEMORY;
782
783 return VK_SUCCESS;
784 }
785
786 static void
787 radv_queue_finish(struct radv_queue *queue)
788 {
789 if (queue->hw_ctx)
790 queue->device->ws->ctx_destroy(queue->hw_ctx);
791
792 if (queue->preamble_cs)
793 queue->device->ws->cs_destroy(queue->preamble_cs);
794 if (queue->descriptor_bo)
795 queue->device->ws->buffer_destroy(queue->descriptor_bo);
796 if (queue->scratch_bo)
797 queue->device->ws->buffer_destroy(queue->scratch_bo);
798 if (queue->esgs_ring_bo)
799 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
800 if (queue->gsvs_ring_bo)
801 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
802 if (queue->compute_scratch_bo)
803 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
804 }
805
806 static void
807 radv_device_init_gs_info(struct radv_device *device)
808 {
809 switch (device->physical_device->rad_info.family) {
810 case CHIP_OLAND:
811 case CHIP_HAINAN:
812 case CHIP_KAVERI:
813 case CHIP_KABINI:
814 case CHIP_MULLINS:
815 case CHIP_ICELAND:
816 case CHIP_CARRIZO:
817 case CHIP_STONEY:
818 device->gs_table_depth = 16;
819 return;
820 case CHIP_TAHITI:
821 case CHIP_PITCAIRN:
822 case CHIP_VERDE:
823 case CHIP_BONAIRE:
824 case CHIP_HAWAII:
825 case CHIP_TONGA:
826 case CHIP_FIJI:
827 case CHIP_POLARIS10:
828 case CHIP_POLARIS11:
829 device->gs_table_depth = 32;
830 return;
831 default:
832 unreachable("unknown GPU");
833 }
834 }
835
836 VkResult radv_CreateDevice(
837 VkPhysicalDevice physicalDevice,
838 const VkDeviceCreateInfo* pCreateInfo,
839 const VkAllocationCallbacks* pAllocator,
840 VkDevice* pDevice)
841 {
842 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
843 VkResult result;
844 struct radv_device *device;
845
846 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
847 if (!is_extension_enabled(physical_device->extensions.ext_array,
848 physical_device->extensions.num_ext,
849 pCreateInfo->ppEnabledExtensionNames[i]))
850 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
851 }
852
853 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
854 sizeof(*device), 8,
855 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
856 if (!device)
857 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
858
859 memset(device, 0, sizeof(*device));
860
861 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
862 device->instance = physical_device->instance;
863 device->physical_device = physical_device;
864
865 device->debug_flags = device->instance->debug_flags;
866
867 device->ws = physical_device->ws;
868 if (pAllocator)
869 device->alloc = *pAllocator;
870 else
871 device->alloc = physical_device->instance->alloc;
872
873 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
874 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
875 uint32_t qfi = queue_create->queueFamilyIndex;
876
877 device->queues[qfi] = vk_alloc(&device->alloc,
878 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
879 if (!device->queues[qfi]) {
880 result = VK_ERROR_OUT_OF_HOST_MEMORY;
881 goto fail;
882 }
883
884 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
885
886 device->queue_count[qfi] = queue_create->queueCount;
887
888 for (unsigned q = 0; q < queue_create->queueCount; q++) {
889 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
890 if (result != VK_SUCCESS)
891 goto fail;
892 }
893 }
894
895 #if HAVE_LLVM < 0x0400
896 device->llvm_supports_spill = false;
897 #else
898 device->llvm_supports_spill = true;
899 #endif
900
901 /* The maximum number of scratch waves. Scratch space isn't divided
902 * evenly between CUs. The number is only a function of the number of CUs.
903 * We can decrease the constant to decrease the scratch buffer size.
904 *
905 * sctx->scratch_waves must be >= the maximum posible size of
906 * 1 threadgroup, so that the hw doesn't hang from being unable
907 * to start any.
908 *
909 * The recommended value is 4 per CU at most. Higher numbers don't
910 * bring much benefit, but they still occupy chip resources (think
911 * async compute). I've seen ~2% performance difference between 4 and 32.
912 */
913 uint32_t max_threads_per_block = 2048;
914 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
915 max_threads_per_block / 64);
916
917 radv_device_init_gs_info(device);
918
919 result = radv_device_init_meta(device);
920 if (result != VK_SUCCESS)
921 goto fail;
922
923 radv_device_init_msaa(device);
924
925 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
926 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
927 switch (family) {
928 case RADV_QUEUE_GENERAL:
929 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
930 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
931 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
932 break;
933 case RADV_QUEUE_COMPUTE:
934 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
935 radeon_emit(device->empty_cs[family], 0);
936 break;
937 }
938 device->ws->cs_finalize(device->empty_cs[family]);
939 }
940
941 if (getenv("RADV_TRACE_FILE")) {
942 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
943 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
944 if (!device->trace_bo)
945 goto fail;
946
947 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
948 if (!device->trace_id_ptr)
949 goto fail;
950 }
951
952 /* temporarily disabled on CIK */
953 if (device->physical_device->rad_info.chip_class > CIK)
954 cik_create_gfx_config(device);
955
956 *pDevice = radv_device_to_handle(device);
957 return VK_SUCCESS;
958
959 fail:
960 if (device->trace_bo)
961 device->ws->buffer_destroy(device->trace_bo);
962
963 if (device->gfx_init)
964 device->ws->buffer_destroy(device->gfx_init);
965
966 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
967 for (unsigned q = 0; q < device->queue_count[i]; q++)
968 radv_queue_finish(&device->queues[i][q]);
969 if (device->queue_count[i])
970 vk_free(&device->alloc, device->queues[i]);
971 }
972
973 vk_free(&device->alloc, device);
974 return result;
975 }
976
977 void radv_DestroyDevice(
978 VkDevice _device,
979 const VkAllocationCallbacks* pAllocator)
980 {
981 RADV_FROM_HANDLE(radv_device, device, _device);
982
983 if (device->trace_bo)
984 device->ws->buffer_destroy(device->trace_bo);
985
986 if (device->gfx_init)
987 device->ws->buffer_destroy(device->gfx_init);
988
989 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
990 for (unsigned q = 0; q < device->queue_count[i]; q++)
991 radv_queue_finish(&device->queues[i][q]);
992 if (device->queue_count[i])
993 vk_free(&device->alloc, device->queues[i]);
994 }
995 radv_device_finish_meta(device);
996
997 vk_free(&device->alloc, device);
998 }
999
1000 VkResult radv_EnumerateInstanceExtensionProperties(
1001 const char* pLayerName,
1002 uint32_t* pPropertyCount,
1003 VkExtensionProperties* pProperties)
1004 {
1005 if (pProperties == NULL) {
1006 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1007 return VK_SUCCESS;
1008 }
1009
1010 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1011 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1012
1013 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1014 return VK_INCOMPLETE;
1015
1016 return VK_SUCCESS;
1017 }
1018
1019 VkResult radv_EnumerateDeviceExtensionProperties(
1020 VkPhysicalDevice physicalDevice,
1021 const char* pLayerName,
1022 uint32_t* pPropertyCount,
1023 VkExtensionProperties* pProperties)
1024 {
1025 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1026
1027 if (pProperties == NULL) {
1028 *pPropertyCount = pdevice->extensions.num_ext;
1029 return VK_SUCCESS;
1030 }
1031
1032 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1033 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1034
1035 if (*pPropertyCount < pdevice->extensions.num_ext)
1036 return VK_INCOMPLETE;
1037
1038 return VK_SUCCESS;
1039 }
1040
1041 VkResult radv_EnumerateInstanceLayerProperties(
1042 uint32_t* pPropertyCount,
1043 VkLayerProperties* pProperties)
1044 {
1045 if (pProperties == NULL) {
1046 *pPropertyCount = 0;
1047 return VK_SUCCESS;
1048 }
1049
1050 /* None supported at this time */
1051 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1052 }
1053
1054 VkResult radv_EnumerateDeviceLayerProperties(
1055 VkPhysicalDevice physicalDevice,
1056 uint32_t* pPropertyCount,
1057 VkLayerProperties* pProperties)
1058 {
1059 if (pProperties == NULL) {
1060 *pPropertyCount = 0;
1061 return VK_SUCCESS;
1062 }
1063
1064 /* None supported at this time */
1065 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1066 }
1067
1068 void radv_GetDeviceQueue(
1069 VkDevice _device,
1070 uint32_t queueFamilyIndex,
1071 uint32_t queueIndex,
1072 VkQueue* pQueue)
1073 {
1074 RADV_FROM_HANDLE(radv_device, device, _device);
1075
1076 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1077 }
1078
1079 static void radv_dump_trace(struct radv_device *device,
1080 struct radeon_winsys_cs *cs)
1081 {
1082 const char *filename = getenv("RADV_TRACE_FILE");
1083 FILE *f = fopen(filename, "w");
1084 if (!f) {
1085 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1086 return;
1087 }
1088
1089 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1090 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1091 fclose(f);
1092 }
1093
1094 static void
1095 fill_geom_rings(struct radv_queue *queue,
1096 uint32_t *map,
1097 uint32_t esgs_ring_size,
1098 struct radeon_winsys_bo *esgs_ring_bo,
1099 uint32_t gsvs_ring_size,
1100 struct radeon_winsys_bo *gsvs_ring_bo)
1101 {
1102 uint64_t esgs_va = 0, gsvs_va = 0;
1103 uint32_t *desc = &map[4];
1104
1105 if (esgs_ring_bo)
1106 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1107 if (gsvs_ring_bo)
1108 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1109
1110 /* stride 0, num records - size, add tid, swizzle, elsize4,
1111 index stride 64 */
1112 desc[0] = esgs_va;
1113 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1114 S_008F04_STRIDE(0) |
1115 S_008F04_SWIZZLE_ENABLE(true);
1116 desc[2] = esgs_ring_size;
1117 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1118 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1119 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1120 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1121 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1122 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1123 S_008F0C_ELEMENT_SIZE(1) |
1124 S_008F0C_INDEX_STRIDE(3) |
1125 S_008F0C_ADD_TID_ENABLE(true);
1126
1127 desc += 4;
1128 /* GS entry for ES->GS ring */
1129 /* stride 0, num records - size, elsize0,
1130 index stride 0 */
1131 desc[0] = esgs_va;
1132 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1133 S_008F04_STRIDE(0) |
1134 S_008F04_SWIZZLE_ENABLE(false);
1135 desc[2] = esgs_ring_size;
1136 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1137 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1138 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1139 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1140 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1141 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1142 S_008F0C_ELEMENT_SIZE(0) |
1143 S_008F0C_INDEX_STRIDE(0) |
1144 S_008F0C_ADD_TID_ENABLE(false);
1145
1146 desc += 4;
1147 /* VS entry for GS->VS ring */
1148 /* stride 0, num records - size, elsize0,
1149 index stride 0 */
1150 desc[0] = gsvs_va;
1151 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1152 S_008F04_STRIDE(0) |
1153 S_008F04_SWIZZLE_ENABLE(false);
1154 desc[2] = gsvs_ring_size;
1155 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1156 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1157 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1158 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1159 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1160 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1161 S_008F0C_ELEMENT_SIZE(0) |
1162 S_008F0C_INDEX_STRIDE(0) |
1163 S_008F0C_ADD_TID_ENABLE(false);
1164 desc += 4;
1165
1166 /* stride gsvs_itemsize, num records 64
1167 elsize 4, index stride 16 */
1168 /* shader will patch stride and desc[2] */
1169 desc[0] = gsvs_va;
1170 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1171 S_008F04_STRIDE(0) |
1172 S_008F04_SWIZZLE_ENABLE(true);
1173 desc[2] = 0;
1174 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1175 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1176 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1177 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1178 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1179 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1180 S_008F0C_ELEMENT_SIZE(1) |
1181 S_008F0C_INDEX_STRIDE(1) |
1182 S_008F0C_ADD_TID_ENABLE(true);
1183 }
1184
1185 static VkResult
1186 radv_get_preamble_cs(struct radv_queue *queue,
1187 uint32_t scratch_size,
1188 uint32_t compute_scratch_size,
1189 uint32_t esgs_ring_size,
1190 uint32_t gsvs_ring_size,
1191 struct radeon_winsys_cs **preamble_cs)
1192 {
1193 struct radeon_winsys_bo *scratch_bo = NULL;
1194 struct radeon_winsys_bo *descriptor_bo = NULL;
1195 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1196 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1197 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1198 struct radeon_winsys_cs *cs = NULL;
1199
1200 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) {
1201 *preamble_cs = NULL;
1202 return VK_SUCCESS;
1203 }
1204
1205 if (scratch_size <= queue->scratch_size &&
1206 compute_scratch_size <= queue->compute_scratch_size &&
1207 esgs_ring_size <= queue->esgs_ring_size &&
1208 gsvs_ring_size <= queue->gsvs_ring_size) {
1209 *preamble_cs = queue->preamble_cs;
1210 return VK_SUCCESS;
1211 }
1212
1213 if (scratch_size > queue->scratch_size) {
1214 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1215 scratch_size,
1216 4096,
1217 RADEON_DOMAIN_VRAM,
1218 RADEON_FLAG_NO_CPU_ACCESS);
1219 if (!scratch_bo)
1220 goto fail;
1221 } else
1222 scratch_bo = queue->scratch_bo;
1223
1224 if (compute_scratch_size > queue->compute_scratch_size) {
1225 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1226 compute_scratch_size,
1227 4096,
1228 RADEON_DOMAIN_VRAM,
1229 RADEON_FLAG_NO_CPU_ACCESS);
1230 if (!compute_scratch_bo)
1231 goto fail;
1232
1233 } else
1234 compute_scratch_bo = queue->compute_scratch_bo;
1235
1236 if (esgs_ring_size > queue->esgs_ring_size) {
1237 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1238 esgs_ring_size,
1239 4096,
1240 RADEON_DOMAIN_VRAM,
1241 RADEON_FLAG_NO_CPU_ACCESS);
1242 if (!esgs_ring_bo)
1243 goto fail;
1244 } else {
1245 esgs_ring_bo = queue->esgs_ring_bo;
1246 esgs_ring_size = queue->esgs_ring_size;
1247 }
1248
1249 if (gsvs_ring_size > queue->gsvs_ring_size) {
1250 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1251 gsvs_ring_size,
1252 4096,
1253 RADEON_DOMAIN_VRAM,
1254 RADEON_FLAG_NO_CPU_ACCESS);
1255 if (!gsvs_ring_bo)
1256 goto fail;
1257 } else {
1258 gsvs_ring_bo = queue->gsvs_ring_bo;
1259 gsvs_ring_size = queue->gsvs_ring_size;
1260 }
1261
1262 if (scratch_bo != queue->scratch_bo ||
1263 esgs_ring_bo != queue->esgs_ring_bo ||
1264 gsvs_ring_bo != queue->gsvs_ring_bo) {
1265 uint32_t size = 0;
1266 if (gsvs_ring_bo || esgs_ring_bo)
1267 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1268 else if (scratch_bo)
1269 size = 8; /* 2 dword */
1270
1271 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1272 size,
1273 4096,
1274 RADEON_DOMAIN_VRAM,
1275 RADEON_FLAG_CPU_ACCESS);
1276 if (!descriptor_bo)
1277 goto fail;
1278 } else
1279 descriptor_bo = queue->descriptor_bo;
1280
1281 cs = queue->device->ws->cs_create(queue->device->ws,
1282 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1283 if (!cs)
1284 goto fail;
1285
1286
1287 if (scratch_bo)
1288 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1289
1290 if (esgs_ring_bo)
1291 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1292
1293 if (gsvs_ring_bo)
1294 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1295
1296 if (descriptor_bo)
1297 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1298
1299 if (descriptor_bo != queue->descriptor_bo) {
1300 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1301
1302 if (scratch_bo) {
1303 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1304 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1305 S_008F04_SWIZZLE_ENABLE(1);
1306 map[0] = scratch_va;
1307 map[1] = rsrc1;
1308 }
1309
1310 if (esgs_ring_bo || gsvs_ring_bo)
1311 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1312
1313 queue->device->ws->buffer_unmap(descriptor_bo);
1314 }
1315
1316 if (esgs_ring_bo || gsvs_ring_bo) {
1317 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1318 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1319 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1320 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1321
1322 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1323 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1324 radeon_emit(cs, esgs_ring_size >> 8);
1325 radeon_emit(cs, gsvs_ring_size >> 8);
1326 } else {
1327 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1328 radeon_emit(cs, esgs_ring_size >> 8);
1329 radeon_emit(cs, gsvs_ring_size >> 8);
1330 }
1331 }
1332
1333 if (descriptor_bo) {
1334 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1335 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1336 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1337 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1338 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1339 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1340
1341 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1342
1343 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1344 radeon_set_sh_reg_seq(cs, regs[i], 2);
1345 radeon_emit(cs, va);
1346 radeon_emit(cs, va >> 32);
1347 }
1348 }
1349
1350 if (compute_scratch_bo) {
1351 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1352 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1353 S_008F04_SWIZZLE_ENABLE(1);
1354
1355 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1356
1357 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1358 radeon_emit(cs, scratch_va);
1359 radeon_emit(cs, rsrc1);
1360 }
1361
1362 if (!queue->device->ws->cs_finalize(cs))
1363 goto fail;
1364
1365 if (queue->preamble_cs)
1366 queue->device->ws->cs_destroy(queue->preamble_cs);
1367
1368 queue->preamble_cs = cs;
1369
1370 if (scratch_bo != queue->scratch_bo) {
1371 if (queue->scratch_bo)
1372 queue->device->ws->buffer_destroy(queue->scratch_bo);
1373 queue->scratch_bo = scratch_bo;
1374 queue->scratch_size = scratch_size;
1375 }
1376
1377 if (compute_scratch_bo != queue->compute_scratch_bo) {
1378 if (queue->compute_scratch_bo)
1379 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1380 queue->compute_scratch_bo = compute_scratch_bo;
1381 queue->compute_scratch_size = compute_scratch_size;
1382 }
1383
1384 if (esgs_ring_bo != queue->esgs_ring_bo) {
1385 if (queue->esgs_ring_bo)
1386 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1387 queue->esgs_ring_bo = esgs_ring_bo;
1388 queue->esgs_ring_size = esgs_ring_size;
1389 }
1390
1391 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1392 if (queue->gsvs_ring_bo)
1393 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1394 queue->gsvs_ring_bo = gsvs_ring_bo;
1395 queue->gsvs_ring_size = gsvs_ring_size;
1396 }
1397
1398 if (descriptor_bo != queue->descriptor_bo) {
1399 if (queue->descriptor_bo)
1400 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1401
1402 queue->descriptor_bo = descriptor_bo;
1403 }
1404
1405 *preamble_cs = cs;
1406 return VK_SUCCESS;
1407 fail:
1408 if (cs)
1409 queue->device->ws->cs_destroy(cs);
1410 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1411 queue->device->ws->buffer_destroy(descriptor_bo);
1412 if (scratch_bo && scratch_bo != queue->scratch_bo)
1413 queue->device->ws->buffer_destroy(scratch_bo);
1414 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1415 queue->device->ws->buffer_destroy(compute_scratch_bo);
1416 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1417 queue->device->ws->buffer_destroy(esgs_ring_bo);
1418 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1419 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1420 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1421 }
1422
1423 VkResult radv_QueueSubmit(
1424 VkQueue _queue,
1425 uint32_t submitCount,
1426 const VkSubmitInfo* pSubmits,
1427 VkFence _fence)
1428 {
1429 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1430 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1431 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1432 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1433 int ret;
1434 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1435 uint32_t scratch_size = 0;
1436 uint32_t compute_scratch_size = 0;
1437 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1438 struct radeon_winsys_cs *preamble_cs = NULL;
1439 VkResult result;
1440 bool fence_emitted = false;
1441
1442 /* Do this first so failing to allocate scratch buffers can't result in
1443 * partially executed submissions. */
1444 for (uint32_t i = 0; i < submitCount; i++) {
1445 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1446 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1447 pSubmits[i].pCommandBuffers[j]);
1448
1449 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1450 compute_scratch_size = MAX2(compute_scratch_size,
1451 cmd_buffer->compute_scratch_size_needed);
1452 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1453 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1454 }
1455 }
1456
1457 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size, esgs_ring_size, gsvs_ring_size, &preamble_cs);
1458 if (result != VK_SUCCESS)
1459 return result;
1460
1461 for (uint32_t i = 0; i < submitCount; i++) {
1462 struct radeon_winsys_cs **cs_array;
1463 bool can_patch = true;
1464 uint32_t advance;
1465 int draw_cmd_buffers_count = 0;
1466
1467 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1468 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1469 pSubmits[i].pCommandBuffers[j]);
1470 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1471 if (cmd_buffer->no_draws == true)
1472 continue;
1473 draw_cmd_buffers_count++;
1474 }
1475
1476 if (!draw_cmd_buffers_count) {
1477 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1478 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1479 &queue->device->empty_cs[queue->queue_family_index],
1480 1, NULL,
1481 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1482 pSubmits[i].waitSemaphoreCount,
1483 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1484 pSubmits[i].signalSemaphoreCount,
1485 false, base_fence);
1486 if (ret) {
1487 radv_loge("failed to submit CS %d\n", i);
1488 abort();
1489 }
1490 fence_emitted = true;
1491 }
1492 continue;
1493 }
1494
1495 cs_array = malloc(sizeof(struct radeon_winsys_cs *) * draw_cmd_buffers_count);
1496
1497 int draw_cmd_buffer_idx = 0;
1498 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1499 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1500 pSubmits[i].pCommandBuffers[j]);
1501 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1502 if (cmd_buffer->no_draws == true)
1503 continue;
1504
1505 cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs;
1506 draw_cmd_buffer_idx++;
1507 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1508 can_patch = false;
1509 }
1510
1511 for (uint32_t j = 0; j < draw_cmd_buffers_count; j += advance) {
1512 advance = MIN2(max_cs_submission,
1513 draw_cmd_buffers_count - j);
1514 bool b = j == 0;
1515 bool e = j + advance == draw_cmd_buffers_count;
1516
1517 if (queue->device->trace_bo)
1518 *queue->device->trace_id_ptr = 0;
1519
1520 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1521 advance, preamble_cs,
1522 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1523 b ? pSubmits[i].waitSemaphoreCount : 0,
1524 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1525 e ? pSubmits[i].signalSemaphoreCount : 0,
1526 can_patch, base_fence);
1527
1528 if (ret) {
1529 radv_loge("failed to submit CS %d\n", i);
1530 abort();
1531 }
1532 fence_emitted = true;
1533 if (queue->device->trace_bo) {
1534 bool success = queue->device->ws->ctx_wait_idle(
1535 queue->hw_ctx,
1536 radv_queue_family_to_ring(
1537 queue->queue_family_index),
1538 queue->queue_idx);
1539
1540 if (!success) { /* Hang */
1541 radv_dump_trace(queue->device, cs_array[j]);
1542 abort();
1543 }
1544 }
1545 }
1546 free(cs_array);
1547 }
1548
1549 if (fence) {
1550 if (!fence_emitted)
1551 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1552 &queue->device->empty_cs[queue->queue_family_index],
1553 1, NULL, NULL, 0, NULL, 0,
1554 false, base_fence);
1555
1556 fence->submitted = true;
1557 }
1558
1559 return VK_SUCCESS;
1560 }
1561
1562 VkResult radv_QueueWaitIdle(
1563 VkQueue _queue)
1564 {
1565 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1566
1567 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1568 radv_queue_family_to_ring(queue->queue_family_index),
1569 queue->queue_idx);
1570 return VK_SUCCESS;
1571 }
1572
1573 VkResult radv_DeviceWaitIdle(
1574 VkDevice _device)
1575 {
1576 RADV_FROM_HANDLE(radv_device, device, _device);
1577
1578 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1579 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1580 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1581 }
1582 }
1583 return VK_SUCCESS;
1584 }
1585
1586 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1587 VkInstance instance,
1588 const char* pName)
1589 {
1590 return radv_lookup_entrypoint(pName);
1591 }
1592
1593 /* The loader wants us to expose a second GetInstanceProcAddr function
1594 * to work around certain LD_PRELOAD issues seen in apps.
1595 */
1596 PUBLIC
1597 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1598 VkInstance instance,
1599 const char* pName);
1600
1601 PUBLIC
1602 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1603 VkInstance instance,
1604 const char* pName)
1605 {
1606 return radv_GetInstanceProcAddr(instance, pName);
1607 }
1608
1609 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1610 VkDevice device,
1611 const char* pName)
1612 {
1613 return radv_lookup_entrypoint(pName);
1614 }
1615
1616 VkResult radv_AllocateMemory(
1617 VkDevice _device,
1618 const VkMemoryAllocateInfo* pAllocateInfo,
1619 const VkAllocationCallbacks* pAllocator,
1620 VkDeviceMemory* pMem)
1621 {
1622 RADV_FROM_HANDLE(radv_device, device, _device);
1623 struct radv_device_memory *mem;
1624 VkResult result;
1625 enum radeon_bo_domain domain;
1626 uint32_t flags = 0;
1627 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1628
1629 if (pAllocateInfo->allocationSize == 0) {
1630 /* Apparently, this is allowed */
1631 *pMem = VK_NULL_HANDLE;
1632 return VK_SUCCESS;
1633 }
1634
1635 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1636 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1637 if (mem == NULL)
1638 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1639
1640 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1641 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1642 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1643 domain = RADEON_DOMAIN_GTT;
1644 else
1645 domain = RADEON_DOMAIN_VRAM;
1646
1647 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1648 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1649 else
1650 flags |= RADEON_FLAG_CPU_ACCESS;
1651
1652 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1653 flags |= RADEON_FLAG_GTT_WC;
1654
1655 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
1656 domain, flags);
1657
1658 if (!mem->bo) {
1659 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1660 goto fail;
1661 }
1662 mem->type_index = pAllocateInfo->memoryTypeIndex;
1663
1664 *pMem = radv_device_memory_to_handle(mem);
1665
1666 return VK_SUCCESS;
1667
1668 fail:
1669 vk_free2(&device->alloc, pAllocator, mem);
1670
1671 return result;
1672 }
1673
1674 void radv_FreeMemory(
1675 VkDevice _device,
1676 VkDeviceMemory _mem,
1677 const VkAllocationCallbacks* pAllocator)
1678 {
1679 RADV_FROM_HANDLE(radv_device, device, _device);
1680 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1681
1682 if (mem == NULL)
1683 return;
1684
1685 device->ws->buffer_destroy(mem->bo);
1686 mem->bo = NULL;
1687
1688 vk_free2(&device->alloc, pAllocator, mem);
1689 }
1690
1691 VkResult radv_MapMemory(
1692 VkDevice _device,
1693 VkDeviceMemory _memory,
1694 VkDeviceSize offset,
1695 VkDeviceSize size,
1696 VkMemoryMapFlags flags,
1697 void** ppData)
1698 {
1699 RADV_FROM_HANDLE(radv_device, device, _device);
1700 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1701
1702 if (mem == NULL) {
1703 *ppData = NULL;
1704 return VK_SUCCESS;
1705 }
1706
1707 *ppData = device->ws->buffer_map(mem->bo);
1708 if (*ppData) {
1709 *ppData += offset;
1710 return VK_SUCCESS;
1711 }
1712
1713 return VK_ERROR_MEMORY_MAP_FAILED;
1714 }
1715
1716 void radv_UnmapMemory(
1717 VkDevice _device,
1718 VkDeviceMemory _memory)
1719 {
1720 RADV_FROM_HANDLE(radv_device, device, _device);
1721 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1722
1723 if (mem == NULL)
1724 return;
1725
1726 device->ws->buffer_unmap(mem->bo);
1727 }
1728
1729 VkResult radv_FlushMappedMemoryRanges(
1730 VkDevice _device,
1731 uint32_t memoryRangeCount,
1732 const VkMappedMemoryRange* pMemoryRanges)
1733 {
1734 return VK_SUCCESS;
1735 }
1736
1737 VkResult radv_InvalidateMappedMemoryRanges(
1738 VkDevice _device,
1739 uint32_t memoryRangeCount,
1740 const VkMappedMemoryRange* pMemoryRanges)
1741 {
1742 return VK_SUCCESS;
1743 }
1744
1745 void radv_GetBufferMemoryRequirements(
1746 VkDevice device,
1747 VkBuffer _buffer,
1748 VkMemoryRequirements* pMemoryRequirements)
1749 {
1750 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1751
1752 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1753
1754 pMemoryRequirements->size = buffer->size;
1755 pMemoryRequirements->alignment = 16;
1756 }
1757
1758 void radv_GetImageMemoryRequirements(
1759 VkDevice device,
1760 VkImage _image,
1761 VkMemoryRequirements* pMemoryRequirements)
1762 {
1763 RADV_FROM_HANDLE(radv_image, image, _image);
1764
1765 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1766
1767 pMemoryRequirements->size = image->size;
1768 pMemoryRequirements->alignment = image->alignment;
1769 }
1770
1771 void radv_GetImageSparseMemoryRequirements(
1772 VkDevice device,
1773 VkImage image,
1774 uint32_t* pSparseMemoryRequirementCount,
1775 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1776 {
1777 stub();
1778 }
1779
1780 void radv_GetDeviceMemoryCommitment(
1781 VkDevice device,
1782 VkDeviceMemory memory,
1783 VkDeviceSize* pCommittedMemoryInBytes)
1784 {
1785 *pCommittedMemoryInBytes = 0;
1786 }
1787
1788 VkResult radv_BindBufferMemory(
1789 VkDevice device,
1790 VkBuffer _buffer,
1791 VkDeviceMemory _memory,
1792 VkDeviceSize memoryOffset)
1793 {
1794 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1795 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1796
1797 if (mem) {
1798 buffer->bo = mem->bo;
1799 buffer->offset = memoryOffset;
1800 } else {
1801 buffer->bo = NULL;
1802 buffer->offset = 0;
1803 }
1804
1805 return VK_SUCCESS;
1806 }
1807
1808 VkResult radv_BindImageMemory(
1809 VkDevice device,
1810 VkImage _image,
1811 VkDeviceMemory _memory,
1812 VkDeviceSize memoryOffset)
1813 {
1814 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1815 RADV_FROM_HANDLE(radv_image, image, _image);
1816
1817 if (mem) {
1818 image->bo = mem->bo;
1819 image->offset = memoryOffset;
1820 } else {
1821 image->bo = NULL;
1822 image->offset = 0;
1823 }
1824
1825 return VK_SUCCESS;
1826 }
1827
1828 VkResult radv_QueueBindSparse(
1829 VkQueue queue,
1830 uint32_t bindInfoCount,
1831 const VkBindSparseInfo* pBindInfo,
1832 VkFence fence)
1833 {
1834 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1835 }
1836
1837 VkResult radv_CreateFence(
1838 VkDevice _device,
1839 const VkFenceCreateInfo* pCreateInfo,
1840 const VkAllocationCallbacks* pAllocator,
1841 VkFence* pFence)
1842 {
1843 RADV_FROM_HANDLE(radv_device, device, _device);
1844 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1845 sizeof(*fence), 8,
1846 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1847
1848 if (!fence)
1849 return VK_ERROR_OUT_OF_HOST_MEMORY;
1850
1851 memset(fence, 0, sizeof(*fence));
1852 fence->submitted = false;
1853 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1854 fence->fence = device->ws->create_fence();
1855 if (!fence->fence) {
1856 vk_free2(&device->alloc, pAllocator, fence);
1857 return VK_ERROR_OUT_OF_HOST_MEMORY;
1858 }
1859
1860 *pFence = radv_fence_to_handle(fence);
1861
1862 return VK_SUCCESS;
1863 }
1864
1865 void radv_DestroyFence(
1866 VkDevice _device,
1867 VkFence _fence,
1868 const VkAllocationCallbacks* pAllocator)
1869 {
1870 RADV_FROM_HANDLE(radv_device, device, _device);
1871 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1872
1873 if (!fence)
1874 return;
1875 device->ws->destroy_fence(fence->fence);
1876 vk_free2(&device->alloc, pAllocator, fence);
1877 }
1878
1879 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1880 {
1881 uint64_t current_time;
1882 struct timespec tv;
1883
1884 clock_gettime(CLOCK_MONOTONIC, &tv);
1885 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1886
1887 timeout = MIN2(UINT64_MAX - current_time, timeout);
1888
1889 return current_time + timeout;
1890 }
1891
1892 VkResult radv_WaitForFences(
1893 VkDevice _device,
1894 uint32_t fenceCount,
1895 const VkFence* pFences,
1896 VkBool32 waitAll,
1897 uint64_t timeout)
1898 {
1899 RADV_FROM_HANDLE(radv_device, device, _device);
1900 timeout = radv_get_absolute_timeout(timeout);
1901
1902 if (!waitAll && fenceCount > 1) {
1903 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1904 }
1905
1906 for (uint32_t i = 0; i < fenceCount; ++i) {
1907 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1908 bool expired = false;
1909
1910 if (fence->signalled)
1911 continue;
1912
1913 if (!fence->submitted)
1914 return VK_TIMEOUT;
1915
1916 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1917 if (!expired)
1918 return VK_TIMEOUT;
1919
1920 fence->signalled = true;
1921 }
1922
1923 return VK_SUCCESS;
1924 }
1925
1926 VkResult radv_ResetFences(VkDevice device,
1927 uint32_t fenceCount,
1928 const VkFence *pFences)
1929 {
1930 for (unsigned i = 0; i < fenceCount; ++i) {
1931 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1932 fence->submitted = fence->signalled = false;
1933 }
1934
1935 return VK_SUCCESS;
1936 }
1937
1938 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1939 {
1940 RADV_FROM_HANDLE(radv_device, device, _device);
1941 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1942
1943 if (fence->signalled)
1944 return VK_SUCCESS;
1945 if (!fence->submitted)
1946 return VK_NOT_READY;
1947
1948 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1949 return VK_NOT_READY;
1950
1951 return VK_SUCCESS;
1952 }
1953
1954
1955 // Queue semaphore functions
1956
1957 VkResult radv_CreateSemaphore(
1958 VkDevice _device,
1959 const VkSemaphoreCreateInfo* pCreateInfo,
1960 const VkAllocationCallbacks* pAllocator,
1961 VkSemaphore* pSemaphore)
1962 {
1963 RADV_FROM_HANDLE(radv_device, device, _device);
1964 struct radeon_winsys_sem *sem;
1965
1966 sem = device->ws->create_sem(device->ws);
1967 if (!sem)
1968 return VK_ERROR_OUT_OF_HOST_MEMORY;
1969
1970 *pSemaphore = (VkSemaphore)sem;
1971 return VK_SUCCESS;
1972 }
1973
1974 void radv_DestroySemaphore(
1975 VkDevice _device,
1976 VkSemaphore _semaphore,
1977 const VkAllocationCallbacks* pAllocator)
1978 {
1979 RADV_FROM_HANDLE(radv_device, device, _device);
1980 struct radeon_winsys_sem *sem;
1981 if (!_semaphore)
1982 return;
1983
1984 sem = (struct radeon_winsys_sem *)_semaphore;
1985 device->ws->destroy_sem(sem);
1986 }
1987
1988 VkResult radv_CreateEvent(
1989 VkDevice _device,
1990 const VkEventCreateInfo* pCreateInfo,
1991 const VkAllocationCallbacks* pAllocator,
1992 VkEvent* pEvent)
1993 {
1994 RADV_FROM_HANDLE(radv_device, device, _device);
1995 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
1996 sizeof(*event), 8,
1997 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1998
1999 if (!event)
2000 return VK_ERROR_OUT_OF_HOST_MEMORY;
2001
2002 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2003 RADEON_DOMAIN_GTT,
2004 RADEON_FLAG_CPU_ACCESS);
2005 if (!event->bo) {
2006 vk_free2(&device->alloc, pAllocator, event);
2007 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2008 }
2009
2010 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2011
2012 *pEvent = radv_event_to_handle(event);
2013
2014 return VK_SUCCESS;
2015 }
2016
2017 void radv_DestroyEvent(
2018 VkDevice _device,
2019 VkEvent _event,
2020 const VkAllocationCallbacks* pAllocator)
2021 {
2022 RADV_FROM_HANDLE(radv_device, device, _device);
2023 RADV_FROM_HANDLE(radv_event, event, _event);
2024
2025 if (!event)
2026 return;
2027 device->ws->buffer_destroy(event->bo);
2028 vk_free2(&device->alloc, pAllocator, event);
2029 }
2030
2031 VkResult radv_GetEventStatus(
2032 VkDevice _device,
2033 VkEvent _event)
2034 {
2035 RADV_FROM_HANDLE(radv_event, event, _event);
2036
2037 if (*event->map == 1)
2038 return VK_EVENT_SET;
2039 return VK_EVENT_RESET;
2040 }
2041
2042 VkResult radv_SetEvent(
2043 VkDevice _device,
2044 VkEvent _event)
2045 {
2046 RADV_FROM_HANDLE(radv_event, event, _event);
2047 *event->map = 1;
2048
2049 return VK_SUCCESS;
2050 }
2051
2052 VkResult radv_ResetEvent(
2053 VkDevice _device,
2054 VkEvent _event)
2055 {
2056 RADV_FROM_HANDLE(radv_event, event, _event);
2057 *event->map = 0;
2058
2059 return VK_SUCCESS;
2060 }
2061
2062 VkResult radv_CreateBuffer(
2063 VkDevice _device,
2064 const VkBufferCreateInfo* pCreateInfo,
2065 const VkAllocationCallbacks* pAllocator,
2066 VkBuffer* pBuffer)
2067 {
2068 RADV_FROM_HANDLE(radv_device, device, _device);
2069 struct radv_buffer *buffer;
2070
2071 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2072
2073 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2074 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2075 if (buffer == NULL)
2076 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2077
2078 buffer->size = pCreateInfo->size;
2079 buffer->usage = pCreateInfo->usage;
2080 buffer->bo = NULL;
2081 buffer->offset = 0;
2082
2083 *pBuffer = radv_buffer_to_handle(buffer);
2084
2085 return VK_SUCCESS;
2086 }
2087
2088 void radv_DestroyBuffer(
2089 VkDevice _device,
2090 VkBuffer _buffer,
2091 const VkAllocationCallbacks* pAllocator)
2092 {
2093 RADV_FROM_HANDLE(radv_device, device, _device);
2094 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2095
2096 if (!buffer)
2097 return;
2098
2099 vk_free2(&device->alloc, pAllocator, buffer);
2100 }
2101
2102 static inline unsigned
2103 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2104 {
2105 if (stencil)
2106 return image->surface.stencil_tiling_index[level];
2107 else
2108 return image->surface.tiling_index[level];
2109 }
2110
2111 static void
2112 radv_initialise_color_surface(struct radv_device *device,
2113 struct radv_color_buffer_info *cb,
2114 struct radv_image_view *iview)
2115 {
2116 const struct vk_format_description *desc;
2117 unsigned ntype, format, swap, endian;
2118 unsigned blend_clamp = 0, blend_bypass = 0;
2119 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2120 uint64_t va;
2121 const struct radeon_surf *surf = &iview->image->surface;
2122 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2123
2124 desc = vk_format_description(iview->vk_format);
2125
2126 memset(cb, 0, sizeof(*cb));
2127
2128 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2129 va += level_info->offset;
2130 cb->cb_color_base = va >> 8;
2131
2132 /* CMASK variables */
2133 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2134 va += iview->image->cmask.offset;
2135 cb->cb_color_cmask = va >> 8;
2136 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2137
2138 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2139 va += iview->image->dcc_offset;
2140 cb->cb_dcc_base = va >> 8;
2141
2142 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2143 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2144 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2145
2146 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2147 pitch_tile_max = level_info->nblk_x / 8 - 1;
2148 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2149 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2150
2151 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2152 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2153
2154 /* Intensity is implemented as Red, so treat it that way. */
2155 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2156 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2157
2158 if (iview->image->samples > 1) {
2159 unsigned log_samples = util_logbase2(iview->image->samples);
2160
2161 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2162 S_028C74_NUM_FRAGMENTS(log_samples);
2163 }
2164
2165 if (iview->image->fmask.size) {
2166 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2167 if (device->physical_device->rad_info.chip_class >= CIK)
2168 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2169 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2170 cb->cb_color_fmask = va >> 8;
2171 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2172 } else {
2173 /* This must be set for fast clear to work without FMASK. */
2174 if (device->physical_device->rad_info.chip_class >= CIK)
2175 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2176 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2177 cb->cb_color_fmask = cb->cb_color_base;
2178 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2179 }
2180
2181 ntype = radv_translate_color_numformat(iview->vk_format,
2182 desc,
2183 vk_format_get_first_non_void_channel(iview->vk_format));
2184 format = radv_translate_colorformat(iview->vk_format);
2185 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2186 radv_finishme("Illegal color\n");
2187 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2188 endian = radv_colorformat_endian_swap(format);
2189
2190 /* blend clamp should be set for all NORM/SRGB types */
2191 if (ntype == V_028C70_NUMBER_UNORM ||
2192 ntype == V_028C70_NUMBER_SNORM ||
2193 ntype == V_028C70_NUMBER_SRGB)
2194 blend_clamp = 1;
2195
2196 /* set blend bypass according to docs if SINT/UINT or
2197 8/24 COLOR variants */
2198 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2199 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2200 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2201 blend_clamp = 0;
2202 blend_bypass = 1;
2203 }
2204 #if 0
2205 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2206 (format == V_028C70_COLOR_8 ||
2207 format == V_028C70_COLOR_8_8 ||
2208 format == V_028C70_COLOR_8_8_8_8))
2209 ->color_is_int8 = true;
2210 #endif
2211 cb->cb_color_info = S_028C70_FORMAT(format) |
2212 S_028C70_COMP_SWAP(swap) |
2213 S_028C70_BLEND_CLAMP(blend_clamp) |
2214 S_028C70_BLEND_BYPASS(blend_bypass) |
2215 S_028C70_SIMPLE_FLOAT(1) |
2216 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2217 ntype != V_028C70_NUMBER_SNORM &&
2218 ntype != V_028C70_NUMBER_SRGB &&
2219 format != V_028C70_COLOR_8_24 &&
2220 format != V_028C70_COLOR_24_8) |
2221 S_028C70_NUMBER_TYPE(ntype) |
2222 S_028C70_ENDIAN(endian);
2223 if (iview->image->samples > 1)
2224 if (iview->image->fmask.size)
2225 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2226
2227 if (iview->image->cmask.size &&
2228 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2229 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2230
2231 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2232 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2233
2234 if (device->physical_device->rad_info.chip_class >= VI) {
2235 unsigned max_uncompressed_block_size = 2;
2236 if (iview->image->samples > 1) {
2237 if (iview->image->surface.bpe == 1)
2238 max_uncompressed_block_size = 0;
2239 else if (iview->image->surface.bpe == 2)
2240 max_uncompressed_block_size = 1;
2241 }
2242
2243 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2244 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2245 }
2246
2247 /* This must be set for fast clear to work without FMASK. */
2248 if (!iview->image->fmask.size &&
2249 device->physical_device->rad_info.chip_class == SI) {
2250 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2251 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2252 }
2253 }
2254
2255 static void
2256 radv_initialise_ds_surface(struct radv_device *device,
2257 struct radv_ds_buffer_info *ds,
2258 struct radv_image_view *iview)
2259 {
2260 unsigned level = iview->base_mip;
2261 unsigned format;
2262 uint64_t va, s_offs, z_offs;
2263 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2264 memset(ds, 0, sizeof(*ds));
2265 switch (iview->vk_format) {
2266 case VK_FORMAT_D24_UNORM_S8_UINT:
2267 case VK_FORMAT_X8_D24_UNORM_PACK32:
2268 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2269 ds->offset_scale = 2.0f;
2270 break;
2271 case VK_FORMAT_D16_UNORM:
2272 case VK_FORMAT_D16_UNORM_S8_UINT:
2273 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2274 ds->offset_scale = 4.0f;
2275 break;
2276 case VK_FORMAT_D32_SFLOAT:
2277 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2278 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2279 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2280 ds->offset_scale = 1.0f;
2281 break;
2282 default:
2283 break;
2284 }
2285
2286 format = radv_translate_dbformat(iview->vk_format);
2287 if (format == V_028040_Z_INVALID) {
2288 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2289 }
2290
2291 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2292 s_offs = z_offs = va;
2293 z_offs += iview->image->surface.level[level].offset;
2294 s_offs += iview->image->surface.stencil_level[level].offset;
2295
2296 uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2297 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2298 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2299 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2300 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2301
2302 if (iview->image->samples > 1)
2303 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2304
2305 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2306 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2307 else
2308 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2309
2310 if (device->physical_device->rad_info.chip_class >= CIK) {
2311 struct radeon_info *info = &device->physical_device->rad_info;
2312 unsigned tiling_index = iview->image->surface.tiling_index[level];
2313 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2314 unsigned macro_index = iview->image->surface.macro_tile_index;
2315 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2316 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2317 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2318
2319 ds->db_depth_info |=
2320 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2321 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2322 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2323 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2324 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2325 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2326 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2327 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2328 } else {
2329 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2330 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2331 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2332 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2333 }
2334
2335 if (iview->image->htile.size && !level) {
2336 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2337 S_028040_ALLOW_EXPCLEAR(1);
2338
2339 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2340 /* Workaround: For a not yet understood reason, the
2341 * combination of MSAA, fast stencil clear and stencil
2342 * decompress messes with subsequent stencil buffer
2343 * uses. Problem was reproduced on Verde, Bonaire,
2344 * Tonga, and Carrizo.
2345 *
2346 * Disabling EXPCLEAR works around the problem.
2347 *
2348 * Check piglit's arb_texture_multisample-stencil-clear
2349 * test if you want to try changing this.
2350 */
2351 if (iview->image->samples <= 1)
2352 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2353 } else
2354 /* Use all of the htile_buffer for depth if there's no stencil. */
2355 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2356
2357 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2358 iview->image->htile.offset;
2359 ds->db_htile_data_base = va >> 8;
2360 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2361 } else {
2362 ds->db_htile_data_base = 0;
2363 ds->db_htile_surface = 0;
2364 }
2365
2366 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2367 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2368
2369 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2370 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2371 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2372 }
2373
2374 VkResult radv_CreateFramebuffer(
2375 VkDevice _device,
2376 const VkFramebufferCreateInfo* pCreateInfo,
2377 const VkAllocationCallbacks* pAllocator,
2378 VkFramebuffer* pFramebuffer)
2379 {
2380 RADV_FROM_HANDLE(radv_device, device, _device);
2381 struct radv_framebuffer *framebuffer;
2382
2383 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2384
2385 size_t size = sizeof(*framebuffer) +
2386 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2387 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2388 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2389 if (framebuffer == NULL)
2390 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2391
2392 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2393 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2394 VkImageView _iview = pCreateInfo->pAttachments[i];
2395 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2396 framebuffer->attachments[i].attachment = iview;
2397 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2398 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2399 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2400 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2401 }
2402 }
2403
2404 framebuffer->width = pCreateInfo->width;
2405 framebuffer->height = pCreateInfo->height;
2406 framebuffer->layers = pCreateInfo->layers;
2407
2408 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2409 return VK_SUCCESS;
2410 }
2411
2412 void radv_DestroyFramebuffer(
2413 VkDevice _device,
2414 VkFramebuffer _fb,
2415 const VkAllocationCallbacks* pAllocator)
2416 {
2417 RADV_FROM_HANDLE(radv_device, device, _device);
2418 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2419
2420 if (!fb)
2421 return;
2422 vk_free2(&device->alloc, pAllocator, fb);
2423 }
2424
2425 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2426 {
2427 switch (address_mode) {
2428 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2429 return V_008F30_SQ_TEX_WRAP;
2430 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2431 return V_008F30_SQ_TEX_MIRROR;
2432 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2433 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2434 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2435 return V_008F30_SQ_TEX_CLAMP_BORDER;
2436 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2437 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2438 default:
2439 unreachable("illegal tex wrap mode");
2440 break;
2441 }
2442 }
2443
2444 static unsigned
2445 radv_tex_compare(VkCompareOp op)
2446 {
2447 switch (op) {
2448 case VK_COMPARE_OP_NEVER:
2449 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2450 case VK_COMPARE_OP_LESS:
2451 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2452 case VK_COMPARE_OP_EQUAL:
2453 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2454 case VK_COMPARE_OP_LESS_OR_EQUAL:
2455 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2456 case VK_COMPARE_OP_GREATER:
2457 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2458 case VK_COMPARE_OP_NOT_EQUAL:
2459 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2460 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2461 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2462 case VK_COMPARE_OP_ALWAYS:
2463 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2464 default:
2465 unreachable("illegal compare mode");
2466 break;
2467 }
2468 }
2469
2470 static unsigned
2471 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2472 {
2473 switch (filter) {
2474 case VK_FILTER_NEAREST:
2475 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2476 V_008F38_SQ_TEX_XY_FILTER_POINT);
2477 case VK_FILTER_LINEAR:
2478 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2479 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2480 case VK_FILTER_CUBIC_IMG:
2481 default:
2482 fprintf(stderr, "illegal texture filter");
2483 return 0;
2484 }
2485 }
2486
2487 static unsigned
2488 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2489 {
2490 switch (mode) {
2491 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2492 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2493 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2494 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2495 default:
2496 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2497 }
2498 }
2499
2500 static unsigned
2501 radv_tex_bordercolor(VkBorderColor bcolor)
2502 {
2503 switch (bcolor) {
2504 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2505 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2506 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2507 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2508 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2509 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2510 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2511 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2512 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2513 default:
2514 break;
2515 }
2516 return 0;
2517 }
2518
2519 static unsigned
2520 radv_tex_aniso_filter(unsigned filter)
2521 {
2522 if (filter < 2)
2523 return 0;
2524 if (filter < 4)
2525 return 1;
2526 if (filter < 8)
2527 return 2;
2528 if (filter < 16)
2529 return 3;
2530 return 4;
2531 }
2532
2533 static void
2534 radv_init_sampler(struct radv_device *device,
2535 struct radv_sampler *sampler,
2536 const VkSamplerCreateInfo *pCreateInfo)
2537 {
2538 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2539 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2540 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2541 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2542
2543 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2544 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2545 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2546 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2547 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2548 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2549 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2550 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2551 S_008F30_DISABLE_CUBE_WRAP(0) |
2552 S_008F30_COMPAT_MODE(is_vi));
2553 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2554 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2555 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2556 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2557 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2558 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2559 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2560 S_008F38_MIP_POINT_PRECLAMP(1) |
2561 S_008F38_DISABLE_LSB_CEIL(1) |
2562 S_008F38_FILTER_PREC_FIX(1) |
2563 S_008F38_ANISO_OVERRIDE(is_vi));
2564 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2565 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2566 }
2567
2568 VkResult radv_CreateSampler(
2569 VkDevice _device,
2570 const VkSamplerCreateInfo* pCreateInfo,
2571 const VkAllocationCallbacks* pAllocator,
2572 VkSampler* pSampler)
2573 {
2574 RADV_FROM_HANDLE(radv_device, device, _device);
2575 struct radv_sampler *sampler;
2576
2577 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2578
2579 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2580 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2581 if (!sampler)
2582 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2583
2584 radv_init_sampler(device, sampler, pCreateInfo);
2585 *pSampler = radv_sampler_to_handle(sampler);
2586
2587 return VK_SUCCESS;
2588 }
2589
2590 void radv_DestroySampler(
2591 VkDevice _device,
2592 VkSampler _sampler,
2593 const VkAllocationCallbacks* pAllocator)
2594 {
2595 RADV_FROM_HANDLE(radv_device, device, _device);
2596 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2597
2598 if (!sampler)
2599 return;
2600 vk_free2(&device->alloc, pAllocator, sampler);
2601 }
2602
2603
2604 /* vk_icd.h does not declare this function, so we declare it here to
2605 * suppress Wmissing-prototypes.
2606 */
2607 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2608 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2609
2610 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2611 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2612 {
2613 /* For the full details on loader interface versioning, see
2614 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2615 * What follows is a condensed summary, to help you navigate the large and
2616 * confusing official doc.
2617 *
2618 * - Loader interface v0 is incompatible with later versions. We don't
2619 * support it.
2620 *
2621 * - In loader interface v1:
2622 * - The first ICD entrypoint called by the loader is
2623 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2624 * entrypoint.
2625 * - The ICD must statically expose no other Vulkan symbol unless it is
2626 * linked with -Bsymbolic.
2627 * - Each dispatchable Vulkan handle created by the ICD must be
2628 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2629 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2630 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2631 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2632 * such loader-managed surfaces.
2633 *
2634 * - Loader interface v2 differs from v1 in:
2635 * - The first ICD entrypoint called by the loader is
2636 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2637 * statically expose this entrypoint.
2638 *
2639 * - Loader interface v3 differs from v2 in:
2640 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2641 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2642 * because the loader no longer does so.
2643 */
2644 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2645 return VK_SUCCESS;
2646 }