radv: Flush in the initial preamble CS.
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <dlfcn.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <fcntl.h>
33 #include <sys/stat.h>
34 #include "radv_private.h"
35 #include "radv_cs.h"
36 #include "util/strtod.h"
37
38 #include <xf86drm.h>
39 #include <amdgpu.h>
40 #include <amdgpu_drm.h>
41 #include "amdgpu_id.h"
42 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
43 #include "ac_llvm_util.h"
44 #include "vk_format.h"
45 #include "sid.h"
46 #include "util/debug.h"
47 struct radv_dispatch_table dtable;
48
49 static int
50 radv_get_function_timestamp(void *ptr, uint32_t* timestamp)
51 {
52 Dl_info info;
53 struct stat st;
54 if (!dladdr(ptr, &info) || !info.dli_fname) {
55 return -1;
56 }
57 if (stat(info.dli_fname, &st)) {
58 return -1;
59 }
60 *timestamp = st.st_mtim.tv_sec;
61 return 0;
62 }
63
64 static int
65 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
66 {
67 uint32_t mesa_timestamp, llvm_timestamp;
68 uint16_t f = family;
69 memset(uuid, 0, VK_UUID_SIZE);
70 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
71 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
72 return -1;
73
74 memcpy(uuid, &mesa_timestamp, 4);
75 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
76 memcpy((char*)uuid + 8, &f, 2);
77 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
78 return 0;
79 }
80
81 static const VkExtensionProperties instance_extensions[] = {
82 {
83 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
84 .specVersion = 25,
85 },
86 #ifdef VK_USE_PLATFORM_XCB_KHR
87 {
88 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
89 .specVersion = 6,
90 },
91 #endif
92 #ifdef VK_USE_PLATFORM_XLIB_KHR
93 {
94 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
95 .specVersion = 6,
96 },
97 #endif
98 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
99 {
100 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
101 .specVersion = 5,
102 },
103 #endif
104 };
105
106 static const VkExtensionProperties common_device_extensions[] = {
107 {
108 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
109 .specVersion = 1,
110 },
111 {
112 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
113 .specVersion = 1,
114 },
115 {
116 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
117 .specVersion = 68,
118 },
119 {
120 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
121 .specVersion = 1,
122 },
123 {
124 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
125 .specVersion = 1,
126 },
127 {
128 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
129 .specVersion = 1,
130 },
131 };
132
133 static VkResult
134 radv_extensions_register(struct radv_instance *instance,
135 struct radv_extensions *extensions,
136 const VkExtensionProperties *new_ext,
137 uint32_t num_ext)
138 {
139 size_t new_size;
140 VkExtensionProperties *new_ptr;
141
142 assert(new_ext && num_ext > 0);
143
144 if (!new_ext)
145 return VK_ERROR_INITIALIZATION_FAILED;
146
147 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
148 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
149 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
150
151 /* Old array continues to be valid, update nothing */
152 if (!new_ptr)
153 return VK_ERROR_OUT_OF_HOST_MEMORY;
154
155 memcpy(&new_ptr[extensions->num_ext], new_ext,
156 num_ext * sizeof(VkExtensionProperties));
157 extensions->ext_array = new_ptr;
158 extensions->num_ext += num_ext;
159
160 return VK_SUCCESS;
161 }
162
163 static void
164 radv_extensions_finish(struct radv_instance *instance,
165 struct radv_extensions *extensions)
166 {
167 assert(extensions);
168
169 if (!extensions)
170 radv_loge("Attemted to free invalid extension struct\n");
171
172 if (extensions->ext_array)
173 vk_free(&instance->alloc, extensions->ext_array);
174 }
175
176 static bool
177 is_extension_enabled(const VkExtensionProperties *extensions,
178 size_t num_ext,
179 const char *name)
180 {
181 assert(extensions && name);
182
183 for (uint32_t i = 0; i < num_ext; i++) {
184 if (strcmp(name, extensions[i].extensionName) == 0)
185 return true;
186 }
187
188 return false;
189 }
190
191 static VkResult
192 radv_physical_device_init(struct radv_physical_device *device,
193 struct radv_instance *instance,
194 const char *path)
195 {
196 VkResult result;
197 drmVersionPtr version;
198 int fd;
199
200 fd = open(path, O_RDWR | O_CLOEXEC);
201 if (fd < 0)
202 return VK_ERROR_INCOMPATIBLE_DRIVER;
203
204 version = drmGetVersion(fd);
205 if (!version) {
206 close(fd);
207 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
208 "failed to get version %s: %m", path);
209 }
210
211 if (strcmp(version->name, "amdgpu")) {
212 drmFreeVersion(version);
213 close(fd);
214 return VK_ERROR_INCOMPATIBLE_DRIVER;
215 }
216 drmFreeVersion(version);
217
218 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
219 device->instance = instance;
220 assert(strlen(path) < ARRAY_SIZE(device->path));
221 strncpy(device->path, path, ARRAY_SIZE(device->path));
222
223 device->ws = radv_amdgpu_winsys_create(fd);
224 if (!device->ws) {
225 result = VK_ERROR_INCOMPATIBLE_DRIVER;
226 goto fail;
227 }
228
229 device->local_fd = fd;
230 device->ws->query_info(device->ws, &device->rad_info);
231 result = radv_init_wsi(device);
232 if (result != VK_SUCCESS) {
233 device->ws->destroy(device->ws);
234 goto fail;
235 }
236
237 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
238 radv_finish_wsi(device);
239 device->ws->destroy(device->ws);
240 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
241 "cannot generate UUID");
242 goto fail;
243 }
244
245 result = radv_extensions_register(instance,
246 &device->extensions,
247 common_device_extensions,
248 ARRAY_SIZE(common_device_extensions));
249 if (result != VK_SUCCESS)
250 goto fail;
251
252 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
253 device->name = device->rad_info.name;
254
255 return VK_SUCCESS;
256
257 fail:
258 close(fd);
259 return result;
260 }
261
262 static void
263 radv_physical_device_finish(struct radv_physical_device *device)
264 {
265 radv_extensions_finish(device->instance, &device->extensions);
266 radv_finish_wsi(device);
267 device->ws->destroy(device->ws);
268 close(device->local_fd);
269 }
270
271
272 static void *
273 default_alloc_func(void *pUserData, size_t size, size_t align,
274 VkSystemAllocationScope allocationScope)
275 {
276 return malloc(size);
277 }
278
279 static void *
280 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
281 size_t align, VkSystemAllocationScope allocationScope)
282 {
283 return realloc(pOriginal, size);
284 }
285
286 static void
287 default_free_func(void *pUserData, void *pMemory)
288 {
289 free(pMemory);
290 }
291
292 static const VkAllocationCallbacks default_alloc = {
293 .pUserData = NULL,
294 .pfnAllocation = default_alloc_func,
295 .pfnReallocation = default_realloc_func,
296 .pfnFree = default_free_func,
297 };
298
299 static const struct debug_control radv_debug_options[] = {
300 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
301 {"nodcc", RADV_DEBUG_NO_DCC},
302 {"shaders", RADV_DEBUG_DUMP_SHADERS},
303 {"nocache", RADV_DEBUG_NO_CACHE},
304 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
305 {"nohiz", RADV_DEBUG_NO_HIZ},
306 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
307 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
308 {NULL, 0}
309 };
310
311 VkResult radv_CreateInstance(
312 const VkInstanceCreateInfo* pCreateInfo,
313 const VkAllocationCallbacks* pAllocator,
314 VkInstance* pInstance)
315 {
316 struct radv_instance *instance;
317
318 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
319
320 uint32_t client_version;
321 if (pCreateInfo->pApplicationInfo &&
322 pCreateInfo->pApplicationInfo->apiVersion != 0) {
323 client_version = pCreateInfo->pApplicationInfo->apiVersion;
324 } else {
325 client_version = VK_MAKE_VERSION(1, 0, 0);
326 }
327
328 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
329 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
330 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
331 "Client requested version %d.%d.%d",
332 VK_VERSION_MAJOR(client_version),
333 VK_VERSION_MINOR(client_version),
334 VK_VERSION_PATCH(client_version));
335 }
336
337 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
338 if (!is_extension_enabled(instance_extensions,
339 ARRAY_SIZE(instance_extensions),
340 pCreateInfo->ppEnabledExtensionNames[i]))
341 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
342 }
343
344 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
345 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
346 if (!instance)
347 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
348
349 memset(instance, 0, sizeof(*instance));
350
351 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
352
353 if (pAllocator)
354 instance->alloc = *pAllocator;
355 else
356 instance->alloc = default_alloc;
357
358 instance->apiVersion = client_version;
359 instance->physicalDeviceCount = -1;
360
361 _mesa_locale_init();
362
363 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
364
365 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
366 radv_debug_options);
367
368 *pInstance = radv_instance_to_handle(instance);
369
370 return VK_SUCCESS;
371 }
372
373 void radv_DestroyInstance(
374 VkInstance _instance,
375 const VkAllocationCallbacks* pAllocator)
376 {
377 RADV_FROM_HANDLE(radv_instance, instance, _instance);
378
379 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
380 radv_physical_device_finish(instance->physicalDevices + i);
381 }
382
383 VG(VALGRIND_DESTROY_MEMPOOL(instance));
384
385 _mesa_locale_fini();
386
387 vk_free(&instance->alloc, instance);
388 }
389
390 VkResult radv_EnumeratePhysicalDevices(
391 VkInstance _instance,
392 uint32_t* pPhysicalDeviceCount,
393 VkPhysicalDevice* pPhysicalDevices)
394 {
395 RADV_FROM_HANDLE(radv_instance, instance, _instance);
396 VkResult result;
397
398 if (instance->physicalDeviceCount < 0) {
399 char path[20];
400 instance->physicalDeviceCount = 0;
401 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) {
402 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
403 result = radv_physical_device_init(instance->physicalDevices +
404 instance->physicalDeviceCount,
405 instance, path);
406 if (result == VK_SUCCESS)
407 ++instance->physicalDeviceCount;
408 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
409 return result;
410 }
411 }
412
413 if (!pPhysicalDevices) {
414 *pPhysicalDeviceCount = instance->physicalDeviceCount;
415 } else {
416 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
417 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
418 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
419 }
420
421 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
422 : VK_SUCCESS;
423 }
424
425 void radv_GetPhysicalDeviceFeatures(
426 VkPhysicalDevice physicalDevice,
427 VkPhysicalDeviceFeatures* pFeatures)
428 {
429 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
430
431 memset(pFeatures, 0, sizeof(*pFeatures));
432
433 *pFeatures = (VkPhysicalDeviceFeatures) {
434 .robustBufferAccess = true,
435 .fullDrawIndexUint32 = true,
436 .imageCubeArray = true,
437 .independentBlend = true,
438 .geometryShader = true,
439 .tessellationShader = false,
440 .sampleRateShading = false,
441 .dualSrcBlend = true,
442 .logicOp = true,
443 .multiDrawIndirect = true,
444 .drawIndirectFirstInstance = true,
445 .depthClamp = true,
446 .depthBiasClamp = true,
447 .fillModeNonSolid = true,
448 .depthBounds = true,
449 .wideLines = true,
450 .largePoints = true,
451 .alphaToOne = true,
452 .multiViewport = true,
453 .samplerAnisotropy = true,
454 .textureCompressionETC2 = false,
455 .textureCompressionASTC_LDR = false,
456 .textureCompressionBC = true,
457 .occlusionQueryPrecise = true,
458 .pipelineStatisticsQuery = false,
459 .vertexPipelineStoresAndAtomics = true,
460 .fragmentStoresAndAtomics = true,
461 .shaderTessellationAndGeometryPointSize = true,
462 .shaderImageGatherExtended = true,
463 .shaderStorageImageExtendedFormats = true,
464 .shaderStorageImageMultisample = false,
465 .shaderUniformBufferArrayDynamicIndexing = true,
466 .shaderSampledImageArrayDynamicIndexing = true,
467 .shaderStorageBufferArrayDynamicIndexing = true,
468 .shaderStorageImageArrayDynamicIndexing = true,
469 .shaderStorageImageReadWithoutFormat = true,
470 .shaderStorageImageWriteWithoutFormat = true,
471 .shaderClipDistance = true,
472 .shaderCullDistance = true,
473 .shaderFloat64 = true,
474 .shaderInt64 = false,
475 .shaderInt16 = false,
476 .alphaToOne = true,
477 .variableMultisampleRate = false,
478 .inheritedQueries = false,
479 };
480 }
481
482 void radv_GetPhysicalDeviceFeatures2KHR(
483 VkPhysicalDevice physicalDevice,
484 VkPhysicalDeviceFeatures2KHR *pFeatures)
485 {
486 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
487 }
488
489 void radv_GetPhysicalDeviceProperties(
490 VkPhysicalDevice physicalDevice,
491 VkPhysicalDeviceProperties* pProperties)
492 {
493 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
494 VkSampleCountFlags sample_counts = 0xf;
495 VkPhysicalDeviceLimits limits = {
496 .maxImageDimension1D = (1 << 14),
497 .maxImageDimension2D = (1 << 14),
498 .maxImageDimension3D = (1 << 11),
499 .maxImageDimensionCube = (1 << 14),
500 .maxImageArrayLayers = (1 << 11),
501 .maxTexelBufferElements = 128 * 1024 * 1024,
502 .maxUniformBufferRange = UINT32_MAX,
503 .maxStorageBufferRange = UINT32_MAX,
504 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
505 .maxMemoryAllocationCount = UINT32_MAX,
506 .maxSamplerAllocationCount = 64 * 1024,
507 .bufferImageGranularity = 64, /* A cache line */
508 .sparseAddressSpaceSize = 0,
509 .maxBoundDescriptorSets = MAX_SETS,
510 .maxPerStageDescriptorSamplers = 64,
511 .maxPerStageDescriptorUniformBuffers = 64,
512 .maxPerStageDescriptorStorageBuffers = 64,
513 .maxPerStageDescriptorSampledImages = 64,
514 .maxPerStageDescriptorStorageImages = 64,
515 .maxPerStageDescriptorInputAttachments = 64,
516 .maxPerStageResources = 128,
517 .maxDescriptorSetSamplers = 256,
518 .maxDescriptorSetUniformBuffers = 256,
519 .maxDescriptorSetUniformBuffersDynamic = 256,
520 .maxDescriptorSetStorageBuffers = 256,
521 .maxDescriptorSetStorageBuffersDynamic = 256,
522 .maxDescriptorSetSampledImages = 256,
523 .maxDescriptorSetStorageImages = 256,
524 .maxDescriptorSetInputAttachments = 256,
525 .maxVertexInputAttributes = 32,
526 .maxVertexInputBindings = 32,
527 .maxVertexInputAttributeOffset = 2047,
528 .maxVertexInputBindingStride = 2048,
529 .maxVertexOutputComponents = 128,
530 .maxTessellationGenerationLevel = 0,
531 .maxTessellationPatchSize = 0,
532 .maxTessellationControlPerVertexInputComponents = 0,
533 .maxTessellationControlPerVertexOutputComponents = 0,
534 .maxTessellationControlPerPatchOutputComponents = 0,
535 .maxTessellationControlTotalOutputComponents = 0,
536 .maxTessellationEvaluationInputComponents = 0,
537 .maxTessellationEvaluationOutputComponents = 0,
538 .maxGeometryShaderInvocations = 32,
539 .maxGeometryInputComponents = 64,
540 .maxGeometryOutputComponents = 128,
541 .maxGeometryOutputVertices = 256,
542 .maxGeometryTotalOutputComponents = 1024,
543 .maxFragmentInputComponents = 128,
544 .maxFragmentOutputAttachments = 8,
545 .maxFragmentDualSrcAttachments = 1,
546 .maxFragmentCombinedOutputResources = 8,
547 .maxComputeSharedMemorySize = 32768,
548 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
549 .maxComputeWorkGroupInvocations = 2048,
550 .maxComputeWorkGroupSize = {
551 2048,
552 2048,
553 2048
554 },
555 .subPixelPrecisionBits = 4 /* FIXME */,
556 .subTexelPrecisionBits = 4 /* FIXME */,
557 .mipmapPrecisionBits = 4 /* FIXME */,
558 .maxDrawIndexedIndexValue = UINT32_MAX,
559 .maxDrawIndirectCount = UINT32_MAX,
560 .maxSamplerLodBias = 16,
561 .maxSamplerAnisotropy = 16,
562 .maxViewports = MAX_VIEWPORTS,
563 .maxViewportDimensions = { (1 << 14), (1 << 14) },
564 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
565 .viewportSubPixelBits = 13, /* We take a float? */
566 .minMemoryMapAlignment = 4096, /* A page */
567 .minTexelBufferOffsetAlignment = 1,
568 .minUniformBufferOffsetAlignment = 4,
569 .minStorageBufferOffsetAlignment = 4,
570 .minTexelOffset = -32,
571 .maxTexelOffset = 31,
572 .minTexelGatherOffset = -32,
573 .maxTexelGatherOffset = 31,
574 .minInterpolationOffset = -2,
575 .maxInterpolationOffset = 2,
576 .subPixelInterpolationOffsetBits = 8,
577 .maxFramebufferWidth = (1 << 14),
578 .maxFramebufferHeight = (1 << 14),
579 .maxFramebufferLayers = (1 << 10),
580 .framebufferColorSampleCounts = sample_counts,
581 .framebufferDepthSampleCounts = sample_counts,
582 .framebufferStencilSampleCounts = sample_counts,
583 .framebufferNoAttachmentsSampleCounts = sample_counts,
584 .maxColorAttachments = MAX_RTS,
585 .sampledImageColorSampleCounts = sample_counts,
586 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
587 .sampledImageDepthSampleCounts = sample_counts,
588 .sampledImageStencilSampleCounts = sample_counts,
589 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
590 .maxSampleMaskWords = 1,
591 .timestampComputeAndGraphics = false,
592 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq,
593 .maxClipDistances = 8,
594 .maxCullDistances = 8,
595 .maxCombinedClipAndCullDistances = 8,
596 .discreteQueuePriorities = 1,
597 .pointSizeRange = { 0.125, 255.875 },
598 .lineWidthRange = { 0.0, 7.9921875 },
599 .pointSizeGranularity = (1.0 / 8.0),
600 .lineWidthGranularity = (1.0 / 128.0),
601 .strictLines = false, /* FINISHME */
602 .standardSampleLocations = true,
603 .optimalBufferCopyOffsetAlignment = 128,
604 .optimalBufferCopyRowPitchAlignment = 128,
605 .nonCoherentAtomSize = 64,
606 };
607
608 *pProperties = (VkPhysicalDeviceProperties) {
609 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
610 .driverVersion = 1,
611 .vendorID = 0x1002,
612 .deviceID = pdevice->rad_info.pci_id,
613 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
614 .limits = limits,
615 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
616 };
617
618 strcpy(pProperties->deviceName, pdevice->name);
619 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
620 }
621
622 void radv_GetPhysicalDeviceProperties2KHR(
623 VkPhysicalDevice physicalDevice,
624 VkPhysicalDeviceProperties2KHR *pProperties)
625 {
626 return radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
627 }
628
629 static void radv_get_physical_device_queue_family_properties(
630 struct radv_physical_device* pdevice,
631 uint32_t* pCount,
632 VkQueueFamilyProperties** pQueueFamilyProperties)
633 {
634 int num_queue_families = 1;
635 int idx;
636 if (pdevice->rad_info.compute_rings > 0 &&
637 pdevice->rad_info.chip_class >= CIK &&
638 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
639 num_queue_families++;
640
641 if (pQueueFamilyProperties == NULL) {
642 *pCount = num_queue_families;
643 return;
644 }
645
646 if (!*pCount)
647 return;
648
649 idx = 0;
650 if (*pCount >= 1) {
651 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
652 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
653 VK_QUEUE_COMPUTE_BIT |
654 VK_QUEUE_TRANSFER_BIT,
655 .queueCount = 1,
656 .timestampValidBits = 64,
657 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
658 };
659 idx++;
660 }
661
662 if (pdevice->rad_info.compute_rings > 0 &&
663 pdevice->rad_info.chip_class >= CIK &&
664 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
665 if (*pCount > idx) {
666 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
667 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
668 .queueCount = pdevice->rad_info.compute_rings,
669 .timestampValidBits = 64,
670 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
671 };
672 idx++;
673 }
674 }
675 *pCount = idx;
676 }
677
678 void radv_GetPhysicalDeviceQueueFamilyProperties(
679 VkPhysicalDevice physicalDevice,
680 uint32_t* pCount,
681 VkQueueFamilyProperties* pQueueFamilyProperties)
682 {
683 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
684 if (!pQueueFamilyProperties) {
685 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
686 return;
687 }
688 VkQueueFamilyProperties *properties[] = {
689 pQueueFamilyProperties + 0,
690 pQueueFamilyProperties + 1,
691 pQueueFamilyProperties + 2,
692 };
693 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
694 assert(*pCount <= 3);
695 }
696
697 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
698 VkPhysicalDevice physicalDevice,
699 uint32_t* pCount,
700 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
701 {
702 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
703 if (!pQueueFamilyProperties) {
704 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
705 return;
706 }
707 VkQueueFamilyProperties *properties[] = {
708 &pQueueFamilyProperties[0].queueFamilyProperties,
709 &pQueueFamilyProperties[1].queueFamilyProperties,
710 &pQueueFamilyProperties[2].queueFamilyProperties,
711 };
712 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
713 assert(*pCount <= 3);
714 }
715
716 void radv_GetPhysicalDeviceMemoryProperties(
717 VkPhysicalDevice physicalDevice,
718 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
719 {
720 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
721
722 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
723
724 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
725 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
726 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
727 .heapIndex = RADV_MEM_HEAP_VRAM,
728 };
729 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
730 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
731 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
732 .heapIndex = RADV_MEM_HEAP_GTT,
733 };
734 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
735 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
736 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
737 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
738 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
739 };
740 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
741 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
742 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
743 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
744 .heapIndex = RADV_MEM_HEAP_GTT,
745 };
746
747 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
748
749 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
750 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
751 .size = physical_device->rad_info.vram_size -
752 physical_device->rad_info.visible_vram_size,
753 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
754 };
755 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
756 .size = physical_device->rad_info.visible_vram_size,
757 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
758 };
759 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
760 .size = physical_device->rad_info.gart_size,
761 .flags = 0,
762 };
763 }
764
765 void radv_GetPhysicalDeviceMemoryProperties2KHR(
766 VkPhysicalDevice physicalDevice,
767 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
768 {
769 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
770 &pMemoryProperties->memoryProperties);
771 }
772
773 static int
774 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
775 int queue_family_index, int idx)
776 {
777 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
778 queue->device = device;
779 queue->queue_family_index = queue_family_index;
780 queue->queue_idx = idx;
781
782 queue->hw_ctx = device->ws->ctx_create(device->ws);
783 if (!queue->hw_ctx)
784 return VK_ERROR_OUT_OF_HOST_MEMORY;
785
786 return VK_SUCCESS;
787 }
788
789 static void
790 radv_queue_finish(struct radv_queue *queue)
791 {
792 if (queue->hw_ctx)
793 queue->device->ws->ctx_destroy(queue->hw_ctx);
794
795 if (queue->initial_preamble_cs)
796 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
797 if (queue->continue_preamble_cs)
798 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
799 if (queue->descriptor_bo)
800 queue->device->ws->buffer_destroy(queue->descriptor_bo);
801 if (queue->scratch_bo)
802 queue->device->ws->buffer_destroy(queue->scratch_bo);
803 if (queue->esgs_ring_bo)
804 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
805 if (queue->gsvs_ring_bo)
806 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
807 if (queue->compute_scratch_bo)
808 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
809 }
810
811 static void
812 radv_device_init_gs_info(struct radv_device *device)
813 {
814 switch (device->physical_device->rad_info.family) {
815 case CHIP_OLAND:
816 case CHIP_HAINAN:
817 case CHIP_KAVERI:
818 case CHIP_KABINI:
819 case CHIP_MULLINS:
820 case CHIP_ICELAND:
821 case CHIP_CARRIZO:
822 case CHIP_STONEY:
823 device->gs_table_depth = 16;
824 return;
825 case CHIP_TAHITI:
826 case CHIP_PITCAIRN:
827 case CHIP_VERDE:
828 case CHIP_BONAIRE:
829 case CHIP_HAWAII:
830 case CHIP_TONGA:
831 case CHIP_FIJI:
832 case CHIP_POLARIS10:
833 case CHIP_POLARIS11:
834 device->gs_table_depth = 32;
835 return;
836 default:
837 unreachable("unknown GPU");
838 }
839 }
840
841 VkResult radv_CreateDevice(
842 VkPhysicalDevice physicalDevice,
843 const VkDeviceCreateInfo* pCreateInfo,
844 const VkAllocationCallbacks* pAllocator,
845 VkDevice* pDevice)
846 {
847 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
848 VkResult result;
849 struct radv_device *device;
850
851 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
852 if (!is_extension_enabled(physical_device->extensions.ext_array,
853 physical_device->extensions.num_ext,
854 pCreateInfo->ppEnabledExtensionNames[i]))
855 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
856 }
857
858 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
859 sizeof(*device), 8,
860 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
861 if (!device)
862 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
863
864 memset(device, 0, sizeof(*device));
865
866 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
867 device->instance = physical_device->instance;
868 device->physical_device = physical_device;
869
870 device->debug_flags = device->instance->debug_flags;
871
872 device->ws = physical_device->ws;
873 if (pAllocator)
874 device->alloc = *pAllocator;
875 else
876 device->alloc = physical_device->instance->alloc;
877
878 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
879 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
880 uint32_t qfi = queue_create->queueFamilyIndex;
881
882 device->queues[qfi] = vk_alloc(&device->alloc,
883 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
884 if (!device->queues[qfi]) {
885 result = VK_ERROR_OUT_OF_HOST_MEMORY;
886 goto fail;
887 }
888
889 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
890
891 device->queue_count[qfi] = queue_create->queueCount;
892
893 for (unsigned q = 0; q < queue_create->queueCount; q++) {
894 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
895 if (result != VK_SUCCESS)
896 goto fail;
897 }
898 }
899
900 #if HAVE_LLVM < 0x0400
901 device->llvm_supports_spill = false;
902 #else
903 device->llvm_supports_spill = true;
904 #endif
905
906 /* The maximum number of scratch waves. Scratch space isn't divided
907 * evenly between CUs. The number is only a function of the number of CUs.
908 * We can decrease the constant to decrease the scratch buffer size.
909 *
910 * sctx->scratch_waves must be >= the maximum posible size of
911 * 1 threadgroup, so that the hw doesn't hang from being unable
912 * to start any.
913 *
914 * The recommended value is 4 per CU at most. Higher numbers don't
915 * bring much benefit, but they still occupy chip resources (think
916 * async compute). I've seen ~2% performance difference between 4 and 32.
917 */
918 uint32_t max_threads_per_block = 2048;
919 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
920 max_threads_per_block / 64);
921
922 radv_device_init_gs_info(device);
923
924 result = radv_device_init_meta(device);
925 if (result != VK_SUCCESS)
926 goto fail;
927
928 radv_device_init_msaa(device);
929
930 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
931 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
932 switch (family) {
933 case RADV_QUEUE_GENERAL:
934 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
935 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
936 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
937 break;
938 case RADV_QUEUE_COMPUTE:
939 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
940 radeon_emit(device->empty_cs[family], 0);
941 break;
942 }
943 device->ws->cs_finalize(device->empty_cs[family]);
944
945 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
946 switch (family) {
947 case RADV_QUEUE_GENERAL:
948 case RADV_QUEUE_COMPUTE:
949 si_cs_emit_cache_flush(device->flush_cs[family],
950 device->physical_device->rad_info.chip_class,
951 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
952 RADV_CMD_FLAG_INV_ICACHE |
953 RADV_CMD_FLAG_INV_SMEM_L1 |
954 RADV_CMD_FLAG_INV_VMEM_L1 |
955 RADV_CMD_FLAG_INV_GLOBAL_L2);
956 break;
957 }
958 device->ws->cs_finalize(device->flush_cs[family]);
959 }
960
961 if (getenv("RADV_TRACE_FILE")) {
962 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
963 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
964 if (!device->trace_bo)
965 goto fail;
966
967 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
968 if (!device->trace_id_ptr)
969 goto fail;
970 }
971
972 if (device->physical_device->rad_info.chip_class >= CIK)
973 cik_create_gfx_config(device);
974
975 *pDevice = radv_device_to_handle(device);
976 return VK_SUCCESS;
977
978 fail:
979 if (device->trace_bo)
980 device->ws->buffer_destroy(device->trace_bo);
981
982 if (device->gfx_init)
983 device->ws->buffer_destroy(device->gfx_init);
984
985 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
986 for (unsigned q = 0; q < device->queue_count[i]; q++)
987 radv_queue_finish(&device->queues[i][q]);
988 if (device->queue_count[i])
989 vk_free(&device->alloc, device->queues[i]);
990 }
991
992 vk_free(&device->alloc, device);
993 return result;
994 }
995
996 void radv_DestroyDevice(
997 VkDevice _device,
998 const VkAllocationCallbacks* pAllocator)
999 {
1000 RADV_FROM_HANDLE(radv_device, device, _device);
1001
1002 if (device->trace_bo)
1003 device->ws->buffer_destroy(device->trace_bo);
1004
1005 if (device->gfx_init)
1006 device->ws->buffer_destroy(device->gfx_init);
1007
1008 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1009 for (unsigned q = 0; q < device->queue_count[i]; q++)
1010 radv_queue_finish(&device->queues[i][q]);
1011 if (device->queue_count[i])
1012 vk_free(&device->alloc, device->queues[i]);
1013 if (device->empty_cs[i])
1014 device->ws->cs_destroy(device->empty_cs[i]);
1015 if (device->flush_cs[i])
1016 device->ws->cs_destroy(device->flush_cs[i]);
1017 }
1018 radv_device_finish_meta(device);
1019
1020 vk_free(&device->alloc, device);
1021 }
1022
1023 VkResult radv_EnumerateInstanceExtensionProperties(
1024 const char* pLayerName,
1025 uint32_t* pPropertyCount,
1026 VkExtensionProperties* pProperties)
1027 {
1028 if (pProperties == NULL) {
1029 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1030 return VK_SUCCESS;
1031 }
1032
1033 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1034 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1035
1036 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1037 return VK_INCOMPLETE;
1038
1039 return VK_SUCCESS;
1040 }
1041
1042 VkResult radv_EnumerateDeviceExtensionProperties(
1043 VkPhysicalDevice physicalDevice,
1044 const char* pLayerName,
1045 uint32_t* pPropertyCount,
1046 VkExtensionProperties* pProperties)
1047 {
1048 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1049
1050 if (pProperties == NULL) {
1051 *pPropertyCount = pdevice->extensions.num_ext;
1052 return VK_SUCCESS;
1053 }
1054
1055 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1056 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1057
1058 if (*pPropertyCount < pdevice->extensions.num_ext)
1059 return VK_INCOMPLETE;
1060
1061 return VK_SUCCESS;
1062 }
1063
1064 VkResult radv_EnumerateInstanceLayerProperties(
1065 uint32_t* pPropertyCount,
1066 VkLayerProperties* pProperties)
1067 {
1068 if (pProperties == NULL) {
1069 *pPropertyCount = 0;
1070 return VK_SUCCESS;
1071 }
1072
1073 /* None supported at this time */
1074 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1075 }
1076
1077 VkResult radv_EnumerateDeviceLayerProperties(
1078 VkPhysicalDevice physicalDevice,
1079 uint32_t* pPropertyCount,
1080 VkLayerProperties* pProperties)
1081 {
1082 if (pProperties == NULL) {
1083 *pPropertyCount = 0;
1084 return VK_SUCCESS;
1085 }
1086
1087 /* None supported at this time */
1088 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1089 }
1090
1091 void radv_GetDeviceQueue(
1092 VkDevice _device,
1093 uint32_t queueFamilyIndex,
1094 uint32_t queueIndex,
1095 VkQueue* pQueue)
1096 {
1097 RADV_FROM_HANDLE(radv_device, device, _device);
1098
1099 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1100 }
1101
1102 static void radv_dump_trace(struct radv_device *device,
1103 struct radeon_winsys_cs *cs)
1104 {
1105 const char *filename = getenv("RADV_TRACE_FILE");
1106 FILE *f = fopen(filename, "w");
1107 if (!f) {
1108 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1109 return;
1110 }
1111
1112 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1113 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1114 fclose(f);
1115 }
1116
1117 static void
1118 fill_geom_rings(struct radv_queue *queue,
1119 uint32_t *map,
1120 uint32_t esgs_ring_size,
1121 struct radeon_winsys_bo *esgs_ring_bo,
1122 uint32_t gsvs_ring_size,
1123 struct radeon_winsys_bo *gsvs_ring_bo)
1124 {
1125 uint64_t esgs_va = 0, gsvs_va = 0;
1126 uint32_t *desc = &map[4];
1127
1128 if (esgs_ring_bo)
1129 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1130 if (gsvs_ring_bo)
1131 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1132
1133 /* stride 0, num records - size, add tid, swizzle, elsize4,
1134 index stride 64 */
1135 desc[0] = esgs_va;
1136 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1137 S_008F04_STRIDE(0) |
1138 S_008F04_SWIZZLE_ENABLE(true);
1139 desc[2] = esgs_ring_size;
1140 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1141 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1142 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1143 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1144 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1145 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1146 S_008F0C_ELEMENT_SIZE(1) |
1147 S_008F0C_INDEX_STRIDE(3) |
1148 S_008F0C_ADD_TID_ENABLE(true);
1149
1150 desc += 4;
1151 /* GS entry for ES->GS ring */
1152 /* stride 0, num records - size, elsize0,
1153 index stride 0 */
1154 desc[0] = esgs_va;
1155 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1156 S_008F04_STRIDE(0) |
1157 S_008F04_SWIZZLE_ENABLE(false);
1158 desc[2] = esgs_ring_size;
1159 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1160 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1161 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1162 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1163 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1164 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1165 S_008F0C_ELEMENT_SIZE(0) |
1166 S_008F0C_INDEX_STRIDE(0) |
1167 S_008F0C_ADD_TID_ENABLE(false);
1168
1169 desc += 4;
1170 /* VS entry for GS->VS ring */
1171 /* stride 0, num records - size, elsize0,
1172 index stride 0 */
1173 desc[0] = gsvs_va;
1174 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1175 S_008F04_STRIDE(0) |
1176 S_008F04_SWIZZLE_ENABLE(false);
1177 desc[2] = gsvs_ring_size;
1178 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1179 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1180 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1181 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1182 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1183 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1184 S_008F0C_ELEMENT_SIZE(0) |
1185 S_008F0C_INDEX_STRIDE(0) |
1186 S_008F0C_ADD_TID_ENABLE(false);
1187 desc += 4;
1188
1189 /* stride gsvs_itemsize, num records 64
1190 elsize 4, index stride 16 */
1191 /* shader will patch stride and desc[2] */
1192 desc[0] = gsvs_va;
1193 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1194 S_008F04_STRIDE(0) |
1195 S_008F04_SWIZZLE_ENABLE(true);
1196 desc[2] = 0;
1197 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1198 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1199 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1200 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1201 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1202 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1203 S_008F0C_ELEMENT_SIZE(1) |
1204 S_008F0C_INDEX_STRIDE(1) |
1205 S_008F0C_ADD_TID_ENABLE(true);
1206 }
1207
1208 static VkResult
1209 radv_get_preamble_cs(struct radv_queue *queue,
1210 uint32_t scratch_size,
1211 uint32_t compute_scratch_size,
1212 uint32_t esgs_ring_size,
1213 uint32_t gsvs_ring_size,
1214 struct radeon_winsys_cs **initial_preamble_cs,
1215 struct radeon_winsys_cs **continue_preamble_cs)
1216 {
1217 struct radeon_winsys_bo *scratch_bo = NULL;
1218 struct radeon_winsys_bo *descriptor_bo = NULL;
1219 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1220 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1221 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1222 struct radeon_winsys_cs *dest_cs[2] = {0};
1223
1224 if (scratch_size <= queue->scratch_size &&
1225 compute_scratch_size <= queue->compute_scratch_size &&
1226 esgs_ring_size <= queue->esgs_ring_size &&
1227 gsvs_ring_size <= queue->gsvs_ring_size &&
1228 queue->initial_preamble_cs) {
1229 *initial_preamble_cs = queue->initial_preamble_cs;
1230 *continue_preamble_cs = queue->continue_preamble_cs;
1231 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1232 *continue_preamble_cs = NULL;
1233 return VK_SUCCESS;
1234 }
1235
1236 if (scratch_size > queue->scratch_size) {
1237 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1238 scratch_size,
1239 4096,
1240 RADEON_DOMAIN_VRAM,
1241 RADEON_FLAG_NO_CPU_ACCESS);
1242 if (!scratch_bo)
1243 goto fail;
1244 } else
1245 scratch_bo = queue->scratch_bo;
1246
1247 if (compute_scratch_size > queue->compute_scratch_size) {
1248 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1249 compute_scratch_size,
1250 4096,
1251 RADEON_DOMAIN_VRAM,
1252 RADEON_FLAG_NO_CPU_ACCESS);
1253 if (!compute_scratch_bo)
1254 goto fail;
1255
1256 } else
1257 compute_scratch_bo = queue->compute_scratch_bo;
1258
1259 if (esgs_ring_size > queue->esgs_ring_size) {
1260 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1261 esgs_ring_size,
1262 4096,
1263 RADEON_DOMAIN_VRAM,
1264 RADEON_FLAG_NO_CPU_ACCESS);
1265 if (!esgs_ring_bo)
1266 goto fail;
1267 } else {
1268 esgs_ring_bo = queue->esgs_ring_bo;
1269 esgs_ring_size = queue->esgs_ring_size;
1270 }
1271
1272 if (gsvs_ring_size > queue->gsvs_ring_size) {
1273 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1274 gsvs_ring_size,
1275 4096,
1276 RADEON_DOMAIN_VRAM,
1277 RADEON_FLAG_NO_CPU_ACCESS);
1278 if (!gsvs_ring_bo)
1279 goto fail;
1280 } else {
1281 gsvs_ring_bo = queue->gsvs_ring_bo;
1282 gsvs_ring_size = queue->gsvs_ring_size;
1283 }
1284
1285 if (scratch_bo != queue->scratch_bo ||
1286 esgs_ring_bo != queue->esgs_ring_bo ||
1287 gsvs_ring_bo != queue->gsvs_ring_bo) {
1288 uint32_t size = 0;
1289 if (gsvs_ring_bo || esgs_ring_bo)
1290 size = 80; /* 2 dword + 2 padding + 4 dword * 4 */
1291 else if (scratch_bo)
1292 size = 8; /* 2 dword */
1293
1294 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1295 size,
1296 4096,
1297 RADEON_DOMAIN_VRAM,
1298 RADEON_FLAG_CPU_ACCESS);
1299 if (!descriptor_bo)
1300 goto fail;
1301 } else
1302 descriptor_bo = queue->descriptor_bo;
1303
1304 for(int i = 0; i < 2; ++i) {
1305 struct radeon_winsys_cs *cs = NULL;
1306 cs = queue->device->ws->cs_create(queue->device->ws,
1307 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1308 if (!cs)
1309 goto fail;
1310
1311 dest_cs[i] = cs;
1312
1313 if (scratch_bo)
1314 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1315
1316 if (esgs_ring_bo)
1317 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1318
1319 if (gsvs_ring_bo)
1320 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1321
1322 if (descriptor_bo)
1323 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1324
1325 if (descriptor_bo != queue->descriptor_bo) {
1326 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1327
1328 if (scratch_bo) {
1329 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1330 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1331 S_008F04_SWIZZLE_ENABLE(1);
1332 map[0] = scratch_va;
1333 map[1] = rsrc1;
1334 }
1335
1336 if (esgs_ring_bo || gsvs_ring_bo)
1337 fill_geom_rings(queue, map, esgs_ring_size, esgs_ring_bo, gsvs_ring_size, gsvs_ring_bo);
1338
1339 queue->device->ws->buffer_unmap(descriptor_bo);
1340 }
1341
1342 if (esgs_ring_bo || gsvs_ring_bo) {
1343 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1344 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1345 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1346 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1347
1348 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1349 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1350 radeon_emit(cs, esgs_ring_size >> 8);
1351 radeon_emit(cs, gsvs_ring_size >> 8);
1352 } else {
1353 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1354 radeon_emit(cs, esgs_ring_size >> 8);
1355 radeon_emit(cs, gsvs_ring_size >> 8);
1356 }
1357 }
1358
1359 if (descriptor_bo) {
1360 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1361 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1362 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1363 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1364 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1365 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1366
1367 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1368
1369 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1370 radeon_set_sh_reg_seq(cs, regs[i], 2);
1371 radeon_emit(cs, va);
1372 radeon_emit(cs, va >> 32);
1373 }
1374 }
1375
1376 if (compute_scratch_bo) {
1377 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1378 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1379 S_008F04_SWIZZLE_ENABLE(1);
1380
1381 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1382
1383 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1384 radeon_emit(cs, scratch_va);
1385 radeon_emit(cs, rsrc1);
1386 }
1387
1388 if (!i) {
1389 si_cs_emit_cache_flush(cs,
1390 queue->device->physical_device->rad_info.chip_class,
1391 queue->queue_family_index == RING_COMPUTE &&
1392 queue->device->physical_device->rad_info.chip_class >= CIK,
1393 RADV_CMD_FLAG_INV_ICACHE |
1394 RADV_CMD_FLAG_INV_SMEM_L1 |
1395 RADV_CMD_FLAG_INV_VMEM_L1 |
1396 RADV_CMD_FLAG_INV_GLOBAL_L2);
1397 }
1398
1399 if (!queue->device->ws->cs_finalize(cs))
1400 goto fail;
1401 }
1402
1403 if (queue->initial_preamble_cs)
1404 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1405
1406 if (queue->continue_preamble_cs)
1407 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1408
1409 queue->initial_preamble_cs = dest_cs[0];
1410 queue->continue_preamble_cs = dest_cs[1];
1411
1412 if (scratch_bo != queue->scratch_bo) {
1413 if (queue->scratch_bo)
1414 queue->device->ws->buffer_destroy(queue->scratch_bo);
1415 queue->scratch_bo = scratch_bo;
1416 queue->scratch_size = scratch_size;
1417 }
1418
1419 if (compute_scratch_bo != queue->compute_scratch_bo) {
1420 if (queue->compute_scratch_bo)
1421 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1422 queue->compute_scratch_bo = compute_scratch_bo;
1423 queue->compute_scratch_size = compute_scratch_size;
1424 }
1425
1426 if (esgs_ring_bo != queue->esgs_ring_bo) {
1427 if (queue->esgs_ring_bo)
1428 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1429 queue->esgs_ring_bo = esgs_ring_bo;
1430 queue->esgs_ring_size = esgs_ring_size;
1431 }
1432
1433 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1434 if (queue->gsvs_ring_bo)
1435 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1436 queue->gsvs_ring_bo = gsvs_ring_bo;
1437 queue->gsvs_ring_size = gsvs_ring_size;
1438 }
1439
1440 if (descriptor_bo != queue->descriptor_bo) {
1441 if (queue->descriptor_bo)
1442 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1443
1444 queue->descriptor_bo = descriptor_bo;
1445 }
1446
1447 *initial_preamble_cs = queue->initial_preamble_cs;
1448 *continue_preamble_cs = queue->continue_preamble_cs;
1449 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1450 *continue_preamble_cs = NULL;
1451 return VK_SUCCESS;
1452 fail:
1453 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1454 if (dest_cs[i])
1455 queue->device->ws->cs_destroy(dest_cs[i]);
1456 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1457 queue->device->ws->buffer_destroy(descriptor_bo);
1458 if (scratch_bo && scratch_bo != queue->scratch_bo)
1459 queue->device->ws->buffer_destroy(scratch_bo);
1460 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1461 queue->device->ws->buffer_destroy(compute_scratch_bo);
1462 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1463 queue->device->ws->buffer_destroy(esgs_ring_bo);
1464 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1465 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1466 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1467 }
1468
1469 VkResult radv_QueueSubmit(
1470 VkQueue _queue,
1471 uint32_t submitCount,
1472 const VkSubmitInfo* pSubmits,
1473 VkFence _fence)
1474 {
1475 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1476 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1477 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1478 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1479 int ret;
1480 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1481 uint32_t scratch_size = 0;
1482 uint32_t compute_scratch_size = 0;
1483 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1484 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1485 VkResult result;
1486 bool fence_emitted = false;
1487
1488 /* Do this first so failing to allocate scratch buffers can't result in
1489 * partially executed submissions. */
1490 for (uint32_t i = 0; i < submitCount; i++) {
1491 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1492 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1493 pSubmits[i].pCommandBuffers[j]);
1494
1495 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1496 compute_scratch_size = MAX2(compute_scratch_size,
1497 cmd_buffer->compute_scratch_size_needed);
1498 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1499 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1500 }
1501 }
1502
1503 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1504 esgs_ring_size, gsvs_ring_size,
1505 &initial_preamble_cs, &continue_preamble_cs);
1506 if (result != VK_SUCCESS)
1507 return result;
1508
1509 for (uint32_t i = 0; i < submitCount; i++) {
1510 struct radeon_winsys_cs **cs_array;
1511 bool has_flush = !submitCount;
1512 bool can_patch = !has_flush;
1513 uint32_t advance;
1514
1515 if (!pSubmits[i].commandBufferCount) {
1516 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1517 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1518 &queue->device->empty_cs[queue->queue_family_index],
1519 1, NULL, NULL,
1520 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1521 pSubmits[i].waitSemaphoreCount,
1522 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1523 pSubmits[i].signalSemaphoreCount,
1524 false, base_fence);
1525 if (ret) {
1526 radv_loge("failed to submit CS %d\n", i);
1527 abort();
1528 }
1529 fence_emitted = true;
1530 }
1531 continue;
1532 }
1533
1534 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1535 (pSubmits[i].commandBufferCount + has_flush));
1536
1537 if(has_flush)
1538 cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
1539
1540 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1541 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1542 pSubmits[i].pCommandBuffers[j]);
1543 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1544
1545 cs_array[j + has_flush] = cmd_buffer->cs;
1546 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1547 can_patch = false;
1548 }
1549
1550 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + has_flush; j += advance) {
1551 advance = MIN2(max_cs_submission,
1552 pSubmits[i].commandBufferCount + has_flush - j);
1553 bool b = j == 0;
1554 bool e = j + advance == pSubmits[i].commandBufferCount + has_flush;
1555
1556 if (queue->device->trace_bo)
1557 *queue->device->trace_id_ptr = 0;
1558
1559 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1560 advance, initial_preamble_cs, continue_preamble_cs,
1561 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1562 b ? pSubmits[i].waitSemaphoreCount : 0,
1563 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1564 e ? pSubmits[i].signalSemaphoreCount : 0,
1565 can_patch, base_fence);
1566
1567 if (ret) {
1568 radv_loge("failed to submit CS %d\n", i);
1569 abort();
1570 }
1571 fence_emitted = true;
1572 if (queue->device->trace_bo) {
1573 bool success = queue->device->ws->ctx_wait_idle(
1574 queue->hw_ctx,
1575 radv_queue_family_to_ring(
1576 queue->queue_family_index),
1577 queue->queue_idx);
1578
1579 if (!success) { /* Hang */
1580 radv_dump_trace(queue->device, cs_array[j]);
1581 abort();
1582 }
1583 }
1584 }
1585 free(cs_array);
1586 }
1587
1588 if (fence) {
1589 if (!fence_emitted)
1590 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1591 &queue->device->empty_cs[queue->queue_family_index],
1592 1, NULL, NULL, NULL, 0, NULL, 0,
1593 false, base_fence);
1594
1595 fence->submitted = true;
1596 }
1597
1598 return VK_SUCCESS;
1599 }
1600
1601 VkResult radv_QueueWaitIdle(
1602 VkQueue _queue)
1603 {
1604 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1605
1606 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1607 radv_queue_family_to_ring(queue->queue_family_index),
1608 queue->queue_idx);
1609 return VK_SUCCESS;
1610 }
1611
1612 VkResult radv_DeviceWaitIdle(
1613 VkDevice _device)
1614 {
1615 RADV_FROM_HANDLE(radv_device, device, _device);
1616
1617 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1618 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1619 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
1620 }
1621 }
1622 return VK_SUCCESS;
1623 }
1624
1625 PFN_vkVoidFunction radv_GetInstanceProcAddr(
1626 VkInstance instance,
1627 const char* pName)
1628 {
1629 return radv_lookup_entrypoint(pName);
1630 }
1631
1632 /* The loader wants us to expose a second GetInstanceProcAddr function
1633 * to work around certain LD_PRELOAD issues seen in apps.
1634 */
1635 PUBLIC
1636 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1637 VkInstance instance,
1638 const char* pName);
1639
1640 PUBLIC
1641 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1642 VkInstance instance,
1643 const char* pName)
1644 {
1645 return radv_GetInstanceProcAddr(instance, pName);
1646 }
1647
1648 PFN_vkVoidFunction radv_GetDeviceProcAddr(
1649 VkDevice device,
1650 const char* pName)
1651 {
1652 return radv_lookup_entrypoint(pName);
1653 }
1654
1655 VkResult radv_AllocateMemory(
1656 VkDevice _device,
1657 const VkMemoryAllocateInfo* pAllocateInfo,
1658 const VkAllocationCallbacks* pAllocator,
1659 VkDeviceMemory* pMem)
1660 {
1661 RADV_FROM_HANDLE(radv_device, device, _device);
1662 struct radv_device_memory *mem;
1663 VkResult result;
1664 enum radeon_bo_domain domain;
1665 uint32_t flags = 0;
1666 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1667
1668 if (pAllocateInfo->allocationSize == 0) {
1669 /* Apparently, this is allowed */
1670 *pMem = VK_NULL_HANDLE;
1671 return VK_SUCCESS;
1672 }
1673
1674 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1675 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1676 if (mem == NULL)
1677 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1678
1679 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1680 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
1681 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
1682 domain = RADEON_DOMAIN_GTT;
1683 else
1684 domain = RADEON_DOMAIN_VRAM;
1685
1686 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
1687 flags |= RADEON_FLAG_NO_CPU_ACCESS;
1688 else
1689 flags |= RADEON_FLAG_CPU_ACCESS;
1690
1691 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
1692 flags |= RADEON_FLAG_GTT_WC;
1693
1694 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
1695 domain, flags);
1696
1697 if (!mem->bo) {
1698 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
1699 goto fail;
1700 }
1701 mem->type_index = pAllocateInfo->memoryTypeIndex;
1702
1703 *pMem = radv_device_memory_to_handle(mem);
1704
1705 return VK_SUCCESS;
1706
1707 fail:
1708 vk_free2(&device->alloc, pAllocator, mem);
1709
1710 return result;
1711 }
1712
1713 void radv_FreeMemory(
1714 VkDevice _device,
1715 VkDeviceMemory _mem,
1716 const VkAllocationCallbacks* pAllocator)
1717 {
1718 RADV_FROM_HANDLE(radv_device, device, _device);
1719 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
1720
1721 if (mem == NULL)
1722 return;
1723
1724 device->ws->buffer_destroy(mem->bo);
1725 mem->bo = NULL;
1726
1727 vk_free2(&device->alloc, pAllocator, mem);
1728 }
1729
1730 VkResult radv_MapMemory(
1731 VkDevice _device,
1732 VkDeviceMemory _memory,
1733 VkDeviceSize offset,
1734 VkDeviceSize size,
1735 VkMemoryMapFlags flags,
1736 void** ppData)
1737 {
1738 RADV_FROM_HANDLE(radv_device, device, _device);
1739 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1740
1741 if (mem == NULL) {
1742 *ppData = NULL;
1743 return VK_SUCCESS;
1744 }
1745
1746 *ppData = device->ws->buffer_map(mem->bo);
1747 if (*ppData) {
1748 *ppData += offset;
1749 return VK_SUCCESS;
1750 }
1751
1752 return VK_ERROR_MEMORY_MAP_FAILED;
1753 }
1754
1755 void radv_UnmapMemory(
1756 VkDevice _device,
1757 VkDeviceMemory _memory)
1758 {
1759 RADV_FROM_HANDLE(radv_device, device, _device);
1760 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1761
1762 if (mem == NULL)
1763 return;
1764
1765 device->ws->buffer_unmap(mem->bo);
1766 }
1767
1768 VkResult radv_FlushMappedMemoryRanges(
1769 VkDevice _device,
1770 uint32_t memoryRangeCount,
1771 const VkMappedMemoryRange* pMemoryRanges)
1772 {
1773 return VK_SUCCESS;
1774 }
1775
1776 VkResult radv_InvalidateMappedMemoryRanges(
1777 VkDevice _device,
1778 uint32_t memoryRangeCount,
1779 const VkMappedMemoryRange* pMemoryRanges)
1780 {
1781 return VK_SUCCESS;
1782 }
1783
1784 void radv_GetBufferMemoryRequirements(
1785 VkDevice device,
1786 VkBuffer _buffer,
1787 VkMemoryRequirements* pMemoryRequirements)
1788 {
1789 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1790
1791 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1792
1793 pMemoryRequirements->size = buffer->size;
1794 pMemoryRequirements->alignment = 16;
1795 }
1796
1797 void radv_GetImageMemoryRequirements(
1798 VkDevice device,
1799 VkImage _image,
1800 VkMemoryRequirements* pMemoryRequirements)
1801 {
1802 RADV_FROM_HANDLE(radv_image, image, _image);
1803
1804 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
1805
1806 pMemoryRequirements->size = image->size;
1807 pMemoryRequirements->alignment = image->alignment;
1808 }
1809
1810 void radv_GetImageSparseMemoryRequirements(
1811 VkDevice device,
1812 VkImage image,
1813 uint32_t* pSparseMemoryRequirementCount,
1814 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1815 {
1816 stub();
1817 }
1818
1819 void radv_GetDeviceMemoryCommitment(
1820 VkDevice device,
1821 VkDeviceMemory memory,
1822 VkDeviceSize* pCommittedMemoryInBytes)
1823 {
1824 *pCommittedMemoryInBytes = 0;
1825 }
1826
1827 VkResult radv_BindBufferMemory(
1828 VkDevice device,
1829 VkBuffer _buffer,
1830 VkDeviceMemory _memory,
1831 VkDeviceSize memoryOffset)
1832 {
1833 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1834 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
1835
1836 if (mem) {
1837 buffer->bo = mem->bo;
1838 buffer->offset = memoryOffset;
1839 } else {
1840 buffer->bo = NULL;
1841 buffer->offset = 0;
1842 }
1843
1844 return VK_SUCCESS;
1845 }
1846
1847 VkResult radv_BindImageMemory(
1848 VkDevice device,
1849 VkImage _image,
1850 VkDeviceMemory _memory,
1851 VkDeviceSize memoryOffset)
1852 {
1853 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
1854 RADV_FROM_HANDLE(radv_image, image, _image);
1855
1856 if (mem) {
1857 image->bo = mem->bo;
1858 image->offset = memoryOffset;
1859 } else {
1860 image->bo = NULL;
1861 image->offset = 0;
1862 }
1863
1864 return VK_SUCCESS;
1865 }
1866
1867 VkResult radv_QueueBindSparse(
1868 VkQueue queue,
1869 uint32_t bindInfoCount,
1870 const VkBindSparseInfo* pBindInfo,
1871 VkFence fence)
1872 {
1873 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1874 }
1875
1876 VkResult radv_CreateFence(
1877 VkDevice _device,
1878 const VkFenceCreateInfo* pCreateInfo,
1879 const VkAllocationCallbacks* pAllocator,
1880 VkFence* pFence)
1881 {
1882 RADV_FROM_HANDLE(radv_device, device, _device);
1883 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
1884 sizeof(*fence), 8,
1885 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1886
1887 if (!fence)
1888 return VK_ERROR_OUT_OF_HOST_MEMORY;
1889
1890 memset(fence, 0, sizeof(*fence));
1891 fence->submitted = false;
1892 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
1893 fence->fence = device->ws->create_fence();
1894 if (!fence->fence) {
1895 vk_free2(&device->alloc, pAllocator, fence);
1896 return VK_ERROR_OUT_OF_HOST_MEMORY;
1897 }
1898
1899 *pFence = radv_fence_to_handle(fence);
1900
1901 return VK_SUCCESS;
1902 }
1903
1904 void radv_DestroyFence(
1905 VkDevice _device,
1906 VkFence _fence,
1907 const VkAllocationCallbacks* pAllocator)
1908 {
1909 RADV_FROM_HANDLE(radv_device, device, _device);
1910 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1911
1912 if (!fence)
1913 return;
1914 device->ws->destroy_fence(fence->fence);
1915 vk_free2(&device->alloc, pAllocator, fence);
1916 }
1917
1918 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
1919 {
1920 uint64_t current_time;
1921 struct timespec tv;
1922
1923 clock_gettime(CLOCK_MONOTONIC, &tv);
1924 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
1925
1926 timeout = MIN2(UINT64_MAX - current_time, timeout);
1927
1928 return current_time + timeout;
1929 }
1930
1931 VkResult radv_WaitForFences(
1932 VkDevice _device,
1933 uint32_t fenceCount,
1934 const VkFence* pFences,
1935 VkBool32 waitAll,
1936 uint64_t timeout)
1937 {
1938 RADV_FROM_HANDLE(radv_device, device, _device);
1939 timeout = radv_get_absolute_timeout(timeout);
1940
1941 if (!waitAll && fenceCount > 1) {
1942 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
1943 }
1944
1945 for (uint32_t i = 0; i < fenceCount; ++i) {
1946 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1947 bool expired = false;
1948
1949 if (fence->signalled)
1950 continue;
1951
1952 if (!fence->submitted)
1953 return VK_TIMEOUT;
1954
1955 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
1956 if (!expired)
1957 return VK_TIMEOUT;
1958
1959 fence->signalled = true;
1960 }
1961
1962 return VK_SUCCESS;
1963 }
1964
1965 VkResult radv_ResetFences(VkDevice device,
1966 uint32_t fenceCount,
1967 const VkFence *pFences)
1968 {
1969 for (unsigned i = 0; i < fenceCount; ++i) {
1970 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
1971 fence->submitted = fence->signalled = false;
1972 }
1973
1974 return VK_SUCCESS;
1975 }
1976
1977 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
1978 {
1979 RADV_FROM_HANDLE(radv_device, device, _device);
1980 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1981
1982 if (fence->signalled)
1983 return VK_SUCCESS;
1984 if (!fence->submitted)
1985 return VK_NOT_READY;
1986
1987 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
1988 return VK_NOT_READY;
1989
1990 return VK_SUCCESS;
1991 }
1992
1993
1994 // Queue semaphore functions
1995
1996 VkResult radv_CreateSemaphore(
1997 VkDevice _device,
1998 const VkSemaphoreCreateInfo* pCreateInfo,
1999 const VkAllocationCallbacks* pAllocator,
2000 VkSemaphore* pSemaphore)
2001 {
2002 RADV_FROM_HANDLE(radv_device, device, _device);
2003 struct radeon_winsys_sem *sem;
2004
2005 sem = device->ws->create_sem(device->ws);
2006 if (!sem)
2007 return VK_ERROR_OUT_OF_HOST_MEMORY;
2008
2009 *pSemaphore = (VkSemaphore)sem;
2010 return VK_SUCCESS;
2011 }
2012
2013 void radv_DestroySemaphore(
2014 VkDevice _device,
2015 VkSemaphore _semaphore,
2016 const VkAllocationCallbacks* pAllocator)
2017 {
2018 RADV_FROM_HANDLE(radv_device, device, _device);
2019 struct radeon_winsys_sem *sem;
2020 if (!_semaphore)
2021 return;
2022
2023 sem = (struct radeon_winsys_sem *)_semaphore;
2024 device->ws->destroy_sem(sem);
2025 }
2026
2027 VkResult radv_CreateEvent(
2028 VkDevice _device,
2029 const VkEventCreateInfo* pCreateInfo,
2030 const VkAllocationCallbacks* pAllocator,
2031 VkEvent* pEvent)
2032 {
2033 RADV_FROM_HANDLE(radv_device, device, _device);
2034 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2035 sizeof(*event), 8,
2036 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2037
2038 if (!event)
2039 return VK_ERROR_OUT_OF_HOST_MEMORY;
2040
2041 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2042 RADEON_DOMAIN_GTT,
2043 RADEON_FLAG_CPU_ACCESS);
2044 if (!event->bo) {
2045 vk_free2(&device->alloc, pAllocator, event);
2046 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2047 }
2048
2049 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2050
2051 *pEvent = radv_event_to_handle(event);
2052
2053 return VK_SUCCESS;
2054 }
2055
2056 void radv_DestroyEvent(
2057 VkDevice _device,
2058 VkEvent _event,
2059 const VkAllocationCallbacks* pAllocator)
2060 {
2061 RADV_FROM_HANDLE(radv_device, device, _device);
2062 RADV_FROM_HANDLE(radv_event, event, _event);
2063
2064 if (!event)
2065 return;
2066 device->ws->buffer_destroy(event->bo);
2067 vk_free2(&device->alloc, pAllocator, event);
2068 }
2069
2070 VkResult radv_GetEventStatus(
2071 VkDevice _device,
2072 VkEvent _event)
2073 {
2074 RADV_FROM_HANDLE(radv_event, event, _event);
2075
2076 if (*event->map == 1)
2077 return VK_EVENT_SET;
2078 return VK_EVENT_RESET;
2079 }
2080
2081 VkResult radv_SetEvent(
2082 VkDevice _device,
2083 VkEvent _event)
2084 {
2085 RADV_FROM_HANDLE(radv_event, event, _event);
2086 *event->map = 1;
2087
2088 return VK_SUCCESS;
2089 }
2090
2091 VkResult radv_ResetEvent(
2092 VkDevice _device,
2093 VkEvent _event)
2094 {
2095 RADV_FROM_HANDLE(radv_event, event, _event);
2096 *event->map = 0;
2097
2098 return VK_SUCCESS;
2099 }
2100
2101 VkResult radv_CreateBuffer(
2102 VkDevice _device,
2103 const VkBufferCreateInfo* pCreateInfo,
2104 const VkAllocationCallbacks* pAllocator,
2105 VkBuffer* pBuffer)
2106 {
2107 RADV_FROM_HANDLE(radv_device, device, _device);
2108 struct radv_buffer *buffer;
2109
2110 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2111
2112 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2113 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2114 if (buffer == NULL)
2115 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2116
2117 buffer->size = pCreateInfo->size;
2118 buffer->usage = pCreateInfo->usage;
2119 buffer->bo = NULL;
2120 buffer->offset = 0;
2121
2122 *pBuffer = radv_buffer_to_handle(buffer);
2123
2124 return VK_SUCCESS;
2125 }
2126
2127 void radv_DestroyBuffer(
2128 VkDevice _device,
2129 VkBuffer _buffer,
2130 const VkAllocationCallbacks* pAllocator)
2131 {
2132 RADV_FROM_HANDLE(radv_device, device, _device);
2133 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2134
2135 if (!buffer)
2136 return;
2137
2138 vk_free2(&device->alloc, pAllocator, buffer);
2139 }
2140
2141 static inline unsigned
2142 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2143 {
2144 if (stencil)
2145 return image->surface.stencil_tiling_index[level];
2146 else
2147 return image->surface.tiling_index[level];
2148 }
2149
2150 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2151 {
2152 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2153 }
2154
2155 static void
2156 radv_initialise_color_surface(struct radv_device *device,
2157 struct radv_color_buffer_info *cb,
2158 struct radv_image_view *iview)
2159 {
2160 const struct vk_format_description *desc;
2161 unsigned ntype, format, swap, endian;
2162 unsigned blend_clamp = 0, blend_bypass = 0;
2163 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2164 uint64_t va;
2165 const struct radeon_surf *surf = &iview->image->surface;
2166 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
2167
2168 desc = vk_format_description(iview->vk_format);
2169
2170 memset(cb, 0, sizeof(*cb));
2171
2172 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2173 va += level_info->offset;
2174 cb->cb_color_base = va >> 8;
2175
2176 /* CMASK variables */
2177 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2178 va += iview->image->cmask.offset;
2179 cb->cb_color_cmask = va >> 8;
2180 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2181
2182 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2183 va += iview->image->dcc_offset;
2184 cb->cb_dcc_base = va >> 8;
2185
2186 uint32_t max_slice = radv_surface_layer_count(iview);
2187 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2188 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2189
2190 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2191 pitch_tile_max = level_info->nblk_x / 8 - 1;
2192 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2193 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2194
2195 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2196 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2197
2198 /* Intensity is implemented as Red, so treat it that way. */
2199 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
2200 S_028C74_TILE_MODE_INDEX(tile_mode_index);
2201
2202 if (iview->image->samples > 1) {
2203 unsigned log_samples = util_logbase2(iview->image->samples);
2204
2205 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2206 S_028C74_NUM_FRAGMENTS(log_samples);
2207 }
2208
2209 if (iview->image->fmask.size) {
2210 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2211 if (device->physical_device->rad_info.chip_class >= CIK)
2212 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2213 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2214 cb->cb_color_fmask = va >> 8;
2215 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2216 } else {
2217 /* This must be set for fast clear to work without FMASK. */
2218 if (device->physical_device->rad_info.chip_class >= CIK)
2219 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2220 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2221 cb->cb_color_fmask = cb->cb_color_base;
2222 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2223 }
2224
2225 ntype = radv_translate_color_numformat(iview->vk_format,
2226 desc,
2227 vk_format_get_first_non_void_channel(iview->vk_format));
2228 format = radv_translate_colorformat(iview->vk_format);
2229 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2230 radv_finishme("Illegal color\n");
2231 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2232 endian = radv_colorformat_endian_swap(format);
2233
2234 /* blend clamp should be set for all NORM/SRGB types */
2235 if (ntype == V_028C70_NUMBER_UNORM ||
2236 ntype == V_028C70_NUMBER_SNORM ||
2237 ntype == V_028C70_NUMBER_SRGB)
2238 blend_clamp = 1;
2239
2240 /* set blend bypass according to docs if SINT/UINT or
2241 8/24 COLOR variants */
2242 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2243 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2244 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2245 blend_clamp = 0;
2246 blend_bypass = 1;
2247 }
2248 #if 0
2249 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2250 (format == V_028C70_COLOR_8 ||
2251 format == V_028C70_COLOR_8_8 ||
2252 format == V_028C70_COLOR_8_8_8_8))
2253 ->color_is_int8 = true;
2254 #endif
2255 cb->cb_color_info = S_028C70_FORMAT(format) |
2256 S_028C70_COMP_SWAP(swap) |
2257 S_028C70_BLEND_CLAMP(blend_clamp) |
2258 S_028C70_BLEND_BYPASS(blend_bypass) |
2259 S_028C70_SIMPLE_FLOAT(1) |
2260 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2261 ntype != V_028C70_NUMBER_SNORM &&
2262 ntype != V_028C70_NUMBER_SRGB &&
2263 format != V_028C70_COLOR_8_24 &&
2264 format != V_028C70_COLOR_24_8) |
2265 S_028C70_NUMBER_TYPE(ntype) |
2266 S_028C70_ENDIAN(endian);
2267 if (iview->image->samples > 1)
2268 if (iview->image->fmask.size)
2269 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2270
2271 if (iview->image->cmask.size &&
2272 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2273 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2274
2275 if (iview->image->surface.dcc_size && level_info->dcc_enabled)
2276 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2277
2278 if (device->physical_device->rad_info.chip_class >= VI) {
2279 unsigned max_uncompressed_block_size = 2;
2280 if (iview->image->samples > 1) {
2281 if (iview->image->surface.bpe == 1)
2282 max_uncompressed_block_size = 0;
2283 else if (iview->image->surface.bpe == 2)
2284 max_uncompressed_block_size = 1;
2285 }
2286
2287 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2288 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2289 }
2290
2291 /* This must be set for fast clear to work without FMASK. */
2292 if (!iview->image->fmask.size &&
2293 device->physical_device->rad_info.chip_class == SI) {
2294 unsigned bankh = util_logbase2(iview->image->surface.bankh);
2295 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2296 }
2297 }
2298
2299 static void
2300 radv_initialise_ds_surface(struct radv_device *device,
2301 struct radv_ds_buffer_info *ds,
2302 struct radv_image_view *iview)
2303 {
2304 unsigned level = iview->base_mip;
2305 unsigned format;
2306 uint64_t va, s_offs, z_offs;
2307 const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
2308 memset(ds, 0, sizeof(*ds));
2309 switch (iview->vk_format) {
2310 case VK_FORMAT_D24_UNORM_S8_UINT:
2311 case VK_FORMAT_X8_D24_UNORM_PACK32:
2312 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2313 ds->offset_scale = 2.0f;
2314 break;
2315 case VK_FORMAT_D16_UNORM:
2316 case VK_FORMAT_D16_UNORM_S8_UINT:
2317 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2318 ds->offset_scale = 4.0f;
2319 break;
2320 case VK_FORMAT_D32_SFLOAT:
2321 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2322 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2323 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2324 ds->offset_scale = 1.0f;
2325 break;
2326 default:
2327 break;
2328 }
2329
2330 format = radv_translate_dbformat(iview->vk_format);
2331 if (format == V_028040_Z_INVALID) {
2332 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format);
2333 }
2334
2335 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2336 s_offs = z_offs = va;
2337 z_offs += iview->image->surface.level[level].offset;
2338 s_offs += iview->image->surface.stencil_level[level].offset;
2339
2340 uint32_t max_slice = radv_surface_layer_count(iview);
2341 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2342 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2343 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2344 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2345
2346 if (iview->image->samples > 1)
2347 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
2348
2349 if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
2350 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
2351 else
2352 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
2353
2354 if (device->physical_device->rad_info.chip_class >= CIK) {
2355 struct radeon_info *info = &device->physical_device->rad_info;
2356 unsigned tiling_index = iview->image->surface.tiling_index[level];
2357 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
2358 unsigned macro_index = iview->image->surface.macro_tile_index;
2359 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
2360 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
2361 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
2362
2363 ds->db_depth_info |=
2364 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
2365 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
2366 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
2367 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
2368 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
2369 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
2370 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
2371 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
2372 } else {
2373 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
2374 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
2375 tile_mode_index = si_tile_mode_index(iview->image, level, true);
2376 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
2377 }
2378
2379 if (iview->image->htile.size && !level) {
2380 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
2381 S_028040_ALLOW_EXPCLEAR(1);
2382
2383 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
2384 /* Workaround: For a not yet understood reason, the
2385 * combination of MSAA, fast stencil clear and stencil
2386 * decompress messes with subsequent stencil buffer
2387 * uses. Problem was reproduced on Verde, Bonaire,
2388 * Tonga, and Carrizo.
2389 *
2390 * Disabling EXPCLEAR works around the problem.
2391 *
2392 * Check piglit's arb_texture_multisample-stencil-clear
2393 * test if you want to try changing this.
2394 */
2395 if (iview->image->samples <= 1)
2396 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
2397 } else
2398 /* Use all of the htile_buffer for depth if there's no stencil. */
2399 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2400
2401 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2402 iview->image->htile.offset;
2403 ds->db_htile_data_base = va >> 8;
2404 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
2405 } else {
2406 ds->db_htile_data_base = 0;
2407 ds->db_htile_surface = 0;
2408 }
2409
2410 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
2411 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
2412
2413 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
2414 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
2415 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
2416 }
2417
2418 VkResult radv_CreateFramebuffer(
2419 VkDevice _device,
2420 const VkFramebufferCreateInfo* pCreateInfo,
2421 const VkAllocationCallbacks* pAllocator,
2422 VkFramebuffer* pFramebuffer)
2423 {
2424 RADV_FROM_HANDLE(radv_device, device, _device);
2425 struct radv_framebuffer *framebuffer;
2426
2427 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2428
2429 size_t size = sizeof(*framebuffer) +
2430 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
2431 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2432 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2433 if (framebuffer == NULL)
2434 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2435
2436 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2437 framebuffer->width = pCreateInfo->width;
2438 framebuffer->height = pCreateInfo->height;
2439 framebuffer->layers = pCreateInfo->layers;
2440 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2441 VkImageView _iview = pCreateInfo->pAttachments[i];
2442 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
2443 framebuffer->attachments[i].attachment = iview;
2444 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
2445 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
2446 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2447 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
2448 }
2449 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
2450 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
2451 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
2452 }
2453
2454 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
2455 return VK_SUCCESS;
2456 }
2457
2458 void radv_DestroyFramebuffer(
2459 VkDevice _device,
2460 VkFramebuffer _fb,
2461 const VkAllocationCallbacks* pAllocator)
2462 {
2463 RADV_FROM_HANDLE(radv_device, device, _device);
2464 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
2465
2466 if (!fb)
2467 return;
2468 vk_free2(&device->alloc, pAllocator, fb);
2469 }
2470
2471 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
2472 {
2473 switch (address_mode) {
2474 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2475 return V_008F30_SQ_TEX_WRAP;
2476 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2477 return V_008F30_SQ_TEX_MIRROR;
2478 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2479 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
2480 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2481 return V_008F30_SQ_TEX_CLAMP_BORDER;
2482 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2483 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
2484 default:
2485 unreachable("illegal tex wrap mode");
2486 break;
2487 }
2488 }
2489
2490 static unsigned
2491 radv_tex_compare(VkCompareOp op)
2492 {
2493 switch (op) {
2494 case VK_COMPARE_OP_NEVER:
2495 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
2496 case VK_COMPARE_OP_LESS:
2497 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
2498 case VK_COMPARE_OP_EQUAL:
2499 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
2500 case VK_COMPARE_OP_LESS_OR_EQUAL:
2501 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
2502 case VK_COMPARE_OP_GREATER:
2503 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
2504 case VK_COMPARE_OP_NOT_EQUAL:
2505 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
2506 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2507 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
2508 case VK_COMPARE_OP_ALWAYS:
2509 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
2510 default:
2511 unreachable("illegal compare mode");
2512 break;
2513 }
2514 }
2515
2516 static unsigned
2517 radv_tex_filter(VkFilter filter, unsigned max_ansio)
2518 {
2519 switch (filter) {
2520 case VK_FILTER_NEAREST:
2521 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
2522 V_008F38_SQ_TEX_XY_FILTER_POINT);
2523 case VK_FILTER_LINEAR:
2524 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
2525 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
2526 case VK_FILTER_CUBIC_IMG:
2527 default:
2528 fprintf(stderr, "illegal texture filter");
2529 return 0;
2530 }
2531 }
2532
2533 static unsigned
2534 radv_tex_mipfilter(VkSamplerMipmapMode mode)
2535 {
2536 switch (mode) {
2537 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
2538 return V_008F38_SQ_TEX_Z_FILTER_POINT;
2539 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
2540 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
2541 default:
2542 return V_008F38_SQ_TEX_Z_FILTER_NONE;
2543 }
2544 }
2545
2546 static unsigned
2547 radv_tex_bordercolor(VkBorderColor bcolor)
2548 {
2549 switch (bcolor) {
2550 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2551 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2552 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
2553 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2554 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2555 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
2556 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2557 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2558 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
2559 default:
2560 break;
2561 }
2562 return 0;
2563 }
2564
2565 static unsigned
2566 radv_tex_aniso_filter(unsigned filter)
2567 {
2568 if (filter < 2)
2569 return 0;
2570 if (filter < 4)
2571 return 1;
2572 if (filter < 8)
2573 return 2;
2574 if (filter < 16)
2575 return 3;
2576 return 4;
2577 }
2578
2579 static void
2580 radv_init_sampler(struct radv_device *device,
2581 struct radv_sampler *sampler,
2582 const VkSamplerCreateInfo *pCreateInfo)
2583 {
2584 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
2585 (uint32_t) pCreateInfo->maxAnisotropy : 0;
2586 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
2587 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
2588
2589 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
2590 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
2591 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
2592 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
2593 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
2594 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
2595 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
2596 S_008F30_ANISO_BIAS(max_aniso_ratio) |
2597 S_008F30_DISABLE_CUBE_WRAP(0) |
2598 S_008F30_COMPAT_MODE(is_vi));
2599 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
2600 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
2601 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
2602 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
2603 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
2604 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
2605 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
2606 S_008F38_MIP_POINT_PRECLAMP(1) |
2607 S_008F38_DISABLE_LSB_CEIL(1) |
2608 S_008F38_FILTER_PREC_FIX(1) |
2609 S_008F38_ANISO_OVERRIDE(is_vi));
2610 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
2611 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
2612 }
2613
2614 VkResult radv_CreateSampler(
2615 VkDevice _device,
2616 const VkSamplerCreateInfo* pCreateInfo,
2617 const VkAllocationCallbacks* pAllocator,
2618 VkSampler* pSampler)
2619 {
2620 RADV_FROM_HANDLE(radv_device, device, _device);
2621 struct radv_sampler *sampler;
2622
2623 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2624
2625 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
2626 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2627 if (!sampler)
2628 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2629
2630 radv_init_sampler(device, sampler, pCreateInfo);
2631 *pSampler = radv_sampler_to_handle(sampler);
2632
2633 return VK_SUCCESS;
2634 }
2635
2636 void radv_DestroySampler(
2637 VkDevice _device,
2638 VkSampler _sampler,
2639 const VkAllocationCallbacks* pAllocator)
2640 {
2641 RADV_FROM_HANDLE(radv_device, device, _device);
2642 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
2643
2644 if (!sampler)
2645 return;
2646 vk_free2(&device->alloc, pAllocator, sampler);
2647 }
2648
2649
2650 /* vk_icd.h does not declare this function, so we declare it here to
2651 * suppress Wmissing-prototypes.
2652 */
2653 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2654 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2655
2656 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2657 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2658 {
2659 /* For the full details on loader interface versioning, see
2660 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2661 * What follows is a condensed summary, to help you navigate the large and
2662 * confusing official doc.
2663 *
2664 * - Loader interface v0 is incompatible with later versions. We don't
2665 * support it.
2666 *
2667 * - In loader interface v1:
2668 * - The first ICD entrypoint called by the loader is
2669 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2670 * entrypoint.
2671 * - The ICD must statically expose no other Vulkan symbol unless it is
2672 * linked with -Bsymbolic.
2673 * - Each dispatchable Vulkan handle created by the ICD must be
2674 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2675 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2676 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2677 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2678 * such loader-managed surfaces.
2679 *
2680 * - Loader interface v2 differs from v1 in:
2681 * - The first ICD entrypoint called by the loader is
2682 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2683 * statically expose this entrypoint.
2684 *
2685 * - Loader interface v3 differs from v2 in:
2686 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2687 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2688 * because the loader no longer does so.
2689 */
2690 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2691 return VK_SUCCESS;
2692 }