59c4ea8448ee6c3662a1c9ddcb386323d3c3bb69
[mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdbool.h>
29 #include <string.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include "radv_private.h"
33 #include "radv_cs.h"
34 #include "util/disk_cache.h"
35 #include "util/strtod.h"
36 #include "vk_util.h"
37 #include <xf86drm.h>
38 #include <amdgpu.h>
39 #include <amdgpu_drm.h>
40 #include "amdgpu_id.h"
41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
42 #include "ac_llvm_util.h"
43 #include "vk_format.h"
44 #include "sid.h"
45 #include "gfx9d.h"
46 #include "util/debug.h"
47
48 static int
49 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
50 {
51 uint32_t mesa_timestamp, llvm_timestamp;
52 uint16_t f = family;
53 memset(uuid, 0, VK_UUID_SIZE);
54 if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
55 !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
56 return -1;
57
58 memcpy(uuid, &mesa_timestamp, 4);
59 memcpy((char*)uuid + 4, &llvm_timestamp, 4);
60 memcpy((char*)uuid + 8, &f, 2);
61 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
62 return 0;
63 }
64
65 static void
66 radv_get_device_uuid(drmDevicePtr device, void *uuid) {
67 memset(uuid, 0, VK_UUID_SIZE);
68 memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
69 memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
70 memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
71 memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
72 }
73
74 static const VkExtensionProperties instance_extensions[] = {
75 {
76 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
77 .specVersion = 25,
78 },
79 #ifdef VK_USE_PLATFORM_XCB_KHR
80 {
81 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
82 .specVersion = 6,
83 },
84 #endif
85 #ifdef VK_USE_PLATFORM_XLIB_KHR
86 {
87 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
88 .specVersion = 6,
89 },
90 #endif
91 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
92 {
93 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
94 .specVersion = 5,
95 },
96 #endif
97 {
98 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
99 .specVersion = 1,
100 },
101 };
102
103 static const VkExtensionProperties common_device_extensions[] = {
104 {
105 .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
106 .specVersion = 1,
107 },
108 {
109 .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
110 .specVersion = 1,
111 },
112 {
113 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
114 .specVersion = 1,
115 },
116 {
117 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
118 .specVersion = 1,
119 },
120 {
121 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
122 .specVersion = 1,
123 },
124 {
125 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
126 .specVersion = 68,
127 },
128 {
129 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME,
130 .specVersion = 1,
131 },
132 {
133 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
134 .specVersion = 1,
135 },
136 {
137 .extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
138 .specVersion = 1,
139 },
140 {
141 .extensionName = VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME,
142 .specVersion = 1,
143 },
144 {
145 .extensionName = VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
146 .specVersion = 1,
147 },
148 };
149
150 static VkResult
151 radv_extensions_register(struct radv_instance *instance,
152 struct radv_extensions *extensions,
153 const VkExtensionProperties *new_ext,
154 uint32_t num_ext)
155 {
156 size_t new_size;
157 VkExtensionProperties *new_ptr;
158
159 assert(new_ext && num_ext > 0);
160
161 if (!new_ext)
162 return VK_ERROR_INITIALIZATION_FAILED;
163
164 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties);
165 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array,
166 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
167
168 /* Old array continues to be valid, update nothing */
169 if (!new_ptr)
170 return VK_ERROR_OUT_OF_HOST_MEMORY;
171
172 memcpy(&new_ptr[extensions->num_ext], new_ext,
173 num_ext * sizeof(VkExtensionProperties));
174 extensions->ext_array = new_ptr;
175 extensions->num_ext += num_ext;
176
177 return VK_SUCCESS;
178 }
179
180 static void
181 radv_extensions_finish(struct radv_instance *instance,
182 struct radv_extensions *extensions)
183 {
184 assert(extensions);
185
186 if (!extensions)
187 radv_loge("Attemted to free invalid extension struct\n");
188
189 if (extensions->ext_array)
190 vk_free(&instance->alloc, extensions->ext_array);
191 }
192
193 static bool
194 is_extension_enabled(const VkExtensionProperties *extensions,
195 size_t num_ext,
196 const char *name)
197 {
198 assert(extensions && name);
199
200 for (uint32_t i = 0; i < num_ext; i++) {
201 if (strcmp(name, extensions[i].extensionName) == 0)
202 return true;
203 }
204
205 return false;
206 }
207
208 static const char *
209 get_chip_name(enum radeon_family family)
210 {
211 switch (family) {
212 case CHIP_TAHITI: return "AMD RADV TAHITI";
213 case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
214 case CHIP_VERDE: return "AMD RADV CAPE VERDE";
215 case CHIP_OLAND: return "AMD RADV OLAND";
216 case CHIP_HAINAN: return "AMD RADV HAINAN";
217 case CHIP_BONAIRE: return "AMD RADV BONAIRE";
218 case CHIP_KAVERI: return "AMD RADV KAVERI";
219 case CHIP_KABINI: return "AMD RADV KABINI";
220 case CHIP_HAWAII: return "AMD RADV HAWAII";
221 case CHIP_MULLINS: return "AMD RADV MULLINS";
222 case CHIP_TONGA: return "AMD RADV TONGA";
223 case CHIP_ICELAND: return "AMD RADV ICELAND";
224 case CHIP_CARRIZO: return "AMD RADV CARRIZO";
225 case CHIP_FIJI: return "AMD RADV FIJI";
226 case CHIP_POLARIS10: return "AMD RADV POLARIS10";
227 case CHIP_POLARIS11: return "AMD RADV POLARIS11";
228 case CHIP_POLARIS12: return "AMD RADV POLARIS12";
229 case CHIP_STONEY: return "AMD RADV STONEY";
230 case CHIP_VEGA10: return "AMD RADV VEGA";
231 case CHIP_RAVEN: return "AMD RADV RAVEN";
232 default: return "AMD RADV unknown";
233 }
234 }
235
236 static VkResult
237 radv_physical_device_init(struct radv_physical_device *device,
238 struct radv_instance *instance,
239 drmDevicePtr drm_device)
240 {
241 const char *path = drm_device->nodes[DRM_NODE_RENDER];
242 VkResult result;
243 drmVersionPtr version;
244 int fd;
245
246 fd = open(path, O_RDWR | O_CLOEXEC);
247 if (fd < 0)
248 return VK_ERROR_INCOMPATIBLE_DRIVER;
249
250 version = drmGetVersion(fd);
251 if (!version) {
252 close(fd);
253 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
254 "failed to get version %s: %m", path);
255 }
256
257 if (strcmp(version->name, "amdgpu")) {
258 drmFreeVersion(version);
259 close(fd);
260 return VK_ERROR_INCOMPATIBLE_DRIVER;
261 }
262 drmFreeVersion(version);
263
264 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
265 device->instance = instance;
266 assert(strlen(path) < ARRAY_SIZE(device->path));
267 strncpy(device->path, path, ARRAY_SIZE(device->path));
268
269 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
270 instance->perftest_flags);
271 if (!device->ws) {
272 result = VK_ERROR_INCOMPATIBLE_DRIVER;
273 goto fail;
274 }
275
276 device->local_fd = fd;
277 device->ws->query_info(device->ws, &device->rad_info);
278 result = radv_init_wsi(device);
279 if (result != VK_SUCCESS) {
280 device->ws->destroy(device->ws);
281 goto fail;
282 }
283
284 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
285 radv_finish_wsi(device);
286 device->ws->destroy(device->ws);
287 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
288 "cannot generate UUID");
289 goto fail;
290 }
291
292 result = radv_extensions_register(instance,
293 &device->extensions,
294 common_device_extensions,
295 ARRAY_SIZE(common_device_extensions));
296 if (result != VK_SUCCESS)
297 goto fail;
298
299 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
300 device->name = get_chip_name(device->rad_info.family);
301
302 radv_get_device_uuid(drm_device, device->device_uuid);
303
304 if (device->rad_info.family == CHIP_STONEY ||
305 device->rad_info.chip_class >= GFX9) {
306 device->has_rbplus = true;
307 device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
308 }
309
310 return VK_SUCCESS;
311
312 fail:
313 close(fd);
314 return result;
315 }
316
317 static void
318 radv_physical_device_finish(struct radv_physical_device *device)
319 {
320 radv_extensions_finish(device->instance, &device->extensions);
321 radv_finish_wsi(device);
322 device->ws->destroy(device->ws);
323 close(device->local_fd);
324 }
325
326 static void *
327 default_alloc_func(void *pUserData, size_t size, size_t align,
328 VkSystemAllocationScope allocationScope)
329 {
330 return malloc(size);
331 }
332
333 static void *
334 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
335 size_t align, VkSystemAllocationScope allocationScope)
336 {
337 return realloc(pOriginal, size);
338 }
339
340 static void
341 default_free_func(void *pUserData, void *pMemory)
342 {
343 free(pMemory);
344 }
345
346 static const VkAllocationCallbacks default_alloc = {
347 .pUserData = NULL,
348 .pfnAllocation = default_alloc_func,
349 .pfnReallocation = default_realloc_func,
350 .pfnFree = default_free_func,
351 };
352
353 static const struct debug_control radv_debug_options[] = {
354 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
355 {"nodcc", RADV_DEBUG_NO_DCC},
356 {"shaders", RADV_DEBUG_DUMP_SHADERS},
357 {"nocache", RADV_DEBUG_NO_CACHE},
358 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
359 {"nohiz", RADV_DEBUG_NO_HIZ},
360 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
361 {"unsafemath", RADV_DEBUG_UNSAFE_MATH},
362 {"allbos", RADV_DEBUG_ALL_BOS},
363 {"noibs", RADV_DEBUG_NO_IBS},
364 {NULL, 0}
365 };
366
367 static const struct debug_control radv_perftest_options[] = {
368 {"batchchain", RADV_PERFTEST_BATCHCHAIN},
369 {"sisched", RADV_PERFTEST_SISCHED},
370 {NULL, 0}
371 };
372
373 VkResult radv_CreateInstance(
374 const VkInstanceCreateInfo* pCreateInfo,
375 const VkAllocationCallbacks* pAllocator,
376 VkInstance* pInstance)
377 {
378 struct radv_instance *instance;
379
380 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
381
382 uint32_t client_version;
383 if (pCreateInfo->pApplicationInfo &&
384 pCreateInfo->pApplicationInfo->apiVersion != 0) {
385 client_version = pCreateInfo->pApplicationInfo->apiVersion;
386 } else {
387 client_version = VK_MAKE_VERSION(1, 0, 0);
388 }
389
390 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
391 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
392 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
393 "Client requested version %d.%d.%d",
394 VK_VERSION_MAJOR(client_version),
395 VK_VERSION_MINOR(client_version),
396 VK_VERSION_PATCH(client_version));
397 }
398
399 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
400 if (!is_extension_enabled(instance_extensions,
401 ARRAY_SIZE(instance_extensions),
402 pCreateInfo->ppEnabledExtensionNames[i]))
403 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
404 }
405
406 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
407 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
408 if (!instance)
409 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
410
411 memset(instance, 0, sizeof(*instance));
412
413 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
414
415 if (pAllocator)
416 instance->alloc = *pAllocator;
417 else
418 instance->alloc = default_alloc;
419
420 instance->apiVersion = client_version;
421 instance->physicalDeviceCount = -1;
422
423 _mesa_locale_init();
424
425 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
426
427 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
428 radv_debug_options);
429
430 instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
431 radv_perftest_options);
432
433 *pInstance = radv_instance_to_handle(instance);
434
435 return VK_SUCCESS;
436 }
437
438 void radv_DestroyInstance(
439 VkInstance _instance,
440 const VkAllocationCallbacks* pAllocator)
441 {
442 RADV_FROM_HANDLE(radv_instance, instance, _instance);
443
444 if (!instance)
445 return;
446
447 for (int i = 0; i < instance->physicalDeviceCount; ++i) {
448 radv_physical_device_finish(instance->physicalDevices + i);
449 }
450
451 VG(VALGRIND_DESTROY_MEMPOOL(instance));
452
453 _mesa_locale_fini();
454
455 vk_free(&instance->alloc, instance);
456 }
457
458 static VkResult
459 radv_enumerate_devices(struct radv_instance *instance)
460 {
461 /* TODO: Check for more devices ? */
462 drmDevicePtr devices[8];
463 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
464 int max_devices;
465
466 instance->physicalDeviceCount = 0;
467
468 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
469 if (max_devices < 1)
470 return VK_ERROR_INCOMPATIBLE_DRIVER;
471
472 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
473 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
474 devices[i]->bustype == DRM_BUS_PCI &&
475 devices[i]->deviceinfo.pci->vendor_id == 0x1002) {
476
477 result = radv_physical_device_init(instance->physicalDevices +
478 instance->physicalDeviceCount,
479 instance,
480 devices[i]);
481 if (result == VK_SUCCESS)
482 ++instance->physicalDeviceCount;
483 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
484 break;
485 }
486 }
487 drmFreeDevices(devices, max_devices);
488
489 return result;
490 }
491
492 VkResult radv_EnumeratePhysicalDevices(
493 VkInstance _instance,
494 uint32_t* pPhysicalDeviceCount,
495 VkPhysicalDevice* pPhysicalDevices)
496 {
497 RADV_FROM_HANDLE(radv_instance, instance, _instance);
498 VkResult result;
499
500 if (instance->physicalDeviceCount < 0) {
501 result = radv_enumerate_devices(instance);
502 if (result != VK_SUCCESS &&
503 result != VK_ERROR_INCOMPATIBLE_DRIVER)
504 return result;
505 }
506
507 if (!pPhysicalDevices) {
508 *pPhysicalDeviceCount = instance->physicalDeviceCount;
509 } else {
510 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
511 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
512 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
513 }
514
515 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
516 : VK_SUCCESS;
517 }
518
519 void radv_GetPhysicalDeviceFeatures(
520 VkPhysicalDevice physicalDevice,
521 VkPhysicalDeviceFeatures* pFeatures)
522 {
523 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
524 bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
525 memset(pFeatures, 0, sizeof(*pFeatures));
526
527 *pFeatures = (VkPhysicalDeviceFeatures) {
528 .robustBufferAccess = true,
529 .fullDrawIndexUint32 = true,
530 .imageCubeArray = true,
531 .independentBlend = true,
532 .geometryShader = !is_gfx9,
533 .tessellationShader = !is_gfx9,
534 .sampleRateShading = false,
535 .dualSrcBlend = true,
536 .logicOp = true,
537 .multiDrawIndirect = true,
538 .drawIndirectFirstInstance = true,
539 .depthClamp = true,
540 .depthBiasClamp = true,
541 .fillModeNonSolid = true,
542 .depthBounds = true,
543 .wideLines = true,
544 .largePoints = true,
545 .alphaToOne = true,
546 .multiViewport = true,
547 .samplerAnisotropy = true,
548 .textureCompressionETC2 = false,
549 .textureCompressionASTC_LDR = false,
550 .textureCompressionBC = true,
551 .occlusionQueryPrecise = true,
552 .pipelineStatisticsQuery = true,
553 .vertexPipelineStoresAndAtomics = true,
554 .fragmentStoresAndAtomics = true,
555 .shaderTessellationAndGeometryPointSize = true,
556 .shaderImageGatherExtended = true,
557 .shaderStorageImageExtendedFormats = true,
558 .shaderStorageImageMultisample = false,
559 .shaderUniformBufferArrayDynamicIndexing = true,
560 .shaderSampledImageArrayDynamicIndexing = true,
561 .shaderStorageBufferArrayDynamicIndexing = true,
562 .shaderStorageImageArrayDynamicIndexing = true,
563 .shaderStorageImageReadWithoutFormat = true,
564 .shaderStorageImageWriteWithoutFormat = true,
565 .shaderClipDistance = true,
566 .shaderCullDistance = true,
567 .shaderFloat64 = true,
568 .shaderInt64 = true,
569 .shaderInt16 = false,
570 .sparseBinding = true,
571 .variableMultisampleRate = true,
572 .inheritedQueries = true,
573 };
574 }
575
576 void radv_GetPhysicalDeviceFeatures2KHR(
577 VkPhysicalDevice physicalDevice,
578 VkPhysicalDeviceFeatures2KHR *pFeatures)
579 {
580 return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
581 }
582
583 void radv_GetPhysicalDeviceProperties(
584 VkPhysicalDevice physicalDevice,
585 VkPhysicalDeviceProperties* pProperties)
586 {
587 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
588 VkSampleCountFlags sample_counts = 0xf;
589
590 /* make sure that the entire descriptor set is addressable with a signed
591 * 32-bit int. So the sum of all limits scaled by descriptor size has to
592 * be at most 2 GiB. the combined image & samples object count as one of
593 * both. This limit is for the pipeline layout, not for the set layout, but
594 * there is no set limit, so we just set a pipeline limit. I don't think
595 * any app is going to hit this soon. */
596 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
597 (32 /* uniform buffer, 32 due to potential space wasted on alignement */ +
598 32 /* storage buffer, 32 due to potential space wasted on alignement */ +
599 32 /* sampler, largest when combined with image */ +
600 64 /* sampled image */ +
601 64 /* storage image */);
602
603 VkPhysicalDeviceLimits limits = {
604 .maxImageDimension1D = (1 << 14),
605 .maxImageDimension2D = (1 << 14),
606 .maxImageDimension3D = (1 << 11),
607 .maxImageDimensionCube = (1 << 14),
608 .maxImageArrayLayers = (1 << 11),
609 .maxTexelBufferElements = 128 * 1024 * 1024,
610 .maxUniformBufferRange = UINT32_MAX,
611 .maxStorageBufferRange = UINT32_MAX,
612 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
613 .maxMemoryAllocationCount = UINT32_MAX,
614 .maxSamplerAllocationCount = 64 * 1024,
615 .bufferImageGranularity = 64, /* A cache line */
616 .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */
617 .maxBoundDescriptorSets = MAX_SETS,
618 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
619 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
620 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
621 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
622 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
623 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
624 .maxPerStageResources = max_descriptor_set_size,
625 .maxDescriptorSetSamplers = max_descriptor_set_size,
626 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
627 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
628 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
629 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
630 .maxDescriptorSetSampledImages = max_descriptor_set_size,
631 .maxDescriptorSetStorageImages = max_descriptor_set_size,
632 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
633 .maxVertexInputAttributes = 32,
634 .maxVertexInputBindings = 32,
635 .maxVertexInputAttributeOffset = 2047,
636 .maxVertexInputBindingStride = 2048,
637 .maxVertexOutputComponents = 128,
638 .maxTessellationGenerationLevel = 64,
639 .maxTessellationPatchSize = 32,
640 .maxTessellationControlPerVertexInputComponents = 128,
641 .maxTessellationControlPerVertexOutputComponents = 128,
642 .maxTessellationControlPerPatchOutputComponents = 120,
643 .maxTessellationControlTotalOutputComponents = 4096,
644 .maxTessellationEvaluationInputComponents = 128,
645 .maxTessellationEvaluationOutputComponents = 128,
646 .maxGeometryShaderInvocations = 127,
647 .maxGeometryInputComponents = 64,
648 .maxGeometryOutputComponents = 128,
649 .maxGeometryOutputVertices = 256,
650 .maxGeometryTotalOutputComponents = 1024,
651 .maxFragmentInputComponents = 128,
652 .maxFragmentOutputAttachments = 8,
653 .maxFragmentDualSrcAttachments = 1,
654 .maxFragmentCombinedOutputResources = 8,
655 .maxComputeSharedMemorySize = 32768,
656 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
657 .maxComputeWorkGroupInvocations = 2048,
658 .maxComputeWorkGroupSize = {
659 2048,
660 2048,
661 2048
662 },
663 .subPixelPrecisionBits = 4 /* FIXME */,
664 .subTexelPrecisionBits = 4 /* FIXME */,
665 .mipmapPrecisionBits = 4 /* FIXME */,
666 .maxDrawIndexedIndexValue = UINT32_MAX,
667 .maxDrawIndirectCount = UINT32_MAX,
668 .maxSamplerLodBias = 16,
669 .maxSamplerAnisotropy = 16,
670 .maxViewports = MAX_VIEWPORTS,
671 .maxViewportDimensions = { (1 << 14), (1 << 14) },
672 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
673 .viewportSubPixelBits = 13, /* We take a float? */
674 .minMemoryMapAlignment = 4096, /* A page */
675 .minTexelBufferOffsetAlignment = 1,
676 .minUniformBufferOffsetAlignment = 4,
677 .minStorageBufferOffsetAlignment = 4,
678 .minTexelOffset = -32,
679 .maxTexelOffset = 31,
680 .minTexelGatherOffset = -32,
681 .maxTexelGatherOffset = 31,
682 .minInterpolationOffset = -2,
683 .maxInterpolationOffset = 2,
684 .subPixelInterpolationOffsetBits = 8,
685 .maxFramebufferWidth = (1 << 14),
686 .maxFramebufferHeight = (1 << 14),
687 .maxFramebufferLayers = (1 << 10),
688 .framebufferColorSampleCounts = sample_counts,
689 .framebufferDepthSampleCounts = sample_counts,
690 .framebufferStencilSampleCounts = sample_counts,
691 .framebufferNoAttachmentsSampleCounts = sample_counts,
692 .maxColorAttachments = MAX_RTS,
693 .sampledImageColorSampleCounts = sample_counts,
694 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
695 .sampledImageDepthSampleCounts = sample_counts,
696 .sampledImageStencilSampleCounts = sample_counts,
697 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
698 .maxSampleMaskWords = 1,
699 .timestampComputeAndGraphics = true,
700 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
701 .maxClipDistances = 8,
702 .maxCullDistances = 8,
703 .maxCombinedClipAndCullDistances = 8,
704 .discreteQueuePriorities = 1,
705 .pointSizeRange = { 0.125, 255.875 },
706 .lineWidthRange = { 0.0, 7.9921875 },
707 .pointSizeGranularity = (1.0 / 8.0),
708 .lineWidthGranularity = (1.0 / 128.0),
709 .strictLines = false, /* FINISHME */
710 .standardSampleLocations = true,
711 .optimalBufferCopyOffsetAlignment = 128,
712 .optimalBufferCopyRowPitchAlignment = 128,
713 .nonCoherentAtomSize = 64,
714 };
715
716 *pProperties = (VkPhysicalDeviceProperties) {
717 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
718 .driverVersion = vk_get_driver_version(),
719 .vendorID = 0x1002,
720 .deviceID = pdevice->rad_info.pci_id,
721 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
722 .limits = limits,
723 .sparseProperties = {0},
724 };
725
726 strcpy(pProperties->deviceName, pdevice->name);
727 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
728 }
729
730 void radv_GetPhysicalDeviceProperties2KHR(
731 VkPhysicalDevice physicalDevice,
732 VkPhysicalDeviceProperties2KHR *pProperties)
733 {
734 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
735
736 vk_foreach_struct(ext, pProperties->pNext) {
737 switch (ext->sType) {
738 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
739 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
740 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
741 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
742 break;
743 }
744 default:
745 break;
746 }
747 }
748 }
749
750 static void radv_get_physical_device_queue_family_properties(
751 struct radv_physical_device* pdevice,
752 uint32_t* pCount,
753 VkQueueFamilyProperties** pQueueFamilyProperties)
754 {
755 int num_queue_families = 1;
756 int idx;
757 if (pdevice->rad_info.num_compute_rings > 0 &&
758 pdevice->rad_info.chip_class >= CIK &&
759 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
760 num_queue_families++;
761
762 if (pQueueFamilyProperties == NULL) {
763 *pCount = num_queue_families;
764 return;
765 }
766
767 if (!*pCount)
768 return;
769
770 idx = 0;
771 if (*pCount >= 1) {
772 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
773 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
774 VK_QUEUE_COMPUTE_BIT |
775 VK_QUEUE_TRANSFER_BIT |
776 VK_QUEUE_SPARSE_BINDING_BIT,
777 .queueCount = 1,
778 .timestampValidBits = 64,
779 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
780 };
781 idx++;
782 }
783
784 if (pdevice->rad_info.num_compute_rings > 0 &&
785 pdevice->rad_info.chip_class >= CIK &&
786 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
787 if (*pCount > idx) {
788 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
789 .queueFlags = VK_QUEUE_COMPUTE_BIT |
790 VK_QUEUE_TRANSFER_BIT |
791 VK_QUEUE_SPARSE_BINDING_BIT,
792 .queueCount = pdevice->rad_info.num_compute_rings,
793 .timestampValidBits = 64,
794 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
795 };
796 idx++;
797 }
798 }
799 *pCount = idx;
800 }
801
802 void radv_GetPhysicalDeviceQueueFamilyProperties(
803 VkPhysicalDevice physicalDevice,
804 uint32_t* pCount,
805 VkQueueFamilyProperties* pQueueFamilyProperties)
806 {
807 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
808 if (!pQueueFamilyProperties) {
809 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
810 return;
811 }
812 VkQueueFamilyProperties *properties[] = {
813 pQueueFamilyProperties + 0,
814 pQueueFamilyProperties + 1,
815 pQueueFamilyProperties + 2,
816 };
817 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
818 assert(*pCount <= 3);
819 }
820
821 void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
822 VkPhysicalDevice physicalDevice,
823 uint32_t* pCount,
824 VkQueueFamilyProperties2KHR *pQueueFamilyProperties)
825 {
826 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
827 if (!pQueueFamilyProperties) {
828 return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
829 return;
830 }
831 VkQueueFamilyProperties *properties[] = {
832 &pQueueFamilyProperties[0].queueFamilyProperties,
833 &pQueueFamilyProperties[1].queueFamilyProperties,
834 &pQueueFamilyProperties[2].queueFamilyProperties,
835 };
836 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
837 assert(*pCount <= 3);
838 }
839
840 void radv_GetPhysicalDeviceMemoryProperties(
841 VkPhysicalDevice physicalDevice,
842 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
843 {
844 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
845
846 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
847
848 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
849 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
850 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
851 .heapIndex = RADV_MEM_HEAP_VRAM,
852 };
853 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
854 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
855 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
856 .heapIndex = RADV_MEM_HEAP_GTT,
857 };
858 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
859 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
860 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
861 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
862 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
863 };
864 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
865 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
866 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
867 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
868 .heapIndex = RADV_MEM_HEAP_GTT,
869 };
870
871 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
872
873 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
874 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
875 .size = physical_device->rad_info.vram_size -
876 physical_device->rad_info.vram_vis_size,
877 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
878 };
879 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
880 .size = physical_device->rad_info.vram_vis_size,
881 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
882 };
883 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
884 .size = physical_device->rad_info.gart_size,
885 .flags = 0,
886 };
887 }
888
889 void radv_GetPhysicalDeviceMemoryProperties2KHR(
890 VkPhysicalDevice physicalDevice,
891 VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties)
892 {
893 return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
894 &pMemoryProperties->memoryProperties);
895 }
896
897 static int
898 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
899 int queue_family_index, int idx)
900 {
901 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
902 queue->device = device;
903 queue->queue_family_index = queue_family_index;
904 queue->queue_idx = idx;
905
906 queue->hw_ctx = device->ws->ctx_create(device->ws);
907 if (!queue->hw_ctx)
908 return VK_ERROR_OUT_OF_HOST_MEMORY;
909
910 return VK_SUCCESS;
911 }
912
913 static void
914 radv_queue_finish(struct radv_queue *queue)
915 {
916 if (queue->hw_ctx)
917 queue->device->ws->ctx_destroy(queue->hw_ctx);
918
919 if (queue->initial_preamble_cs)
920 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
921 if (queue->continue_preamble_cs)
922 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
923 if (queue->descriptor_bo)
924 queue->device->ws->buffer_destroy(queue->descriptor_bo);
925 if (queue->scratch_bo)
926 queue->device->ws->buffer_destroy(queue->scratch_bo);
927 if (queue->esgs_ring_bo)
928 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
929 if (queue->gsvs_ring_bo)
930 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
931 if (queue->tess_factor_ring_bo)
932 queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
933 if (queue->tess_offchip_ring_bo)
934 queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
935 if (queue->compute_scratch_bo)
936 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
937 }
938
939 static void
940 radv_device_init_gs_info(struct radv_device *device)
941 {
942 switch (device->physical_device->rad_info.family) {
943 case CHIP_OLAND:
944 case CHIP_HAINAN:
945 case CHIP_KAVERI:
946 case CHIP_KABINI:
947 case CHIP_MULLINS:
948 case CHIP_ICELAND:
949 case CHIP_CARRIZO:
950 case CHIP_STONEY:
951 device->gs_table_depth = 16;
952 return;
953 case CHIP_TAHITI:
954 case CHIP_PITCAIRN:
955 case CHIP_VERDE:
956 case CHIP_BONAIRE:
957 case CHIP_HAWAII:
958 case CHIP_TONGA:
959 case CHIP_FIJI:
960 case CHIP_POLARIS10:
961 case CHIP_POLARIS11:
962 case CHIP_POLARIS12:
963 case CHIP_VEGA10:
964 case CHIP_RAVEN:
965 device->gs_table_depth = 32;
966 return;
967 default:
968 unreachable("unknown GPU");
969 }
970 }
971
972 VkResult radv_CreateDevice(
973 VkPhysicalDevice physicalDevice,
974 const VkDeviceCreateInfo* pCreateInfo,
975 const VkAllocationCallbacks* pAllocator,
976 VkDevice* pDevice)
977 {
978 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
979 VkResult result;
980 struct radv_device *device;
981
982 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
983 if (!is_extension_enabled(physical_device->extensions.ext_array,
984 physical_device->extensions.num_ext,
985 pCreateInfo->ppEnabledExtensionNames[i]))
986 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
987 }
988
989 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
990 sizeof(*device), 8,
991 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
992 if (!device)
993 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
994
995 memset(device, 0, sizeof(*device));
996
997 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
998 device->instance = physical_device->instance;
999 device->physical_device = physical_device;
1000
1001 device->debug_flags = device->instance->debug_flags;
1002
1003 device->ws = physical_device->ws;
1004 if (pAllocator)
1005 device->alloc = *pAllocator;
1006 else
1007 device->alloc = physical_device->instance->alloc;
1008
1009 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1010 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
1011 uint32_t qfi = queue_create->queueFamilyIndex;
1012
1013 device->queues[qfi] = vk_alloc(&device->alloc,
1014 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1015 if (!device->queues[qfi]) {
1016 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1017 goto fail;
1018 }
1019
1020 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1021
1022 device->queue_count[qfi] = queue_create->queueCount;
1023
1024 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1025 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q);
1026 if (result != VK_SUCCESS)
1027 goto fail;
1028 }
1029 }
1030
1031 #if HAVE_LLVM < 0x0400
1032 device->llvm_supports_spill = false;
1033 #else
1034 device->llvm_supports_spill = true;
1035 #endif
1036
1037 /* The maximum number of scratch waves. Scratch space isn't divided
1038 * evenly between CUs. The number is only a function of the number of CUs.
1039 * We can decrease the constant to decrease the scratch buffer size.
1040 *
1041 * sctx->scratch_waves must be >= the maximum posible size of
1042 * 1 threadgroup, so that the hw doesn't hang from being unable
1043 * to start any.
1044 *
1045 * The recommended value is 4 per CU at most. Higher numbers don't
1046 * bring much benefit, but they still occupy chip resources (think
1047 * async compute). I've seen ~2% performance difference between 4 and 32.
1048 */
1049 uint32_t max_threads_per_block = 2048;
1050 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
1051 max_threads_per_block / 64);
1052
1053 radv_device_init_gs_info(device);
1054
1055 device->tess_offchip_block_dw_size =
1056 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
1057 device->has_distributed_tess =
1058 device->physical_device->rad_info.chip_class >= VI &&
1059 device->physical_device->rad_info.max_se >= 2;
1060
1061 result = radv_device_init_meta(device);
1062 if (result != VK_SUCCESS)
1063 goto fail;
1064
1065 radv_device_init_msaa(device);
1066
1067 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
1068 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
1069 switch (family) {
1070 case RADV_QUEUE_GENERAL:
1071 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
1072 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
1073 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
1074 break;
1075 case RADV_QUEUE_COMPUTE:
1076 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
1077 radeon_emit(device->empty_cs[family], 0);
1078 break;
1079 }
1080 device->ws->cs_finalize(device->empty_cs[family]);
1081
1082 device->flush_cs[family] = device->ws->cs_create(device->ws, family);
1083 switch (family) {
1084 case RADV_QUEUE_GENERAL:
1085 case RADV_QUEUE_COMPUTE:
1086 si_cs_emit_cache_flush(device->flush_cs[family],
1087 false,
1088 device->physical_device->rad_info.chip_class,
1089 NULL, 0,
1090 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1091 RADV_CMD_FLAG_INV_ICACHE |
1092 RADV_CMD_FLAG_INV_SMEM_L1 |
1093 RADV_CMD_FLAG_INV_VMEM_L1 |
1094 RADV_CMD_FLAG_INV_GLOBAL_L2);
1095 break;
1096 }
1097 device->ws->cs_finalize(device->flush_cs[family]);
1098
1099 device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
1100 switch (family) {
1101 case RADV_QUEUE_GENERAL:
1102 case RADV_QUEUE_COMPUTE:
1103 si_cs_emit_cache_flush(device->flush_shader_cs[family],
1104 false,
1105 device->physical_device->rad_info.chip_class,
1106 NULL, 0,
1107 family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
1108 family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
1109 RADV_CMD_FLAG_INV_ICACHE |
1110 RADV_CMD_FLAG_INV_SMEM_L1 |
1111 RADV_CMD_FLAG_INV_VMEM_L1 |
1112 RADV_CMD_FLAG_INV_GLOBAL_L2);
1113 break;
1114 }
1115 device->ws->cs_finalize(device->flush_shader_cs[family]);
1116 }
1117
1118 if (getenv("RADV_TRACE_FILE")) {
1119 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8,
1120 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
1121 if (!device->trace_bo)
1122 goto fail;
1123
1124 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo);
1125 if (!device->trace_id_ptr)
1126 goto fail;
1127 }
1128
1129 if (device->physical_device->rad_info.chip_class >= CIK)
1130 cik_create_gfx_config(device);
1131
1132 VkPipelineCacheCreateInfo ci;
1133 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1134 ci.pNext = NULL;
1135 ci.flags = 0;
1136 ci.pInitialData = NULL;
1137 ci.initialDataSize = 0;
1138 VkPipelineCache pc;
1139 result = radv_CreatePipelineCache(radv_device_to_handle(device),
1140 &ci, NULL, &pc);
1141 if (result != VK_SUCCESS)
1142 goto fail;
1143
1144 device->mem_cache = radv_pipeline_cache_from_handle(pc);
1145
1146 *pDevice = radv_device_to_handle(device);
1147 return VK_SUCCESS;
1148
1149 fail:
1150 if (device->trace_bo)
1151 device->ws->buffer_destroy(device->trace_bo);
1152
1153 if (device->gfx_init)
1154 device->ws->buffer_destroy(device->gfx_init);
1155
1156 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1157 for (unsigned q = 0; q < device->queue_count[i]; q++)
1158 radv_queue_finish(&device->queues[i][q]);
1159 if (device->queue_count[i])
1160 vk_free(&device->alloc, device->queues[i]);
1161 }
1162
1163 vk_free(&device->alloc, device);
1164 return result;
1165 }
1166
1167 void radv_DestroyDevice(
1168 VkDevice _device,
1169 const VkAllocationCallbacks* pAllocator)
1170 {
1171 RADV_FROM_HANDLE(radv_device, device, _device);
1172
1173 if (!device)
1174 return;
1175
1176 if (device->trace_bo)
1177 device->ws->buffer_destroy(device->trace_bo);
1178
1179 if (device->gfx_init)
1180 device->ws->buffer_destroy(device->gfx_init);
1181
1182 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1183 for (unsigned q = 0; q < device->queue_count[i]; q++)
1184 radv_queue_finish(&device->queues[i][q]);
1185 if (device->queue_count[i])
1186 vk_free(&device->alloc, device->queues[i]);
1187 if (device->empty_cs[i])
1188 device->ws->cs_destroy(device->empty_cs[i]);
1189 if (device->flush_cs[i])
1190 device->ws->cs_destroy(device->flush_cs[i]);
1191 if (device->flush_shader_cs[i])
1192 device->ws->cs_destroy(device->flush_shader_cs[i]);
1193 }
1194 radv_device_finish_meta(device);
1195
1196 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
1197 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
1198
1199 vk_free(&device->alloc, device);
1200 }
1201
1202 VkResult radv_EnumerateInstanceExtensionProperties(
1203 const char* pLayerName,
1204 uint32_t* pPropertyCount,
1205 VkExtensionProperties* pProperties)
1206 {
1207 if (pProperties == NULL) {
1208 *pPropertyCount = ARRAY_SIZE(instance_extensions);
1209 return VK_SUCCESS;
1210 }
1211
1212 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions));
1213 typed_memcpy(pProperties, instance_extensions, *pPropertyCount);
1214
1215 if (*pPropertyCount < ARRAY_SIZE(instance_extensions))
1216 return VK_INCOMPLETE;
1217
1218 return VK_SUCCESS;
1219 }
1220
1221 VkResult radv_EnumerateDeviceExtensionProperties(
1222 VkPhysicalDevice physicalDevice,
1223 const char* pLayerName,
1224 uint32_t* pPropertyCount,
1225 VkExtensionProperties* pProperties)
1226 {
1227 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1228
1229 if (pProperties == NULL) {
1230 *pPropertyCount = pdevice->extensions.num_ext;
1231 return VK_SUCCESS;
1232 }
1233
1234 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext);
1235 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount);
1236
1237 if (*pPropertyCount < pdevice->extensions.num_ext)
1238 return VK_INCOMPLETE;
1239
1240 return VK_SUCCESS;
1241 }
1242
1243 VkResult radv_EnumerateInstanceLayerProperties(
1244 uint32_t* pPropertyCount,
1245 VkLayerProperties* pProperties)
1246 {
1247 if (pProperties == NULL) {
1248 *pPropertyCount = 0;
1249 return VK_SUCCESS;
1250 }
1251
1252 /* None supported at this time */
1253 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1254 }
1255
1256 VkResult radv_EnumerateDeviceLayerProperties(
1257 VkPhysicalDevice physicalDevice,
1258 uint32_t* pPropertyCount,
1259 VkLayerProperties* pProperties)
1260 {
1261 if (pProperties == NULL) {
1262 *pPropertyCount = 0;
1263 return VK_SUCCESS;
1264 }
1265
1266 /* None supported at this time */
1267 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1268 }
1269
1270 void radv_GetDeviceQueue(
1271 VkDevice _device,
1272 uint32_t queueFamilyIndex,
1273 uint32_t queueIndex,
1274 VkQueue* pQueue)
1275 {
1276 RADV_FROM_HANDLE(radv_device, device, _device);
1277
1278 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]);
1279 }
1280
1281 static void radv_dump_trace(struct radv_device *device,
1282 struct radeon_winsys_cs *cs)
1283 {
1284 const char *filename = getenv("RADV_TRACE_FILE");
1285 FILE *f = fopen(filename, "w");
1286 if (!f) {
1287 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
1288 return;
1289 }
1290
1291 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1292 device->ws->cs_dump(cs, f, *device->trace_id_ptr);
1293 fclose(f);
1294 }
1295
1296 static void
1297 fill_geom_tess_rings(struct radv_queue *queue,
1298 uint32_t *map,
1299 bool add_sample_positions,
1300 uint32_t esgs_ring_size,
1301 struct radeon_winsys_bo *esgs_ring_bo,
1302 uint32_t gsvs_ring_size,
1303 struct radeon_winsys_bo *gsvs_ring_bo,
1304 uint32_t tess_factor_ring_size,
1305 struct radeon_winsys_bo *tess_factor_ring_bo,
1306 uint32_t tess_offchip_ring_size,
1307 struct radeon_winsys_bo *tess_offchip_ring_bo)
1308 {
1309 uint64_t esgs_va = 0, gsvs_va = 0;
1310 uint64_t tess_factor_va = 0, tess_offchip_va = 0;
1311 uint32_t *desc = &map[4];
1312
1313 if (esgs_ring_bo)
1314 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
1315 if (gsvs_ring_bo)
1316 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
1317 if (tess_factor_ring_bo)
1318 tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1319 if (tess_offchip_ring_bo)
1320 tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
1321
1322 /* stride 0, num records - size, add tid, swizzle, elsize4,
1323 index stride 64 */
1324 desc[0] = esgs_va;
1325 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
1326 S_008F04_STRIDE(0) |
1327 S_008F04_SWIZZLE_ENABLE(true);
1328 desc[2] = esgs_ring_size;
1329 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1330 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1331 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1332 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1333 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1334 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1335 S_008F0C_ELEMENT_SIZE(1) |
1336 S_008F0C_INDEX_STRIDE(3) |
1337 S_008F0C_ADD_TID_ENABLE(true);
1338
1339 desc += 4;
1340 /* GS entry for ES->GS ring */
1341 /* stride 0, num records - size, elsize0,
1342 index stride 0 */
1343 desc[0] = esgs_va;
1344 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
1345 S_008F04_STRIDE(0) |
1346 S_008F04_SWIZZLE_ENABLE(false);
1347 desc[2] = esgs_ring_size;
1348 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1349 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1350 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1351 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1352 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1353 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1354 S_008F0C_ELEMENT_SIZE(0) |
1355 S_008F0C_INDEX_STRIDE(0) |
1356 S_008F0C_ADD_TID_ENABLE(false);
1357
1358 desc += 4;
1359 /* VS entry for GS->VS ring */
1360 /* stride 0, num records - size, elsize0,
1361 index stride 0 */
1362 desc[0] = gsvs_va;
1363 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1364 S_008F04_STRIDE(0) |
1365 S_008F04_SWIZZLE_ENABLE(false);
1366 desc[2] = gsvs_ring_size;
1367 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1368 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1369 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1370 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1371 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1372 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1373 S_008F0C_ELEMENT_SIZE(0) |
1374 S_008F0C_INDEX_STRIDE(0) |
1375 S_008F0C_ADD_TID_ENABLE(false);
1376 desc += 4;
1377
1378 /* stride gsvs_itemsize, num records 64
1379 elsize 4, index stride 16 */
1380 /* shader will patch stride and desc[2] */
1381 desc[0] = gsvs_va;
1382 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
1383 S_008F04_STRIDE(0) |
1384 S_008F04_SWIZZLE_ENABLE(true);
1385 desc[2] = 0;
1386 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1387 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1388 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1389 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1390 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1391 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1392 S_008F0C_ELEMENT_SIZE(1) |
1393 S_008F0C_INDEX_STRIDE(1) |
1394 S_008F0C_ADD_TID_ENABLE(true);
1395 desc += 4;
1396
1397 desc[0] = tess_factor_va;
1398 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
1399 S_008F04_STRIDE(0) |
1400 S_008F04_SWIZZLE_ENABLE(false);
1401 desc[2] = tess_factor_ring_size;
1402 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1403 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1404 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1405 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1406 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1407 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1408 S_008F0C_ELEMENT_SIZE(0) |
1409 S_008F0C_INDEX_STRIDE(0) |
1410 S_008F0C_ADD_TID_ENABLE(false);
1411 desc += 4;
1412
1413 desc[0] = tess_offchip_va;
1414 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
1415 S_008F04_STRIDE(0) |
1416 S_008F04_SWIZZLE_ENABLE(false);
1417 desc[2] = tess_offchip_ring_size;
1418 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
1419 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1420 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
1421 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1422 S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1423 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
1424 S_008F0C_ELEMENT_SIZE(0) |
1425 S_008F0C_INDEX_STRIDE(0) |
1426 S_008F0C_ADD_TID_ENABLE(false);
1427 desc += 4;
1428
1429 /* add sample positions after all rings */
1430 memcpy(desc, queue->device->sample_locations_1x, 8);
1431 desc += 2;
1432 memcpy(desc, queue->device->sample_locations_2x, 16);
1433 desc += 4;
1434 memcpy(desc, queue->device->sample_locations_4x, 32);
1435 desc += 8;
1436 memcpy(desc, queue->device->sample_locations_8x, 64);
1437 desc += 16;
1438 memcpy(desc, queue->device->sample_locations_16x, 128);
1439 }
1440
1441 static unsigned
1442 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
1443 {
1444 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
1445 device->physical_device->rad_info.family != CHIP_CARRIZO &&
1446 device->physical_device->rad_info.family != CHIP_STONEY;
1447 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
1448 unsigned max_offchip_buffers = max_offchip_buffers_per_se *
1449 device->physical_device->rad_info.max_se;
1450 unsigned offchip_granularity;
1451 unsigned hs_offchip_param;
1452 switch (device->tess_offchip_block_dw_size) {
1453 default:
1454 assert(0);
1455 /* fall through */
1456 case 8192:
1457 offchip_granularity = V_03093C_X_8K_DWORDS;
1458 break;
1459 case 4096:
1460 offchip_granularity = V_03093C_X_4K_DWORDS;
1461 break;
1462 }
1463
1464 switch (device->physical_device->rad_info.chip_class) {
1465 case SI:
1466 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
1467 break;
1468 case CIK:
1469 case VI:
1470 case GFX9:
1471 default:
1472 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
1473 break;
1474 }
1475
1476 *max_offchip_buffers_p = max_offchip_buffers;
1477 if (device->physical_device->rad_info.chip_class >= CIK) {
1478 if (device->physical_device->rad_info.chip_class >= VI)
1479 --max_offchip_buffers;
1480 hs_offchip_param =
1481 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
1482 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
1483 } else {
1484 hs_offchip_param =
1485 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
1486 }
1487 return hs_offchip_param;
1488 }
1489
1490 static VkResult
1491 radv_get_preamble_cs(struct radv_queue *queue,
1492 uint32_t scratch_size,
1493 uint32_t compute_scratch_size,
1494 uint32_t esgs_ring_size,
1495 uint32_t gsvs_ring_size,
1496 bool needs_tess_rings,
1497 bool needs_sample_positions,
1498 struct radeon_winsys_cs **initial_preamble_cs,
1499 struct radeon_winsys_cs **continue_preamble_cs)
1500 {
1501 struct radeon_winsys_bo *scratch_bo = NULL;
1502 struct radeon_winsys_bo *descriptor_bo = NULL;
1503 struct radeon_winsys_bo *compute_scratch_bo = NULL;
1504 struct radeon_winsys_bo *esgs_ring_bo = NULL;
1505 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
1506 struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
1507 struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
1508 struct radeon_winsys_cs *dest_cs[2] = {0};
1509 bool add_tess_rings = false, add_sample_positions = false;
1510 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
1511 unsigned max_offchip_buffers;
1512 unsigned hs_offchip_param = 0;
1513 if (!queue->has_tess_rings) {
1514 if (needs_tess_rings)
1515 add_tess_rings = true;
1516 }
1517 if (!queue->has_sample_positions) {
1518 if (needs_sample_positions)
1519 add_sample_positions = true;
1520 }
1521 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
1522 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
1523 &max_offchip_buffers);
1524 tess_offchip_ring_size = max_offchip_buffers *
1525 queue->device->tess_offchip_block_dw_size * 4;
1526
1527 if (scratch_size <= queue->scratch_size &&
1528 compute_scratch_size <= queue->compute_scratch_size &&
1529 esgs_ring_size <= queue->esgs_ring_size &&
1530 gsvs_ring_size <= queue->gsvs_ring_size &&
1531 !add_tess_rings && !add_sample_positions &&
1532 queue->initial_preamble_cs) {
1533 *initial_preamble_cs = queue->initial_preamble_cs;
1534 *continue_preamble_cs = queue->continue_preamble_cs;
1535 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1536 *continue_preamble_cs = NULL;
1537 return VK_SUCCESS;
1538 }
1539
1540 if (scratch_size > queue->scratch_size) {
1541 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1542 scratch_size,
1543 4096,
1544 RADEON_DOMAIN_VRAM,
1545 RADEON_FLAG_NO_CPU_ACCESS);
1546 if (!scratch_bo)
1547 goto fail;
1548 } else
1549 scratch_bo = queue->scratch_bo;
1550
1551 if (compute_scratch_size > queue->compute_scratch_size) {
1552 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
1553 compute_scratch_size,
1554 4096,
1555 RADEON_DOMAIN_VRAM,
1556 RADEON_FLAG_NO_CPU_ACCESS);
1557 if (!compute_scratch_bo)
1558 goto fail;
1559
1560 } else
1561 compute_scratch_bo = queue->compute_scratch_bo;
1562
1563 if (esgs_ring_size > queue->esgs_ring_size) {
1564 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1565 esgs_ring_size,
1566 4096,
1567 RADEON_DOMAIN_VRAM,
1568 RADEON_FLAG_NO_CPU_ACCESS);
1569 if (!esgs_ring_bo)
1570 goto fail;
1571 } else {
1572 esgs_ring_bo = queue->esgs_ring_bo;
1573 esgs_ring_size = queue->esgs_ring_size;
1574 }
1575
1576 if (gsvs_ring_size > queue->gsvs_ring_size) {
1577 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1578 gsvs_ring_size,
1579 4096,
1580 RADEON_DOMAIN_VRAM,
1581 RADEON_FLAG_NO_CPU_ACCESS);
1582 if (!gsvs_ring_bo)
1583 goto fail;
1584 } else {
1585 gsvs_ring_bo = queue->gsvs_ring_bo;
1586 gsvs_ring_size = queue->gsvs_ring_size;
1587 }
1588
1589 if (add_tess_rings) {
1590 tess_factor_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1591 tess_factor_ring_size,
1592 256,
1593 RADEON_DOMAIN_VRAM,
1594 RADEON_FLAG_NO_CPU_ACCESS);
1595 if (!tess_factor_ring_bo)
1596 goto fail;
1597 tess_offchip_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
1598 tess_offchip_ring_size,
1599 256,
1600 RADEON_DOMAIN_VRAM,
1601 RADEON_FLAG_NO_CPU_ACCESS);
1602 if (!tess_offchip_ring_bo)
1603 goto fail;
1604 } else {
1605 tess_factor_ring_bo = queue->tess_factor_ring_bo;
1606 tess_offchip_ring_bo = queue->tess_offchip_ring_bo;
1607 }
1608
1609 if (scratch_bo != queue->scratch_bo ||
1610 esgs_ring_bo != queue->esgs_ring_bo ||
1611 gsvs_ring_bo != queue->gsvs_ring_bo ||
1612 tess_factor_ring_bo != queue->tess_factor_ring_bo ||
1613 tess_offchip_ring_bo != queue->tess_offchip_ring_bo || add_sample_positions) {
1614 uint32_t size = 0;
1615 if (gsvs_ring_bo || esgs_ring_bo ||
1616 tess_factor_ring_bo || tess_offchip_ring_bo || add_sample_positions) {
1617 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
1618 if (add_sample_positions)
1619 size += 256; /* 32+16+8+4+2+1 samples * 4 * 2 = 248 bytes. */
1620 }
1621 else if (scratch_bo)
1622 size = 8; /* 2 dword */
1623
1624 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
1625 size,
1626 4096,
1627 RADEON_DOMAIN_VRAM,
1628 RADEON_FLAG_CPU_ACCESS);
1629 if (!descriptor_bo)
1630 goto fail;
1631 } else
1632 descriptor_bo = queue->descriptor_bo;
1633
1634 for(int i = 0; i < 2; ++i) {
1635 struct radeon_winsys_cs *cs = NULL;
1636 cs = queue->device->ws->cs_create(queue->device->ws,
1637 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
1638 if (!cs)
1639 goto fail;
1640
1641 dest_cs[i] = cs;
1642
1643 if (scratch_bo)
1644 queue->device->ws->cs_add_buffer(cs, scratch_bo, 8);
1645
1646 if (esgs_ring_bo)
1647 queue->device->ws->cs_add_buffer(cs, esgs_ring_bo, 8);
1648
1649 if (gsvs_ring_bo)
1650 queue->device->ws->cs_add_buffer(cs, gsvs_ring_bo, 8);
1651
1652 if (tess_factor_ring_bo)
1653 queue->device->ws->cs_add_buffer(cs, tess_factor_ring_bo, 8);
1654
1655 if (tess_offchip_ring_bo)
1656 queue->device->ws->cs_add_buffer(cs, tess_offchip_ring_bo, 8);
1657
1658 if (descriptor_bo)
1659 queue->device->ws->cs_add_buffer(cs, descriptor_bo, 8);
1660
1661 if (descriptor_bo != queue->descriptor_bo) {
1662 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
1663
1664 if (scratch_bo) {
1665 uint64_t scratch_va = queue->device->ws->buffer_get_va(scratch_bo);
1666 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1667 S_008F04_SWIZZLE_ENABLE(1);
1668 map[0] = scratch_va;
1669 map[1] = rsrc1;
1670 }
1671
1672 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo ||
1673 add_sample_positions)
1674 fill_geom_tess_rings(queue, map, add_sample_positions,
1675 esgs_ring_size, esgs_ring_bo,
1676 gsvs_ring_size, gsvs_ring_bo,
1677 tess_factor_ring_size, tess_factor_ring_bo,
1678 tess_offchip_ring_size, tess_offchip_ring_bo);
1679
1680 queue->device->ws->buffer_unmap(descriptor_bo);
1681 }
1682
1683 if (esgs_ring_bo || gsvs_ring_bo || tess_factor_ring_bo || tess_offchip_ring_bo) {
1684 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1685 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
1686 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1687 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
1688 }
1689
1690 if (esgs_ring_bo || gsvs_ring_bo) {
1691 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1692 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
1693 radeon_emit(cs, esgs_ring_size >> 8);
1694 radeon_emit(cs, gsvs_ring_size >> 8);
1695 } else {
1696 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
1697 radeon_emit(cs, esgs_ring_size >> 8);
1698 radeon_emit(cs, gsvs_ring_size >> 8);
1699 }
1700 }
1701
1702 if (tess_factor_ring_bo) {
1703 uint64_t tf_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
1704 if (queue->device->physical_device->rad_info.chip_class >= CIK) {
1705 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
1706 S_030938_SIZE(tess_factor_ring_size / 4));
1707 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
1708 tf_va >> 8);
1709 if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
1710 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
1711 tf_va >> 40);
1712 }
1713 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
1714 } else {
1715 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
1716 S_008988_SIZE(tess_factor_ring_size / 4));
1717 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
1718 tf_va >> 8);
1719 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
1720 hs_offchip_param);
1721 }
1722 }
1723
1724 if (descriptor_bo) {
1725 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
1726 R_00B130_SPI_SHADER_USER_DATA_VS_0,
1727 R_00B230_SPI_SHADER_USER_DATA_GS_0,
1728 R_00B330_SPI_SHADER_USER_DATA_ES_0,
1729 R_00B430_SPI_SHADER_USER_DATA_HS_0,
1730 R_00B530_SPI_SHADER_USER_DATA_LS_0};
1731
1732 uint64_t va = queue->device->ws->buffer_get_va(descriptor_bo);
1733
1734 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
1735 radeon_set_sh_reg_seq(cs, regs[i], 2);
1736 radeon_emit(cs, va);
1737 radeon_emit(cs, va >> 32);
1738 }
1739 }
1740
1741 if (compute_scratch_bo) {
1742 uint64_t scratch_va = queue->device->ws->buffer_get_va(compute_scratch_bo);
1743 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
1744 S_008F04_SWIZZLE_ENABLE(1);
1745
1746 queue->device->ws->cs_add_buffer(cs, compute_scratch_bo, 8);
1747
1748 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
1749 radeon_emit(cs, scratch_va);
1750 radeon_emit(cs, rsrc1);
1751 }
1752
1753 if (!i) {
1754 si_cs_emit_cache_flush(cs,
1755 false,
1756 queue->device->physical_device->rad_info.chip_class,
1757 NULL, 0,
1758 queue->queue_family_index == RING_COMPUTE &&
1759 queue->device->physical_device->rad_info.chip_class >= CIK,
1760 RADV_CMD_FLAG_INV_ICACHE |
1761 RADV_CMD_FLAG_INV_SMEM_L1 |
1762 RADV_CMD_FLAG_INV_VMEM_L1 |
1763 RADV_CMD_FLAG_INV_GLOBAL_L2);
1764 }
1765
1766 if (!queue->device->ws->cs_finalize(cs))
1767 goto fail;
1768 }
1769
1770 if (queue->initial_preamble_cs)
1771 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
1772
1773 if (queue->continue_preamble_cs)
1774 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1775
1776 queue->initial_preamble_cs = dest_cs[0];
1777 queue->continue_preamble_cs = dest_cs[1];
1778
1779 if (scratch_bo != queue->scratch_bo) {
1780 if (queue->scratch_bo)
1781 queue->device->ws->buffer_destroy(queue->scratch_bo);
1782 queue->scratch_bo = scratch_bo;
1783 queue->scratch_size = scratch_size;
1784 }
1785
1786 if (compute_scratch_bo != queue->compute_scratch_bo) {
1787 if (queue->compute_scratch_bo)
1788 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1789 queue->compute_scratch_bo = compute_scratch_bo;
1790 queue->compute_scratch_size = compute_scratch_size;
1791 }
1792
1793 if (esgs_ring_bo != queue->esgs_ring_bo) {
1794 if (queue->esgs_ring_bo)
1795 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
1796 queue->esgs_ring_bo = esgs_ring_bo;
1797 queue->esgs_ring_size = esgs_ring_size;
1798 }
1799
1800 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
1801 if (queue->gsvs_ring_bo)
1802 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1803 queue->gsvs_ring_bo = gsvs_ring_bo;
1804 queue->gsvs_ring_size = gsvs_ring_size;
1805 }
1806
1807 if (tess_factor_ring_bo != queue->tess_factor_ring_bo) {
1808 queue->tess_factor_ring_bo = tess_factor_ring_bo;
1809 }
1810
1811 if (tess_offchip_ring_bo != queue->tess_offchip_ring_bo) {
1812 queue->tess_offchip_ring_bo = tess_offchip_ring_bo;
1813 queue->has_tess_rings = true;
1814 }
1815
1816 if (descriptor_bo != queue->descriptor_bo) {
1817 if (queue->descriptor_bo)
1818 queue->device->ws->buffer_destroy(queue->descriptor_bo);
1819
1820 queue->descriptor_bo = descriptor_bo;
1821 }
1822
1823 if (add_sample_positions)
1824 queue->has_sample_positions = true;
1825
1826 *initial_preamble_cs = queue->initial_preamble_cs;
1827 *continue_preamble_cs = queue->continue_preamble_cs;
1828 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
1829 *continue_preamble_cs = NULL;
1830 return VK_SUCCESS;
1831 fail:
1832 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
1833 if (dest_cs[i])
1834 queue->device->ws->cs_destroy(dest_cs[i]);
1835 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
1836 queue->device->ws->buffer_destroy(descriptor_bo);
1837 if (scratch_bo && scratch_bo != queue->scratch_bo)
1838 queue->device->ws->buffer_destroy(scratch_bo);
1839 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
1840 queue->device->ws->buffer_destroy(compute_scratch_bo);
1841 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
1842 queue->device->ws->buffer_destroy(esgs_ring_bo);
1843 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
1844 queue->device->ws->buffer_destroy(gsvs_ring_bo);
1845 if (tess_factor_ring_bo && tess_factor_ring_bo != queue->tess_factor_ring_bo)
1846 queue->device->ws->buffer_destroy(tess_factor_ring_bo);
1847 if (tess_offchip_ring_bo && tess_offchip_ring_bo != queue->tess_offchip_ring_bo)
1848 queue->device->ws->buffer_destroy(tess_offchip_ring_bo);
1849 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1850 }
1851
1852 VkResult radv_QueueSubmit(
1853 VkQueue _queue,
1854 uint32_t submitCount,
1855 const VkSubmitInfo* pSubmits,
1856 VkFence _fence)
1857 {
1858 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1859 RADV_FROM_HANDLE(radv_fence, fence, _fence);
1860 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
1861 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
1862 int ret;
1863 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
1864 uint32_t scratch_size = 0;
1865 uint32_t compute_scratch_size = 0;
1866 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
1867 struct radeon_winsys_cs *initial_preamble_cs = NULL, *continue_preamble_cs = NULL;
1868 VkResult result;
1869 bool fence_emitted = false;
1870 bool tess_rings_needed = false;
1871 bool sample_positions_needed = false;
1872
1873 /* Do this first so failing to allocate scratch buffers can't result in
1874 * partially executed submissions. */
1875 for (uint32_t i = 0; i < submitCount; i++) {
1876 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1877 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1878 pSubmits[i].pCommandBuffers[j]);
1879
1880 scratch_size = MAX2(scratch_size, cmd_buffer->scratch_size_needed);
1881 compute_scratch_size = MAX2(compute_scratch_size,
1882 cmd_buffer->compute_scratch_size_needed);
1883 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
1884 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
1885 tess_rings_needed |= cmd_buffer->tess_rings_needed;
1886 sample_positions_needed |= cmd_buffer->sample_positions_needed;
1887 }
1888 }
1889
1890 result = radv_get_preamble_cs(queue, scratch_size, compute_scratch_size,
1891 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
1892 sample_positions_needed,
1893 &initial_preamble_cs, &continue_preamble_cs);
1894 if (result != VK_SUCCESS)
1895 return result;
1896
1897 for (uint32_t i = 0; i < submitCount; i++) {
1898 struct radeon_winsys_cs **cs_array;
1899 bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
1900 bool can_patch = !do_flush;
1901 uint32_t advance;
1902
1903 if (!pSubmits[i].commandBufferCount) {
1904 if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
1905 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1906 &queue->device->empty_cs[queue->queue_family_index],
1907 1, NULL, NULL,
1908 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1909 pSubmits[i].waitSemaphoreCount,
1910 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1911 pSubmits[i].signalSemaphoreCount,
1912 false, base_fence);
1913 if (ret) {
1914 radv_loge("failed to submit CS %d\n", i);
1915 abort();
1916 }
1917 fence_emitted = true;
1918 }
1919 continue;
1920 }
1921
1922 cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
1923 (pSubmits[i].commandBufferCount + do_flush));
1924
1925 if(do_flush)
1926 cs_array[0] = pSubmits[i].waitSemaphoreCount ?
1927 queue->device->flush_shader_cs[queue->queue_family_index] :
1928 queue->device->flush_cs[queue->queue_family_index];
1929
1930 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1931 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
1932 pSubmits[i].pCommandBuffers[j]);
1933 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1934
1935 cs_array[j + do_flush] = cmd_buffer->cs;
1936 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
1937 can_patch = false;
1938 }
1939
1940 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
1941 advance = MIN2(max_cs_submission,
1942 pSubmits[i].commandBufferCount + do_flush - j);
1943 bool b = j == 0;
1944 bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
1945
1946 if (queue->device->trace_bo)
1947 *queue->device->trace_id_ptr = 0;
1948
1949 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
1950 advance, initial_preamble_cs, continue_preamble_cs,
1951 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
1952 b ? pSubmits[i].waitSemaphoreCount : 0,
1953 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
1954 e ? pSubmits[i].signalSemaphoreCount : 0,
1955 can_patch, base_fence);
1956
1957 if (ret) {
1958 radv_loge("failed to submit CS %d\n", i);
1959 abort();
1960 }
1961 fence_emitted = true;
1962 if (queue->device->trace_bo) {
1963 bool success = queue->device->ws->ctx_wait_idle(
1964 queue->hw_ctx,
1965 radv_queue_family_to_ring(
1966 queue->queue_family_index),
1967 queue->queue_idx);
1968
1969 if (!success) { /* Hang */
1970 radv_dump_trace(queue->device, cs_array[j]);
1971 abort();
1972 }
1973 }
1974 }
1975 free(cs_array);
1976 }
1977
1978 if (fence) {
1979 if (!fence_emitted)
1980 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
1981 &queue->device->empty_cs[queue->queue_family_index],
1982 1, NULL, NULL, NULL, 0, NULL, 0,
1983 false, base_fence);
1984
1985 fence->submitted = true;
1986 }
1987
1988 return VK_SUCCESS;
1989 }
1990
1991 VkResult radv_QueueWaitIdle(
1992 VkQueue _queue)
1993 {
1994 RADV_FROM_HANDLE(radv_queue, queue, _queue);
1995
1996 queue->device->ws->ctx_wait_idle(queue->hw_ctx,
1997 radv_queue_family_to_ring(queue->queue_family_index),
1998 queue->queue_idx);
1999 return VK_SUCCESS;
2000 }
2001
2002 VkResult radv_DeviceWaitIdle(
2003 VkDevice _device)
2004 {
2005 RADV_FROM_HANDLE(radv_device, device, _device);
2006
2007 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2008 for (unsigned q = 0; q < device->queue_count[i]; q++) {
2009 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
2010 }
2011 }
2012 return VK_SUCCESS;
2013 }
2014
2015 PFN_vkVoidFunction radv_GetInstanceProcAddr(
2016 VkInstance instance,
2017 const char* pName)
2018 {
2019 return radv_lookup_entrypoint(pName);
2020 }
2021
2022 /* The loader wants us to expose a second GetInstanceProcAddr function
2023 * to work around certain LD_PRELOAD issues seen in apps.
2024 */
2025 PUBLIC
2026 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2027 VkInstance instance,
2028 const char* pName);
2029
2030 PUBLIC
2031 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2032 VkInstance instance,
2033 const char* pName)
2034 {
2035 return radv_GetInstanceProcAddr(instance, pName);
2036 }
2037
2038 PFN_vkVoidFunction radv_GetDeviceProcAddr(
2039 VkDevice device,
2040 const char* pName)
2041 {
2042 return radv_lookup_entrypoint(pName);
2043 }
2044
2045 bool radv_get_memory_fd(struct radv_device *device,
2046 struct radv_device_memory *memory,
2047 int *pFD)
2048 {
2049 struct radeon_bo_metadata metadata;
2050
2051 if (memory->image) {
2052 radv_init_metadata(device, memory->image, &metadata);
2053 device->ws->buffer_set_metadata(memory->bo, &metadata);
2054 }
2055
2056 return device->ws->buffer_get_fd(device->ws, memory->bo,
2057 pFD);
2058 }
2059
2060 VkResult radv_AllocateMemory(
2061 VkDevice _device,
2062 const VkMemoryAllocateInfo* pAllocateInfo,
2063 const VkAllocationCallbacks* pAllocator,
2064 VkDeviceMemory* pMem)
2065 {
2066 RADV_FROM_HANDLE(radv_device, device, _device);
2067 struct radv_device_memory *mem;
2068 VkResult result;
2069 enum radeon_bo_domain domain;
2070 uint32_t flags = 0;
2071
2072 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2073
2074 if (pAllocateInfo->allocationSize == 0) {
2075 /* Apparently, this is allowed */
2076 *pMem = VK_NULL_HANDLE;
2077 return VK_SUCCESS;
2078 }
2079 const VkMemoryDedicatedAllocateInfoKHR *dedicate_info =
2080 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR);
2081
2082 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
2083 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2084 if (mem == NULL)
2085 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2086
2087 if (dedicate_info) {
2088 mem->image = radv_image_from_handle(dedicate_info->image);
2089 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
2090 } else {
2091 mem->image = NULL;
2092 mem->buffer = NULL;
2093 }
2094
2095 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
2096 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
2097 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
2098 domain = RADEON_DOMAIN_GTT;
2099 else
2100 domain = RADEON_DOMAIN_VRAM;
2101
2102 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
2103 flags |= RADEON_FLAG_NO_CPU_ACCESS;
2104 else
2105 flags |= RADEON_FLAG_CPU_ACCESS;
2106
2107 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
2108 flags |= RADEON_FLAG_GTT_WC;
2109
2110 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
2111 domain, flags);
2112
2113 if (!mem->bo) {
2114 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
2115 goto fail;
2116 }
2117 mem->type_index = pAllocateInfo->memoryTypeIndex;
2118
2119 *pMem = radv_device_memory_to_handle(mem);
2120
2121 return VK_SUCCESS;
2122
2123 fail:
2124 vk_free2(&device->alloc, pAllocator, mem);
2125
2126 return result;
2127 }
2128
2129 void radv_FreeMemory(
2130 VkDevice _device,
2131 VkDeviceMemory _mem,
2132 const VkAllocationCallbacks* pAllocator)
2133 {
2134 RADV_FROM_HANDLE(radv_device, device, _device);
2135 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
2136
2137 if (mem == NULL)
2138 return;
2139
2140 device->ws->buffer_destroy(mem->bo);
2141 mem->bo = NULL;
2142
2143 vk_free2(&device->alloc, pAllocator, mem);
2144 }
2145
2146 VkResult radv_MapMemory(
2147 VkDevice _device,
2148 VkDeviceMemory _memory,
2149 VkDeviceSize offset,
2150 VkDeviceSize size,
2151 VkMemoryMapFlags flags,
2152 void** ppData)
2153 {
2154 RADV_FROM_HANDLE(radv_device, device, _device);
2155 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2156
2157 if (mem == NULL) {
2158 *ppData = NULL;
2159 return VK_SUCCESS;
2160 }
2161
2162 *ppData = device->ws->buffer_map(mem->bo);
2163 if (*ppData) {
2164 *ppData += offset;
2165 return VK_SUCCESS;
2166 }
2167
2168 return VK_ERROR_MEMORY_MAP_FAILED;
2169 }
2170
2171 void radv_UnmapMemory(
2172 VkDevice _device,
2173 VkDeviceMemory _memory)
2174 {
2175 RADV_FROM_HANDLE(radv_device, device, _device);
2176 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2177
2178 if (mem == NULL)
2179 return;
2180
2181 device->ws->buffer_unmap(mem->bo);
2182 }
2183
2184 VkResult radv_FlushMappedMemoryRanges(
2185 VkDevice _device,
2186 uint32_t memoryRangeCount,
2187 const VkMappedMemoryRange* pMemoryRanges)
2188 {
2189 return VK_SUCCESS;
2190 }
2191
2192 VkResult radv_InvalidateMappedMemoryRanges(
2193 VkDevice _device,
2194 uint32_t memoryRangeCount,
2195 const VkMappedMemoryRange* pMemoryRanges)
2196 {
2197 return VK_SUCCESS;
2198 }
2199
2200 void radv_GetBufferMemoryRequirements(
2201 VkDevice device,
2202 VkBuffer _buffer,
2203 VkMemoryRequirements* pMemoryRequirements)
2204 {
2205 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2206
2207 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2208
2209 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2210 pMemoryRequirements->alignment = 4096;
2211 else
2212 pMemoryRequirements->alignment = 16;
2213
2214 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
2215 }
2216
2217 void radv_GetBufferMemoryRequirements2KHR(
2218 VkDevice device,
2219 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
2220 VkMemoryRequirements2KHR* pMemoryRequirements)
2221 {
2222 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
2223 &pMemoryRequirements->memoryRequirements);
2224
2225 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2226 switch (ext->sType) {
2227 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2228 VkMemoryDedicatedRequirementsKHR *req =
2229 (VkMemoryDedicatedRequirementsKHR *) ext;
2230 req->requiresDedicatedAllocation = false;
2231 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2232 break;
2233 }
2234 default:
2235 break;
2236 }
2237 }
2238 }
2239
2240 void radv_GetImageMemoryRequirements(
2241 VkDevice device,
2242 VkImage _image,
2243 VkMemoryRequirements* pMemoryRequirements)
2244 {
2245 RADV_FROM_HANDLE(radv_image, image, _image);
2246
2247 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
2248
2249 pMemoryRequirements->size = image->size;
2250 pMemoryRequirements->alignment = image->alignment;
2251 }
2252
2253 void radv_GetImageMemoryRequirements2KHR(
2254 VkDevice device,
2255 const VkImageMemoryRequirementsInfo2KHR* pInfo,
2256 VkMemoryRequirements2KHR* pMemoryRequirements)
2257 {
2258 radv_GetImageMemoryRequirements(device, pInfo->image,
2259 &pMemoryRequirements->memoryRequirements);
2260
2261 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2262 switch (ext->sType) {
2263 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
2264 VkMemoryDedicatedRequirementsKHR *req =
2265 (VkMemoryDedicatedRequirementsKHR *) ext;
2266 req->requiresDedicatedAllocation = false;
2267 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2268 break;
2269 }
2270 default:
2271 break;
2272 }
2273 }
2274 }
2275
2276 void radv_GetImageSparseMemoryRequirements(
2277 VkDevice device,
2278 VkImage image,
2279 uint32_t* pSparseMemoryRequirementCount,
2280 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
2281 {
2282 stub();
2283 }
2284
2285 void radv_GetImageSparseMemoryRequirements2KHR(
2286 VkDevice device,
2287 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
2288 uint32_t* pSparseMemoryRequirementCount,
2289 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
2290 {
2291 stub();
2292 }
2293
2294 void radv_GetDeviceMemoryCommitment(
2295 VkDevice device,
2296 VkDeviceMemory memory,
2297 VkDeviceSize* pCommittedMemoryInBytes)
2298 {
2299 *pCommittedMemoryInBytes = 0;
2300 }
2301
2302 VkResult radv_BindBufferMemory(
2303 VkDevice device,
2304 VkBuffer _buffer,
2305 VkDeviceMemory _memory,
2306 VkDeviceSize memoryOffset)
2307 {
2308 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2309 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2310
2311 if (mem) {
2312 buffer->bo = mem->bo;
2313 buffer->offset = memoryOffset;
2314 } else {
2315 buffer->bo = NULL;
2316 buffer->offset = 0;
2317 }
2318
2319 return VK_SUCCESS;
2320 }
2321
2322 VkResult radv_BindImageMemory(
2323 VkDevice device,
2324 VkImage _image,
2325 VkDeviceMemory _memory,
2326 VkDeviceSize memoryOffset)
2327 {
2328 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
2329 RADV_FROM_HANDLE(radv_image, image, _image);
2330
2331 if (mem) {
2332 image->bo = mem->bo;
2333 image->offset = memoryOffset;
2334 } else {
2335 image->bo = NULL;
2336 image->offset = 0;
2337 }
2338
2339 return VK_SUCCESS;
2340 }
2341
2342
2343 static void
2344 radv_sparse_buffer_bind_memory(struct radv_device *device,
2345 const VkSparseBufferMemoryBindInfo *bind)
2346 {
2347 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
2348
2349 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2350 struct radv_device_memory *mem = NULL;
2351
2352 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2353 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2354
2355 device->ws->buffer_virtual_bind(buffer->bo,
2356 bind->pBinds[i].resourceOffset,
2357 bind->pBinds[i].size,
2358 mem ? mem->bo : NULL,
2359 bind->pBinds[i].memoryOffset);
2360 }
2361 }
2362
2363 static void
2364 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
2365 const VkSparseImageOpaqueMemoryBindInfo *bind)
2366 {
2367 RADV_FROM_HANDLE(radv_image, image, bind->image);
2368
2369 for (uint32_t i = 0; i < bind->bindCount; ++i) {
2370 struct radv_device_memory *mem = NULL;
2371
2372 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
2373 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
2374
2375 device->ws->buffer_virtual_bind(image->bo,
2376 bind->pBinds[i].resourceOffset,
2377 bind->pBinds[i].size,
2378 mem ? mem->bo : NULL,
2379 bind->pBinds[i].memoryOffset);
2380 }
2381 }
2382
2383 VkResult radv_QueueBindSparse(
2384 VkQueue _queue,
2385 uint32_t bindInfoCount,
2386 const VkBindSparseInfo* pBindInfo,
2387 VkFence _fence)
2388 {
2389 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2390 RADV_FROM_HANDLE(radv_queue, queue, _queue);
2391 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
2392 bool fence_emitted = false;
2393
2394 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2395 for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
2396 radv_sparse_buffer_bind_memory(queue->device,
2397 pBindInfo[i].pBufferBinds + j);
2398 }
2399
2400 for (uint32_t j = 0; j < pBindInfo[i].imageOpaqueBindCount; ++j) {
2401 radv_sparse_image_opaque_bind_memory(queue->device,
2402 pBindInfo[i].pImageOpaqueBinds + j);
2403 }
2404
2405 if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
2406 queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
2407 &queue->device->empty_cs[queue->queue_family_index],
2408 1, NULL, NULL,
2409 (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
2410 pBindInfo[i].waitSemaphoreCount,
2411 (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
2412 pBindInfo[i].signalSemaphoreCount,
2413 false, base_fence);
2414 fence_emitted = true;
2415 if (fence)
2416 fence->submitted = true;
2417 }
2418 }
2419
2420 if (fence && !fence_emitted) {
2421 fence->signalled = true;
2422 }
2423
2424 return VK_SUCCESS;
2425 }
2426
2427 VkResult radv_CreateFence(
2428 VkDevice _device,
2429 const VkFenceCreateInfo* pCreateInfo,
2430 const VkAllocationCallbacks* pAllocator,
2431 VkFence* pFence)
2432 {
2433 RADV_FROM_HANDLE(radv_device, device, _device);
2434 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
2435 sizeof(*fence), 8,
2436 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2437
2438 if (!fence)
2439 return VK_ERROR_OUT_OF_HOST_MEMORY;
2440
2441 memset(fence, 0, sizeof(*fence));
2442 fence->submitted = false;
2443 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
2444 fence->fence = device->ws->create_fence();
2445 if (!fence->fence) {
2446 vk_free2(&device->alloc, pAllocator, fence);
2447 return VK_ERROR_OUT_OF_HOST_MEMORY;
2448 }
2449
2450 *pFence = radv_fence_to_handle(fence);
2451
2452 return VK_SUCCESS;
2453 }
2454
2455 void radv_DestroyFence(
2456 VkDevice _device,
2457 VkFence _fence,
2458 const VkAllocationCallbacks* pAllocator)
2459 {
2460 RADV_FROM_HANDLE(radv_device, device, _device);
2461 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2462
2463 if (!fence)
2464 return;
2465 device->ws->destroy_fence(fence->fence);
2466 vk_free2(&device->alloc, pAllocator, fence);
2467 }
2468
2469 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
2470 {
2471 uint64_t current_time;
2472 struct timespec tv;
2473
2474 clock_gettime(CLOCK_MONOTONIC, &tv);
2475 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
2476
2477 timeout = MIN2(UINT64_MAX - current_time, timeout);
2478
2479 return current_time + timeout;
2480 }
2481
2482 VkResult radv_WaitForFences(
2483 VkDevice _device,
2484 uint32_t fenceCount,
2485 const VkFence* pFences,
2486 VkBool32 waitAll,
2487 uint64_t timeout)
2488 {
2489 RADV_FROM_HANDLE(radv_device, device, _device);
2490 timeout = radv_get_absolute_timeout(timeout);
2491
2492 if (!waitAll && fenceCount > 1) {
2493 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
2494 }
2495
2496 for (uint32_t i = 0; i < fenceCount; ++i) {
2497 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2498 bool expired = false;
2499
2500 if (fence->signalled)
2501 continue;
2502
2503 if (!fence->submitted)
2504 return VK_TIMEOUT;
2505
2506 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
2507 if (!expired)
2508 return VK_TIMEOUT;
2509
2510 fence->signalled = true;
2511 }
2512
2513 return VK_SUCCESS;
2514 }
2515
2516 VkResult radv_ResetFences(VkDevice device,
2517 uint32_t fenceCount,
2518 const VkFence *pFences)
2519 {
2520 for (unsigned i = 0; i < fenceCount; ++i) {
2521 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
2522 fence->submitted = fence->signalled = false;
2523 }
2524
2525 return VK_SUCCESS;
2526 }
2527
2528 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
2529 {
2530 RADV_FROM_HANDLE(radv_device, device, _device);
2531 RADV_FROM_HANDLE(radv_fence, fence, _fence);
2532
2533 if (fence->signalled)
2534 return VK_SUCCESS;
2535 if (!fence->submitted)
2536 return VK_NOT_READY;
2537
2538 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
2539 return VK_NOT_READY;
2540
2541 return VK_SUCCESS;
2542 }
2543
2544
2545 // Queue semaphore functions
2546
2547 VkResult radv_CreateSemaphore(
2548 VkDevice _device,
2549 const VkSemaphoreCreateInfo* pCreateInfo,
2550 const VkAllocationCallbacks* pAllocator,
2551 VkSemaphore* pSemaphore)
2552 {
2553 RADV_FROM_HANDLE(radv_device, device, _device);
2554 struct radeon_winsys_sem *sem;
2555
2556 sem = device->ws->create_sem(device->ws);
2557 if (!sem)
2558 return VK_ERROR_OUT_OF_HOST_MEMORY;
2559
2560 *pSemaphore = radeon_winsys_sem_to_handle(sem);
2561 return VK_SUCCESS;
2562 }
2563
2564 void radv_DestroySemaphore(
2565 VkDevice _device,
2566 VkSemaphore _semaphore,
2567 const VkAllocationCallbacks* pAllocator)
2568 {
2569 RADV_FROM_HANDLE(radv_device, device, _device);
2570 RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
2571 if (!_semaphore)
2572 return;
2573
2574 device->ws->destroy_sem(sem);
2575 }
2576
2577 VkResult radv_CreateEvent(
2578 VkDevice _device,
2579 const VkEventCreateInfo* pCreateInfo,
2580 const VkAllocationCallbacks* pAllocator,
2581 VkEvent* pEvent)
2582 {
2583 RADV_FROM_HANDLE(radv_device, device, _device);
2584 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator,
2585 sizeof(*event), 8,
2586 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2587
2588 if (!event)
2589 return VK_ERROR_OUT_OF_HOST_MEMORY;
2590
2591 event->bo = device->ws->buffer_create(device->ws, 8, 8,
2592 RADEON_DOMAIN_GTT,
2593 RADEON_FLAG_CPU_ACCESS);
2594 if (!event->bo) {
2595 vk_free2(&device->alloc, pAllocator, event);
2596 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2597 }
2598
2599 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
2600
2601 *pEvent = radv_event_to_handle(event);
2602
2603 return VK_SUCCESS;
2604 }
2605
2606 void radv_DestroyEvent(
2607 VkDevice _device,
2608 VkEvent _event,
2609 const VkAllocationCallbacks* pAllocator)
2610 {
2611 RADV_FROM_HANDLE(radv_device, device, _device);
2612 RADV_FROM_HANDLE(radv_event, event, _event);
2613
2614 if (!event)
2615 return;
2616 device->ws->buffer_destroy(event->bo);
2617 vk_free2(&device->alloc, pAllocator, event);
2618 }
2619
2620 VkResult radv_GetEventStatus(
2621 VkDevice _device,
2622 VkEvent _event)
2623 {
2624 RADV_FROM_HANDLE(radv_event, event, _event);
2625
2626 if (*event->map == 1)
2627 return VK_EVENT_SET;
2628 return VK_EVENT_RESET;
2629 }
2630
2631 VkResult radv_SetEvent(
2632 VkDevice _device,
2633 VkEvent _event)
2634 {
2635 RADV_FROM_HANDLE(radv_event, event, _event);
2636 *event->map = 1;
2637
2638 return VK_SUCCESS;
2639 }
2640
2641 VkResult radv_ResetEvent(
2642 VkDevice _device,
2643 VkEvent _event)
2644 {
2645 RADV_FROM_HANDLE(radv_event, event, _event);
2646 *event->map = 0;
2647
2648 return VK_SUCCESS;
2649 }
2650
2651 VkResult radv_CreateBuffer(
2652 VkDevice _device,
2653 const VkBufferCreateInfo* pCreateInfo,
2654 const VkAllocationCallbacks* pAllocator,
2655 VkBuffer* pBuffer)
2656 {
2657 RADV_FROM_HANDLE(radv_device, device, _device);
2658 struct radv_buffer *buffer;
2659
2660 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2661
2662 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2663 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2664 if (buffer == NULL)
2665 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2666
2667 buffer->size = pCreateInfo->size;
2668 buffer->usage = pCreateInfo->usage;
2669 buffer->bo = NULL;
2670 buffer->offset = 0;
2671 buffer->flags = pCreateInfo->flags;
2672
2673 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
2674 buffer->bo = device->ws->buffer_create(device->ws,
2675 align64(buffer->size, 4096),
2676 4096, 0, RADEON_FLAG_VIRTUAL);
2677 if (!buffer->bo) {
2678 vk_free2(&device->alloc, pAllocator, buffer);
2679 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
2680 }
2681 }
2682
2683 *pBuffer = radv_buffer_to_handle(buffer);
2684
2685 return VK_SUCCESS;
2686 }
2687
2688 void radv_DestroyBuffer(
2689 VkDevice _device,
2690 VkBuffer _buffer,
2691 const VkAllocationCallbacks* pAllocator)
2692 {
2693 RADV_FROM_HANDLE(radv_device, device, _device);
2694 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
2695
2696 if (!buffer)
2697 return;
2698
2699 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
2700 device->ws->buffer_destroy(buffer->bo);
2701
2702 vk_free2(&device->alloc, pAllocator, buffer);
2703 }
2704
2705 static inline unsigned
2706 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
2707 {
2708 if (stencil)
2709 return image->surface.u.legacy.stencil_tiling_index[level];
2710 else
2711 return image->surface.u.legacy.tiling_index[level];
2712 }
2713
2714 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
2715 {
2716 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count;
2717 }
2718
2719 static void
2720 radv_initialise_color_surface(struct radv_device *device,
2721 struct radv_color_buffer_info *cb,
2722 struct radv_image_view *iview)
2723 {
2724 const struct vk_format_description *desc;
2725 unsigned ntype, format, swap, endian;
2726 unsigned blend_clamp = 0, blend_bypass = 0;
2727 uint64_t va;
2728 const struct radeon_surf *surf = &iview->image->surface;
2729
2730 desc = vk_format_description(iview->vk_format);
2731
2732 memset(cb, 0, sizeof(*cb));
2733
2734 /* Intensity is implemented as Red, so treat it that way. */
2735 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
2736
2737 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2738
2739 if (device->physical_device->rad_info.chip_class >= GFX9) {
2740 struct gfx9_surf_meta_flags meta;
2741 if (iview->image->dcc_offset)
2742 meta = iview->image->surface.u.gfx9.dcc;
2743 else
2744 meta = iview->image->surface.u.gfx9.cmask;
2745
2746 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2747 S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
2748 S_028C74_RB_ALIGNED(meta.rb_aligned) |
2749 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
2750
2751 va += iview->image->surface.u.gfx9.surf_offset >> 8;
2752 } else {
2753 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
2754 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
2755
2756 va += level_info->offset;
2757
2758 pitch_tile_max = level_info->nblk_x / 8 - 1;
2759 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
2760 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
2761
2762 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
2763 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
2764 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
2765
2766 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
2767 cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
2768
2769 if (iview->image->fmask.size) {
2770 if (device->physical_device->rad_info.chip_class >= CIK)
2771 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
2772 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
2773 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
2774 } else {
2775 /* This must be set for fast clear to work without FMASK. */
2776 if (device->physical_device->rad_info.chip_class >= CIK)
2777 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
2778 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
2779 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
2780 }
2781 }
2782
2783 cb->cb_color_base = va >> 8;
2784
2785 /* CMASK variables */
2786 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2787 va += iview->image->cmask.offset;
2788 cb->cb_color_cmask = va >> 8;
2789
2790 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2791 va += iview->image->dcc_offset;
2792 cb->cb_dcc_base = va >> 8;
2793
2794 uint32_t max_slice = radv_surface_layer_count(iview);
2795 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
2796 S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
2797
2798 if (iview->image->info.samples > 1) {
2799 unsigned log_samples = util_logbase2(iview->image->info.samples);
2800
2801 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
2802 S_028C74_NUM_FRAGMENTS(log_samples);
2803 }
2804
2805 if (iview->image->fmask.size) {
2806 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
2807 cb->cb_color_fmask = va >> 8;
2808 } else {
2809 cb->cb_color_fmask = cb->cb_color_base;
2810 }
2811
2812 ntype = radv_translate_color_numformat(iview->vk_format,
2813 desc,
2814 vk_format_get_first_non_void_channel(iview->vk_format));
2815 format = radv_translate_colorformat(iview->vk_format);
2816 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
2817 radv_finishme("Illegal color\n");
2818 swap = radv_translate_colorswap(iview->vk_format, FALSE);
2819 endian = radv_colorformat_endian_swap(format);
2820
2821 /* blend clamp should be set for all NORM/SRGB types */
2822 if (ntype == V_028C70_NUMBER_UNORM ||
2823 ntype == V_028C70_NUMBER_SNORM ||
2824 ntype == V_028C70_NUMBER_SRGB)
2825 blend_clamp = 1;
2826
2827 /* set blend bypass according to docs if SINT/UINT or
2828 8/24 COLOR variants */
2829 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
2830 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
2831 format == V_028C70_COLOR_X24_8_32_FLOAT) {
2832 blend_clamp = 0;
2833 blend_bypass = 1;
2834 }
2835 #if 0
2836 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
2837 (format == V_028C70_COLOR_8 ||
2838 format == V_028C70_COLOR_8_8 ||
2839 format == V_028C70_COLOR_8_8_8_8))
2840 ->color_is_int8 = true;
2841 #endif
2842 cb->cb_color_info = S_028C70_FORMAT(format) |
2843 S_028C70_COMP_SWAP(swap) |
2844 S_028C70_BLEND_CLAMP(blend_clamp) |
2845 S_028C70_BLEND_BYPASS(blend_bypass) |
2846 S_028C70_SIMPLE_FLOAT(1) |
2847 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
2848 ntype != V_028C70_NUMBER_SNORM &&
2849 ntype != V_028C70_NUMBER_SRGB &&
2850 format != V_028C70_COLOR_8_24 &&
2851 format != V_028C70_COLOR_24_8) |
2852 S_028C70_NUMBER_TYPE(ntype) |
2853 S_028C70_ENDIAN(endian);
2854 if (iview->image->info.samples > 1)
2855 if (iview->image->fmask.size)
2856 cb->cb_color_info |= S_028C70_COMPRESSION(1);
2857
2858 if (iview->image->cmask.size &&
2859 !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
2860 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
2861
2862 if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
2863 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
2864
2865 if (device->physical_device->rad_info.chip_class >= VI) {
2866 unsigned max_uncompressed_block_size = 2;
2867 if (iview->image->info.samples > 1) {
2868 if (iview->image->surface.bpe == 1)
2869 max_uncompressed_block_size = 0;
2870 else if (iview->image->surface.bpe == 2)
2871 max_uncompressed_block_size = 1;
2872 }
2873
2874 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
2875 S_028C78_INDEPENDENT_64B_BLOCKS(1);
2876 }
2877
2878 /* This must be set for fast clear to work without FMASK. */
2879 if (!iview->image->fmask.size &&
2880 device->physical_device->rad_info.chip_class == SI) {
2881 unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
2882 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
2883 }
2884
2885 if (device->physical_device->rad_info.chip_class >= GFX9) {
2886 uint32_t max_slice = radv_surface_layer_count(iview);
2887 unsigned mip0_depth = iview->base_layer + max_slice - 1;
2888
2889 cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
2890 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
2891 S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
2892 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
2893 S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
2894 S_028C68_MAX_MIP(iview->image->info.levels);
2895
2896 cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2897
2898 }
2899 }
2900
2901 static void
2902 radv_initialise_ds_surface(struct radv_device *device,
2903 struct radv_ds_buffer_info *ds,
2904 struct radv_image_view *iview)
2905 {
2906 unsigned level = iview->base_mip;
2907 unsigned format, stencil_format;
2908 uint64_t va, s_offs, z_offs;
2909 bool stencil_only = false;
2910 memset(ds, 0, sizeof(*ds));
2911 switch (iview->image->vk_format) {
2912 case VK_FORMAT_D24_UNORM_S8_UINT:
2913 case VK_FORMAT_X8_D24_UNORM_PACK32:
2914 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
2915 ds->offset_scale = 2.0f;
2916 break;
2917 case VK_FORMAT_D16_UNORM:
2918 case VK_FORMAT_D16_UNORM_S8_UINT:
2919 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
2920 ds->offset_scale = 4.0f;
2921 break;
2922 case VK_FORMAT_D32_SFLOAT:
2923 case VK_FORMAT_D32_SFLOAT_S8_UINT:
2924 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
2925 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
2926 ds->offset_scale = 1.0f;
2927 break;
2928 case VK_FORMAT_S8_UINT:
2929 stencil_only = true;
2930 break;
2931 default:
2932 break;
2933 }
2934
2935 format = radv_translate_dbformat(iview->image->vk_format);
2936 stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
2937 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
2938
2939 uint32_t max_slice = radv_surface_layer_count(iview);
2940 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
2941 S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
2942
2943 ds->db_htile_data_base = 0;
2944 ds->db_htile_surface = 0;
2945
2946 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
2947 s_offs = z_offs = va;
2948
2949 if (device->physical_device->rad_info.chip_class >= GFX9) {
2950 assert(iview->image->surface.u.gfx9.surf_offset == 0);
2951 s_offs += iview->image->surface.u.gfx9.stencil_offset;
2952
2953 ds->db_z_info = S_028038_FORMAT(format) |
2954 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
2955 S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
2956 S_028038_MAXMIP(iview->image->info.levels - 1);
2957 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
2958 S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
2959
2960 ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
2961 ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
2962 ds->db_depth_view |= S_028008_MIPID(level);
2963
2964 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
2965 S_02801C_Y_MAX(iview->image->info.height - 1);
2966
2967 /* Only use HTILE for the first level. */
2968 if (iview->image->surface.htile_size && !level) {
2969 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
2970
2971 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
2972 /* Use all of the htile_buffer for depth if there's no stencil. */
2973 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
2974 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
2975 iview->image->htile_offset;
2976 ds->db_htile_data_base = va >> 8;
2977 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
2978 S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
2979 S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
2980 }
2981 } else {
2982 const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
2983
2984 if (stencil_only)
2985 level_info = &iview->image->surface.u.legacy.stencil_level[level];
2986
2987 z_offs += iview->image->surface.u.legacy.level[level].offset;
2988 s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
2989
2990 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
2991 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
2992 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
2993
2994 if (iview->image->info.samples > 1)
2995 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
2996
2997 if (device->physical_device->rad_info.chip_class >= CIK) {
2998 struct radeon_info *info = &device->physical_device->rad_info;
2999 unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
3000 unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
3001 unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
3002 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
3003 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
3004 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
3005
3006 if (stencil_only)
3007 tile_mode = stencil_tile_mode;
3008
3009 ds->db_depth_info |=
3010 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
3011 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
3012 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
3013 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
3014 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
3015 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
3016 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
3017 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
3018 } else {
3019 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
3020 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
3021 tile_mode_index = si_tile_mode_index(iview->image, level, true);
3022 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
3023 }
3024
3025 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
3026 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
3027 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
3028
3029 if (iview->image->surface.htile_size && !level) {
3030 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
3031
3032 if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
3033 /* Use all of the htile_buffer for depth if there's no stencil. */
3034 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
3035
3036 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
3037 iview->image->htile_offset;
3038 ds->db_htile_data_base = va >> 8;
3039 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
3040 }
3041 }
3042
3043 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
3044 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
3045 }
3046
3047 VkResult radv_CreateFramebuffer(
3048 VkDevice _device,
3049 const VkFramebufferCreateInfo* pCreateInfo,
3050 const VkAllocationCallbacks* pAllocator,
3051 VkFramebuffer* pFramebuffer)
3052 {
3053 RADV_FROM_HANDLE(radv_device, device, _device);
3054 struct radv_framebuffer *framebuffer;
3055
3056 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3057
3058 size_t size = sizeof(*framebuffer) +
3059 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount;
3060 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
3061 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3062 if (framebuffer == NULL)
3063 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3064
3065 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3066 framebuffer->width = pCreateInfo->width;
3067 framebuffer->height = pCreateInfo->height;
3068 framebuffer->layers = pCreateInfo->layers;
3069 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3070 VkImageView _iview = pCreateInfo->pAttachments[i];
3071 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
3072 framebuffer->attachments[i].attachment = iview;
3073 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) {
3074 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview);
3075 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
3076 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview);
3077 }
3078 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
3079 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
3080 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview));
3081 }
3082
3083 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
3084 return VK_SUCCESS;
3085 }
3086
3087 void radv_DestroyFramebuffer(
3088 VkDevice _device,
3089 VkFramebuffer _fb,
3090 const VkAllocationCallbacks* pAllocator)
3091 {
3092 RADV_FROM_HANDLE(radv_device, device, _device);
3093 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
3094
3095 if (!fb)
3096 return;
3097 vk_free2(&device->alloc, pAllocator, fb);
3098 }
3099
3100 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
3101 {
3102 switch (address_mode) {
3103 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3104 return V_008F30_SQ_TEX_WRAP;
3105 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3106 return V_008F30_SQ_TEX_MIRROR;
3107 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3108 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
3109 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3110 return V_008F30_SQ_TEX_CLAMP_BORDER;
3111 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3112 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
3113 default:
3114 unreachable("illegal tex wrap mode");
3115 break;
3116 }
3117 }
3118
3119 static unsigned
3120 radv_tex_compare(VkCompareOp op)
3121 {
3122 switch (op) {
3123 case VK_COMPARE_OP_NEVER:
3124 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
3125 case VK_COMPARE_OP_LESS:
3126 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
3127 case VK_COMPARE_OP_EQUAL:
3128 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
3129 case VK_COMPARE_OP_LESS_OR_EQUAL:
3130 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
3131 case VK_COMPARE_OP_GREATER:
3132 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
3133 case VK_COMPARE_OP_NOT_EQUAL:
3134 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
3135 case VK_COMPARE_OP_GREATER_OR_EQUAL:
3136 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
3137 case VK_COMPARE_OP_ALWAYS:
3138 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
3139 default:
3140 unreachable("illegal compare mode");
3141 break;
3142 }
3143 }
3144
3145 static unsigned
3146 radv_tex_filter(VkFilter filter, unsigned max_ansio)
3147 {
3148 switch (filter) {
3149 case VK_FILTER_NEAREST:
3150 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
3151 V_008F38_SQ_TEX_XY_FILTER_POINT);
3152 case VK_FILTER_LINEAR:
3153 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
3154 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
3155 case VK_FILTER_CUBIC_IMG:
3156 default:
3157 fprintf(stderr, "illegal texture filter");
3158 return 0;
3159 }
3160 }
3161
3162 static unsigned
3163 radv_tex_mipfilter(VkSamplerMipmapMode mode)
3164 {
3165 switch (mode) {
3166 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
3167 return V_008F38_SQ_TEX_Z_FILTER_POINT;
3168 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
3169 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
3170 default:
3171 return V_008F38_SQ_TEX_Z_FILTER_NONE;
3172 }
3173 }
3174
3175 static unsigned
3176 radv_tex_bordercolor(VkBorderColor bcolor)
3177 {
3178 switch (bcolor) {
3179 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
3180 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
3181 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
3182 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
3183 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
3184 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
3185 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
3186 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
3187 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
3188 default:
3189 break;
3190 }
3191 return 0;
3192 }
3193
3194 static unsigned
3195 radv_tex_aniso_filter(unsigned filter)
3196 {
3197 if (filter < 2)
3198 return 0;
3199 if (filter < 4)
3200 return 1;
3201 if (filter < 8)
3202 return 2;
3203 if (filter < 16)
3204 return 3;
3205 return 4;
3206 }
3207
3208 static void
3209 radv_init_sampler(struct radv_device *device,
3210 struct radv_sampler *sampler,
3211 const VkSamplerCreateInfo *pCreateInfo)
3212 {
3213 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
3214 (uint32_t) pCreateInfo->maxAnisotropy : 0;
3215 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
3216 bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
3217
3218 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
3219 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
3220 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
3221 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
3222 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
3223 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
3224 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
3225 S_008F30_ANISO_BIAS(max_aniso_ratio) |
3226 S_008F30_DISABLE_CUBE_WRAP(0) |
3227 S_008F30_COMPAT_MODE(is_vi));
3228 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
3229 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
3230 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
3231 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
3232 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
3233 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
3234 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
3235 S_008F38_MIP_POINT_PRECLAMP(0) |
3236 S_008F38_DISABLE_LSB_CEIL(1) |
3237 S_008F38_FILTER_PREC_FIX(1) |
3238 S_008F38_ANISO_OVERRIDE(is_vi));
3239 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
3240 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor)));
3241 }
3242
3243 VkResult radv_CreateSampler(
3244 VkDevice _device,
3245 const VkSamplerCreateInfo* pCreateInfo,
3246 const VkAllocationCallbacks* pAllocator,
3247 VkSampler* pSampler)
3248 {
3249 RADV_FROM_HANDLE(radv_device, device, _device);
3250 struct radv_sampler *sampler;
3251
3252 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3253
3254 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
3255 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3256 if (!sampler)
3257 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3258
3259 radv_init_sampler(device, sampler, pCreateInfo);
3260 *pSampler = radv_sampler_to_handle(sampler);
3261
3262 return VK_SUCCESS;
3263 }
3264
3265 void radv_DestroySampler(
3266 VkDevice _device,
3267 VkSampler _sampler,
3268 const VkAllocationCallbacks* pAllocator)
3269 {
3270 RADV_FROM_HANDLE(radv_device, device, _device);
3271 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
3272
3273 if (!sampler)
3274 return;
3275 vk_free2(&device->alloc, pAllocator, sampler);
3276 }
3277
3278 /* vk_icd.h does not declare this function, so we declare it here to
3279 * suppress Wmissing-prototypes.
3280 */
3281 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3282 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
3283
3284 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
3285 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
3286 {
3287 /* For the full details on loader interface versioning, see
3288 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
3289 * What follows is a condensed summary, to help you navigate the large and
3290 * confusing official doc.
3291 *
3292 * - Loader interface v0 is incompatible with later versions. We don't
3293 * support it.
3294 *
3295 * - In loader interface v1:
3296 * - The first ICD entrypoint called by the loader is
3297 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
3298 * entrypoint.
3299 * - The ICD must statically expose no other Vulkan symbol unless it is
3300 * linked with -Bsymbolic.
3301 * - Each dispatchable Vulkan handle created by the ICD must be
3302 * a pointer to a struct whose first member is VK_LOADER_DATA. The
3303 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
3304 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
3305 * vkDestroySurfaceKHR(). The ICD must be capable of working with
3306 * such loader-managed surfaces.
3307 *
3308 * - Loader interface v2 differs from v1 in:
3309 * - The first ICD entrypoint called by the loader is
3310 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
3311 * statically expose this entrypoint.
3312 *
3313 * - Loader interface v3 differs from v2 in:
3314 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
3315 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
3316 * because the loader no longer does so.
3317 */
3318 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
3319 return VK_SUCCESS;
3320 }