anv: Implement VK_EXT_external_memory_dma_buf
[mesa.git] / src / intel / vulkan / anv_device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <sys/mman.h>
28 #include <sys/sysinfo.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <xf86drm.h>
32
33 #include "anv_private.h"
34 #include "util/strtod.h"
35 #include "util/debug.h"
36 #include "util/build_id.h"
37 #include "util/mesa-sha1.h"
38 #include "vk_util.h"
39
40 #include "genxml/gen7_pack.h"
41
42 static void
43 compiler_debug_log(void *data, const char *fmt, ...)
44 { }
45
46 static void
47 compiler_perf_log(void *data, const char *fmt, ...)
48 {
49 va_list args;
50 va_start(args, fmt);
51
52 if (unlikely(INTEL_DEBUG & DEBUG_PERF))
53 intel_logd_v(fmt, args);
54
55 va_end(args);
56 }
57
58 static VkResult
59 anv_compute_heap_size(int fd, uint64_t *heap_size)
60 {
61 uint64_t gtt_size;
62 if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
63 &gtt_size) == -1) {
64 /* If, for whatever reason, we can't actually get the GTT size from the
65 * kernel (too old?) fall back to the aperture size.
66 */
67 anv_perf_warn(NULL, NULL,
68 "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
69
70 if (anv_gem_get_aperture(fd, &gtt_size) == -1) {
71 return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED,
72 "failed to get aperture size: %m");
73 }
74 }
75
76 /* Query the total ram from the system */
77 struct sysinfo info;
78 sysinfo(&info);
79
80 uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
81
82 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
83 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
84 */
85 uint64_t available_ram;
86 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
87 available_ram = total_ram / 2;
88 else
89 available_ram = total_ram * 3 / 4;
90
91 /* We also want to leave some padding for things we allocate in the driver,
92 * so don't go over 3/4 of the GTT either.
93 */
94 uint64_t available_gtt = gtt_size * 3 / 4;
95
96 *heap_size = MIN2(available_ram, available_gtt);
97
98 return VK_SUCCESS;
99 }
100
101 static VkResult
102 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
103 {
104 /* The kernel query only tells us whether or not the kernel supports the
105 * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
106 * hardware has actual 48bit address support.
107 */
108 device->supports_48bit_addresses =
109 (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);
110
111 uint64_t heap_size;
112 VkResult result = anv_compute_heap_size(fd, &heap_size);
113 if (result != VK_SUCCESS)
114 return result;
115
116 if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) {
117 /* When running with an overridden PCI ID, we may get a GTT size from
118 * the kernel that is greater than 2 GiB but the execbuf check for 48bit
119 * address support can still fail. Just clamp the address space size to
120 * 2 GiB if we don't have 48-bit support.
121 */
122 intel_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
123 "not support for 48-bit addresses",
124 __FILE__, __LINE__);
125 heap_size = 2ull << 30;
126 }
127
128 if (heap_size <= 3ull * (1ull << 30)) {
129 /* In this case, everything fits nicely into the 32-bit address space,
130 * so there's no need for supporting 48bit addresses on client-allocated
131 * memory objects.
132 */
133 device->memory.heap_count = 1;
134 device->memory.heaps[0] = (struct anv_memory_heap) {
135 .size = heap_size,
136 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
137 .supports_48bit_addresses = false,
138 };
139 } else {
140 /* Not everything will fit nicely into a 32-bit address space. In this
141 * case we need a 64-bit heap. Advertise a small 32-bit heap and a
142 * larger 48-bit heap. If we're in this case, then we have a total heap
143 * size larger than 3GiB which most likely means they have 8 GiB of
144 * video memory and so carving off 1 GiB for the 32-bit heap should be
145 * reasonable.
146 */
147 const uint64_t heap_size_32bit = 1ull << 30;
148 const uint64_t heap_size_48bit = heap_size - heap_size_32bit;
149
150 assert(device->supports_48bit_addresses);
151
152 device->memory.heap_count = 2;
153 device->memory.heaps[0] = (struct anv_memory_heap) {
154 .size = heap_size_48bit,
155 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
156 .supports_48bit_addresses = true,
157 };
158 device->memory.heaps[1] = (struct anv_memory_heap) {
159 .size = heap_size_32bit,
160 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
161 .supports_48bit_addresses = false,
162 };
163 }
164
165 uint32_t type_count = 0;
166 for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) {
167 uint32_t valid_buffer_usage = ~0;
168
169 /* There appears to be a hardware issue in the VF cache where it only
170 * considers the bottom 32 bits of memory addresses. If you happen to
171 * have two vertex buffers which get placed exactly 4 GiB apart and use
172 * them in back-to-back draw calls, you can get collisions. In order to
173 * solve this problem, we require vertex and index buffers be bound to
174 * memory allocated out of the 32-bit heap.
175 */
176 if (device->memory.heaps[heap].supports_48bit_addresses) {
177 valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
178 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
179 }
180
181 if (device->info.has_llc) {
182 /* Big core GPUs share LLC with the CPU and thus one memory type can be
183 * both cached and coherent at the same time.
184 */
185 device->memory.types[type_count++] = (struct anv_memory_type) {
186 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
187 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
188 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
189 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
190 .heapIndex = heap,
191 .valid_buffer_usage = valid_buffer_usage,
192 };
193 } else {
194 /* The spec requires that we expose a host-visible, coherent memory
195 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
196 * to give the application a choice between cached, but not coherent and
197 * coherent but uncached (WC though).
198 */
199 device->memory.types[type_count++] = (struct anv_memory_type) {
200 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
201 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
202 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
203 .heapIndex = heap,
204 .valid_buffer_usage = valid_buffer_usage,
205 };
206 device->memory.types[type_count++] = (struct anv_memory_type) {
207 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
208 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
209 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
210 .heapIndex = heap,
211 .valid_buffer_usage = valid_buffer_usage,
212 };
213 }
214 }
215 device->memory.type_count = type_count;
216
217 return VK_SUCCESS;
218 }
219
220 static VkResult
221 anv_physical_device_init_uuids(struct anv_physical_device *device)
222 {
223 const struct build_id_note *note =
224 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
225 if (!note) {
226 return vk_errorf(device->instance, device,
227 VK_ERROR_INITIALIZATION_FAILED,
228 "Failed to find build-id");
229 }
230
231 unsigned build_id_len = build_id_length(note);
232 if (build_id_len < 20) {
233 return vk_errorf(device->instance, device,
234 VK_ERROR_INITIALIZATION_FAILED,
235 "build-id too short. It needs to be a SHA");
236 }
237
238 struct mesa_sha1 sha1_ctx;
239 uint8_t sha1[20];
240 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
241
242 /* The pipeline cache UUID is used for determining when a pipeline cache is
243 * invalid. It needs both a driver build and the PCI ID of the device.
244 */
245 _mesa_sha1_init(&sha1_ctx);
246 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
247 _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
248 sizeof(device->chipset_id));
249 _mesa_sha1_final(&sha1_ctx, sha1);
250 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
251
252 /* The driver UUID is used for determining sharability of images and memory
253 * between two Vulkan instances in separate processes. People who want to
254 * share memory need to also check the device UUID (below) so all this
255 * needs to be is the build-id.
256 */
257 memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);
258
259 /* The device UUID uniquely identifies the given device within the machine.
260 * Since we never have more than one device, this doesn't need to be a real
261 * UUID. However, on the off-chance that someone tries to use this to
262 * cache pre-tiled images or something of the like, we use the PCI ID and
263 * some bits of ISL info to ensure that this is safe.
264 */
265 _mesa_sha1_init(&sha1_ctx);
266 _mesa_sha1_update(&sha1_ctx, &device->chipset_id,
267 sizeof(device->chipset_id));
268 _mesa_sha1_update(&sha1_ctx, &device->isl_dev.has_bit6_swizzling,
269 sizeof(device->isl_dev.has_bit6_swizzling));
270 _mesa_sha1_final(&sha1_ctx, sha1);
271 memcpy(device->device_uuid, sha1, VK_UUID_SIZE);
272
273 return VK_SUCCESS;
274 }
275
276 static VkResult
277 anv_physical_device_init(struct anv_physical_device *device,
278 struct anv_instance *instance,
279 const char *path)
280 {
281 VkResult result;
282 int fd;
283
284 brw_process_intel_debug_variable();
285
286 fd = open(path, O_RDWR | O_CLOEXEC);
287 if (fd < 0)
288 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
289
290 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
291 device->instance = instance;
292
293 assert(strlen(path) < ARRAY_SIZE(device->path));
294 strncpy(device->path, path, ARRAY_SIZE(device->path));
295
296 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
297 if (!device->chipset_id) {
298 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
299 goto fail;
300 }
301
302 device->name = gen_get_device_name(device->chipset_id);
303 if (!gen_get_device_info(device->chipset_id, &device->info)) {
304 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
305 goto fail;
306 }
307
308 if (device->info.is_haswell) {
309 intel_logw("Haswell Vulkan support is incomplete");
310 } else if (device->info.gen == 7 && !device->info.is_baytrail) {
311 intel_logw("Ivy Bridge Vulkan support is incomplete");
312 } else if (device->info.gen == 7 && device->info.is_baytrail) {
313 intel_logw("Bay Trail Vulkan support is incomplete");
314 } else if (device->info.gen >= 8 && device->info.gen <= 9) {
315 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake, Coffelake is as
316 * fully supported as anything */
317 } else if (device->info.gen == 10) {
318 intel_logw("Cannonlake Vulkan support is alpha");
319 } else {
320 result = vk_errorf(device->instance, device,
321 VK_ERROR_INCOMPATIBLE_DRIVER,
322 "Vulkan not yet supported on %s", device->name);
323 goto fail;
324 }
325
326 device->cmd_parser_version = -1;
327 if (device->info.gen == 7) {
328 device->cmd_parser_version =
329 anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
330 if (device->cmd_parser_version == -1) {
331 result = vk_errorf(device->instance, device,
332 VK_ERROR_INITIALIZATION_FAILED,
333 "failed to get command parser version");
334 goto fail;
335 }
336 }
337
338 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
339 result = vk_errorf(device->instance, device,
340 VK_ERROR_INITIALIZATION_FAILED,
341 "kernel missing gem wait");
342 goto fail;
343 }
344
345 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
346 result = vk_errorf(device->instance, device,
347 VK_ERROR_INITIALIZATION_FAILED,
348 "kernel missing execbuf2");
349 goto fail;
350 }
351
352 if (!device->info.has_llc &&
353 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
354 result = vk_errorf(device->instance, device,
355 VK_ERROR_INITIALIZATION_FAILED,
356 "kernel missing wc mmap");
357 goto fail;
358 }
359
360 result = anv_physical_device_init_heaps(device, fd);
361 if (result != VK_SUCCESS)
362 goto fail;
363
364 device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
365 device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
366 device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
367 device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY);
368 device->has_syncobj_wait = device->has_syncobj &&
369 anv_gem_supports_syncobj_wait(fd);
370
371 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
372
373 /* GENs prior to 8 do not support EU/Subslice info */
374 if (device->info.gen >= 8) {
375 device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
376 device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
377
378 /* Without this information, we cannot get the right Braswell
379 * brandstrings, and we have to use conservative numbers for GPGPU on
380 * many platforms, but otherwise, things will just work.
381 */
382 if (device->subslice_total < 1 || device->eu_total < 1) {
383 intel_logw("Kernel 4.1 required to properly query GPU properties");
384 }
385 } else if (device->info.gen == 7) {
386 device->subslice_total = 1 << (device->info.gt - 1);
387 }
388
389 if (device->info.is_cherryview &&
390 device->subslice_total > 0 && device->eu_total > 0) {
391 /* Logical CS threads = EUs per subslice * num threads per EU */
392 uint32_t max_cs_threads =
393 device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
394
395 /* Fuse configurations may give more threads than expected, never less. */
396 if (max_cs_threads > device->info.max_cs_threads)
397 device->info.max_cs_threads = max_cs_threads;
398 }
399
400 device->compiler = brw_compiler_create(NULL, &device->info);
401 if (device->compiler == NULL) {
402 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
403 goto fail;
404 }
405 device->compiler->shader_debug_log = compiler_debug_log;
406 device->compiler->shader_perf_log = compiler_perf_log;
407 device->compiler->supports_pull_constants = false;
408
409 isl_device_init(&device->isl_dev, &device->info, swizzled);
410
411 result = anv_physical_device_init_uuids(device);
412 if (result != VK_SUCCESS)
413 goto fail;
414
415 result = anv_init_wsi(device);
416 if (result != VK_SUCCESS) {
417 ralloc_free(device->compiler);
418 goto fail;
419 }
420
421 device->local_fd = fd;
422 return VK_SUCCESS;
423
424 fail:
425 close(fd);
426 return result;
427 }
428
429 static void
430 anv_physical_device_finish(struct anv_physical_device *device)
431 {
432 anv_finish_wsi(device);
433 ralloc_free(device->compiler);
434 close(device->local_fd);
435 }
436
437 static void *
438 default_alloc_func(void *pUserData, size_t size, size_t align,
439 VkSystemAllocationScope allocationScope)
440 {
441 return malloc(size);
442 }
443
444 static void *
445 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
446 size_t align, VkSystemAllocationScope allocationScope)
447 {
448 return realloc(pOriginal, size);
449 }
450
451 static void
452 default_free_func(void *pUserData, void *pMemory)
453 {
454 free(pMemory);
455 }
456
457 static const VkAllocationCallbacks default_alloc = {
458 .pUserData = NULL,
459 .pfnAllocation = default_alloc_func,
460 .pfnReallocation = default_realloc_func,
461 .pfnFree = default_free_func,
462 };
463
464 VkResult anv_CreateInstance(
465 const VkInstanceCreateInfo* pCreateInfo,
466 const VkAllocationCallbacks* pAllocator,
467 VkInstance* pInstance)
468 {
469 struct anv_instance *instance;
470
471 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
472
473 /* Check if user passed a debug report callback to be used during
474 * Create/Destroy of instance.
475 */
476 const VkDebugReportCallbackCreateInfoEXT *ctor_cb =
477 vk_find_struct_const(pCreateInfo->pNext,
478 DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT);
479
480 uint32_t client_version;
481 if (pCreateInfo->pApplicationInfo &&
482 pCreateInfo->pApplicationInfo->apiVersion != 0) {
483 client_version = pCreateInfo->pApplicationInfo->apiVersion;
484 } else {
485 client_version = VK_MAKE_VERSION(1, 0, 0);
486 }
487
488 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
489 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
490
491 if (ctor_cb && ctor_cb->flags & VK_DEBUG_REPORT_ERROR_BIT_EXT)
492 ctor_cb->pfnCallback(VK_DEBUG_REPORT_ERROR_BIT_EXT,
493 VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,
494 VK_NULL_HANDLE, /* No handle available yet. */
495 __LINE__,
496 0,
497 "anv",
498 "incompatible driver version",
499 ctor_cb->pUserData);
500
501 return vk_errorf(NULL, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
502 "Client requested version %d.%d.%d",
503 VK_VERSION_MAJOR(client_version),
504 VK_VERSION_MINOR(client_version),
505 VK_VERSION_PATCH(client_version));
506 }
507
508 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
509 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
510 if (!anv_instance_extension_supported(ext_name))
511 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
512 }
513
514 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
515 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
516 if (!instance)
517 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
518
519 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
520
521 if (pAllocator)
522 instance->alloc = *pAllocator;
523 else
524 instance->alloc = default_alloc;
525
526 instance->apiVersion = client_version;
527 instance->physicalDeviceCount = -1;
528
529 if (pthread_mutex_init(&instance->callbacks_mutex, NULL) != 0) {
530 vk_free2(&default_alloc, pAllocator, instance);
531 return vk_error(VK_ERROR_INITIALIZATION_FAILED);
532 }
533
534 list_inithead(&instance->callbacks);
535
536 /* Store report debug callback to be used during DestroyInstance. */
537 if (ctor_cb) {
538 instance->destroy_debug_cb.flags = ctor_cb->flags;
539 instance->destroy_debug_cb.callback = ctor_cb->pfnCallback;
540 instance->destroy_debug_cb.data = ctor_cb->pUserData;
541 }
542
543 _mesa_locale_init();
544
545 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
546
547 *pInstance = anv_instance_to_handle(instance);
548
549 return VK_SUCCESS;
550 }
551
552 void anv_DestroyInstance(
553 VkInstance _instance,
554 const VkAllocationCallbacks* pAllocator)
555 {
556 ANV_FROM_HANDLE(anv_instance, instance, _instance);
557
558 if (!instance)
559 return;
560
561 if (instance->physicalDeviceCount > 0) {
562 /* We support at most one physical device. */
563 assert(instance->physicalDeviceCount == 1);
564 anv_physical_device_finish(&instance->physicalDevice);
565 }
566
567 VG(VALGRIND_DESTROY_MEMPOOL(instance));
568
569 pthread_mutex_destroy(&instance->callbacks_mutex);
570
571 _mesa_locale_fini();
572
573 vk_free(&instance->alloc, instance);
574 }
575
576 static VkResult
577 anv_enumerate_devices(struct anv_instance *instance)
578 {
579 /* TODO: Check for more devices ? */
580 drmDevicePtr devices[8];
581 VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
582 int max_devices;
583
584 instance->physicalDeviceCount = 0;
585
586 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
587 if (max_devices < 1)
588 return VK_ERROR_INCOMPATIBLE_DRIVER;
589
590 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
591 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
592 devices[i]->bustype == DRM_BUS_PCI &&
593 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
594
595 result = anv_physical_device_init(&instance->physicalDevice,
596 instance,
597 devices[i]->nodes[DRM_NODE_RENDER]);
598 if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
599 break;
600 }
601 }
602 drmFreeDevices(devices, max_devices);
603
604 if (result == VK_SUCCESS)
605 instance->physicalDeviceCount = 1;
606
607 return result;
608 }
609
610
611 VkResult anv_EnumeratePhysicalDevices(
612 VkInstance _instance,
613 uint32_t* pPhysicalDeviceCount,
614 VkPhysicalDevice* pPhysicalDevices)
615 {
616 ANV_FROM_HANDLE(anv_instance, instance, _instance);
617 VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
618 VkResult result;
619
620 if (instance->physicalDeviceCount < 0) {
621 result = anv_enumerate_devices(instance);
622 if (result != VK_SUCCESS &&
623 result != VK_ERROR_INCOMPATIBLE_DRIVER)
624 return result;
625 }
626
627 if (instance->physicalDeviceCount > 0) {
628 assert(instance->physicalDeviceCount == 1);
629 vk_outarray_append(&out, i) {
630 *i = anv_physical_device_to_handle(&instance->physicalDevice);
631 }
632 }
633
634 return vk_outarray_status(&out);
635 }
636
637 void anv_GetPhysicalDeviceFeatures(
638 VkPhysicalDevice physicalDevice,
639 VkPhysicalDeviceFeatures* pFeatures)
640 {
641 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
642
643 *pFeatures = (VkPhysicalDeviceFeatures) {
644 .robustBufferAccess = true,
645 .fullDrawIndexUint32 = true,
646 .imageCubeArray = true,
647 .independentBlend = true,
648 .geometryShader = true,
649 .tessellationShader = true,
650 .sampleRateShading = true,
651 .dualSrcBlend = true,
652 .logicOp = true,
653 .multiDrawIndirect = true,
654 .drawIndirectFirstInstance = true,
655 .depthClamp = true,
656 .depthBiasClamp = true,
657 .fillModeNonSolid = true,
658 .depthBounds = false,
659 .wideLines = true,
660 .largePoints = true,
661 .alphaToOne = true,
662 .multiViewport = true,
663 .samplerAnisotropy = true,
664 .textureCompressionETC2 = pdevice->info.gen >= 8 ||
665 pdevice->info.is_baytrail,
666 .textureCompressionASTC_LDR = pdevice->info.gen >= 9, /* FINISHME CHV */
667 .textureCompressionBC = true,
668 .occlusionQueryPrecise = true,
669 .pipelineStatisticsQuery = true,
670 .fragmentStoresAndAtomics = true,
671 .shaderTessellationAndGeometryPointSize = true,
672 .shaderImageGatherExtended = true,
673 .shaderStorageImageExtendedFormats = true,
674 .shaderStorageImageMultisample = false,
675 .shaderStorageImageReadWithoutFormat = false,
676 .shaderStorageImageWriteWithoutFormat = true,
677 .shaderUniformBufferArrayDynamicIndexing = true,
678 .shaderSampledImageArrayDynamicIndexing = true,
679 .shaderStorageBufferArrayDynamicIndexing = true,
680 .shaderStorageImageArrayDynamicIndexing = true,
681 .shaderClipDistance = true,
682 .shaderCullDistance = true,
683 .shaderFloat64 = pdevice->info.gen >= 8,
684 .shaderInt64 = pdevice->info.gen >= 8,
685 .shaderInt16 = false,
686 .shaderResourceMinLod = false,
687 .variableMultisampleRate = false,
688 .inheritedQueries = true,
689 };
690
691 /* We can't do image stores in vec4 shaders */
692 pFeatures->vertexPipelineStoresAndAtomics =
693 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
694 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
695 }
696
697 void anv_GetPhysicalDeviceFeatures2KHR(
698 VkPhysicalDevice physicalDevice,
699 VkPhysicalDeviceFeatures2KHR* pFeatures)
700 {
701 anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
702
703 vk_foreach_struct(ext, pFeatures->pNext) {
704 switch (ext->sType) {
705 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
706 VkPhysicalDeviceMultiviewFeaturesKHX *features =
707 (VkPhysicalDeviceMultiviewFeaturesKHX *)ext;
708 features->multiview = true;
709 features->multiviewGeometryShader = true;
710 features->multiviewTessellationShader = true;
711 break;
712 }
713
714 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
715 VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
716 features->variablePointersStorageBuffer = true;
717 features->variablePointers = false;
718 break;
719 }
720
721 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES_KHR: {
722 VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *features =
723 (VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *) ext;
724 features->samplerYcbcrConversion = true;
725 break;
726 }
727
728 default:
729 anv_debug_ignored_stype(ext->sType);
730 break;
731 }
732 }
733 }
734
735 void anv_GetPhysicalDeviceProperties(
736 VkPhysicalDevice physicalDevice,
737 VkPhysicalDeviceProperties* pProperties)
738 {
739 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
740 const struct gen_device_info *devinfo = &pdevice->info;
741
742 /* See assertions made when programming the buffer surface state. */
743 const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
744 (1ul << 30) : (1ul << 27);
745
746 const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
747 128 : 16;
748
749 VkSampleCountFlags sample_counts =
750 isl_device_get_sample_counts(&pdevice->isl_dev);
751
752 VkPhysicalDeviceLimits limits = {
753 .maxImageDimension1D = (1 << 14),
754 .maxImageDimension2D = (1 << 14),
755 .maxImageDimension3D = (1 << 11),
756 .maxImageDimensionCube = (1 << 14),
757 .maxImageArrayLayers = (1 << 11),
758 .maxTexelBufferElements = 128 * 1024 * 1024,
759 .maxUniformBufferRange = (1ul << 27),
760 .maxStorageBufferRange = max_raw_buffer_sz,
761 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
762 .maxMemoryAllocationCount = UINT32_MAX,
763 .maxSamplerAllocationCount = 64 * 1024,
764 .bufferImageGranularity = 64, /* A cache line */
765 .sparseAddressSpaceSize = 0,
766 .maxBoundDescriptorSets = MAX_SETS,
767 .maxPerStageDescriptorSamplers = max_samplers,
768 .maxPerStageDescriptorUniformBuffers = 64,
769 .maxPerStageDescriptorStorageBuffers = 64,
770 .maxPerStageDescriptorSampledImages = max_samplers,
771 .maxPerStageDescriptorStorageImages = 64,
772 .maxPerStageDescriptorInputAttachments = 64,
773 .maxPerStageResources = 250,
774 .maxDescriptorSetSamplers = 256,
775 .maxDescriptorSetUniformBuffers = 256,
776 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
777 .maxDescriptorSetStorageBuffers = 256,
778 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
779 .maxDescriptorSetSampledImages = 256,
780 .maxDescriptorSetStorageImages = 256,
781 .maxDescriptorSetInputAttachments = 256,
782 .maxVertexInputAttributes = MAX_VBS,
783 .maxVertexInputBindings = MAX_VBS,
784 .maxVertexInputAttributeOffset = 2047,
785 .maxVertexInputBindingStride = 2048,
786 .maxVertexOutputComponents = 128,
787 .maxTessellationGenerationLevel = 64,
788 .maxTessellationPatchSize = 32,
789 .maxTessellationControlPerVertexInputComponents = 128,
790 .maxTessellationControlPerVertexOutputComponents = 128,
791 .maxTessellationControlPerPatchOutputComponents = 128,
792 .maxTessellationControlTotalOutputComponents = 2048,
793 .maxTessellationEvaluationInputComponents = 128,
794 .maxTessellationEvaluationOutputComponents = 128,
795 .maxGeometryShaderInvocations = 32,
796 .maxGeometryInputComponents = 64,
797 .maxGeometryOutputComponents = 128,
798 .maxGeometryOutputVertices = 256,
799 .maxGeometryTotalOutputComponents = 1024,
800 .maxFragmentInputComponents = 128,
801 .maxFragmentOutputAttachments = 8,
802 .maxFragmentDualSrcAttachments = 1,
803 .maxFragmentCombinedOutputResources = 8,
804 .maxComputeSharedMemorySize = 32768,
805 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
806 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
807 .maxComputeWorkGroupSize = {
808 16 * devinfo->max_cs_threads,
809 16 * devinfo->max_cs_threads,
810 16 * devinfo->max_cs_threads,
811 },
812 .subPixelPrecisionBits = 4 /* FIXME */,
813 .subTexelPrecisionBits = 4 /* FIXME */,
814 .mipmapPrecisionBits = 4 /* FIXME */,
815 .maxDrawIndexedIndexValue = UINT32_MAX,
816 .maxDrawIndirectCount = UINT32_MAX,
817 .maxSamplerLodBias = 16,
818 .maxSamplerAnisotropy = 16,
819 .maxViewports = MAX_VIEWPORTS,
820 .maxViewportDimensions = { (1 << 14), (1 << 14) },
821 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
822 .viewportSubPixelBits = 13, /* We take a float? */
823 .minMemoryMapAlignment = 4096, /* A page */
824 .minTexelBufferOffsetAlignment = 1,
825 .minUniformBufferOffsetAlignment = 16,
826 .minStorageBufferOffsetAlignment = 4,
827 .minTexelOffset = -8,
828 .maxTexelOffset = 7,
829 .minTexelGatherOffset = -32,
830 .maxTexelGatherOffset = 31,
831 .minInterpolationOffset = -0.5,
832 .maxInterpolationOffset = 0.4375,
833 .subPixelInterpolationOffsetBits = 4,
834 .maxFramebufferWidth = (1 << 14),
835 .maxFramebufferHeight = (1 << 14),
836 .maxFramebufferLayers = (1 << 11),
837 .framebufferColorSampleCounts = sample_counts,
838 .framebufferDepthSampleCounts = sample_counts,
839 .framebufferStencilSampleCounts = sample_counts,
840 .framebufferNoAttachmentsSampleCounts = sample_counts,
841 .maxColorAttachments = MAX_RTS,
842 .sampledImageColorSampleCounts = sample_counts,
843 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
844 .sampledImageDepthSampleCounts = sample_counts,
845 .sampledImageStencilSampleCounts = sample_counts,
846 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
847 .maxSampleMaskWords = 1,
848 .timestampComputeAndGraphics = false,
849 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
850 .maxClipDistances = 8,
851 .maxCullDistances = 8,
852 .maxCombinedClipAndCullDistances = 8,
853 .discreteQueuePriorities = 1,
854 .pointSizeRange = { 0.125, 255.875 },
855 .lineWidthRange = { 0.0, 7.9921875 },
856 .pointSizeGranularity = (1.0 / 8.0),
857 .lineWidthGranularity = (1.0 / 128.0),
858 .strictLines = false, /* FINISHME */
859 .standardSampleLocations = true,
860 .optimalBufferCopyOffsetAlignment = 128,
861 .optimalBufferCopyRowPitchAlignment = 128,
862 .nonCoherentAtomSize = 64,
863 };
864
865 *pProperties = (VkPhysicalDeviceProperties) {
866 .apiVersion = anv_physical_device_api_version(pdevice),
867 .driverVersion = vk_get_driver_version(),
868 .vendorID = 0x8086,
869 .deviceID = pdevice->chipset_id,
870 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
871 .limits = limits,
872 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
873 };
874
875 snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
876 "%s", pdevice->name);
877 memcpy(pProperties->pipelineCacheUUID,
878 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
879 }
880
881 void anv_GetPhysicalDeviceProperties2KHR(
882 VkPhysicalDevice physicalDevice,
883 VkPhysicalDeviceProperties2KHR* pProperties)
884 {
885 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
886
887 anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
888
889 vk_foreach_struct(ext, pProperties->pNext) {
890 switch (ext->sType) {
891 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
892 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
893 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
894
895 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
896 break;
897 }
898
899 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
900 VkPhysicalDeviceIDPropertiesKHR *id_props =
901 (VkPhysicalDeviceIDPropertiesKHR *)ext;
902 memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
903 memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
904 /* The LUID is for Windows. */
905 id_props->deviceLUIDValid = false;
906 break;
907 }
908
909 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
910 VkPhysicalDeviceMultiviewPropertiesKHX *properties =
911 (VkPhysicalDeviceMultiviewPropertiesKHX *)ext;
912 properties->maxMultiviewViewCount = 16;
913 properties->maxMultiviewInstanceIndex = UINT32_MAX / 16;
914 break;
915 }
916
917 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
918 VkPhysicalDevicePointClippingPropertiesKHR *properties =
919 (VkPhysicalDevicePointClippingPropertiesKHR *) ext;
920 properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
921 anv_finishme("Implement pop-free point clipping");
922 break;
923 }
924
925 default:
926 anv_debug_ignored_stype(ext->sType);
927 break;
928 }
929 }
930 }
931
932 /* We support exactly one queue family. */
933 static const VkQueueFamilyProperties
934 anv_queue_family_properties = {
935 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
936 VK_QUEUE_COMPUTE_BIT |
937 VK_QUEUE_TRANSFER_BIT,
938 .queueCount = 1,
939 .timestampValidBits = 36, /* XXX: Real value here */
940 .minImageTransferGranularity = { 1, 1, 1 },
941 };
942
943 void anv_GetPhysicalDeviceQueueFamilyProperties(
944 VkPhysicalDevice physicalDevice,
945 uint32_t* pCount,
946 VkQueueFamilyProperties* pQueueFamilyProperties)
947 {
948 VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
949
950 vk_outarray_append(&out, p) {
951 *p = anv_queue_family_properties;
952 }
953 }
954
955 void anv_GetPhysicalDeviceQueueFamilyProperties2KHR(
956 VkPhysicalDevice physicalDevice,
957 uint32_t* pQueueFamilyPropertyCount,
958 VkQueueFamilyProperties2KHR* pQueueFamilyProperties)
959 {
960
961 VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
962
963 vk_outarray_append(&out, p) {
964 p->queueFamilyProperties = anv_queue_family_properties;
965
966 vk_foreach_struct(s, p->pNext) {
967 anv_debug_ignored_stype(s->sType);
968 }
969 }
970 }
971
972 void anv_GetPhysicalDeviceMemoryProperties(
973 VkPhysicalDevice physicalDevice,
974 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
975 {
976 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
977
978 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
979 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
980 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
981 .propertyFlags = physical_device->memory.types[i].propertyFlags,
982 .heapIndex = physical_device->memory.types[i].heapIndex,
983 };
984 }
985
986 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
987 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
988 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
989 .size = physical_device->memory.heaps[i].size,
990 .flags = physical_device->memory.heaps[i].flags,
991 };
992 }
993 }
994
995 void anv_GetPhysicalDeviceMemoryProperties2KHR(
996 VkPhysicalDevice physicalDevice,
997 VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties)
998 {
999 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1000 &pMemoryProperties->memoryProperties);
1001
1002 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1003 switch (ext->sType) {
1004 default:
1005 anv_debug_ignored_stype(ext->sType);
1006 break;
1007 }
1008 }
1009 }
1010
1011 PFN_vkVoidFunction anv_GetInstanceProcAddr(
1012 VkInstance instance,
1013 const char* pName)
1014 {
1015 return anv_lookup_entrypoint(NULL, pName);
1016 }
1017
1018 /* With version 1+ of the loader interface the ICD should expose
1019 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
1020 */
1021 PUBLIC
1022 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1023 VkInstance instance,
1024 const char* pName);
1025
1026 PUBLIC
1027 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
1028 VkInstance instance,
1029 const char* pName)
1030 {
1031 return anv_GetInstanceProcAddr(instance, pName);
1032 }
1033
1034 PFN_vkVoidFunction anv_GetDeviceProcAddr(
1035 VkDevice _device,
1036 const char* pName)
1037 {
1038 ANV_FROM_HANDLE(anv_device, device, _device);
1039 return anv_lookup_entrypoint(&device->info, pName);
1040 }
1041
1042 static void
1043 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
1044 {
1045 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1046 queue->device = device;
1047 queue->pool = &device->surface_state_pool;
1048 }
1049
1050 static void
1051 anv_queue_finish(struct anv_queue *queue)
1052 {
1053 }
1054
1055 static struct anv_state
1056 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
1057 {
1058 struct anv_state state;
1059
1060 state = anv_state_pool_alloc(pool, size, align);
1061 memcpy(state.map, p, size);
1062
1063 anv_state_flush(pool->block_pool.device, state);
1064
1065 return state;
1066 }
1067
1068 struct gen8_border_color {
1069 union {
1070 float float32[4];
1071 uint32_t uint32[4];
1072 };
1073 /* Pad out to 64 bytes */
1074 uint32_t _pad[12];
1075 };
1076
1077 static void
1078 anv_device_init_border_colors(struct anv_device *device)
1079 {
1080 static const struct gen8_border_color border_colors[] = {
1081 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
1082 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
1083 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
1084 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
1085 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
1086 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
1087 };
1088
1089 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
1090 sizeof(border_colors), 64,
1091 border_colors);
1092 }
1093
1094 static void
1095 anv_device_init_trivial_batch(struct anv_device *device)
1096 {
1097 anv_bo_init_new(&device->trivial_batch_bo, device, 4096);
1098
1099 if (device->instance->physicalDevice.has_exec_async)
1100 device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC;
1101
1102 void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle,
1103 0, 4096, 0);
1104
1105 struct anv_batch batch = {
1106 .start = map,
1107 .next = map,
1108 .end = map + 4096,
1109 };
1110
1111 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1112 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1113
1114 if (!device->info.has_llc)
1115 gen_clflush_range(map, batch.next - map);
1116
1117 anv_gem_munmap(map, device->trivial_batch_bo.size);
1118 }
1119
1120 VkResult anv_CreateDevice(
1121 VkPhysicalDevice physicalDevice,
1122 const VkDeviceCreateInfo* pCreateInfo,
1123 const VkAllocationCallbacks* pAllocator,
1124 VkDevice* pDevice)
1125 {
1126 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
1127 VkResult result;
1128 struct anv_device *device;
1129
1130 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1131
1132 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
1133 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
1134 if (!anv_physical_device_extension_supported(physical_device, ext_name))
1135 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
1136 }
1137
1138 /* Check enabled features */
1139 if (pCreateInfo->pEnabledFeatures) {
1140 VkPhysicalDeviceFeatures supported_features;
1141 anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
1142 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
1143 VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
1144 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
1145 for (uint32_t i = 0; i < num_features; i++) {
1146 if (enabled_feature[i] && !supported_feature[i])
1147 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
1148 }
1149 }
1150
1151 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1152 sizeof(*device), 8,
1153 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1154 if (!device)
1155 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1156
1157 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1158 device->instance = physical_device->instance;
1159 device->chipset_id = physical_device->chipset_id;
1160 device->lost = false;
1161
1162 if (pAllocator)
1163 device->alloc = *pAllocator;
1164 else
1165 device->alloc = physical_device->instance->alloc;
1166
1167 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
1168 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
1169 if (device->fd == -1) {
1170 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1171 goto fail_device;
1172 }
1173
1174 device->context_id = anv_gem_create_context(device);
1175 if (device->context_id == -1) {
1176 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1177 goto fail_fd;
1178 }
1179
1180 device->info = physical_device->info;
1181 device->isl_dev = physical_device->isl_dev;
1182
1183 /* On Broadwell and later, we can use batch chaining to more efficiently
1184 * implement growing command buffers. Prior to Haswell, the kernel
1185 * command parser gets in the way and we have to fall back to growing
1186 * the batch.
1187 */
1188 device->can_chain_batches = device->info.gen >= 8;
1189
1190 device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
1191 pCreateInfo->pEnabledFeatures->robustBufferAccess;
1192
1193 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
1194 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1195 goto fail_context_id;
1196 }
1197
1198 pthread_condattr_t condattr;
1199 if (pthread_condattr_init(&condattr) != 0) {
1200 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1201 goto fail_mutex;
1202 }
1203 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
1204 pthread_condattr_destroy(&condattr);
1205 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1206 goto fail_mutex;
1207 }
1208 if (pthread_cond_init(&device->queue_submit, NULL) != 0) {
1209 pthread_condattr_destroy(&condattr);
1210 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1211 goto fail_mutex;
1212 }
1213 pthread_condattr_destroy(&condattr);
1214
1215 uint64_t bo_flags =
1216 (physical_device->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
1217 (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
1218 (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0);
1219
1220 anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags);
1221
1222 result = anv_bo_cache_init(&device->bo_cache);
1223 if (result != VK_SUCCESS)
1224 goto fail_batch_bo_pool;
1225
1226 /* For the state pools we explicitly disable 48bit. */
1227 bo_flags = physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0;
1228
1229 result = anv_state_pool_init(&device->dynamic_state_pool, device, 16384,
1230 bo_flags);
1231 if (result != VK_SUCCESS)
1232 goto fail_bo_cache;
1233
1234 result = anv_state_pool_init(&device->instruction_state_pool, device, 16384,
1235 bo_flags |
1236 (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0));
1237 if (result != VK_SUCCESS)
1238 goto fail_dynamic_state_pool;
1239
1240 result = anv_state_pool_init(&device->surface_state_pool, device, 4096,
1241 bo_flags);
1242 if (result != VK_SUCCESS)
1243 goto fail_instruction_state_pool;
1244
1245 result = anv_bo_init_new(&device->workaround_bo, device, 1024);
1246 if (result != VK_SUCCESS)
1247 goto fail_surface_state_pool;
1248
1249 anv_device_init_trivial_batch(device);
1250
1251 anv_scratch_pool_init(device, &device->scratch_pool);
1252
1253 anv_queue_init(device, &device->queue);
1254
1255 switch (device->info.gen) {
1256 case 7:
1257 if (!device->info.is_haswell)
1258 result = gen7_init_device_state(device);
1259 else
1260 result = gen75_init_device_state(device);
1261 break;
1262 case 8:
1263 result = gen8_init_device_state(device);
1264 break;
1265 case 9:
1266 result = gen9_init_device_state(device);
1267 break;
1268 case 10:
1269 result = gen10_init_device_state(device);
1270 break;
1271 default:
1272 /* Shouldn't get here as we don't create physical devices for any other
1273 * gens. */
1274 unreachable("unhandled gen");
1275 }
1276 if (result != VK_SUCCESS)
1277 goto fail_workaround_bo;
1278
1279 anv_device_init_blorp(device);
1280
1281 anv_device_init_border_colors(device);
1282
1283 *pDevice = anv_device_to_handle(device);
1284
1285 return VK_SUCCESS;
1286
1287 fail_workaround_bo:
1288 anv_queue_finish(&device->queue);
1289 anv_scratch_pool_finish(device, &device->scratch_pool);
1290 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1291 anv_gem_close(device, device->workaround_bo.gem_handle);
1292 fail_surface_state_pool:
1293 anv_state_pool_finish(&device->surface_state_pool);
1294 fail_instruction_state_pool:
1295 anv_state_pool_finish(&device->instruction_state_pool);
1296 fail_dynamic_state_pool:
1297 anv_state_pool_finish(&device->dynamic_state_pool);
1298 fail_bo_cache:
1299 anv_bo_cache_finish(&device->bo_cache);
1300 fail_batch_bo_pool:
1301 anv_bo_pool_finish(&device->batch_bo_pool);
1302 pthread_cond_destroy(&device->queue_submit);
1303 fail_mutex:
1304 pthread_mutex_destroy(&device->mutex);
1305 fail_context_id:
1306 anv_gem_destroy_context(device, device->context_id);
1307 fail_fd:
1308 close(device->fd);
1309 fail_device:
1310 vk_free(&device->alloc, device);
1311
1312 return result;
1313 }
1314
1315 void anv_DestroyDevice(
1316 VkDevice _device,
1317 const VkAllocationCallbacks* pAllocator)
1318 {
1319 ANV_FROM_HANDLE(anv_device, device, _device);
1320
1321 if (!device)
1322 return;
1323
1324 anv_device_finish_blorp(device);
1325
1326 anv_queue_finish(&device->queue);
1327
1328 #ifdef HAVE_VALGRIND
1329 /* We only need to free these to prevent valgrind errors. The backing
1330 * BO will go away in a couple of lines so we don't actually leak.
1331 */
1332 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1333 #endif
1334
1335 anv_scratch_pool_finish(device, &device->scratch_pool);
1336
1337 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1338 anv_gem_close(device, device->workaround_bo.gem_handle);
1339
1340 anv_gem_close(device, device->trivial_batch_bo.gem_handle);
1341
1342 anv_state_pool_finish(&device->surface_state_pool);
1343 anv_state_pool_finish(&device->instruction_state_pool);
1344 anv_state_pool_finish(&device->dynamic_state_pool);
1345
1346 anv_bo_cache_finish(&device->bo_cache);
1347
1348 anv_bo_pool_finish(&device->batch_bo_pool);
1349
1350 pthread_cond_destroy(&device->queue_submit);
1351 pthread_mutex_destroy(&device->mutex);
1352
1353 anv_gem_destroy_context(device, device->context_id);
1354
1355 close(device->fd);
1356
1357 vk_free(&device->alloc, device);
1358 }
1359
1360 VkResult anv_EnumerateInstanceLayerProperties(
1361 uint32_t* pPropertyCount,
1362 VkLayerProperties* pProperties)
1363 {
1364 if (pProperties == NULL) {
1365 *pPropertyCount = 0;
1366 return VK_SUCCESS;
1367 }
1368
1369 /* None supported at this time */
1370 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1371 }
1372
1373 VkResult anv_EnumerateDeviceLayerProperties(
1374 VkPhysicalDevice physicalDevice,
1375 uint32_t* pPropertyCount,
1376 VkLayerProperties* pProperties)
1377 {
1378 if (pProperties == NULL) {
1379 *pPropertyCount = 0;
1380 return VK_SUCCESS;
1381 }
1382
1383 /* None supported at this time */
1384 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1385 }
1386
1387 void anv_GetDeviceQueue(
1388 VkDevice _device,
1389 uint32_t queueNodeIndex,
1390 uint32_t queueIndex,
1391 VkQueue* pQueue)
1392 {
1393 ANV_FROM_HANDLE(anv_device, device, _device);
1394
1395 assert(queueIndex == 0);
1396
1397 *pQueue = anv_queue_to_handle(&device->queue);
1398 }
1399
1400 VkResult
1401 anv_device_query_status(struct anv_device *device)
1402 {
1403 /* This isn't likely as most of the callers of this function already check
1404 * for it. However, it doesn't hurt to check and it potentially lets us
1405 * avoid an ioctl.
1406 */
1407 if (unlikely(device->lost))
1408 return VK_ERROR_DEVICE_LOST;
1409
1410 uint32_t active, pending;
1411 int ret = anv_gem_gpu_get_reset_stats(device, &active, &pending);
1412 if (ret == -1) {
1413 /* We don't know the real error. */
1414 device->lost = true;
1415 return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1416 "get_reset_stats failed: %m");
1417 }
1418
1419 if (active) {
1420 device->lost = true;
1421 return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1422 "GPU hung on one of our command buffers");
1423 } else if (pending) {
1424 device->lost = true;
1425 return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1426 "GPU hung with commands in-flight");
1427 }
1428
1429 return VK_SUCCESS;
1430 }
1431
1432 VkResult
1433 anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
1434 {
1435 /* Note: This only returns whether or not the BO is in use by an i915 GPU.
1436 * Other usages of the BO (such as on different hardware) will not be
1437 * flagged as "busy" by this ioctl. Use with care.
1438 */
1439 int ret = anv_gem_busy(device, bo->gem_handle);
1440 if (ret == 1) {
1441 return VK_NOT_READY;
1442 } else if (ret == -1) {
1443 /* We don't know the real error. */
1444 device->lost = true;
1445 return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1446 "gem wait failed: %m");
1447 }
1448
1449 /* Query for device status after the busy call. If the BO we're checking
1450 * got caught in a GPU hang we don't want to return VK_SUCCESS to the
1451 * client because it clearly doesn't have valid data. Yes, this most
1452 * likely means an ioctl, but we just did an ioctl to query the busy status
1453 * so it's no great loss.
1454 */
1455 return anv_device_query_status(device);
1456 }
1457
1458 VkResult
1459 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1460 int64_t timeout)
1461 {
1462 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
1463 if (ret == -1 && errno == ETIME) {
1464 return VK_TIMEOUT;
1465 } else if (ret == -1) {
1466 /* We don't know the real error. */
1467 device->lost = true;
1468 return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST,
1469 "gem wait failed: %m");
1470 }
1471
1472 /* Query for device status after the wait. If the BO we're waiting on got
1473 * caught in a GPU hang we don't want to return VK_SUCCESS to the client
1474 * because it clearly doesn't have valid data. Yes, this most likely means
1475 * an ioctl, but we just did an ioctl to wait so it's no great loss.
1476 */
1477 return anv_device_query_status(device);
1478 }
1479
1480 VkResult anv_DeviceWaitIdle(
1481 VkDevice _device)
1482 {
1483 ANV_FROM_HANDLE(anv_device, device, _device);
1484 if (unlikely(device->lost))
1485 return VK_ERROR_DEVICE_LOST;
1486
1487 struct anv_batch batch;
1488
1489 uint32_t cmds[8];
1490 batch.start = batch.next = cmds;
1491 batch.end = (void *) cmds + sizeof(cmds);
1492
1493 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1494 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1495
1496 return anv_device_submit_simple_batch(device, &batch);
1497 }
1498
1499 VkResult
1500 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1501 {
1502 uint32_t gem_handle = anv_gem_create(device, size);
1503 if (!gem_handle)
1504 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1505
1506 anv_bo_init(bo, gem_handle, size);
1507
1508 return VK_SUCCESS;
1509 }
1510
1511 VkResult anv_AllocateMemory(
1512 VkDevice _device,
1513 const VkMemoryAllocateInfo* pAllocateInfo,
1514 const VkAllocationCallbacks* pAllocator,
1515 VkDeviceMemory* pMem)
1516 {
1517 ANV_FROM_HANDLE(anv_device, device, _device);
1518 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1519 struct anv_device_memory *mem;
1520 VkResult result = VK_SUCCESS;
1521
1522 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1523
1524 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
1525 assert(pAllocateInfo->allocationSize > 0);
1526
1527 /* The kernel relocation API has a limitation of a 32-bit delta value
1528 * applied to the address before it is written which, in spite of it being
1529 * unsigned, is treated as signed . Because of the way that this maps to
1530 * the Vulkan API, we cannot handle an offset into a buffer that does not
1531 * fit into a signed 32 bits. The only mechanism we have for dealing with
1532 * this at the moment is to limit all VkDeviceMemory objects to a maximum
1533 * of 2GB each. The Vulkan spec allows us to do this:
1534 *
1535 * "Some platforms may have a limit on the maximum size of a single
1536 * allocation. For example, certain systems may fail to create
1537 * allocations with a size greater than or equal to 4GB. Such a limit is
1538 * implementation-dependent, and if such a failure occurs then the error
1539 * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
1540 *
1541 * We don't use vk_error here because it's not an error so much as an
1542 * indication to the application that the allocation is too large.
1543 */
1544 if (pAllocateInfo->allocationSize > (1ull << 31))
1545 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1546
1547 /* FINISHME: Fail if allocation request exceeds heap size. */
1548
1549 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1550 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1551 if (mem == NULL)
1552 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1553
1554 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
1555 mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
1556 mem->map = NULL;
1557 mem->map_size = 0;
1558
1559 const VkImportMemoryFdInfoKHR *fd_info =
1560 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
1561
1562 /* The Vulkan spec permits handleType to be 0, in which case the struct is
1563 * ignored.
1564 */
1565 if (fd_info && fd_info->handleType) {
1566 /* At the moment, we support only the below handle types. */
1567 assert(fd_info->handleType ==
1568 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
1569 fd_info->handleType ==
1570 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1571
1572 result = anv_bo_cache_import(device, &device->bo_cache,
1573 fd_info->fd, &mem->bo);
1574 if (result != VK_SUCCESS)
1575 goto fail;
1576
1577 VkDeviceSize aligned_alloc_size =
1578 align_u64(pAllocateInfo->allocationSize, 4096);
1579
1580 /* For security purposes, we reject importing the bo if it's smaller
1581 * than the requested allocation size. This prevents a malicious client
1582 * from passing a buffer to a trusted client, lying about the size, and
1583 * telling the trusted client to try and texture from an image that goes
1584 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1585 * in the trusted client. The trusted client can protect itself against
1586 * this sort of attack but only if it can trust the buffer size.
1587 */
1588 if (mem->bo->size < aligned_alloc_size) {
1589 result = vk_errorf(device->instance, device,
1590 VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
1591 "aligned allocationSize too large for "
1592 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: "
1593 "%"PRIu64"B > %"PRIu64"B",
1594 aligned_alloc_size, mem->bo->size);
1595 anv_bo_cache_release(device, &device->bo_cache, mem->bo);
1596 goto fail;
1597 }
1598
1599 /* From the Vulkan spec:
1600 *
1601 * "Importing memory from a file descriptor transfers ownership of
1602 * the file descriptor from the application to the Vulkan
1603 * implementation. The application must not perform any operations on
1604 * the file descriptor after a successful import."
1605 *
1606 * If the import fails, we leave the file descriptor open.
1607 */
1608 close(fd_info->fd);
1609 } else {
1610 result = anv_bo_cache_alloc(device, &device->bo_cache,
1611 pAllocateInfo->allocationSize,
1612 &mem->bo);
1613 if (result != VK_SUCCESS)
1614 goto fail;
1615 }
1616
1617 assert(mem->type->heapIndex < pdevice->memory.heap_count);
1618 if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses)
1619 mem->bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
1620
1621 if (pdevice->has_exec_async)
1622 mem->bo->flags |= EXEC_OBJECT_ASYNC;
1623
1624 *pMem = anv_device_memory_to_handle(mem);
1625
1626 return VK_SUCCESS;
1627
1628 fail:
1629 vk_free2(&device->alloc, pAllocator, mem);
1630
1631 return result;
1632 }
1633
1634 VkResult anv_GetMemoryFdKHR(
1635 VkDevice device_h,
1636 const VkMemoryGetFdInfoKHR* pGetFdInfo,
1637 int* pFd)
1638 {
1639 ANV_FROM_HANDLE(anv_device, dev, device_h);
1640 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
1641
1642 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1643
1644 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
1645 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1646
1647 return anv_bo_cache_export(dev, &dev->bo_cache, mem->bo, pFd);
1648 }
1649
1650 VkResult anv_GetMemoryFdPropertiesKHR(
1651 VkDevice _device,
1652 VkExternalMemoryHandleTypeFlagBitsKHR handleType,
1653 int fd,
1654 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
1655 {
1656 ANV_FROM_HANDLE(anv_device, device, _device);
1657 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1658
1659 switch (handleType) {
1660 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
1661 /* dma-buf can be imported as any memory type */
1662 pMemoryFdProperties->memoryTypeBits =
1663 (1 << pdevice->memory.type_count) - 1;
1664 return VK_SUCCESS;
1665
1666 default:
1667 /* The valid usage section for this function says:
1668 *
1669 * "handleType must not be one of the handle types defined as
1670 * opaque."
1671 *
1672 * So opaque handle types fall into the default "unsupported" case.
1673 */
1674 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
1675 }
1676 }
1677
1678 void anv_FreeMemory(
1679 VkDevice _device,
1680 VkDeviceMemory _mem,
1681 const VkAllocationCallbacks* pAllocator)
1682 {
1683 ANV_FROM_HANDLE(anv_device, device, _device);
1684 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1685
1686 if (mem == NULL)
1687 return;
1688
1689 if (mem->map)
1690 anv_UnmapMemory(_device, _mem);
1691
1692 anv_bo_cache_release(device, &device->bo_cache, mem->bo);
1693
1694 vk_free2(&device->alloc, pAllocator, mem);
1695 }
1696
1697 VkResult anv_MapMemory(
1698 VkDevice _device,
1699 VkDeviceMemory _memory,
1700 VkDeviceSize offset,
1701 VkDeviceSize size,
1702 VkMemoryMapFlags flags,
1703 void** ppData)
1704 {
1705 ANV_FROM_HANDLE(anv_device, device, _device);
1706 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1707
1708 if (mem == NULL) {
1709 *ppData = NULL;
1710 return VK_SUCCESS;
1711 }
1712
1713 if (size == VK_WHOLE_SIZE)
1714 size = mem->bo->size - offset;
1715
1716 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1717 *
1718 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1719 * assert(size != 0);
1720 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1721 * equal to the size of the memory minus offset
1722 */
1723 assert(size > 0);
1724 assert(offset + size <= mem->bo->size);
1725
1726 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1727 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1728 * at a time is valid. We could just mmap up front and return an offset
1729 * pointer here, but that may exhaust virtual memory on 32 bit
1730 * userspace. */
1731
1732 uint32_t gem_flags = 0;
1733
1734 if (!device->info.has_llc &&
1735 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1736 gem_flags |= I915_MMAP_WC;
1737
1738 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1739 uint64_t map_offset = offset & ~4095ull;
1740 assert(offset >= map_offset);
1741 uint64_t map_size = (offset + size) - map_offset;
1742
1743 /* Let's map whole pages */
1744 map_size = align_u64(map_size, 4096);
1745
1746 void *map = anv_gem_mmap(device, mem->bo->gem_handle,
1747 map_offset, map_size, gem_flags);
1748 if (map == MAP_FAILED)
1749 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
1750
1751 mem->map = map;
1752 mem->map_size = map_size;
1753
1754 *ppData = mem->map + (offset - map_offset);
1755
1756 return VK_SUCCESS;
1757 }
1758
1759 void anv_UnmapMemory(
1760 VkDevice _device,
1761 VkDeviceMemory _memory)
1762 {
1763 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1764
1765 if (mem == NULL)
1766 return;
1767
1768 anv_gem_munmap(mem->map, mem->map_size);
1769
1770 mem->map = NULL;
1771 mem->map_size = 0;
1772 }
1773
1774 static void
1775 clflush_mapped_ranges(struct anv_device *device,
1776 uint32_t count,
1777 const VkMappedMemoryRange *ranges)
1778 {
1779 for (uint32_t i = 0; i < count; i++) {
1780 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1781 if (ranges[i].offset >= mem->map_size)
1782 continue;
1783
1784 gen_clflush_range(mem->map + ranges[i].offset,
1785 MIN2(ranges[i].size, mem->map_size - ranges[i].offset));
1786 }
1787 }
1788
1789 VkResult anv_FlushMappedMemoryRanges(
1790 VkDevice _device,
1791 uint32_t memoryRangeCount,
1792 const VkMappedMemoryRange* pMemoryRanges)
1793 {
1794 ANV_FROM_HANDLE(anv_device, device, _device);
1795
1796 if (device->info.has_llc)
1797 return VK_SUCCESS;
1798
1799 /* Make sure the writes we're flushing have landed. */
1800 __builtin_ia32_mfence();
1801
1802 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1803
1804 return VK_SUCCESS;
1805 }
1806
1807 VkResult anv_InvalidateMappedMemoryRanges(
1808 VkDevice _device,
1809 uint32_t memoryRangeCount,
1810 const VkMappedMemoryRange* pMemoryRanges)
1811 {
1812 ANV_FROM_HANDLE(anv_device, device, _device);
1813
1814 if (device->info.has_llc)
1815 return VK_SUCCESS;
1816
1817 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1818
1819 /* Make sure no reads get moved up above the invalidate. */
1820 __builtin_ia32_mfence();
1821
1822 return VK_SUCCESS;
1823 }
1824
1825 void anv_GetBufferMemoryRequirements(
1826 VkDevice _device,
1827 VkBuffer _buffer,
1828 VkMemoryRequirements* pMemoryRequirements)
1829 {
1830 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1831 ANV_FROM_HANDLE(anv_device, device, _device);
1832 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1833
1834 /* The Vulkan spec (git aaed022) says:
1835 *
1836 * memoryTypeBits is a bitfield and contains one bit set for every
1837 * supported memory type for the resource. The bit `1<<i` is set if and
1838 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1839 * structure for the physical device is supported.
1840 */
1841 uint32_t memory_types = 0;
1842 for (uint32_t i = 0; i < pdevice->memory.type_count; i++) {
1843 uint32_t valid_usage = pdevice->memory.types[i].valid_buffer_usage;
1844 if ((valid_usage & buffer->usage) == buffer->usage)
1845 memory_types |= (1u << i);
1846 }
1847
1848 pMemoryRequirements->size = buffer->size;
1849 pMemoryRequirements->alignment = 16;
1850 pMemoryRequirements->memoryTypeBits = memory_types;
1851 }
1852
1853 void anv_GetBufferMemoryRequirements2KHR(
1854 VkDevice _device,
1855 const VkBufferMemoryRequirementsInfo2KHR* pInfo,
1856 VkMemoryRequirements2KHR* pMemoryRequirements)
1857 {
1858 anv_GetBufferMemoryRequirements(_device, pInfo->buffer,
1859 &pMemoryRequirements->memoryRequirements);
1860
1861 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
1862 switch (ext->sType) {
1863 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
1864 VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext;
1865 requirements->prefersDedicatedAllocation = VK_FALSE;
1866 requirements->requiresDedicatedAllocation = VK_FALSE;
1867 break;
1868 }
1869
1870 default:
1871 anv_debug_ignored_stype(ext->sType);
1872 break;
1873 }
1874 }
1875 }
1876
1877 void anv_GetImageMemoryRequirements(
1878 VkDevice _device,
1879 VkImage _image,
1880 VkMemoryRequirements* pMemoryRequirements)
1881 {
1882 ANV_FROM_HANDLE(anv_image, image, _image);
1883 ANV_FROM_HANDLE(anv_device, device, _device);
1884 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1885
1886 /* The Vulkan spec (git aaed022) says:
1887 *
1888 * memoryTypeBits is a bitfield and contains one bit set for every
1889 * supported memory type for the resource. The bit `1<<i` is set if and
1890 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1891 * structure for the physical device is supported.
1892 *
1893 * All types are currently supported for images.
1894 */
1895 uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1;
1896
1897 pMemoryRequirements->size = image->size;
1898 pMemoryRequirements->alignment = image->alignment;
1899 pMemoryRequirements->memoryTypeBits = memory_types;
1900 }
1901
1902 void anv_GetImageMemoryRequirements2KHR(
1903 VkDevice _device,
1904 const VkImageMemoryRequirementsInfo2KHR* pInfo,
1905 VkMemoryRequirements2KHR* pMemoryRequirements)
1906 {
1907 anv_GetImageMemoryRequirements(_device, pInfo->image,
1908 &pMemoryRequirements->memoryRequirements);
1909
1910 vk_foreach_struct_const(ext, pInfo->pNext) {
1911 switch (ext->sType) {
1912 case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO_KHR: {
1913 ANV_FROM_HANDLE(anv_image, image, pInfo->image);
1914 ANV_FROM_HANDLE(anv_device, device, _device);
1915 struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1916 const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs =
1917 (const VkImagePlaneMemoryRequirementsInfoKHR *) ext;
1918 uint32_t plane = anv_image_aspect_to_plane(image->aspects,
1919 plane_reqs->planeAspect);
1920
1921 assert(image->planes[plane].offset == 0);
1922
1923 /* The Vulkan spec (git aaed022) says:
1924 *
1925 * memoryTypeBits is a bitfield and contains one bit set for every
1926 * supported memory type for the resource. The bit `1<<i` is set
1927 * if and only if the memory type `i` in the
1928 * VkPhysicalDeviceMemoryProperties structure for the physical
1929 * device is supported.
1930 *
1931 * All types are currently supported for images.
1932 */
1933 pMemoryRequirements->memoryRequirements.memoryTypeBits =
1934 (1ull << pdevice->memory.type_count) - 1;
1935
1936 pMemoryRequirements->memoryRequirements.size = image->planes[plane].size;
1937 pMemoryRequirements->memoryRequirements.alignment =
1938 image->planes[plane].alignment;
1939 break;
1940 }
1941
1942 default:
1943 anv_debug_ignored_stype(ext->sType);
1944 break;
1945 }
1946 }
1947
1948 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
1949 switch (ext->sType) {
1950 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: {
1951 VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext;
1952 requirements->prefersDedicatedAllocation = VK_FALSE;
1953 requirements->requiresDedicatedAllocation = VK_FALSE;
1954 break;
1955 }
1956
1957 default:
1958 anv_debug_ignored_stype(ext->sType);
1959 break;
1960 }
1961 }
1962 }
1963
1964 void anv_GetImageSparseMemoryRequirements(
1965 VkDevice device,
1966 VkImage image,
1967 uint32_t* pSparseMemoryRequirementCount,
1968 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1969 {
1970 *pSparseMemoryRequirementCount = 0;
1971 }
1972
1973 void anv_GetImageSparseMemoryRequirements2KHR(
1974 VkDevice device,
1975 const VkImageSparseMemoryRequirementsInfo2KHR* pInfo,
1976 uint32_t* pSparseMemoryRequirementCount,
1977 VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements)
1978 {
1979 *pSparseMemoryRequirementCount = 0;
1980 }
1981
1982 void anv_GetDeviceMemoryCommitment(
1983 VkDevice device,
1984 VkDeviceMemory memory,
1985 VkDeviceSize* pCommittedMemoryInBytes)
1986 {
1987 *pCommittedMemoryInBytes = 0;
1988 }
1989
1990 static void
1991 anv_bind_buffer_memory(const VkBindBufferMemoryInfoKHR *pBindInfo)
1992 {
1993 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
1994 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
1995
1996 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR);
1997
1998 if (mem) {
1999 assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage);
2000 buffer->bo = mem->bo;
2001 buffer->offset = pBindInfo->memoryOffset;
2002 } else {
2003 buffer->bo = NULL;
2004 buffer->offset = 0;
2005 }
2006 }
2007
2008 VkResult anv_BindBufferMemory(
2009 VkDevice device,
2010 VkBuffer buffer,
2011 VkDeviceMemory memory,
2012 VkDeviceSize memoryOffset)
2013 {
2014 anv_bind_buffer_memory(
2015 &(VkBindBufferMemoryInfoKHR) {
2016 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR,
2017 .buffer = buffer,
2018 .memory = memory,
2019 .memoryOffset = memoryOffset,
2020 });
2021
2022 return VK_SUCCESS;
2023 }
2024
2025 VkResult anv_BindBufferMemory2KHR(
2026 VkDevice device,
2027 uint32_t bindInfoCount,
2028 const VkBindBufferMemoryInfoKHR* pBindInfos)
2029 {
2030 for (uint32_t i = 0; i < bindInfoCount; i++)
2031 anv_bind_buffer_memory(&pBindInfos[i]);
2032
2033 return VK_SUCCESS;
2034 }
2035
2036 VkResult anv_QueueBindSparse(
2037 VkQueue _queue,
2038 uint32_t bindInfoCount,
2039 const VkBindSparseInfo* pBindInfo,
2040 VkFence fence)
2041 {
2042 ANV_FROM_HANDLE(anv_queue, queue, _queue);
2043 if (unlikely(queue->device->lost))
2044 return VK_ERROR_DEVICE_LOST;
2045
2046 return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
2047 }
2048
2049 // Event functions
2050
2051 VkResult anv_CreateEvent(
2052 VkDevice _device,
2053 const VkEventCreateInfo* pCreateInfo,
2054 const VkAllocationCallbacks* pAllocator,
2055 VkEvent* pEvent)
2056 {
2057 ANV_FROM_HANDLE(anv_device, device, _device);
2058 struct anv_state state;
2059 struct anv_event *event;
2060
2061 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
2062
2063 state = anv_state_pool_alloc(&device->dynamic_state_pool,
2064 sizeof(*event), 8);
2065 event = state.map;
2066 event->state = state;
2067 event->semaphore = VK_EVENT_RESET;
2068
2069 if (!device->info.has_llc) {
2070 /* Make sure the writes we're flushing have landed. */
2071 __builtin_ia32_mfence();
2072 __builtin_ia32_clflush(event);
2073 }
2074
2075 *pEvent = anv_event_to_handle(event);
2076
2077 return VK_SUCCESS;
2078 }
2079
2080 void anv_DestroyEvent(
2081 VkDevice _device,
2082 VkEvent _event,
2083 const VkAllocationCallbacks* pAllocator)
2084 {
2085 ANV_FROM_HANDLE(anv_device, device, _device);
2086 ANV_FROM_HANDLE(anv_event, event, _event);
2087
2088 if (!event)
2089 return;
2090
2091 anv_state_pool_free(&device->dynamic_state_pool, event->state);
2092 }
2093
2094 VkResult anv_GetEventStatus(
2095 VkDevice _device,
2096 VkEvent _event)
2097 {
2098 ANV_FROM_HANDLE(anv_device, device, _device);
2099 ANV_FROM_HANDLE(anv_event, event, _event);
2100
2101 if (unlikely(device->lost))
2102 return VK_ERROR_DEVICE_LOST;
2103
2104 if (!device->info.has_llc) {
2105 /* Invalidate read cache before reading event written by GPU. */
2106 __builtin_ia32_clflush(event);
2107 __builtin_ia32_mfence();
2108
2109 }
2110
2111 return event->semaphore;
2112 }
2113
2114 VkResult anv_SetEvent(
2115 VkDevice _device,
2116 VkEvent _event)
2117 {
2118 ANV_FROM_HANDLE(anv_device, device, _device);
2119 ANV_FROM_HANDLE(anv_event, event, _event);
2120
2121 event->semaphore = VK_EVENT_SET;
2122
2123 if (!device->info.has_llc) {
2124 /* Make sure the writes we're flushing have landed. */
2125 __builtin_ia32_mfence();
2126 __builtin_ia32_clflush(event);
2127 }
2128
2129 return VK_SUCCESS;
2130 }
2131
2132 VkResult anv_ResetEvent(
2133 VkDevice _device,
2134 VkEvent _event)
2135 {
2136 ANV_FROM_HANDLE(anv_device, device, _device);
2137 ANV_FROM_HANDLE(anv_event, event, _event);
2138
2139 event->semaphore = VK_EVENT_RESET;
2140
2141 if (!device->info.has_llc) {
2142 /* Make sure the writes we're flushing have landed. */
2143 __builtin_ia32_mfence();
2144 __builtin_ia32_clflush(event);
2145 }
2146
2147 return VK_SUCCESS;
2148 }
2149
2150 // Buffer functions
2151
2152 VkResult anv_CreateBuffer(
2153 VkDevice _device,
2154 const VkBufferCreateInfo* pCreateInfo,
2155 const VkAllocationCallbacks* pAllocator,
2156 VkBuffer* pBuffer)
2157 {
2158 ANV_FROM_HANDLE(anv_device, device, _device);
2159 struct anv_buffer *buffer;
2160
2161 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2162
2163 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2164 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2165 if (buffer == NULL)
2166 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2167
2168 buffer->size = pCreateInfo->size;
2169 buffer->usage = pCreateInfo->usage;
2170 buffer->bo = NULL;
2171 buffer->offset = 0;
2172
2173 *pBuffer = anv_buffer_to_handle(buffer);
2174
2175 return VK_SUCCESS;
2176 }
2177
2178 void anv_DestroyBuffer(
2179 VkDevice _device,
2180 VkBuffer _buffer,
2181 const VkAllocationCallbacks* pAllocator)
2182 {
2183 ANV_FROM_HANDLE(anv_device, device, _device);
2184 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
2185
2186 if (!buffer)
2187 return;
2188
2189 vk_free2(&device->alloc, pAllocator, buffer);
2190 }
2191
2192 void
2193 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
2194 enum isl_format format,
2195 uint32_t offset, uint32_t range, uint32_t stride)
2196 {
2197 isl_buffer_fill_state(&device->isl_dev, state.map,
2198 .address = offset,
2199 .mocs = device->default_mocs,
2200 .size = range,
2201 .format = format,
2202 .stride = stride);
2203
2204 anv_state_flush(device, state);
2205 }
2206
2207 void anv_DestroySampler(
2208 VkDevice _device,
2209 VkSampler _sampler,
2210 const VkAllocationCallbacks* pAllocator)
2211 {
2212 ANV_FROM_HANDLE(anv_device, device, _device);
2213 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
2214
2215 if (!sampler)
2216 return;
2217
2218 vk_free2(&device->alloc, pAllocator, sampler);
2219 }
2220
2221 VkResult anv_CreateFramebuffer(
2222 VkDevice _device,
2223 const VkFramebufferCreateInfo* pCreateInfo,
2224 const VkAllocationCallbacks* pAllocator,
2225 VkFramebuffer* pFramebuffer)
2226 {
2227 ANV_FROM_HANDLE(anv_device, device, _device);
2228 struct anv_framebuffer *framebuffer;
2229
2230 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2231
2232 size_t size = sizeof(*framebuffer) +
2233 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
2234 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2235 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2236 if (framebuffer == NULL)
2237 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2238
2239 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2240 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2241 VkImageView _iview = pCreateInfo->pAttachments[i];
2242 framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
2243 }
2244
2245 framebuffer->width = pCreateInfo->width;
2246 framebuffer->height = pCreateInfo->height;
2247 framebuffer->layers = pCreateInfo->layers;
2248
2249 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
2250
2251 return VK_SUCCESS;
2252 }
2253
2254 void anv_DestroyFramebuffer(
2255 VkDevice _device,
2256 VkFramebuffer _fb,
2257 const VkAllocationCallbacks* pAllocator)
2258 {
2259 ANV_FROM_HANDLE(anv_device, device, _device);
2260 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
2261
2262 if (!fb)
2263 return;
2264
2265 vk_free2(&device->alloc, pAllocator, fb);
2266 }
2267
2268 /* vk_icd.h does not declare this function, so we declare it here to
2269 * suppress Wmissing-prototypes.
2270 */
2271 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2272 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
2273
2274 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2275 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
2276 {
2277 /* For the full details on loader interface versioning, see
2278 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2279 * What follows is a condensed summary, to help you navigate the large and
2280 * confusing official doc.
2281 *
2282 * - Loader interface v0 is incompatible with later versions. We don't
2283 * support it.
2284 *
2285 * - In loader interface v1:
2286 * - The first ICD entrypoint called by the loader is
2287 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2288 * entrypoint.
2289 * - The ICD must statically expose no other Vulkan symbol unless it is
2290 * linked with -Bsymbolic.
2291 * - Each dispatchable Vulkan handle created by the ICD must be
2292 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2293 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2294 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2295 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2296 * such loader-managed surfaces.
2297 *
2298 * - Loader interface v2 differs from v1 in:
2299 * - The first ICD entrypoint called by the loader is
2300 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2301 * statically expose this entrypoint.
2302 *
2303 * - Loader interface v3 differs from v2 in:
2304 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2305 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2306 * because the loader no longer does so.
2307 */
2308 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2309 return VK_SUCCESS;
2310 }