anv: add VK_KHR_push_descriptor support
[mesa.git] / src / intel / vulkan / anv_device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <sys/mman.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30
31 #include "anv_private.h"
32 #include "util/strtod.h"
33 #include "util/debug.h"
34 #include "util/build_id.h"
35 #include "util/vk_util.h"
36
37 #include "genxml/gen7_pack.h"
38
39 static void
40 compiler_debug_log(void *data, const char *fmt, ...)
41 { }
42
43 static void
44 compiler_perf_log(void *data, const char *fmt, ...)
45 {
46 va_list args;
47 va_start(args, fmt);
48
49 if (unlikely(INTEL_DEBUG & DEBUG_PERF))
50 vfprintf(stderr, fmt, args);
51
52 va_end(args);
53 }
54
55 static bool
56 anv_device_get_cache_uuid(void *uuid)
57 {
58 const struct build_id_note *note = build_id_find_nhdr("libvulkan_intel.so");
59 if (!note)
60 return false;
61
62 unsigned len = build_id_length(note);
63 if (len < VK_UUID_SIZE)
64 return false;
65
66 memcpy(uuid, build_id_data(note), VK_UUID_SIZE);
67 return true;
68 }
69
70 static VkResult
71 anv_physical_device_init(struct anv_physical_device *device,
72 struct anv_instance *instance,
73 const char *path)
74 {
75 VkResult result;
76 int fd;
77
78 fd = open(path, O_RDWR | O_CLOEXEC);
79 if (fd < 0)
80 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
81
82 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
83 device->instance = instance;
84
85 assert(strlen(path) < ARRAY_SIZE(device->path));
86 strncpy(device->path, path, ARRAY_SIZE(device->path));
87
88 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
89 if (!device->chipset_id) {
90 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
91 goto fail;
92 }
93
94 device->name = gen_get_device_name(device->chipset_id);
95 if (!gen_get_device_info(device->chipset_id, &device->info)) {
96 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
97 goto fail;
98 }
99
100 if (device->info.is_haswell) {
101 fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
102 } else if (device->info.gen == 7 && !device->info.is_baytrail) {
103 fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
104 } else if (device->info.gen == 7 && device->info.is_baytrail) {
105 fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
106 } else if (device->info.gen >= 8) {
107 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
108 * supported as anything */
109 } else {
110 result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
111 "Vulkan not yet supported on %s", device->name);
112 goto fail;
113 }
114
115 device->cmd_parser_version = -1;
116 if (device->info.gen == 7) {
117 device->cmd_parser_version =
118 anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
119 if (device->cmd_parser_version == -1) {
120 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
121 "failed to get command parser version");
122 goto fail;
123 }
124 }
125
126 if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
127 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
128 "failed to get aperture size: %m");
129 goto fail;
130 }
131
132 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
133 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
134 "kernel missing gem wait");
135 goto fail;
136 }
137
138 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
139 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
140 "kernel missing execbuf2");
141 goto fail;
142 }
143
144 if (!device->info.has_llc &&
145 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
146 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
147 "kernel missing wc mmap");
148 goto fail;
149 }
150
151 if (!anv_device_get_cache_uuid(device->uuid)) {
152 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
153 "cannot generate UUID");
154 goto fail;
155 }
156 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
157
158 /* GENs prior to 8 do not support EU/Subslice info */
159 if (device->info.gen >= 8) {
160 device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
161 device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
162
163 /* Without this information, we cannot get the right Braswell
164 * brandstrings, and we have to use conservative numbers for GPGPU on
165 * many platforms, but otherwise, things will just work.
166 */
167 if (device->subslice_total < 1 || device->eu_total < 1) {
168 fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
169 " query GPU properties.\n");
170 }
171 } else if (device->info.gen == 7) {
172 device->subslice_total = 1 << (device->info.gt - 1);
173 }
174
175 if (device->info.is_cherryview &&
176 device->subslice_total > 0 && device->eu_total > 0) {
177 /* Logical CS threads = EUs per subslice * 7 threads per EU */
178 uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7;
179
180 /* Fuse configurations may give more threads than expected, never less. */
181 if (max_cs_threads > device->info.max_cs_threads)
182 device->info.max_cs_threads = max_cs_threads;
183 }
184
185 brw_process_intel_debug_variable();
186
187 device->compiler = brw_compiler_create(NULL, &device->info);
188 if (device->compiler == NULL) {
189 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
190 goto fail;
191 }
192 device->compiler->shader_debug_log = compiler_debug_log;
193 device->compiler->shader_perf_log = compiler_perf_log;
194
195 result = anv_init_wsi(device);
196 if (result != VK_SUCCESS) {
197 ralloc_free(device->compiler);
198 goto fail;
199 }
200
201 isl_device_init(&device->isl_dev, &device->info, swizzled);
202
203 device->local_fd = fd;
204 return VK_SUCCESS;
205
206 fail:
207 close(fd);
208 return result;
209 }
210
211 static void
212 anv_physical_device_finish(struct anv_physical_device *device)
213 {
214 anv_finish_wsi(device);
215 ralloc_free(device->compiler);
216 close(device->local_fd);
217 }
218
219 static const VkExtensionProperties global_extensions[] = {
220 {
221 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
222 .specVersion = 25,
223 },
224 #ifdef VK_USE_PLATFORM_XCB_KHR
225 {
226 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
227 .specVersion = 6,
228 },
229 #endif
230 #ifdef VK_USE_PLATFORM_XLIB_KHR
231 {
232 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
233 .specVersion = 6,
234 },
235 #endif
236 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
237 {
238 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
239 .specVersion = 5,
240 },
241 #endif
242 {
243 .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
244 .specVersion = 1,
245 },
246 };
247
248 static const VkExtensionProperties device_extensions[] = {
249 {
250 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
251 .specVersion = 68,
252 },
253 {
254 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
255 .specVersion = 1,
256 },
257 {
258 .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME,
259 .specVersion = 1,
260 },
261 {
262 .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME,
263 .specVersion = 1,
264 },
265 {
266 .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
267 .specVersion = 1,
268 }
269 };
270
271 static void *
272 default_alloc_func(void *pUserData, size_t size, size_t align,
273 VkSystemAllocationScope allocationScope)
274 {
275 return malloc(size);
276 }
277
278 static void *
279 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
280 size_t align, VkSystemAllocationScope allocationScope)
281 {
282 return realloc(pOriginal, size);
283 }
284
285 static void
286 default_free_func(void *pUserData, void *pMemory)
287 {
288 free(pMemory);
289 }
290
291 static const VkAllocationCallbacks default_alloc = {
292 .pUserData = NULL,
293 .pfnAllocation = default_alloc_func,
294 .pfnReallocation = default_realloc_func,
295 .pfnFree = default_free_func,
296 };
297
298 VkResult anv_CreateInstance(
299 const VkInstanceCreateInfo* pCreateInfo,
300 const VkAllocationCallbacks* pAllocator,
301 VkInstance* pInstance)
302 {
303 struct anv_instance *instance;
304
305 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
306
307 uint32_t client_version;
308 if (pCreateInfo->pApplicationInfo &&
309 pCreateInfo->pApplicationInfo->apiVersion != 0) {
310 client_version = pCreateInfo->pApplicationInfo->apiVersion;
311 } else {
312 client_version = VK_MAKE_VERSION(1, 0, 0);
313 }
314
315 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
316 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
317 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
318 "Client requested version %d.%d.%d",
319 VK_VERSION_MAJOR(client_version),
320 VK_VERSION_MINOR(client_version),
321 VK_VERSION_PATCH(client_version));
322 }
323
324 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
325 bool found = false;
326 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
327 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
328 global_extensions[j].extensionName) == 0) {
329 found = true;
330 break;
331 }
332 }
333 if (!found)
334 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
335 }
336
337 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
338 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
339 if (!instance)
340 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
341
342 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
343
344 if (pAllocator)
345 instance->alloc = *pAllocator;
346 else
347 instance->alloc = default_alloc;
348
349 instance->apiVersion = client_version;
350 instance->physicalDeviceCount = -1;
351
352 _mesa_locale_init();
353
354 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
355
356 *pInstance = anv_instance_to_handle(instance);
357
358 return VK_SUCCESS;
359 }
360
361 void anv_DestroyInstance(
362 VkInstance _instance,
363 const VkAllocationCallbacks* pAllocator)
364 {
365 ANV_FROM_HANDLE(anv_instance, instance, _instance);
366
367 if (!instance)
368 return;
369
370 if (instance->physicalDeviceCount > 0) {
371 /* We support at most one physical device. */
372 assert(instance->physicalDeviceCount == 1);
373 anv_physical_device_finish(&instance->physicalDevice);
374 }
375
376 VG(VALGRIND_DESTROY_MEMPOOL(instance));
377
378 _mesa_locale_fini();
379
380 vk_free(&instance->alloc, instance);
381 }
382
383 VkResult anv_EnumeratePhysicalDevices(
384 VkInstance _instance,
385 uint32_t* pPhysicalDeviceCount,
386 VkPhysicalDevice* pPhysicalDevices)
387 {
388 ANV_FROM_HANDLE(anv_instance, instance, _instance);
389 VkResult result;
390
391 if (instance->physicalDeviceCount < 0) {
392 char path[20];
393 for (unsigned i = 0; i < 8; i++) {
394 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
395 result = anv_physical_device_init(&instance->physicalDevice,
396 instance, path);
397 if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
398 break;
399 }
400
401 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
402 instance->physicalDeviceCount = 0;
403 } else if (result == VK_SUCCESS) {
404 instance->physicalDeviceCount = 1;
405 } else {
406 return result;
407 }
408 }
409
410 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
411 * otherwise it's an inout parameter.
412 *
413 * The Vulkan spec (git aaed022) says:
414 *
415 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
416 * that is initialized with the number of devices the application is
417 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
418 * an array of at least this many VkPhysicalDevice handles [...].
419 *
420 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
421 * overwrites the contents of the variable pointed to by
422 * pPhysicalDeviceCount with the number of physical devices in in the
423 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
424 * pPhysicalDeviceCount with the number of physical handles written to
425 * pPhysicalDevices.
426 */
427 if (!pPhysicalDevices) {
428 *pPhysicalDeviceCount = instance->physicalDeviceCount;
429 } else if (*pPhysicalDeviceCount >= 1) {
430 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
431 *pPhysicalDeviceCount = 1;
432 } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
433 return VK_INCOMPLETE;
434 } else {
435 *pPhysicalDeviceCount = 0;
436 }
437
438 return VK_SUCCESS;
439 }
440
441 void anv_GetPhysicalDeviceFeatures(
442 VkPhysicalDevice physicalDevice,
443 VkPhysicalDeviceFeatures* pFeatures)
444 {
445 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
446
447 *pFeatures = (VkPhysicalDeviceFeatures) {
448 .robustBufferAccess = true,
449 .fullDrawIndexUint32 = true,
450 .imageCubeArray = true,
451 .independentBlend = true,
452 .geometryShader = true,
453 .tessellationShader = true,
454 .sampleRateShading = true,
455 .dualSrcBlend = true,
456 .logicOp = true,
457 .multiDrawIndirect = false,
458 .drawIndirectFirstInstance = true,
459 .depthClamp = true,
460 .depthBiasClamp = true,
461 .fillModeNonSolid = true,
462 .depthBounds = false,
463 .wideLines = true,
464 .largePoints = true,
465 .alphaToOne = true,
466 .multiViewport = true,
467 .samplerAnisotropy = true,
468 .textureCompressionETC2 = pdevice->info.gen >= 8 ||
469 pdevice->info.is_baytrail,
470 .textureCompressionASTC_LDR = pdevice->info.gen >= 9, /* FINISHME CHV */
471 .textureCompressionBC = true,
472 .occlusionQueryPrecise = true,
473 .pipelineStatisticsQuery = false,
474 .fragmentStoresAndAtomics = true,
475 .shaderTessellationAndGeometryPointSize = true,
476 .shaderImageGatherExtended = true,
477 .shaderStorageImageExtendedFormats = true,
478 .shaderStorageImageMultisample = false,
479 .shaderStorageImageReadWithoutFormat = false,
480 .shaderStorageImageWriteWithoutFormat = true,
481 .shaderUniformBufferArrayDynamicIndexing = true,
482 .shaderSampledImageArrayDynamicIndexing = true,
483 .shaderStorageBufferArrayDynamicIndexing = true,
484 .shaderStorageImageArrayDynamicIndexing = true,
485 .shaderClipDistance = true,
486 .shaderCullDistance = true,
487 .shaderFloat64 = pdevice->info.gen >= 8,
488 .shaderInt64 = false,
489 .shaderInt16 = false,
490 .shaderResourceMinLod = false,
491 .variableMultisampleRate = false,
492 .inheritedQueries = false,
493 };
494
495 /* We can't do image stores in vec4 shaders */
496 pFeatures->vertexPipelineStoresAndAtomics =
497 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
498 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
499 }
500
501 void anv_GetPhysicalDeviceFeatures2KHR(
502 VkPhysicalDevice physicalDevice,
503 VkPhysicalDeviceFeatures2KHR* pFeatures)
504 {
505 anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
506
507 vk_foreach_struct(ext, pFeatures->pNext) {
508 switch (ext->sType) {
509 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
510 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
511 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
512
513 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
514 break;
515 }
516
517 default:
518 anv_debug_ignored_stype(ext->sType);
519 break;
520 }
521 }
522 }
523
524 void anv_GetPhysicalDeviceProperties(
525 VkPhysicalDevice physicalDevice,
526 VkPhysicalDeviceProperties* pProperties)
527 {
528 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
529 const struct gen_device_info *devinfo = &pdevice->info;
530
531 const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
532
533 /* See assertions made when programming the buffer surface state. */
534 const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
535 (1ul << 30) : (1ul << 27);
536
537 VkSampleCountFlags sample_counts =
538 isl_device_get_sample_counts(&pdevice->isl_dev);
539
540 VkPhysicalDeviceLimits limits = {
541 .maxImageDimension1D = (1 << 14),
542 .maxImageDimension2D = (1 << 14),
543 .maxImageDimension3D = (1 << 11),
544 .maxImageDimensionCube = (1 << 14),
545 .maxImageArrayLayers = (1 << 11),
546 .maxTexelBufferElements = 128 * 1024 * 1024,
547 .maxUniformBufferRange = (1ul << 27),
548 .maxStorageBufferRange = max_raw_buffer_sz,
549 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
550 .maxMemoryAllocationCount = UINT32_MAX,
551 .maxSamplerAllocationCount = 64 * 1024,
552 .bufferImageGranularity = 64, /* A cache line */
553 .sparseAddressSpaceSize = 0,
554 .maxBoundDescriptorSets = MAX_SETS,
555 .maxPerStageDescriptorSamplers = 64,
556 .maxPerStageDescriptorUniformBuffers = 64,
557 .maxPerStageDescriptorStorageBuffers = 64,
558 .maxPerStageDescriptorSampledImages = 64,
559 .maxPerStageDescriptorStorageImages = 64,
560 .maxPerStageDescriptorInputAttachments = 64,
561 .maxPerStageResources = 128,
562 .maxDescriptorSetSamplers = 256,
563 .maxDescriptorSetUniformBuffers = 256,
564 .maxDescriptorSetUniformBuffersDynamic = 256,
565 .maxDescriptorSetStorageBuffers = 256,
566 .maxDescriptorSetStorageBuffersDynamic = 256,
567 .maxDescriptorSetSampledImages = 256,
568 .maxDescriptorSetStorageImages = 256,
569 .maxDescriptorSetInputAttachments = 256,
570 .maxVertexInputAttributes = MAX_VBS,
571 .maxVertexInputBindings = MAX_VBS,
572 .maxVertexInputAttributeOffset = 2047,
573 .maxVertexInputBindingStride = 2048,
574 .maxVertexOutputComponents = 128,
575 .maxTessellationGenerationLevel = 64,
576 .maxTessellationPatchSize = 32,
577 .maxTessellationControlPerVertexInputComponents = 128,
578 .maxTessellationControlPerVertexOutputComponents = 128,
579 .maxTessellationControlPerPatchOutputComponents = 128,
580 .maxTessellationControlTotalOutputComponents = 2048,
581 .maxTessellationEvaluationInputComponents = 128,
582 .maxTessellationEvaluationOutputComponents = 128,
583 .maxGeometryShaderInvocations = 32,
584 .maxGeometryInputComponents = 64,
585 .maxGeometryOutputComponents = 128,
586 .maxGeometryOutputVertices = 256,
587 .maxGeometryTotalOutputComponents = 1024,
588 .maxFragmentInputComponents = 128,
589 .maxFragmentOutputAttachments = 8,
590 .maxFragmentDualSrcAttachments = 1,
591 .maxFragmentCombinedOutputResources = 8,
592 .maxComputeSharedMemorySize = 32768,
593 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
594 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
595 .maxComputeWorkGroupSize = {
596 16 * devinfo->max_cs_threads,
597 16 * devinfo->max_cs_threads,
598 16 * devinfo->max_cs_threads,
599 },
600 .subPixelPrecisionBits = 4 /* FIXME */,
601 .subTexelPrecisionBits = 4 /* FIXME */,
602 .mipmapPrecisionBits = 4 /* FIXME */,
603 .maxDrawIndexedIndexValue = UINT32_MAX,
604 .maxDrawIndirectCount = UINT32_MAX,
605 .maxSamplerLodBias = 16,
606 .maxSamplerAnisotropy = 16,
607 .maxViewports = MAX_VIEWPORTS,
608 .maxViewportDimensions = { (1 << 14), (1 << 14) },
609 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
610 .viewportSubPixelBits = 13, /* We take a float? */
611 .minMemoryMapAlignment = 4096, /* A page */
612 .minTexelBufferOffsetAlignment = 1,
613 .minUniformBufferOffsetAlignment = 16,
614 .minStorageBufferOffsetAlignment = 4,
615 .minTexelOffset = -8,
616 .maxTexelOffset = 7,
617 .minTexelGatherOffset = -32,
618 .maxTexelGatherOffset = 31,
619 .minInterpolationOffset = -0.5,
620 .maxInterpolationOffset = 0.4375,
621 .subPixelInterpolationOffsetBits = 4,
622 .maxFramebufferWidth = (1 << 14),
623 .maxFramebufferHeight = (1 << 14),
624 .maxFramebufferLayers = (1 << 11),
625 .framebufferColorSampleCounts = sample_counts,
626 .framebufferDepthSampleCounts = sample_counts,
627 .framebufferStencilSampleCounts = sample_counts,
628 .framebufferNoAttachmentsSampleCounts = sample_counts,
629 .maxColorAttachments = MAX_RTS,
630 .sampledImageColorSampleCounts = sample_counts,
631 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
632 .sampledImageDepthSampleCounts = sample_counts,
633 .sampledImageStencilSampleCounts = sample_counts,
634 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
635 .maxSampleMaskWords = 1,
636 .timestampComputeAndGraphics = false,
637 .timestampPeriod = time_stamp_base,
638 .maxClipDistances = 8,
639 .maxCullDistances = 8,
640 .maxCombinedClipAndCullDistances = 8,
641 .discreteQueuePriorities = 1,
642 .pointSizeRange = { 0.125, 255.875 },
643 .lineWidthRange = { 0.0, 7.9921875 },
644 .pointSizeGranularity = (1.0 / 8.0),
645 .lineWidthGranularity = (1.0 / 128.0),
646 .strictLines = false, /* FINISHME */
647 .standardSampleLocations = true,
648 .optimalBufferCopyOffsetAlignment = 128,
649 .optimalBufferCopyRowPitchAlignment = 128,
650 .nonCoherentAtomSize = 64,
651 };
652
653 *pProperties = (VkPhysicalDeviceProperties) {
654 .apiVersion = VK_MAKE_VERSION(1, 0, 42),
655 .driverVersion = 1,
656 .vendorID = 0x8086,
657 .deviceID = pdevice->chipset_id,
658 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
659 .limits = limits,
660 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
661 };
662
663 strcpy(pProperties->deviceName, pdevice->name);
664 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
665 }
666
667 void anv_GetPhysicalDeviceProperties2KHR(
668 VkPhysicalDevice physicalDevice,
669 VkPhysicalDeviceProperties2KHR* pProperties)
670 {
671 anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
672
673 vk_foreach_struct(ext, pProperties->pNext) {
674 switch (ext->sType) {
675 default:
676 anv_debug_ignored_stype(ext->sType);
677 break;
678 }
679 }
680 }
681
682 static void
683 anv_get_queue_family_properties(struct anv_physical_device *phys_dev,
684 VkQueueFamilyProperties *props)
685 {
686 *props = (VkQueueFamilyProperties) {
687 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
688 VK_QUEUE_COMPUTE_BIT |
689 VK_QUEUE_TRANSFER_BIT,
690 .queueCount = 1,
691 .timestampValidBits = 36, /* XXX: Real value here */
692 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
693 };
694 }
695
696 void anv_GetPhysicalDeviceQueueFamilyProperties(
697 VkPhysicalDevice physicalDevice,
698 uint32_t* pCount,
699 VkQueueFamilyProperties* pQueueFamilyProperties)
700 {
701 ANV_FROM_HANDLE(anv_physical_device, phys_dev, physicalDevice);
702
703 if (pQueueFamilyProperties == NULL) {
704 *pCount = 1;
705 return;
706 }
707
708 /* The spec implicitly allows the incoming count to be 0. From the Vulkan
709 * 1.0.38 spec, Section 4.1 Physical Devices:
710 *
711 * If the value referenced by pQueueFamilyPropertyCount is not 0 [then
712 * do stuff].
713 */
714 if (*pCount == 0)
715 return;
716
717 *pCount = 1;
718 anv_get_queue_family_properties(phys_dev, pQueueFamilyProperties);
719 }
720
721 void anv_GetPhysicalDeviceQueueFamilyProperties2KHR(
722 VkPhysicalDevice physicalDevice,
723 uint32_t* pQueueFamilyPropertyCount,
724 VkQueueFamilyProperties2KHR* pQueueFamilyProperties)
725 {
726
727 ANV_FROM_HANDLE(anv_physical_device, phys_dev, physicalDevice);
728
729 if (pQueueFamilyProperties == NULL) {
730 *pQueueFamilyPropertyCount = 1;
731 return;
732 }
733
734 /* The spec implicitly allows the incoming count to be 0. From the Vulkan
735 * 1.0.38 spec, Section 4.1 Physical Devices:
736 *
737 * If the value referenced by pQueueFamilyPropertyCount is not 0 [then
738 * do stuff].
739 */
740 if (*pQueueFamilyPropertyCount == 0)
741 return;
742
743 /* We support exactly one queue family. So need to traverse only the first
744 * array element's pNext chain.
745 */
746 *pQueueFamilyPropertyCount = 1;
747 anv_get_queue_family_properties(phys_dev,
748 &pQueueFamilyProperties->queueFamilyProperties);
749
750 vk_foreach_struct(ext, pQueueFamilyProperties->pNext) {
751 switch (ext->sType) {
752 default:
753 anv_debug_ignored_stype(ext->sType);
754 break;
755 }
756 }
757 }
758
759 void anv_GetPhysicalDeviceMemoryProperties(
760 VkPhysicalDevice physicalDevice,
761 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
762 {
763 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
764 VkDeviceSize heap_size;
765
766 /* Reserve some wiggle room for the driver by exposing only 75% of the
767 * aperture to the heap.
768 */
769 heap_size = 3 * physical_device->aperture_size / 4;
770
771 if (physical_device->info.has_llc) {
772 /* Big core GPUs share LLC with the CPU and thus one memory type can be
773 * both cached and coherent at the same time.
774 */
775 pMemoryProperties->memoryTypeCount = 1;
776 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
777 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
778 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
779 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
780 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
781 .heapIndex = 0,
782 };
783 } else {
784 /* The spec requires that we expose a host-visible, coherent memory
785 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
786 * to give the application a choice between cached, but not coherent and
787 * coherent but uncached (WC though).
788 */
789 pMemoryProperties->memoryTypeCount = 2;
790 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
791 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
792 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
793 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
794 .heapIndex = 0,
795 };
796 pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
797 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
798 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
799 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
800 .heapIndex = 0,
801 };
802 }
803
804 pMemoryProperties->memoryHeapCount = 1;
805 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
806 .size = heap_size,
807 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
808 };
809 }
810
811 void anv_GetPhysicalDeviceMemoryProperties2KHR(
812 VkPhysicalDevice physicalDevice,
813 VkPhysicalDeviceMemoryProperties2KHR* pMemoryProperties)
814 {
815 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
816 &pMemoryProperties->memoryProperties);
817
818 vk_foreach_struct(ext, pMemoryProperties->pNext) {
819 switch (ext->sType) {
820 default:
821 anv_debug_ignored_stype(ext->sType);
822 break;
823 }
824 }
825 }
826
827 PFN_vkVoidFunction anv_GetInstanceProcAddr(
828 VkInstance instance,
829 const char* pName)
830 {
831 return anv_lookup_entrypoint(NULL, pName);
832 }
833
834 /* With version 1+ of the loader interface the ICD should expose
835 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
836 */
837 PUBLIC
838 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
839 VkInstance instance,
840 const char* pName);
841
842 PUBLIC
843 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
844 VkInstance instance,
845 const char* pName)
846 {
847 return anv_GetInstanceProcAddr(instance, pName);
848 }
849
850 PFN_vkVoidFunction anv_GetDeviceProcAddr(
851 VkDevice _device,
852 const char* pName)
853 {
854 ANV_FROM_HANDLE(anv_device, device, _device);
855 return anv_lookup_entrypoint(&device->info, pName);
856 }
857
858 static void
859 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
860 {
861 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
862 queue->device = device;
863 queue->pool = &device->surface_state_pool;
864 }
865
866 static void
867 anv_queue_finish(struct anv_queue *queue)
868 {
869 }
870
871 static struct anv_state
872 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
873 {
874 struct anv_state state;
875
876 state = anv_state_pool_alloc(pool, size, align);
877 memcpy(state.map, p, size);
878
879 anv_state_flush(pool->block_pool->device, state);
880
881 return state;
882 }
883
884 struct gen8_border_color {
885 union {
886 float float32[4];
887 uint32_t uint32[4];
888 };
889 /* Pad out to 64 bytes */
890 uint32_t _pad[12];
891 };
892
893 static void
894 anv_device_init_border_colors(struct anv_device *device)
895 {
896 static const struct gen8_border_color border_colors[] = {
897 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
898 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
899 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
900 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
901 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
902 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
903 };
904
905 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
906 sizeof(border_colors), 64,
907 border_colors);
908 }
909
910 VkResult
911 anv_device_submit_simple_batch(struct anv_device *device,
912 struct anv_batch *batch)
913 {
914 struct drm_i915_gem_execbuffer2 execbuf;
915 struct drm_i915_gem_exec_object2 exec2_objects[1];
916 struct anv_bo bo, *exec_bos[1];
917 VkResult result = VK_SUCCESS;
918 uint32_t size;
919 int64_t timeout;
920 int ret;
921
922 /* Kernel driver requires 8 byte aligned batch length */
923 size = align_u32(batch->next - batch->start, 8);
924 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
925 if (result != VK_SUCCESS)
926 return result;
927
928 memcpy(bo.map, batch->start, size);
929 if (!device->info.has_llc)
930 anv_flush_range(bo.map, size);
931
932 exec_bos[0] = &bo;
933 exec2_objects[0].handle = bo.gem_handle;
934 exec2_objects[0].relocation_count = 0;
935 exec2_objects[0].relocs_ptr = 0;
936 exec2_objects[0].alignment = 0;
937 exec2_objects[0].offset = bo.offset;
938 exec2_objects[0].flags = 0;
939 exec2_objects[0].rsvd1 = 0;
940 exec2_objects[0].rsvd2 = 0;
941
942 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
943 execbuf.buffer_count = 1;
944 execbuf.batch_start_offset = 0;
945 execbuf.batch_len = size;
946 execbuf.cliprects_ptr = 0;
947 execbuf.num_cliprects = 0;
948 execbuf.DR1 = 0;
949 execbuf.DR4 = 0;
950
951 execbuf.flags =
952 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
953 execbuf.rsvd1 = device->context_id;
954 execbuf.rsvd2 = 0;
955
956 result = anv_device_execbuf(device, &execbuf, exec_bos);
957 if (result != VK_SUCCESS)
958 goto fail;
959
960 timeout = INT64_MAX;
961 ret = anv_gem_wait(device, bo.gem_handle, &timeout);
962 if (ret != 0) {
963 /* We don't know the real error. */
964 result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
965 goto fail;
966 }
967
968 fail:
969 anv_bo_pool_free(&device->batch_bo_pool, &bo);
970
971 return result;
972 }
973
974 VkResult anv_CreateDevice(
975 VkPhysicalDevice physicalDevice,
976 const VkDeviceCreateInfo* pCreateInfo,
977 const VkAllocationCallbacks* pAllocator,
978 VkDevice* pDevice)
979 {
980 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
981 VkResult result;
982 struct anv_device *device;
983
984 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
985
986 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
987 bool found = false;
988 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
989 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
990 device_extensions[j].extensionName) == 0) {
991 found = true;
992 break;
993 }
994 }
995 if (!found)
996 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
997 }
998
999 device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
1000 sizeof(*device), 8,
1001 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1002 if (!device)
1003 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1004
1005 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
1006 device->instance = physical_device->instance;
1007 device->chipset_id = physical_device->chipset_id;
1008
1009 if (pAllocator)
1010 device->alloc = *pAllocator;
1011 else
1012 device->alloc = physical_device->instance->alloc;
1013
1014 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
1015 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
1016 if (device->fd == -1) {
1017 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1018 goto fail_device;
1019 }
1020
1021 device->context_id = anv_gem_create_context(device);
1022 if (device->context_id == -1) {
1023 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
1024 goto fail_fd;
1025 }
1026
1027 device->info = physical_device->info;
1028 device->isl_dev = physical_device->isl_dev;
1029
1030 /* On Broadwell and later, we can use batch chaining to more efficiently
1031 * implement growing command buffers. Prior to Haswell, the kernel
1032 * command parser gets in the way and we have to fall back to growing
1033 * the batch.
1034 */
1035 device->can_chain_batches = device->info.gen >= 8;
1036
1037 device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
1038 pCreateInfo->pEnabledFeatures->robustBufferAccess;
1039
1040 pthread_mutex_init(&device->mutex, NULL);
1041
1042 pthread_condattr_t condattr;
1043 pthread_condattr_init(&condattr);
1044 pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
1045 pthread_cond_init(&device->queue_submit, NULL);
1046 pthread_condattr_destroy(&condattr);
1047
1048 anv_bo_pool_init(&device->batch_bo_pool, device);
1049
1050 anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
1051
1052 anv_state_pool_init(&device->dynamic_state_pool,
1053 &device->dynamic_state_block_pool);
1054
1055 anv_block_pool_init(&device->instruction_block_pool, device, 1024 * 1024);
1056 anv_state_pool_init(&device->instruction_state_pool,
1057 &device->instruction_block_pool);
1058
1059 anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
1060
1061 anv_state_pool_init(&device->surface_state_pool,
1062 &device->surface_state_block_pool);
1063
1064 anv_bo_init_new(&device->workaround_bo, device, 1024);
1065
1066 anv_scratch_pool_init(device, &device->scratch_pool);
1067
1068 anv_queue_init(device, &device->queue);
1069
1070 switch (device->info.gen) {
1071 case 7:
1072 if (!device->info.is_haswell)
1073 result = gen7_init_device_state(device);
1074 else
1075 result = gen75_init_device_state(device);
1076 break;
1077 case 8:
1078 result = gen8_init_device_state(device);
1079 break;
1080 case 9:
1081 result = gen9_init_device_state(device);
1082 break;
1083 default:
1084 /* Shouldn't get here as we don't create physical devices for any other
1085 * gens. */
1086 unreachable("unhandled gen");
1087 }
1088 if (result != VK_SUCCESS)
1089 goto fail_fd;
1090
1091 anv_device_init_blorp(device);
1092
1093 anv_device_init_border_colors(device);
1094
1095 *pDevice = anv_device_to_handle(device);
1096
1097 return VK_SUCCESS;
1098
1099 fail_fd:
1100 close(device->fd);
1101 fail_device:
1102 vk_free(&device->alloc, device);
1103
1104 return result;
1105 }
1106
1107 void anv_DestroyDevice(
1108 VkDevice _device,
1109 const VkAllocationCallbacks* pAllocator)
1110 {
1111 ANV_FROM_HANDLE(anv_device, device, _device);
1112
1113 if (!device)
1114 return;
1115
1116 anv_device_finish_blorp(device);
1117
1118 anv_queue_finish(&device->queue);
1119
1120 #ifdef HAVE_VALGRIND
1121 /* We only need to free these to prevent valgrind errors. The backing
1122 * BO will go away in a couple of lines so we don't actually leak.
1123 */
1124 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1125 #endif
1126
1127 anv_scratch_pool_finish(device, &device->scratch_pool);
1128
1129 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1130 anv_gem_close(device, device->workaround_bo.gem_handle);
1131
1132 anv_state_pool_finish(&device->surface_state_pool);
1133 anv_block_pool_finish(&device->surface_state_block_pool);
1134 anv_state_pool_finish(&device->instruction_state_pool);
1135 anv_block_pool_finish(&device->instruction_block_pool);
1136 anv_state_pool_finish(&device->dynamic_state_pool);
1137 anv_block_pool_finish(&device->dynamic_state_block_pool);
1138
1139 anv_bo_pool_finish(&device->batch_bo_pool);
1140
1141 pthread_cond_destroy(&device->queue_submit);
1142 pthread_mutex_destroy(&device->mutex);
1143
1144 anv_gem_destroy_context(device, device->context_id);
1145
1146 close(device->fd);
1147
1148 vk_free(&device->alloc, device);
1149 }
1150
1151 VkResult anv_EnumerateInstanceExtensionProperties(
1152 const char* pLayerName,
1153 uint32_t* pPropertyCount,
1154 VkExtensionProperties* pProperties)
1155 {
1156 if (pProperties == NULL) {
1157 *pPropertyCount = ARRAY_SIZE(global_extensions);
1158 return VK_SUCCESS;
1159 }
1160
1161 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
1162 typed_memcpy(pProperties, global_extensions, *pPropertyCount);
1163
1164 if (*pPropertyCount < ARRAY_SIZE(global_extensions))
1165 return VK_INCOMPLETE;
1166
1167 return VK_SUCCESS;
1168 }
1169
1170 VkResult anv_EnumerateDeviceExtensionProperties(
1171 VkPhysicalDevice physicalDevice,
1172 const char* pLayerName,
1173 uint32_t* pPropertyCount,
1174 VkExtensionProperties* pProperties)
1175 {
1176 if (pProperties == NULL) {
1177 *pPropertyCount = ARRAY_SIZE(device_extensions);
1178 return VK_SUCCESS;
1179 }
1180
1181 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
1182 typed_memcpy(pProperties, device_extensions, *pPropertyCount);
1183
1184 if (*pPropertyCount < ARRAY_SIZE(device_extensions))
1185 return VK_INCOMPLETE;
1186
1187 return VK_SUCCESS;
1188 }
1189
1190 VkResult anv_EnumerateInstanceLayerProperties(
1191 uint32_t* pPropertyCount,
1192 VkLayerProperties* pProperties)
1193 {
1194 if (pProperties == NULL) {
1195 *pPropertyCount = 0;
1196 return VK_SUCCESS;
1197 }
1198
1199 /* None supported at this time */
1200 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1201 }
1202
1203 VkResult anv_EnumerateDeviceLayerProperties(
1204 VkPhysicalDevice physicalDevice,
1205 uint32_t* pPropertyCount,
1206 VkLayerProperties* pProperties)
1207 {
1208 if (pProperties == NULL) {
1209 *pPropertyCount = 0;
1210 return VK_SUCCESS;
1211 }
1212
1213 /* None supported at this time */
1214 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1215 }
1216
1217 void anv_GetDeviceQueue(
1218 VkDevice _device,
1219 uint32_t queueNodeIndex,
1220 uint32_t queueIndex,
1221 VkQueue* pQueue)
1222 {
1223 ANV_FROM_HANDLE(anv_device, device, _device);
1224
1225 assert(queueIndex == 0);
1226
1227 *pQueue = anv_queue_to_handle(&device->queue);
1228 }
1229
1230 VkResult
1231 anv_device_execbuf(struct anv_device *device,
1232 struct drm_i915_gem_execbuffer2 *execbuf,
1233 struct anv_bo **execbuf_bos)
1234 {
1235 int ret = anv_gem_execbuffer(device, execbuf);
1236 if (ret != 0) {
1237 /* We don't know the real error. */
1238 return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
1239 }
1240
1241 struct drm_i915_gem_exec_object2 *objects =
1242 (void *)(uintptr_t)execbuf->buffers_ptr;
1243 for (uint32_t k = 0; k < execbuf->buffer_count; k++)
1244 execbuf_bos[k]->offset = objects[k].offset;
1245
1246 return VK_SUCCESS;
1247 }
1248
1249 VkResult anv_QueueSubmit(
1250 VkQueue _queue,
1251 uint32_t submitCount,
1252 const VkSubmitInfo* pSubmits,
1253 VkFence _fence)
1254 {
1255 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1256 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1257 struct anv_device *device = queue->device;
1258 VkResult result = VK_SUCCESS;
1259
1260 /* We lock around QueueSubmit for three main reasons:
1261 *
1262 * 1) When a block pool is resized, we create a new gem handle with a
1263 * different size and, in the case of surface states, possibly a
1264 * different center offset but we re-use the same anv_bo struct when
1265 * we do so. If this happens in the middle of setting up an execbuf,
1266 * we could end up with our list of BOs out of sync with our list of
1267 * gem handles.
1268 *
1269 * 2) The algorithm we use for building the list of unique buffers isn't
1270 * thread-safe. While the client is supposed to syncronize around
1271 * QueueSubmit, this would be extremely difficult to debug if it ever
1272 * came up in the wild due to a broken app. It's better to play it
1273 * safe and just lock around QueueSubmit.
1274 *
1275 * 3) The anv_cmd_buffer_execbuf function may perform relocations in
1276 * userspace. Due to the fact that the surface state buffer is shared
1277 * between batches, we can't afford to have that happen from multiple
1278 * threads at the same time. Even though the user is supposed to
1279 * ensure this doesn't happen, we play it safe as in (2) above.
1280 *
1281 * Since the only other things that ever take the device lock such as block
1282 * pool resize only rarely happen, this will almost never be contended so
1283 * taking a lock isn't really an expensive operation in this case.
1284 */
1285 pthread_mutex_lock(&device->mutex);
1286
1287 for (uint32_t i = 0; i < submitCount; i++) {
1288 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1289 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1290 pSubmits[i].pCommandBuffers[j]);
1291 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1292
1293 result = anv_cmd_buffer_execbuf(device, cmd_buffer);
1294 if (result != VK_SUCCESS)
1295 goto out;
1296 }
1297 }
1298
1299 if (fence) {
1300 struct anv_bo *fence_bo = &fence->bo;
1301 result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
1302 if (result != VK_SUCCESS)
1303 goto out;
1304
1305 /* Update the fence and wake up any waiters */
1306 assert(fence->state == ANV_FENCE_STATE_RESET);
1307 fence->state = ANV_FENCE_STATE_SUBMITTED;
1308 pthread_cond_broadcast(&device->queue_submit);
1309 }
1310
1311 out:
1312 pthread_mutex_unlock(&device->mutex);
1313
1314 return result;
1315 }
1316
1317 VkResult anv_QueueWaitIdle(
1318 VkQueue _queue)
1319 {
1320 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1321
1322 return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
1323 }
1324
1325 VkResult anv_DeviceWaitIdle(
1326 VkDevice _device)
1327 {
1328 ANV_FROM_HANDLE(anv_device, device, _device);
1329 struct anv_batch batch;
1330
1331 uint32_t cmds[8];
1332 batch.start = batch.next = cmds;
1333 batch.end = (void *) cmds + sizeof(cmds);
1334
1335 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1336 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1337
1338 return anv_device_submit_simple_batch(device, &batch);
1339 }
1340
1341 VkResult
1342 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1343 {
1344 uint32_t gem_handle = anv_gem_create(device, size);
1345 if (!gem_handle)
1346 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1347
1348 anv_bo_init(bo, gem_handle, size);
1349
1350 return VK_SUCCESS;
1351 }
1352
1353 VkResult anv_AllocateMemory(
1354 VkDevice _device,
1355 const VkMemoryAllocateInfo* pAllocateInfo,
1356 const VkAllocationCallbacks* pAllocator,
1357 VkDeviceMemory* pMem)
1358 {
1359 ANV_FROM_HANDLE(anv_device, device, _device);
1360 struct anv_device_memory *mem;
1361 VkResult result;
1362
1363 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1364
1365 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
1366 assert(pAllocateInfo->allocationSize > 0);
1367
1368 /* We support exactly one memory heap. */
1369 assert(pAllocateInfo->memoryTypeIndex == 0 ||
1370 (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
1371
1372 /* FINISHME: Fail if allocation request exceeds heap size. */
1373
1374 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1375 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1376 if (mem == NULL)
1377 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1378
1379 /* The kernel is going to give us whole pages anyway */
1380 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1381
1382 result = anv_bo_init_new(&mem->bo, device, alloc_size);
1383 if (result != VK_SUCCESS)
1384 goto fail;
1385
1386 mem->type_index = pAllocateInfo->memoryTypeIndex;
1387
1388 mem->map = NULL;
1389 mem->map_size = 0;
1390
1391 *pMem = anv_device_memory_to_handle(mem);
1392
1393 return VK_SUCCESS;
1394
1395 fail:
1396 vk_free2(&device->alloc, pAllocator, mem);
1397
1398 return result;
1399 }
1400
1401 void anv_FreeMemory(
1402 VkDevice _device,
1403 VkDeviceMemory _mem,
1404 const VkAllocationCallbacks* pAllocator)
1405 {
1406 ANV_FROM_HANDLE(anv_device, device, _device);
1407 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1408
1409 if (mem == NULL)
1410 return;
1411
1412 if (mem->map)
1413 anv_UnmapMemory(_device, _mem);
1414
1415 if (mem->bo.map)
1416 anv_gem_munmap(mem->bo.map, mem->bo.size);
1417
1418 if (mem->bo.gem_handle != 0)
1419 anv_gem_close(device, mem->bo.gem_handle);
1420
1421 vk_free2(&device->alloc, pAllocator, mem);
1422 }
1423
1424 VkResult anv_MapMemory(
1425 VkDevice _device,
1426 VkDeviceMemory _memory,
1427 VkDeviceSize offset,
1428 VkDeviceSize size,
1429 VkMemoryMapFlags flags,
1430 void** ppData)
1431 {
1432 ANV_FROM_HANDLE(anv_device, device, _device);
1433 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1434
1435 if (mem == NULL) {
1436 *ppData = NULL;
1437 return VK_SUCCESS;
1438 }
1439
1440 if (size == VK_WHOLE_SIZE)
1441 size = mem->bo.size - offset;
1442
1443 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1444 *
1445 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1446 * assert(size != 0);
1447 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1448 * equal to the size of the memory minus offset
1449 */
1450 assert(size > 0);
1451 assert(offset + size <= mem->bo.size);
1452
1453 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1454 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1455 * at a time is valid. We could just mmap up front and return an offset
1456 * pointer here, but that may exhaust virtual memory on 32 bit
1457 * userspace. */
1458
1459 uint32_t gem_flags = 0;
1460 if (!device->info.has_llc && mem->type_index == 0)
1461 gem_flags |= I915_MMAP_WC;
1462
1463 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1464 uint64_t map_offset = offset & ~4095ull;
1465 assert(offset >= map_offset);
1466 uint64_t map_size = (offset + size) - map_offset;
1467
1468 /* Let's map whole pages */
1469 map_size = align_u64(map_size, 4096);
1470
1471 void *map = anv_gem_mmap(device, mem->bo.gem_handle,
1472 map_offset, map_size, gem_flags);
1473 if (map == MAP_FAILED)
1474 return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
1475
1476 mem->map = map;
1477 mem->map_size = map_size;
1478
1479 *ppData = mem->map + (offset - map_offset);
1480
1481 return VK_SUCCESS;
1482 }
1483
1484 void anv_UnmapMemory(
1485 VkDevice _device,
1486 VkDeviceMemory _memory)
1487 {
1488 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1489
1490 if (mem == NULL)
1491 return;
1492
1493 anv_gem_munmap(mem->map, mem->map_size);
1494
1495 mem->map = NULL;
1496 mem->map_size = 0;
1497 }
1498
1499 static void
1500 clflush_mapped_ranges(struct anv_device *device,
1501 uint32_t count,
1502 const VkMappedMemoryRange *ranges)
1503 {
1504 for (uint32_t i = 0; i < count; i++) {
1505 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1506 if (ranges[i].offset >= mem->map_size)
1507 continue;
1508
1509 anv_clflush_range(mem->map + ranges[i].offset,
1510 MIN2(ranges[i].size, mem->map_size - ranges[i].offset));
1511 }
1512 }
1513
1514 VkResult anv_FlushMappedMemoryRanges(
1515 VkDevice _device,
1516 uint32_t memoryRangeCount,
1517 const VkMappedMemoryRange* pMemoryRanges)
1518 {
1519 ANV_FROM_HANDLE(anv_device, device, _device);
1520
1521 if (device->info.has_llc)
1522 return VK_SUCCESS;
1523
1524 /* Make sure the writes we're flushing have landed. */
1525 __builtin_ia32_mfence();
1526
1527 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1528
1529 return VK_SUCCESS;
1530 }
1531
1532 VkResult anv_InvalidateMappedMemoryRanges(
1533 VkDevice _device,
1534 uint32_t memoryRangeCount,
1535 const VkMappedMemoryRange* pMemoryRanges)
1536 {
1537 ANV_FROM_HANDLE(anv_device, device, _device);
1538
1539 if (device->info.has_llc)
1540 return VK_SUCCESS;
1541
1542 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1543
1544 /* Make sure no reads get moved up above the invalidate. */
1545 __builtin_ia32_mfence();
1546
1547 return VK_SUCCESS;
1548 }
1549
1550 void anv_GetBufferMemoryRequirements(
1551 VkDevice _device,
1552 VkBuffer _buffer,
1553 VkMemoryRequirements* pMemoryRequirements)
1554 {
1555 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1556 ANV_FROM_HANDLE(anv_device, device, _device);
1557
1558 /* The Vulkan spec (git aaed022) says:
1559 *
1560 * memoryTypeBits is a bitfield and contains one bit set for every
1561 * supported memory type for the resource. The bit `1<<i` is set if and
1562 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1563 * structure for the physical device is supported.
1564 *
1565 * We support exactly one memory type on LLC, two on non-LLC.
1566 */
1567 pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
1568
1569 pMemoryRequirements->size = buffer->size;
1570 pMemoryRequirements->alignment = 16;
1571 }
1572
1573 void anv_GetImageMemoryRequirements(
1574 VkDevice _device,
1575 VkImage _image,
1576 VkMemoryRequirements* pMemoryRequirements)
1577 {
1578 ANV_FROM_HANDLE(anv_image, image, _image);
1579 ANV_FROM_HANDLE(anv_device, device, _device);
1580
1581 /* The Vulkan spec (git aaed022) says:
1582 *
1583 * memoryTypeBits is a bitfield and contains one bit set for every
1584 * supported memory type for the resource. The bit `1<<i` is set if and
1585 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1586 * structure for the physical device is supported.
1587 *
1588 * We support exactly one memory type on LLC, two on non-LLC.
1589 */
1590 pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
1591
1592 pMemoryRequirements->size = image->size;
1593 pMemoryRequirements->alignment = image->alignment;
1594 }
1595
1596 void anv_GetImageSparseMemoryRequirements(
1597 VkDevice device,
1598 VkImage image,
1599 uint32_t* pSparseMemoryRequirementCount,
1600 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1601 {
1602 stub();
1603 }
1604
1605 void anv_GetDeviceMemoryCommitment(
1606 VkDevice device,
1607 VkDeviceMemory memory,
1608 VkDeviceSize* pCommittedMemoryInBytes)
1609 {
1610 *pCommittedMemoryInBytes = 0;
1611 }
1612
1613 VkResult anv_BindBufferMemory(
1614 VkDevice device,
1615 VkBuffer _buffer,
1616 VkDeviceMemory _memory,
1617 VkDeviceSize memoryOffset)
1618 {
1619 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1620 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1621
1622 if (mem) {
1623 buffer->bo = &mem->bo;
1624 buffer->offset = memoryOffset;
1625 } else {
1626 buffer->bo = NULL;
1627 buffer->offset = 0;
1628 }
1629
1630 return VK_SUCCESS;
1631 }
1632
1633 VkResult anv_QueueBindSparse(
1634 VkQueue queue,
1635 uint32_t bindInfoCount,
1636 const VkBindSparseInfo* pBindInfo,
1637 VkFence fence)
1638 {
1639 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1640 }
1641
1642 VkResult anv_CreateFence(
1643 VkDevice _device,
1644 const VkFenceCreateInfo* pCreateInfo,
1645 const VkAllocationCallbacks* pAllocator,
1646 VkFence* pFence)
1647 {
1648 ANV_FROM_HANDLE(anv_device, device, _device);
1649 struct anv_bo fence_bo;
1650 struct anv_fence *fence;
1651 struct anv_batch batch;
1652 VkResult result;
1653
1654 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1655
1656 result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
1657 if (result != VK_SUCCESS)
1658 return result;
1659
1660 /* Fences are small. Just store the CPU data structure in the BO. */
1661 fence = fence_bo.map;
1662 fence->bo = fence_bo;
1663
1664 /* Place the batch after the CPU data but on its own cache line. */
1665 const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
1666 batch.next = batch.start = fence->bo.map + batch_offset;
1667 batch.end = fence->bo.map + fence->bo.size;
1668 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1669 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1670
1671 if (!device->info.has_llc) {
1672 assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
1673 assert(batch.next - batch.start <= CACHELINE_SIZE);
1674 __builtin_ia32_mfence();
1675 __builtin_ia32_clflush(batch.start);
1676 }
1677
1678 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1679 fence->exec2_objects[0].relocation_count = 0;
1680 fence->exec2_objects[0].relocs_ptr = 0;
1681 fence->exec2_objects[0].alignment = 0;
1682 fence->exec2_objects[0].offset = fence->bo.offset;
1683 fence->exec2_objects[0].flags = 0;
1684 fence->exec2_objects[0].rsvd1 = 0;
1685 fence->exec2_objects[0].rsvd2 = 0;
1686
1687 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1688 fence->execbuf.buffer_count = 1;
1689 fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
1690 fence->execbuf.batch_len = batch.next - batch.start;
1691 fence->execbuf.cliprects_ptr = 0;
1692 fence->execbuf.num_cliprects = 0;
1693 fence->execbuf.DR1 = 0;
1694 fence->execbuf.DR4 = 0;
1695
1696 fence->execbuf.flags =
1697 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1698 fence->execbuf.rsvd1 = device->context_id;
1699 fence->execbuf.rsvd2 = 0;
1700
1701 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
1702 fence->state = ANV_FENCE_STATE_SIGNALED;
1703 } else {
1704 fence->state = ANV_FENCE_STATE_RESET;
1705 }
1706
1707 *pFence = anv_fence_to_handle(fence);
1708
1709 return VK_SUCCESS;
1710 }
1711
1712 void anv_DestroyFence(
1713 VkDevice _device,
1714 VkFence _fence,
1715 const VkAllocationCallbacks* pAllocator)
1716 {
1717 ANV_FROM_HANDLE(anv_device, device, _device);
1718 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1719
1720 if (!fence)
1721 return;
1722
1723 assert(fence->bo.map == fence);
1724 anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
1725 }
1726
1727 VkResult anv_ResetFences(
1728 VkDevice _device,
1729 uint32_t fenceCount,
1730 const VkFence* pFences)
1731 {
1732 for (uint32_t i = 0; i < fenceCount; i++) {
1733 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1734 fence->state = ANV_FENCE_STATE_RESET;
1735 }
1736
1737 return VK_SUCCESS;
1738 }
1739
1740 VkResult anv_GetFenceStatus(
1741 VkDevice _device,
1742 VkFence _fence)
1743 {
1744 ANV_FROM_HANDLE(anv_device, device, _device);
1745 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1746 int64_t t = 0;
1747 int ret;
1748
1749 switch (fence->state) {
1750 case ANV_FENCE_STATE_RESET:
1751 /* If it hasn't even been sent off to the GPU yet, it's not ready */
1752 return VK_NOT_READY;
1753
1754 case ANV_FENCE_STATE_SIGNALED:
1755 /* It's been signaled, return success */
1756 return VK_SUCCESS;
1757
1758 case ANV_FENCE_STATE_SUBMITTED:
1759 /* It's been submitted to the GPU but we don't know if it's done yet. */
1760 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1761 if (ret == 0) {
1762 fence->state = ANV_FENCE_STATE_SIGNALED;
1763 return VK_SUCCESS;
1764 } else {
1765 return VK_NOT_READY;
1766 }
1767 default:
1768 unreachable("Invalid fence status");
1769 }
1770 }
1771
1772 #define NSEC_PER_SEC 1000000000
1773 #define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
1774
1775 VkResult anv_WaitForFences(
1776 VkDevice _device,
1777 uint32_t fenceCount,
1778 const VkFence* pFences,
1779 VkBool32 waitAll,
1780 uint64_t _timeout)
1781 {
1782 ANV_FROM_HANDLE(anv_device, device, _device);
1783 int ret;
1784
1785 /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
1786 * to block indefinitely timeouts <= 0. Unfortunately, this was broken
1787 * for a couple of kernel releases. Since there's no way to know
1788 * whether or not the kernel we're using is one of the broken ones, the
1789 * best we can do is to clamp the timeout to INT64_MAX. This limits the
1790 * maximum timeout from 584 years to 292 years - likely not a big deal.
1791 */
1792 int64_t timeout = MIN2(_timeout, INT64_MAX);
1793
1794 uint32_t pending_fences = fenceCount;
1795 while (pending_fences) {
1796 pending_fences = 0;
1797 bool signaled_fences = false;
1798 for (uint32_t i = 0; i < fenceCount; i++) {
1799 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1800 switch (fence->state) {
1801 case ANV_FENCE_STATE_RESET:
1802 /* This fence hasn't been submitted yet, we'll catch it the next
1803 * time around. Yes, this may mean we dead-loop but, short of
1804 * lots of locking and a condition variable, there's not much that
1805 * we can do about that.
1806 */
1807 pending_fences++;
1808 continue;
1809
1810 case ANV_FENCE_STATE_SIGNALED:
1811 /* This fence is not pending. If waitAll isn't set, we can return
1812 * early. Otherwise, we have to keep going.
1813 */
1814 if (!waitAll)
1815 return VK_SUCCESS;
1816 continue;
1817
1818 case ANV_FENCE_STATE_SUBMITTED:
1819 /* These are the fences we really care about. Go ahead and wait
1820 * on it until we hit a timeout.
1821 */
1822 ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
1823 if (ret == -1 && errno == ETIME) {
1824 return VK_TIMEOUT;
1825 } else if (ret == -1) {
1826 /* We don't know the real error. */
1827 return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
1828 } else {
1829 fence->state = ANV_FENCE_STATE_SIGNALED;
1830 signaled_fences = true;
1831 if (!waitAll)
1832 return VK_SUCCESS;
1833 continue;
1834 }
1835 }
1836 }
1837
1838 if (pending_fences && !signaled_fences) {
1839 /* If we've hit this then someone decided to vkWaitForFences before
1840 * they've actually submitted any of them to a queue. This is a
1841 * fairly pessimal case, so it's ok to lock here and use a standard
1842 * pthreads condition variable.
1843 */
1844 pthread_mutex_lock(&device->mutex);
1845
1846 /* It's possible that some of the fences have changed state since the
1847 * last time we checked. Now that we have the lock, check for
1848 * pending fences again and don't wait if it's changed.
1849 */
1850 uint32_t now_pending_fences = 0;
1851 for (uint32_t i = 0; i < fenceCount; i++) {
1852 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1853 if (fence->state == ANV_FENCE_STATE_RESET)
1854 now_pending_fences++;
1855 }
1856 assert(now_pending_fences <= pending_fences);
1857
1858 if (now_pending_fences == pending_fences) {
1859 struct timespec before;
1860 clock_gettime(CLOCK_MONOTONIC, &before);
1861
1862 uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
1863 uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
1864 (timeout / NSEC_PER_SEC);
1865 abs_nsec %= NSEC_PER_SEC;
1866
1867 /* Avoid roll-over in tv_sec on 32-bit systems if the user
1868 * provided timeout is UINT64_MAX
1869 */
1870 struct timespec abstime;
1871 abstime.tv_nsec = abs_nsec;
1872 abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
1873
1874 ret = pthread_cond_timedwait(&device->queue_submit,
1875 &device->mutex, &abstime);
1876 assert(ret != EINVAL);
1877
1878 struct timespec after;
1879 clock_gettime(CLOCK_MONOTONIC, &after);
1880 uint64_t time_elapsed =
1881 ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
1882 ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
1883
1884 if (time_elapsed >= timeout) {
1885 pthread_mutex_unlock(&device->mutex);
1886 return VK_TIMEOUT;
1887 }
1888
1889 timeout -= time_elapsed;
1890 }
1891
1892 pthread_mutex_unlock(&device->mutex);
1893 }
1894 }
1895
1896 return VK_SUCCESS;
1897 }
1898
1899 // Queue semaphore functions
1900
1901 VkResult anv_CreateSemaphore(
1902 VkDevice device,
1903 const VkSemaphoreCreateInfo* pCreateInfo,
1904 const VkAllocationCallbacks* pAllocator,
1905 VkSemaphore* pSemaphore)
1906 {
1907 /* The DRM execbuffer ioctl always execute in-oder, even between different
1908 * rings. As such, there's nothing to do for the user space semaphore.
1909 */
1910
1911 *pSemaphore = (VkSemaphore)1;
1912
1913 return VK_SUCCESS;
1914 }
1915
1916 void anv_DestroySemaphore(
1917 VkDevice device,
1918 VkSemaphore semaphore,
1919 const VkAllocationCallbacks* pAllocator)
1920 {
1921 }
1922
1923 // Event functions
1924
1925 VkResult anv_CreateEvent(
1926 VkDevice _device,
1927 const VkEventCreateInfo* pCreateInfo,
1928 const VkAllocationCallbacks* pAllocator,
1929 VkEvent* pEvent)
1930 {
1931 ANV_FROM_HANDLE(anv_device, device, _device);
1932 struct anv_state state;
1933 struct anv_event *event;
1934
1935 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
1936
1937 state = anv_state_pool_alloc(&device->dynamic_state_pool,
1938 sizeof(*event), 8);
1939 event = state.map;
1940 event->state = state;
1941 event->semaphore = VK_EVENT_RESET;
1942
1943 if (!device->info.has_llc) {
1944 /* Make sure the writes we're flushing have landed. */
1945 __builtin_ia32_mfence();
1946 __builtin_ia32_clflush(event);
1947 }
1948
1949 *pEvent = anv_event_to_handle(event);
1950
1951 return VK_SUCCESS;
1952 }
1953
1954 void anv_DestroyEvent(
1955 VkDevice _device,
1956 VkEvent _event,
1957 const VkAllocationCallbacks* pAllocator)
1958 {
1959 ANV_FROM_HANDLE(anv_device, device, _device);
1960 ANV_FROM_HANDLE(anv_event, event, _event);
1961
1962 if (!event)
1963 return;
1964
1965 anv_state_pool_free(&device->dynamic_state_pool, event->state);
1966 }
1967
1968 VkResult anv_GetEventStatus(
1969 VkDevice _device,
1970 VkEvent _event)
1971 {
1972 ANV_FROM_HANDLE(anv_device, device, _device);
1973 ANV_FROM_HANDLE(anv_event, event, _event);
1974
1975 if (!device->info.has_llc) {
1976 /* Invalidate read cache before reading event written by GPU. */
1977 __builtin_ia32_clflush(event);
1978 __builtin_ia32_mfence();
1979
1980 }
1981
1982 return event->semaphore;
1983 }
1984
1985 VkResult anv_SetEvent(
1986 VkDevice _device,
1987 VkEvent _event)
1988 {
1989 ANV_FROM_HANDLE(anv_device, device, _device);
1990 ANV_FROM_HANDLE(anv_event, event, _event);
1991
1992 event->semaphore = VK_EVENT_SET;
1993
1994 if (!device->info.has_llc) {
1995 /* Make sure the writes we're flushing have landed. */
1996 __builtin_ia32_mfence();
1997 __builtin_ia32_clflush(event);
1998 }
1999
2000 return VK_SUCCESS;
2001 }
2002
2003 VkResult anv_ResetEvent(
2004 VkDevice _device,
2005 VkEvent _event)
2006 {
2007 ANV_FROM_HANDLE(anv_device, device, _device);
2008 ANV_FROM_HANDLE(anv_event, event, _event);
2009
2010 event->semaphore = VK_EVENT_RESET;
2011
2012 if (!device->info.has_llc) {
2013 /* Make sure the writes we're flushing have landed. */
2014 __builtin_ia32_mfence();
2015 __builtin_ia32_clflush(event);
2016 }
2017
2018 return VK_SUCCESS;
2019 }
2020
2021 // Buffer functions
2022
2023 VkResult anv_CreateBuffer(
2024 VkDevice _device,
2025 const VkBufferCreateInfo* pCreateInfo,
2026 const VkAllocationCallbacks* pAllocator,
2027 VkBuffer* pBuffer)
2028 {
2029 ANV_FROM_HANDLE(anv_device, device, _device);
2030 struct anv_buffer *buffer;
2031
2032 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2033
2034 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
2035 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2036 if (buffer == NULL)
2037 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2038
2039 buffer->size = pCreateInfo->size;
2040 buffer->usage = pCreateInfo->usage;
2041 buffer->bo = NULL;
2042 buffer->offset = 0;
2043
2044 *pBuffer = anv_buffer_to_handle(buffer);
2045
2046 return VK_SUCCESS;
2047 }
2048
2049 void anv_DestroyBuffer(
2050 VkDevice _device,
2051 VkBuffer _buffer,
2052 const VkAllocationCallbacks* pAllocator)
2053 {
2054 ANV_FROM_HANDLE(anv_device, device, _device);
2055 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
2056
2057 if (!buffer)
2058 return;
2059
2060 vk_free2(&device->alloc, pAllocator, buffer);
2061 }
2062
2063 void
2064 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
2065 enum isl_format format,
2066 uint32_t offset, uint32_t range, uint32_t stride)
2067 {
2068 isl_buffer_fill_state(&device->isl_dev, state.map,
2069 .address = offset,
2070 .mocs = device->default_mocs,
2071 .size = range,
2072 .format = format,
2073 .stride = stride);
2074
2075 anv_state_flush(device, state);
2076 }
2077
2078 void anv_DestroySampler(
2079 VkDevice _device,
2080 VkSampler _sampler,
2081 const VkAllocationCallbacks* pAllocator)
2082 {
2083 ANV_FROM_HANDLE(anv_device, device, _device);
2084 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
2085
2086 if (!sampler)
2087 return;
2088
2089 vk_free2(&device->alloc, pAllocator, sampler);
2090 }
2091
2092 VkResult anv_CreateFramebuffer(
2093 VkDevice _device,
2094 const VkFramebufferCreateInfo* pCreateInfo,
2095 const VkAllocationCallbacks* pAllocator,
2096 VkFramebuffer* pFramebuffer)
2097 {
2098 ANV_FROM_HANDLE(anv_device, device, _device);
2099 struct anv_framebuffer *framebuffer;
2100
2101 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2102
2103 size_t size = sizeof(*framebuffer) +
2104 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
2105 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2106 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2107 if (framebuffer == NULL)
2108 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2109
2110 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2111 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2112 VkImageView _iview = pCreateInfo->pAttachments[i];
2113 framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
2114 }
2115
2116 framebuffer->width = pCreateInfo->width;
2117 framebuffer->height = pCreateInfo->height;
2118 framebuffer->layers = pCreateInfo->layers;
2119
2120 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
2121
2122 return VK_SUCCESS;
2123 }
2124
2125 void anv_DestroyFramebuffer(
2126 VkDevice _device,
2127 VkFramebuffer _fb,
2128 const VkAllocationCallbacks* pAllocator)
2129 {
2130 ANV_FROM_HANDLE(anv_device, device, _device);
2131 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
2132
2133 if (!fb)
2134 return;
2135
2136 vk_free2(&device->alloc, pAllocator, fb);
2137 }
2138
2139 /* vk_icd.h does not declare this function, so we declare it here to
2140 * suppress Wmissing-prototypes.
2141 */
2142 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2143 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
2144
2145 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2146 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
2147 {
2148 /* For the full details on loader interface versioning, see
2149 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2150 * What follows is a condensed summary, to help you navigate the large and
2151 * confusing official doc.
2152 *
2153 * - Loader interface v0 is incompatible with later versions. We don't
2154 * support it.
2155 *
2156 * - In loader interface v1:
2157 * - The first ICD entrypoint called by the loader is
2158 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2159 * entrypoint.
2160 * - The ICD must statically expose no other Vulkan symbol unless it is
2161 * linked with -Bsymbolic.
2162 * - Each dispatchable Vulkan handle created by the ICD must be
2163 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2164 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2165 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2166 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2167 * such loader-managed surfaces.
2168 *
2169 * - Loader interface v2 differs from v1 in:
2170 * - The first ICD entrypoint called by the loader is
2171 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2172 * statically expose this entrypoint.
2173 *
2174 * - Loader interface v3 differs from v2 in:
2175 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2176 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2177 * because the loader no longer does so.
2178 */
2179 *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2180 return VK_SUCCESS;
2181 }