anv: clflush is only orderered against mfence
[mesa.git] / src / vulkan / anv_device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "mesa/main/git_sha1.h"
32 #include "util/strtod.h"
33 #include "util/debug.h"
34
35 #include "gen7_pack.h"
36
37 struct anv_dispatch_table dtable;
38
39 static void
40 compiler_debug_log(void *data, const char *fmt, ...)
41 { }
42
43 static void
44 compiler_perf_log(void *data, const char *fmt, ...)
45 {
46 va_list args;
47 va_start(args, fmt);
48
49 if (unlikely(INTEL_DEBUG & DEBUG_PERF))
50 vfprintf(stderr, fmt, args);
51
52 va_end(args);
53 }
54
55 static VkResult
56 anv_physical_device_init(struct anv_physical_device *device,
57 struct anv_instance *instance,
58 const char *path)
59 {
60 VkResult result;
61 int fd;
62
63 fd = open(path, O_RDWR | O_CLOEXEC);
64 if (fd < 0)
65 return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
66 "failed to open %s: %m", path);
67
68 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
69 device->instance = instance;
70 device->path = path;
71
72 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
73 if (!device->chipset_id) {
74 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
75 "failed to get chipset id: %m");
76 goto fail;
77 }
78
79 device->name = brw_get_device_name(device->chipset_id);
80 device->info = brw_get_device_info(device->chipset_id);
81 if (!device->info) {
82 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
83 "failed to get device info");
84 goto fail;
85 }
86
87 if (device->info->is_haswell) {
88 fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
89 } else if (device->info->gen == 7 && !device->info->is_baytrail) {
90 fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
91 } else if (device->info->gen == 7 && device->info->is_baytrail) {
92 fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
93 } else if (device->info->gen >= 8) {
94 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
95 * supported as anything */
96 } else {
97 result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
98 "Vulkan not yet supported on %s", device->name);
99 goto fail;
100 }
101
102 if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
103 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
104 "failed to get aperture size: %m");
105 goto fail;
106 }
107
108 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
109 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
110 "kernel missing gem wait");
111 goto fail;
112 }
113
114 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
115 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
116 "kernel missing execbuf2");
117 goto fail;
118 }
119
120 if (!device->info->has_llc &&
121 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
122 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
123 "kernel missing wc mmap");
124 goto fail;
125 }
126
127 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
128
129 close(fd);
130
131 brw_process_intel_debug_variable();
132
133 device->compiler = brw_compiler_create(NULL, device->info);
134 if (device->compiler == NULL) {
135 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136 goto fail;
137 }
138 device->compiler->shader_debug_log = compiler_debug_log;
139 device->compiler->shader_perf_log = compiler_perf_log;
140
141 /* XXX: Actually detect bit6 swizzling */
142 isl_device_init(&device->isl_dev, device->info, swizzled);
143
144 return VK_SUCCESS;
145
146 fail:
147 close(fd);
148 return result;
149 }
150
151 static void
152 anv_physical_device_finish(struct anv_physical_device *device)
153 {
154 ralloc_free(device->compiler);
155 }
156
157 static const VkExtensionProperties global_extensions[] = {
158 {
159 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
160 .specVersion = 24,
161 },
162 {
163 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
164 .specVersion = 5,
165 },
166 #ifdef HAVE_WAYLAND_PLATFORM
167 {
168 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
169 .specVersion = 4,
170 },
171 #endif
172 };
173
174 static const VkExtensionProperties device_extensions[] = {
175 {
176 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
177 .specVersion = 67,
178 },
179 };
180
181 static void *
182 default_alloc_func(void *pUserData, size_t size, size_t align,
183 VkSystemAllocationScope allocationScope)
184 {
185 return malloc(size);
186 }
187
188 static void *
189 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
190 size_t align, VkSystemAllocationScope allocationScope)
191 {
192 return realloc(pOriginal, size);
193 }
194
195 static void
196 default_free_func(void *pUserData, void *pMemory)
197 {
198 free(pMemory);
199 }
200
201 static const VkAllocationCallbacks default_alloc = {
202 .pUserData = NULL,
203 .pfnAllocation = default_alloc_func,
204 .pfnReallocation = default_realloc_func,
205 .pfnFree = default_free_func,
206 };
207
208 VkResult anv_CreateInstance(
209 const VkInstanceCreateInfo* pCreateInfo,
210 const VkAllocationCallbacks* pAllocator,
211 VkInstance* pInstance)
212 {
213 struct anv_instance *instance;
214
215 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
216
217 uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion;
218 if (VK_MAKE_VERSION(1, 0, 0) < client_version ||
219 client_version > VK_MAKE_VERSION(1, 0, 2)) {
220 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
221 }
222
223 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
224 bool found = false;
225 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
226 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
227 global_extensions[j].extensionName) == 0) {
228 found = true;
229 break;
230 }
231 }
232 if (!found)
233 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
234 }
235
236 instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
237 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
238 if (!instance)
239 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
240
241 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
242
243 if (pAllocator)
244 instance->alloc = *pAllocator;
245 else
246 instance->alloc = default_alloc;
247
248 instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion;
249 instance->physicalDeviceCount = -1;
250
251 _mesa_locale_init();
252
253 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
254
255 anv_init_wsi(instance);
256
257 *pInstance = anv_instance_to_handle(instance);
258
259 return VK_SUCCESS;
260 }
261
262 void anv_DestroyInstance(
263 VkInstance _instance,
264 const VkAllocationCallbacks* pAllocator)
265 {
266 ANV_FROM_HANDLE(anv_instance, instance, _instance);
267
268 if (instance->physicalDeviceCount > 0) {
269 /* We support at most one physical device. */
270 assert(instance->physicalDeviceCount == 1);
271 anv_physical_device_finish(&instance->physicalDevice);
272 }
273
274 anv_finish_wsi(instance);
275
276 VG(VALGRIND_DESTROY_MEMPOOL(instance));
277
278 _mesa_locale_fini();
279
280 anv_free(&instance->alloc, instance);
281 }
282
283 VkResult anv_EnumeratePhysicalDevices(
284 VkInstance _instance,
285 uint32_t* pPhysicalDeviceCount,
286 VkPhysicalDevice* pPhysicalDevices)
287 {
288 ANV_FROM_HANDLE(anv_instance, instance, _instance);
289 VkResult result;
290
291 if (instance->physicalDeviceCount < 0) {
292 result = anv_physical_device_init(&instance->physicalDevice,
293 instance, "/dev/dri/renderD128");
294 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
295 instance->physicalDeviceCount = 0;
296 } else if (result == VK_SUCCESS) {
297 instance->physicalDeviceCount = 1;
298 } else {
299 return result;
300 }
301 }
302
303 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
304 * otherwise it's an inout parameter.
305 *
306 * The Vulkan spec (git aaed022) says:
307 *
308 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
309 * that is initialized with the number of devices the application is
310 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
311 * an array of at least this many VkPhysicalDevice handles [...].
312 *
313 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
314 * overwrites the contents of the variable pointed to by
315 * pPhysicalDeviceCount with the number of physical devices in in the
316 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
317 * pPhysicalDeviceCount with the number of physical handles written to
318 * pPhysicalDevices.
319 */
320 if (!pPhysicalDevices) {
321 *pPhysicalDeviceCount = instance->physicalDeviceCount;
322 } else if (*pPhysicalDeviceCount >= 1) {
323 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
324 *pPhysicalDeviceCount = 1;
325 } else {
326 *pPhysicalDeviceCount = 0;
327 }
328
329 return VK_SUCCESS;
330 }
331
332 void anv_GetPhysicalDeviceFeatures(
333 VkPhysicalDevice physicalDevice,
334 VkPhysicalDeviceFeatures* pFeatures)
335 {
336 anv_finishme("Get correct values for PhysicalDeviceFeatures");
337
338 *pFeatures = (VkPhysicalDeviceFeatures) {
339 .robustBufferAccess = false,
340 .fullDrawIndexUint32 = false,
341 .imageCubeArray = false,
342 .independentBlend = false,
343 .geometryShader = true,
344 .tessellationShader = false,
345 .sampleRateShading = false,
346 .dualSrcBlend = true,
347 .logicOp = true,
348 .multiDrawIndirect = false,
349 .drawIndirectFirstInstance = false,
350 .depthClamp = false,
351 .depthBiasClamp = false,
352 .fillModeNonSolid = true,
353 .depthBounds = false,
354 .wideLines = true,
355 .largePoints = true,
356 .alphaToOne = true,
357 .multiViewport = true,
358 .samplerAnisotropy = false, /* FINISHME */
359 .textureCompressionETC2 = true,
360 .textureCompressionASTC_LDR = true,
361 .textureCompressionBC = true,
362 .occlusionQueryPrecise = false, /* FINISHME */
363 .pipelineStatisticsQuery = true,
364 .vertexPipelineStoresAndAtomics = false,
365 .fragmentStoresAndAtomics = true,
366 .shaderTessellationAndGeometryPointSize = true,
367 .shaderImageGatherExtended = true,
368 .shaderStorageImageExtendedFormats = false,
369 .shaderStorageImageMultisample = false,
370 .shaderUniformBufferArrayDynamicIndexing = true,
371 .shaderSampledImageArrayDynamicIndexing = false,
372 .shaderStorageBufferArrayDynamicIndexing = false,
373 .shaderStorageImageArrayDynamicIndexing = false,
374 .shaderStorageImageReadWithoutFormat = false,
375 .shaderStorageImageWriteWithoutFormat = true,
376 .shaderClipDistance = false,
377 .shaderCullDistance = false,
378 .shaderFloat64 = false,
379 .shaderInt64 = false,
380 .shaderInt16 = false,
381 .alphaToOne = true,
382 .variableMultisampleRate = false,
383 .inheritedQueries = false,
384 };
385 }
386
387 void anv_GetPhysicalDeviceProperties(
388 VkPhysicalDevice physicalDevice,
389 VkPhysicalDeviceProperties* pProperties)
390 {
391 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
392 const struct brw_device_info *devinfo = pdevice->info;
393
394 anv_finishme("Get correct values for VkPhysicalDeviceLimits");
395
396 const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
397
398 VkSampleCountFlags sample_counts =
399 isl_device_get_sample_counts(&pdevice->isl_dev);
400
401 VkPhysicalDeviceLimits limits = {
402 .maxImageDimension1D = (1 << 14),
403 .maxImageDimension2D = (1 << 14),
404 .maxImageDimension3D = (1 << 10),
405 .maxImageDimensionCube = (1 << 14),
406 .maxImageArrayLayers = (1 << 10),
407 .maxTexelBufferElements = 128 * 1024 * 1024,
408 .maxUniformBufferRange = UINT32_MAX,
409 .maxStorageBufferRange = UINT32_MAX,
410 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
411 .maxMemoryAllocationCount = UINT32_MAX,
412 .maxSamplerAllocationCount = 64 * 1024,
413 .bufferImageGranularity = 64, /* A cache line */
414 .sparseAddressSpaceSize = 0,
415 .maxBoundDescriptorSets = MAX_SETS,
416 .maxPerStageDescriptorSamplers = 64,
417 .maxPerStageDescriptorUniformBuffers = 64,
418 .maxPerStageDescriptorStorageBuffers = 64,
419 .maxPerStageDescriptorSampledImages = 64,
420 .maxPerStageDescriptorStorageImages = 64,
421 .maxPerStageDescriptorInputAttachments = 64,
422 .maxPerStageResources = 128,
423 .maxDescriptorSetSamplers = 256,
424 .maxDescriptorSetUniformBuffers = 256,
425 .maxDescriptorSetUniformBuffersDynamic = 256,
426 .maxDescriptorSetStorageBuffers = 256,
427 .maxDescriptorSetStorageBuffersDynamic = 256,
428 .maxDescriptorSetSampledImages = 256,
429 .maxDescriptorSetStorageImages = 256,
430 .maxDescriptorSetInputAttachments = 256,
431 .maxVertexInputAttributes = 32,
432 .maxVertexInputBindings = 32,
433 .maxVertexInputAttributeOffset = 2047,
434 .maxVertexInputBindingStride = 2048,
435 .maxVertexOutputComponents = 128,
436 .maxTessellationGenerationLevel = 0,
437 .maxTessellationPatchSize = 0,
438 .maxTessellationControlPerVertexInputComponents = 0,
439 .maxTessellationControlPerVertexOutputComponents = 0,
440 .maxTessellationControlPerPatchOutputComponents = 0,
441 .maxTessellationControlTotalOutputComponents = 0,
442 .maxTessellationEvaluationInputComponents = 0,
443 .maxTessellationEvaluationOutputComponents = 0,
444 .maxGeometryShaderInvocations = 32,
445 .maxGeometryInputComponents = 64,
446 .maxGeometryOutputComponents = 128,
447 .maxGeometryOutputVertices = 256,
448 .maxGeometryTotalOutputComponents = 1024,
449 .maxFragmentInputComponents = 128,
450 .maxFragmentOutputAttachments = 8,
451 .maxFragmentDualSrcAttachments = 2,
452 .maxFragmentCombinedOutputResources = 8,
453 .maxComputeSharedMemorySize = 32768,
454 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
455 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
456 .maxComputeWorkGroupSize = {
457 16 * devinfo->max_cs_threads,
458 16 * devinfo->max_cs_threads,
459 16 * devinfo->max_cs_threads,
460 },
461 .subPixelPrecisionBits = 4 /* FIXME */,
462 .subTexelPrecisionBits = 4 /* FIXME */,
463 .mipmapPrecisionBits = 4 /* FIXME */,
464 .maxDrawIndexedIndexValue = UINT32_MAX,
465 .maxDrawIndirectCount = UINT32_MAX,
466 .maxSamplerLodBias = 16,
467 .maxSamplerAnisotropy = 16,
468 .maxViewports = MAX_VIEWPORTS,
469 .maxViewportDimensions = { (1 << 14), (1 << 14) },
470 .viewportBoundsRange = { -16384.0, 16384.0 },
471 .viewportSubPixelBits = 13, /* We take a float? */
472 .minMemoryMapAlignment = 4096, /* A page */
473 .minTexelBufferOffsetAlignment = 1,
474 .minUniformBufferOffsetAlignment = 1,
475 .minStorageBufferOffsetAlignment = 1,
476 .minTexelOffset = -8,
477 .maxTexelOffset = 7,
478 .minTexelGatherOffset = -8,
479 .maxTexelGatherOffset = 7,
480 .minInterpolationOffset = 0, /* FIXME */
481 .maxInterpolationOffset = 0, /* FIXME */
482 .subPixelInterpolationOffsetBits = 0, /* FIXME */
483 .maxFramebufferWidth = (1 << 14),
484 .maxFramebufferHeight = (1 << 14),
485 .maxFramebufferLayers = (1 << 10),
486 .framebufferColorSampleCounts = sample_counts,
487 .framebufferDepthSampleCounts = sample_counts,
488 .framebufferStencilSampleCounts = sample_counts,
489 .framebufferNoAttachmentsSampleCounts = sample_counts,
490 .maxColorAttachments = MAX_RTS,
491 .sampledImageColorSampleCounts = sample_counts,
492 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
493 .sampledImageDepthSampleCounts = sample_counts,
494 .sampledImageStencilSampleCounts = sample_counts,
495 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
496 .maxSampleMaskWords = 1,
497 .timestampComputeAndGraphics = false,
498 .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000),
499 .maxClipDistances = 0 /* FIXME */,
500 .maxCullDistances = 0 /* FIXME */,
501 .maxCombinedClipAndCullDistances = 0 /* FIXME */,
502 .discreteQueuePriorities = 1,
503 .pointSizeRange = { 0.125, 255.875 },
504 .lineWidthRange = { 0.0, 7.9921875 },
505 .pointSizeGranularity = (1.0 / 8.0),
506 .lineWidthGranularity = (1.0 / 128.0),
507 .strictLines = false, /* FINISHME */
508 .standardSampleLocations = true,
509 .optimalBufferCopyOffsetAlignment = 128,
510 .optimalBufferCopyRowPitchAlignment = 128,
511 .nonCoherentAtomSize = 64,
512 };
513
514 *pProperties = (VkPhysicalDeviceProperties) {
515 .apiVersion = VK_MAKE_VERSION(1, 0, 2),
516 .driverVersion = 1,
517 .vendorID = 0x8086,
518 .deviceID = pdevice->chipset_id,
519 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
520 .limits = limits,
521 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
522 };
523
524 strcpy(pProperties->deviceName, pdevice->name);
525 snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_SIZE,
526 "anv-%s", MESA_GIT_SHA1 + 4);
527 }
528
529 void anv_GetPhysicalDeviceQueueFamilyProperties(
530 VkPhysicalDevice physicalDevice,
531 uint32_t* pCount,
532 VkQueueFamilyProperties* pQueueFamilyProperties)
533 {
534 if (pQueueFamilyProperties == NULL) {
535 *pCount = 1;
536 return;
537 }
538
539 assert(*pCount >= 1);
540
541 *pQueueFamilyProperties = (VkQueueFamilyProperties) {
542 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
543 VK_QUEUE_COMPUTE_BIT |
544 VK_QUEUE_TRANSFER_BIT,
545 .queueCount = 1,
546 .timestampValidBits = 36, /* XXX: Real value here */
547 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
548 };
549 }
550
551 void anv_GetPhysicalDeviceMemoryProperties(
552 VkPhysicalDevice physicalDevice,
553 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
554 {
555 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
556 VkDeviceSize heap_size;
557
558 /* Reserve some wiggle room for the driver by exposing only 75% of the
559 * aperture to the heap.
560 */
561 heap_size = 3 * physical_device->aperture_size / 4;
562
563 if (physical_device->info->has_llc) {
564 /* Big core GPUs share LLC with the CPU and thus one memory type can be
565 * both cached and coherent at the same time.
566 */
567 pMemoryProperties->memoryTypeCount = 1;
568 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
569 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
570 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
571 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
572 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
573 .heapIndex = 0,
574 };
575 } else {
576 /* The spec requires that we expose a host-visible, coherent memory
577 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
578 * to give the application a choice between cached, but not coherent and
579 * coherent but uncached (WC though).
580 */
581 pMemoryProperties->memoryTypeCount = 2;
582 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
583 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
584 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
585 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
586 .heapIndex = 0,
587 };
588 pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
589 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
590 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
591 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
592 .heapIndex = 0,
593 };
594 }
595
596 pMemoryProperties->memoryHeapCount = 1;
597 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
598 .size = heap_size,
599 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
600 };
601 }
602
603 PFN_vkVoidFunction anv_GetInstanceProcAddr(
604 VkInstance instance,
605 const char* pName)
606 {
607 return anv_lookup_entrypoint(pName);
608 }
609
610 PFN_vkVoidFunction anv_GetDeviceProcAddr(
611 VkDevice device,
612 const char* pName)
613 {
614 return anv_lookup_entrypoint(pName);
615 }
616
617 static VkResult
618 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
619 {
620 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
621 queue->device = device;
622 queue->pool = &device->surface_state_pool;
623
624 return VK_SUCCESS;
625 }
626
627 static void
628 anv_queue_finish(struct anv_queue *queue)
629 {
630 }
631
632 static struct anv_state
633 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
634 {
635 struct anv_state state;
636
637 state = anv_state_pool_alloc(pool, size, align);
638 memcpy(state.map, p, size);
639
640 if (!pool->block_pool->device->info.has_llc)
641 anv_state_clflush(state);
642
643 return state;
644 }
645
646 struct gen8_border_color {
647 union {
648 float float32[4];
649 uint32_t uint32[4];
650 };
651 /* Pad out to 64 bytes */
652 uint32_t _pad[12];
653 };
654
655 static void
656 anv_device_init_border_colors(struct anv_device *device)
657 {
658 static const struct gen8_border_color border_colors[] = {
659 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
660 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
661 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
662 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
663 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
664 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
665 };
666
667 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
668 sizeof(border_colors), 64,
669 border_colors);
670 }
671
672 VkResult anv_CreateDevice(
673 VkPhysicalDevice physicalDevice,
674 const VkDeviceCreateInfo* pCreateInfo,
675 const VkAllocationCallbacks* pAllocator,
676 VkDevice* pDevice)
677 {
678 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
679 VkResult result;
680 struct anv_device *device;
681
682 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
683
684 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
685 bool found = false;
686 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
687 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
688 device_extensions[j].extensionName) == 0) {
689 found = true;
690 break;
691 }
692 }
693 if (!found)
694 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
695 }
696
697 anv_set_dispatch_devinfo(physical_device->info);
698
699 device = anv_alloc2(&physical_device->instance->alloc, pAllocator,
700 sizeof(*device), 8,
701 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
702 if (!device)
703 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
704
705 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
706 device->instance = physical_device->instance;
707
708 if (pAllocator)
709 device->alloc = *pAllocator;
710 else
711 device->alloc = physical_device->instance->alloc;
712
713 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
714 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
715 if (device->fd == -1) {
716 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
717 goto fail_device;
718 }
719
720 device->context_id = anv_gem_create_context(device);
721 if (device->context_id == -1) {
722 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
723 goto fail_fd;
724 }
725
726 device->info = *physical_device->info;
727 device->isl_dev = physical_device->isl_dev;
728
729 pthread_mutex_init(&device->mutex, NULL);
730
731 anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE);
732
733 anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
734
735 anv_state_pool_init(&device->dynamic_state_pool,
736 &device->dynamic_state_block_pool);
737
738 anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
739 anv_pipeline_cache_init(&device->default_pipeline_cache, device);
740
741 anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
742
743 anv_state_pool_init(&device->surface_state_pool,
744 &device->surface_state_block_pool);
745
746 anv_bo_init_new(&device->workaround_bo, device, 1024);
747
748 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
749
750 anv_queue_init(device, &device->queue);
751
752 result = anv_device_init_meta(device);
753 if (result != VK_SUCCESS)
754 goto fail_fd;
755
756 anv_device_init_border_colors(device);
757
758 *pDevice = anv_device_to_handle(device);
759
760 return VK_SUCCESS;
761
762 fail_fd:
763 close(device->fd);
764 fail_device:
765 anv_free(&device->alloc, device);
766
767 return result;
768 }
769
770 void anv_DestroyDevice(
771 VkDevice _device,
772 const VkAllocationCallbacks* pAllocator)
773 {
774 ANV_FROM_HANDLE(anv_device, device, _device);
775
776 anv_queue_finish(&device->queue);
777
778 anv_device_finish_meta(device);
779
780 #ifdef HAVE_VALGRIND
781 /* We only need to free these to prevent valgrind errors. The backing
782 * BO will go away in a couple of lines so we don't actually leak.
783 */
784 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
785 #endif
786
787 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
788 anv_gem_close(device, device->workaround_bo.gem_handle);
789
790 anv_bo_pool_finish(&device->batch_bo_pool);
791 anv_state_pool_finish(&device->dynamic_state_pool);
792 anv_block_pool_finish(&device->dynamic_state_block_pool);
793 anv_block_pool_finish(&device->instruction_block_pool);
794 anv_state_pool_finish(&device->surface_state_pool);
795 anv_block_pool_finish(&device->surface_state_block_pool);
796 anv_block_pool_finish(&device->scratch_block_pool);
797
798 close(device->fd);
799
800 pthread_mutex_destroy(&device->mutex);
801
802 anv_free(&device->alloc, device);
803 }
804
805 VkResult anv_EnumerateInstanceExtensionProperties(
806 const char* pLayerName,
807 uint32_t* pPropertyCount,
808 VkExtensionProperties* pProperties)
809 {
810 if (pProperties == NULL) {
811 *pPropertyCount = ARRAY_SIZE(global_extensions);
812 return VK_SUCCESS;
813 }
814
815 assert(*pPropertyCount >= ARRAY_SIZE(global_extensions));
816
817 *pPropertyCount = ARRAY_SIZE(global_extensions);
818 memcpy(pProperties, global_extensions, sizeof(global_extensions));
819
820 return VK_SUCCESS;
821 }
822
823 VkResult anv_EnumerateDeviceExtensionProperties(
824 VkPhysicalDevice physicalDevice,
825 const char* pLayerName,
826 uint32_t* pPropertyCount,
827 VkExtensionProperties* pProperties)
828 {
829 if (pProperties == NULL) {
830 *pPropertyCount = ARRAY_SIZE(device_extensions);
831 return VK_SUCCESS;
832 }
833
834 assert(*pPropertyCount >= ARRAY_SIZE(device_extensions));
835
836 *pPropertyCount = ARRAY_SIZE(device_extensions);
837 memcpy(pProperties, device_extensions, sizeof(device_extensions));
838
839 return VK_SUCCESS;
840 }
841
842 VkResult anv_EnumerateInstanceLayerProperties(
843 uint32_t* pPropertyCount,
844 VkLayerProperties* pProperties)
845 {
846 if (pProperties == NULL) {
847 *pPropertyCount = 0;
848 return VK_SUCCESS;
849 }
850
851 /* None supported at this time */
852 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
853 }
854
855 VkResult anv_EnumerateDeviceLayerProperties(
856 VkPhysicalDevice physicalDevice,
857 uint32_t* pPropertyCount,
858 VkLayerProperties* pProperties)
859 {
860 if (pProperties == NULL) {
861 *pPropertyCount = 0;
862 return VK_SUCCESS;
863 }
864
865 /* None supported at this time */
866 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
867 }
868
869 void anv_GetDeviceQueue(
870 VkDevice _device,
871 uint32_t queueNodeIndex,
872 uint32_t queueIndex,
873 VkQueue* pQueue)
874 {
875 ANV_FROM_HANDLE(anv_device, device, _device);
876
877 assert(queueIndex == 0);
878
879 *pQueue = anv_queue_to_handle(&device->queue);
880 }
881
882 VkResult anv_QueueSubmit(
883 VkQueue _queue,
884 uint32_t submitCount,
885 const VkSubmitInfo* pSubmits,
886 VkFence _fence)
887 {
888 ANV_FROM_HANDLE(anv_queue, queue, _queue);
889 ANV_FROM_HANDLE(anv_fence, fence, _fence);
890 struct anv_device *device = queue->device;
891 int ret;
892
893 for (uint32_t i = 0; i < submitCount; i++) {
894 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
895 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
896 pSubmits[i].pCommandBuffers[j]);
897 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
898
899 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
900 if (ret != 0) {
901 /* We don't know the real error. */
902 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
903 "execbuf2 failed: %m");
904 }
905
906 if (fence) {
907 ret = anv_gem_execbuffer(device, &fence->execbuf);
908 if (ret != 0) {
909 /* We don't know the real error. */
910 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
911 "execbuf2 failed: %m");
912 }
913 }
914
915 for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++)
916 cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset;
917 }
918 }
919
920 return VK_SUCCESS;
921 }
922
923 VkResult anv_QueueWaitIdle(
924 VkQueue _queue)
925 {
926 ANV_FROM_HANDLE(anv_queue, queue, _queue);
927
928 return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device));
929 }
930
931 VkResult anv_DeviceWaitIdle(
932 VkDevice _device)
933 {
934 ANV_FROM_HANDLE(anv_device, device, _device);
935 struct anv_state state;
936 struct anv_batch batch;
937 struct drm_i915_gem_execbuffer2 execbuf;
938 struct drm_i915_gem_exec_object2 exec2_objects[1];
939 struct anv_bo *bo = NULL;
940 VkResult result;
941 int64_t timeout;
942 int ret;
943
944 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
945 bo = &device->dynamic_state_pool.block_pool->bo;
946 batch.start = batch.next = state.map;
947 batch.end = state.map + 32;
948 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
949 anv_batch_emit(&batch, GEN7_MI_NOOP);
950
951 if (!device->info.has_llc)
952 anv_state_clflush(state);
953
954 exec2_objects[0].handle = bo->gem_handle;
955 exec2_objects[0].relocation_count = 0;
956 exec2_objects[0].relocs_ptr = 0;
957 exec2_objects[0].alignment = 0;
958 exec2_objects[0].offset = bo->offset;
959 exec2_objects[0].flags = 0;
960 exec2_objects[0].rsvd1 = 0;
961 exec2_objects[0].rsvd2 = 0;
962
963 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
964 execbuf.buffer_count = 1;
965 execbuf.batch_start_offset = state.offset;
966 execbuf.batch_len = batch.next - state.map;
967 execbuf.cliprects_ptr = 0;
968 execbuf.num_cliprects = 0;
969 execbuf.DR1 = 0;
970 execbuf.DR4 = 0;
971
972 execbuf.flags =
973 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
974 execbuf.rsvd1 = device->context_id;
975 execbuf.rsvd2 = 0;
976
977 ret = anv_gem_execbuffer(device, &execbuf);
978 if (ret != 0) {
979 /* We don't know the real error. */
980 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
981 goto fail;
982 }
983
984 timeout = INT64_MAX;
985 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
986 if (ret != 0) {
987 /* We don't know the real error. */
988 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
989 goto fail;
990 }
991
992 anv_state_pool_free(&device->dynamic_state_pool, state);
993
994 return VK_SUCCESS;
995
996 fail:
997 anv_state_pool_free(&device->dynamic_state_pool, state);
998
999 return result;
1000 }
1001
1002 VkResult
1003 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1004 {
1005 bo->gem_handle = anv_gem_create(device, size);
1006 if (!bo->gem_handle)
1007 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1008
1009 bo->map = NULL;
1010 bo->index = 0;
1011 bo->offset = 0;
1012 bo->size = size;
1013
1014 return VK_SUCCESS;
1015 }
1016
1017 VkResult anv_AllocateMemory(
1018 VkDevice _device,
1019 const VkMemoryAllocateInfo* pAllocateInfo,
1020 const VkAllocationCallbacks* pAllocator,
1021 VkDeviceMemory* pMem)
1022 {
1023 ANV_FROM_HANDLE(anv_device, device, _device);
1024 struct anv_device_memory *mem;
1025 VkResult result;
1026
1027 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1028
1029 if (pAllocateInfo->allocationSize == 0) {
1030 /* Apparently, this is allowed */
1031 *pMem = VK_NULL_HANDLE;
1032 return VK_SUCCESS;
1033 }
1034
1035 /* We support exactly one memory heap. */
1036 assert(pAllocateInfo->memoryTypeIndex == 0 ||
1037 (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
1038
1039 /* FINISHME: Fail if allocation request exceeds heap size. */
1040
1041 mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1042 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1043 if (mem == NULL)
1044 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1045
1046 /* The kernel is going to give us whole pages anyway */
1047 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1048
1049 result = anv_bo_init_new(&mem->bo, device, alloc_size);
1050 if (result != VK_SUCCESS)
1051 goto fail;
1052
1053 mem->type_index = pAllocateInfo->memoryTypeIndex;
1054
1055 *pMem = anv_device_memory_to_handle(mem);
1056
1057 return VK_SUCCESS;
1058
1059 fail:
1060 anv_free2(&device->alloc, pAllocator, mem);
1061
1062 return result;
1063 }
1064
1065 void anv_FreeMemory(
1066 VkDevice _device,
1067 VkDeviceMemory _mem,
1068 const VkAllocationCallbacks* pAllocator)
1069 {
1070 ANV_FROM_HANDLE(anv_device, device, _device);
1071 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1072
1073 if (mem == NULL)
1074 return;
1075
1076 if (mem->bo.map)
1077 anv_gem_munmap(mem->bo.map, mem->bo.size);
1078
1079 if (mem->bo.gem_handle != 0)
1080 anv_gem_close(device, mem->bo.gem_handle);
1081
1082 anv_free2(&device->alloc, pAllocator, mem);
1083 }
1084
1085 VkResult anv_MapMemory(
1086 VkDevice _device,
1087 VkDeviceMemory _memory,
1088 VkDeviceSize offset,
1089 VkDeviceSize size,
1090 VkMemoryMapFlags flags,
1091 void** ppData)
1092 {
1093 ANV_FROM_HANDLE(anv_device, device, _device);
1094 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1095
1096 if (mem == NULL) {
1097 *ppData = NULL;
1098 return VK_SUCCESS;
1099 }
1100
1101 if (size == VK_WHOLE_SIZE)
1102 size = mem->bo.size - offset;
1103
1104 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1105 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1106 * at a time is valid. We could just mmap up front and return an offset
1107 * pointer here, but that may exhaust virtual memory on 32 bit
1108 * userspace. */
1109
1110 uint32_t gem_flags = 0;
1111 if (!device->info.has_llc && mem->type_index == 0)
1112 gem_flags |= I915_MMAP_WC;
1113
1114 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1115 uint64_t map_offset = offset & ~4095ull;
1116 assert(offset >= map_offset);
1117 uint64_t map_size = (offset + size) - map_offset;
1118
1119 /* Let's map whole pages */
1120 map_size = align_u64(map_size, 4096);
1121
1122 mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
1123 map_offset, map_size, gem_flags);
1124 mem->map_size = map_size;
1125
1126 *ppData = mem->map + (offset - map_offset);
1127
1128 return VK_SUCCESS;
1129 }
1130
1131 void anv_UnmapMemory(
1132 VkDevice _device,
1133 VkDeviceMemory _memory)
1134 {
1135 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1136
1137 if (mem == NULL)
1138 return;
1139
1140 anv_gem_munmap(mem->map, mem->map_size);
1141 }
1142
1143 static void
1144 clflush_mapped_ranges(struct anv_device *device,
1145 uint32_t count,
1146 const VkMappedMemoryRange *ranges)
1147 {
1148 for (uint32_t i = 0; i < count; i++) {
1149 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1150 void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK);
1151 void *end;
1152
1153 if (ranges[i].offset + ranges[i].size > mem->map_size)
1154 end = mem->map + mem->map_size;
1155 else
1156 end = mem->map + ranges[i].offset + ranges[i].size;
1157
1158 while (p < end) {
1159 __builtin_ia32_clflush(p);
1160 p += CACHELINE_SIZE;
1161 }
1162 }
1163 }
1164
1165 VkResult anv_FlushMappedMemoryRanges(
1166 VkDevice _device,
1167 uint32_t memoryRangeCount,
1168 const VkMappedMemoryRange* pMemoryRanges)
1169 {
1170 ANV_FROM_HANDLE(anv_device, device, _device);
1171
1172 if (device->info.has_llc)
1173 return VK_SUCCESS;
1174
1175 /* Make sure the writes we're flushing have landed. */
1176 __builtin_ia32_mfence();
1177
1178 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1179
1180 return VK_SUCCESS;
1181 }
1182
1183 VkResult anv_InvalidateMappedMemoryRanges(
1184 VkDevice _device,
1185 uint32_t memoryRangeCount,
1186 const VkMappedMemoryRange* pMemoryRanges)
1187 {
1188 ANV_FROM_HANDLE(anv_device, device, _device);
1189
1190 if (device->info.has_llc)
1191 return VK_SUCCESS;
1192
1193 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1194
1195 /* Make sure no reads get moved up above the invalidate. */
1196 __builtin_ia32_mfence();
1197
1198 return VK_SUCCESS;
1199 }
1200
1201 void anv_GetBufferMemoryRequirements(
1202 VkDevice device,
1203 VkBuffer _buffer,
1204 VkMemoryRequirements* pMemoryRequirements)
1205 {
1206 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1207
1208 /* The Vulkan spec (git aaed022) says:
1209 *
1210 * memoryTypeBits is a bitfield and contains one bit set for every
1211 * supported memory type for the resource. The bit `1<<i` is set if and
1212 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1213 * structure for the physical device is supported.
1214 *
1215 * We support exactly one memory type.
1216 */
1217 pMemoryRequirements->memoryTypeBits = 1;
1218
1219 pMemoryRequirements->size = buffer->size;
1220 pMemoryRequirements->alignment = 16;
1221 }
1222
1223 void anv_GetImageMemoryRequirements(
1224 VkDevice device,
1225 VkImage _image,
1226 VkMemoryRequirements* pMemoryRequirements)
1227 {
1228 ANV_FROM_HANDLE(anv_image, image, _image);
1229
1230 /* The Vulkan spec (git aaed022) says:
1231 *
1232 * memoryTypeBits is a bitfield and contains one bit set for every
1233 * supported memory type for the resource. The bit `1<<i` is set if and
1234 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1235 * structure for the physical device is supported.
1236 *
1237 * We support exactly one memory type.
1238 */
1239 pMemoryRequirements->memoryTypeBits = 1;
1240
1241 pMemoryRequirements->size = image->size;
1242 pMemoryRequirements->alignment = image->alignment;
1243 }
1244
1245 void anv_GetImageSparseMemoryRequirements(
1246 VkDevice device,
1247 VkImage image,
1248 uint32_t* pSparseMemoryRequirementCount,
1249 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1250 {
1251 stub();
1252 }
1253
1254 void anv_GetDeviceMemoryCommitment(
1255 VkDevice device,
1256 VkDeviceMemory memory,
1257 VkDeviceSize* pCommittedMemoryInBytes)
1258 {
1259 *pCommittedMemoryInBytes = 0;
1260 }
1261
1262 VkResult anv_BindBufferMemory(
1263 VkDevice device,
1264 VkBuffer _buffer,
1265 VkDeviceMemory _memory,
1266 VkDeviceSize memoryOffset)
1267 {
1268 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1269 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1270
1271 if (mem) {
1272 buffer->bo = &mem->bo;
1273 buffer->offset = memoryOffset;
1274 } else {
1275 buffer->bo = NULL;
1276 buffer->offset = 0;
1277 }
1278
1279 return VK_SUCCESS;
1280 }
1281
1282 VkResult anv_BindImageMemory(
1283 VkDevice device,
1284 VkImage _image,
1285 VkDeviceMemory _memory,
1286 VkDeviceSize memoryOffset)
1287 {
1288 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1289 ANV_FROM_HANDLE(anv_image, image, _image);
1290
1291 if (mem) {
1292 image->bo = &mem->bo;
1293 image->offset = memoryOffset;
1294 } else {
1295 image->bo = NULL;
1296 image->offset = 0;
1297 }
1298
1299 return VK_SUCCESS;
1300 }
1301
1302 VkResult anv_QueueBindSparse(
1303 VkQueue queue,
1304 uint32_t bindInfoCount,
1305 const VkBindSparseInfo* pBindInfo,
1306 VkFence fence)
1307 {
1308 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1309 }
1310
1311 VkResult anv_CreateFence(
1312 VkDevice _device,
1313 const VkFenceCreateInfo* pCreateInfo,
1314 const VkAllocationCallbacks* pAllocator,
1315 VkFence* pFence)
1316 {
1317 ANV_FROM_HANDLE(anv_device, device, _device);
1318 struct anv_fence *fence;
1319 struct anv_batch batch;
1320 VkResult result;
1321
1322 const uint32_t fence_size = 128;
1323
1324 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1325
1326 fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8,
1327 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1328 if (fence == NULL)
1329 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1330
1331 result = anv_bo_init_new(&fence->bo, device, fence_size);
1332 if (result != VK_SUCCESS)
1333 goto fail;
1334
1335 fence->bo.map =
1336 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0);
1337 batch.next = batch.start = fence->bo.map;
1338 batch.end = fence->bo.map + fence->bo.size;
1339 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
1340 anv_batch_emit(&batch, GEN7_MI_NOOP);
1341
1342 if (!device->info.has_llc) {
1343 assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
1344 assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
1345 __builtin_ia32_mfence();
1346 __builtin_ia32_clflush(fence->bo.map);
1347 }
1348
1349 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1350 fence->exec2_objects[0].relocation_count = 0;
1351 fence->exec2_objects[0].relocs_ptr = 0;
1352 fence->exec2_objects[0].alignment = 0;
1353 fence->exec2_objects[0].offset = fence->bo.offset;
1354 fence->exec2_objects[0].flags = 0;
1355 fence->exec2_objects[0].rsvd1 = 0;
1356 fence->exec2_objects[0].rsvd2 = 0;
1357
1358 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1359 fence->execbuf.buffer_count = 1;
1360 fence->execbuf.batch_start_offset = 0;
1361 fence->execbuf.batch_len = batch.next - fence->bo.map;
1362 fence->execbuf.cliprects_ptr = 0;
1363 fence->execbuf.num_cliprects = 0;
1364 fence->execbuf.DR1 = 0;
1365 fence->execbuf.DR4 = 0;
1366
1367 fence->execbuf.flags =
1368 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1369 fence->execbuf.rsvd1 = device->context_id;
1370 fence->execbuf.rsvd2 = 0;
1371
1372 *pFence = anv_fence_to_handle(fence);
1373
1374 return VK_SUCCESS;
1375
1376 fail:
1377 anv_free2(&device->alloc, pAllocator, fence);
1378
1379 return result;
1380 }
1381
1382 void anv_DestroyFence(
1383 VkDevice _device,
1384 VkFence _fence,
1385 const VkAllocationCallbacks* pAllocator)
1386 {
1387 ANV_FROM_HANDLE(anv_device, device, _device);
1388 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1389
1390 anv_gem_munmap(fence->bo.map, fence->bo.size);
1391 anv_gem_close(device, fence->bo.gem_handle);
1392 anv_free2(&device->alloc, pAllocator, fence);
1393 }
1394
1395 VkResult anv_ResetFences(
1396 VkDevice _device,
1397 uint32_t fenceCount,
1398 const VkFence* pFences)
1399 {
1400 for (uint32_t i = 0; i < fenceCount; i++) {
1401 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1402 fence->ready = false;
1403 }
1404
1405 return VK_SUCCESS;
1406 }
1407
1408 VkResult anv_GetFenceStatus(
1409 VkDevice _device,
1410 VkFence _fence)
1411 {
1412 ANV_FROM_HANDLE(anv_device, device, _device);
1413 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1414 int64_t t = 0;
1415 int ret;
1416
1417 if (fence->ready)
1418 return VK_SUCCESS;
1419
1420 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1421 if (ret == 0) {
1422 fence->ready = true;
1423 return VK_SUCCESS;
1424 }
1425
1426 return VK_NOT_READY;
1427 }
1428
1429 VkResult anv_WaitForFences(
1430 VkDevice _device,
1431 uint32_t fenceCount,
1432 const VkFence* pFences,
1433 VkBool32 waitAll,
1434 uint64_t timeout)
1435 {
1436 ANV_FROM_HANDLE(anv_device, device, _device);
1437
1438 /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
1439 * to block indefinitely timeouts <= 0. Unfortunately, this was broken
1440 * for a couple of kernel releases. Since there's no way to know
1441 * whether or not the kernel we're using is one of the broken ones, the
1442 * best we can do is to clamp the timeout to INT64_MAX. This limits the
1443 * maximum timeout from 584 years to 292 years - likely not a big deal.
1444 */
1445 if (timeout > INT64_MAX)
1446 timeout = INT64_MAX;
1447
1448 int64_t t = timeout;
1449
1450 /* FIXME: handle !waitAll */
1451
1452 for (uint32_t i = 0; i < fenceCount; i++) {
1453 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1454 int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1455 if (ret == -1 && errno == ETIME) {
1456 return VK_TIMEOUT;
1457 } else if (ret == -1) {
1458 /* We don't know the real error. */
1459 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
1460 "gem wait failed: %m");
1461 }
1462 }
1463
1464 return VK_SUCCESS;
1465 }
1466
1467 // Queue semaphore functions
1468
1469 VkResult anv_CreateSemaphore(
1470 VkDevice device,
1471 const VkSemaphoreCreateInfo* pCreateInfo,
1472 const VkAllocationCallbacks* pAllocator,
1473 VkSemaphore* pSemaphore)
1474 {
1475 /* The DRM execbuffer ioctl always execute in-oder, even between different
1476 * rings. As such, there's nothing to do for the user space semaphore.
1477 */
1478
1479 *pSemaphore = (VkSemaphore)1;
1480
1481 return VK_SUCCESS;
1482 }
1483
1484 void anv_DestroySemaphore(
1485 VkDevice device,
1486 VkSemaphore semaphore,
1487 const VkAllocationCallbacks* pAllocator)
1488 {
1489 }
1490
1491 // Event functions
1492
1493 VkResult anv_CreateEvent(
1494 VkDevice _device,
1495 const VkEventCreateInfo* pCreateInfo,
1496 const VkAllocationCallbacks* pAllocator,
1497 VkEvent* pEvent)
1498 {
1499 ANV_FROM_HANDLE(anv_device, device, _device);
1500 struct anv_state state;
1501 struct anv_event *event;
1502
1503 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
1504
1505 state = anv_state_pool_alloc(&device->dynamic_state_pool,
1506 sizeof(*event), 4);
1507 event = state.map;
1508 event->state = state;
1509 event->semaphore = VK_EVENT_RESET;
1510
1511 if (!device->info.has_llc) {
1512 /* Make sure the writes we're flushing have landed. */
1513 __builtin_ia32_mfence();
1514 __builtin_ia32_clflush(event);
1515 }
1516
1517 *pEvent = anv_event_to_handle(event);
1518
1519 return VK_SUCCESS;
1520 }
1521
1522 void anv_DestroyEvent(
1523 VkDevice _device,
1524 VkEvent _event,
1525 const VkAllocationCallbacks* pAllocator)
1526 {
1527 ANV_FROM_HANDLE(anv_device, device, _device);
1528 ANV_FROM_HANDLE(anv_event, event, _event);
1529
1530 anv_state_pool_free(&device->dynamic_state_pool, event->state);
1531 }
1532
1533 VkResult anv_GetEventStatus(
1534 VkDevice _device,
1535 VkEvent _event)
1536 {
1537 ANV_FROM_HANDLE(anv_device, device, _device);
1538 ANV_FROM_HANDLE(anv_event, event, _event);
1539
1540 if (!device->info.has_llc) {
1541 /* Invalidate read cache before reading event written by GPU. */
1542 __builtin_ia32_clflush(event);
1543 __builtin_ia32_mfence();
1544
1545 }
1546
1547 return event->semaphore;
1548 }
1549
1550 VkResult anv_SetEvent(
1551 VkDevice _device,
1552 VkEvent _event)
1553 {
1554 ANV_FROM_HANDLE(anv_device, device, _device);
1555 ANV_FROM_HANDLE(anv_event, event, _event);
1556
1557 event->semaphore = VK_EVENT_SET;
1558
1559 if (!device->info.has_llc) {
1560 /* Make sure the writes we're flushing have landed. */
1561 __builtin_ia32_mfence();
1562 __builtin_ia32_clflush(event);
1563 }
1564
1565 return VK_SUCCESS;
1566 }
1567
1568 VkResult anv_ResetEvent(
1569 VkDevice _device,
1570 VkEvent _event)
1571 {
1572 ANV_FROM_HANDLE(anv_device, device, _device);
1573 ANV_FROM_HANDLE(anv_event, event, _event);
1574
1575 event->semaphore = VK_EVENT_RESET;
1576
1577 if (!device->info.has_llc) {
1578 /* Make sure the writes we're flushing have landed. */
1579 __builtin_ia32_mfence();
1580 __builtin_ia32_clflush(event);
1581 }
1582
1583 return VK_SUCCESS;
1584 }
1585
1586 // Buffer functions
1587
1588 VkResult anv_CreateBuffer(
1589 VkDevice _device,
1590 const VkBufferCreateInfo* pCreateInfo,
1591 const VkAllocationCallbacks* pAllocator,
1592 VkBuffer* pBuffer)
1593 {
1594 ANV_FROM_HANDLE(anv_device, device, _device);
1595 struct anv_buffer *buffer;
1596
1597 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1598
1599 buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1600 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1601 if (buffer == NULL)
1602 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1603
1604 buffer->size = pCreateInfo->size;
1605 buffer->usage = pCreateInfo->usage;
1606 buffer->bo = NULL;
1607 buffer->offset = 0;
1608
1609 *pBuffer = anv_buffer_to_handle(buffer);
1610
1611 return VK_SUCCESS;
1612 }
1613
1614 void anv_DestroyBuffer(
1615 VkDevice _device,
1616 VkBuffer _buffer,
1617 const VkAllocationCallbacks* pAllocator)
1618 {
1619 ANV_FROM_HANDLE(anv_device, device, _device);
1620 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1621
1622 anv_free2(&device->alloc, pAllocator, buffer);
1623 }
1624
1625 void
1626 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
1627 enum isl_format format,
1628 uint32_t offset, uint32_t range, uint32_t stride)
1629 {
1630 switch (device->info.gen) {
1631 case 7:
1632 if (device->info.is_haswell)
1633 gen75_fill_buffer_surface_state(state.map, format, offset, range,
1634 stride);
1635 else
1636 gen7_fill_buffer_surface_state(state.map, format, offset, range,
1637 stride);
1638 break;
1639 case 8:
1640 gen8_fill_buffer_surface_state(state.map, format, offset, range, stride);
1641 break;
1642 case 9:
1643 gen9_fill_buffer_surface_state(state.map, format, offset, range, stride);
1644 break;
1645 default:
1646 unreachable("unsupported gen\n");
1647 }
1648
1649 if (!device->info.has_llc)
1650 anv_state_clflush(state);
1651 }
1652
1653 void anv_DestroySampler(
1654 VkDevice _device,
1655 VkSampler _sampler,
1656 const VkAllocationCallbacks* pAllocator)
1657 {
1658 ANV_FROM_HANDLE(anv_device, device, _device);
1659 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
1660
1661 anv_free2(&device->alloc, pAllocator, sampler);
1662 }
1663
1664 VkResult anv_CreateFramebuffer(
1665 VkDevice _device,
1666 const VkFramebufferCreateInfo* pCreateInfo,
1667 const VkAllocationCallbacks* pAllocator,
1668 VkFramebuffer* pFramebuffer)
1669 {
1670 ANV_FROM_HANDLE(anv_device, device, _device);
1671 struct anv_framebuffer *framebuffer;
1672
1673 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1674
1675 size_t size = sizeof(*framebuffer) +
1676 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
1677 framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8,
1678 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1679 if (framebuffer == NULL)
1680 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1681
1682 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1683 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1684 VkImageView _iview = pCreateInfo->pAttachments[i];
1685 framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
1686 }
1687
1688 framebuffer->width = pCreateInfo->width;
1689 framebuffer->height = pCreateInfo->height;
1690 framebuffer->layers = pCreateInfo->layers;
1691
1692 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
1693
1694 return VK_SUCCESS;
1695 }
1696
1697 void anv_DestroyFramebuffer(
1698 VkDevice _device,
1699 VkFramebuffer _fb,
1700 const VkAllocationCallbacks* pAllocator)
1701 {
1702 ANV_FROM_HANDLE(anv_device, device, _device);
1703 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
1704
1705 anv_free2(&device->alloc, pAllocator, fb);
1706 }
1707
1708 void vkCmdDbgMarkerBegin(
1709 VkCommandBuffer commandBuffer,
1710 const char* pMarker)
1711 __attribute__ ((visibility ("default")));
1712
1713 void vkCmdDbgMarkerEnd(
1714 VkCommandBuffer commandBuffer)
1715 __attribute__ ((visibility ("default")));
1716
1717 void vkCmdDbgMarkerBegin(
1718 VkCommandBuffer commandBuffer,
1719 const char* pMarker)
1720 {
1721 }
1722
1723 void vkCmdDbgMarkerEnd(
1724 VkCommandBuffer commandBuffer)
1725 {
1726 }