Merge commit '85f5c18fef1ff2f19d698f150e23a02acd6f59b9' into vulkan
[mesa.git] / src / vulkan / anv_device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "mesa/main/git_sha1.h"
32 #include "util/strtod.h"
33 #include "util/debug.h"
34
35 #include "gen7_pack.h"
36
37 struct anv_dispatch_table dtable;
38
39 static void
40 compiler_debug_log(void *data, const char *fmt, ...)
41 { }
42
43 static void
44 compiler_perf_log(void *data, const char *fmt, ...)
45 {
46 va_list args;
47 va_start(args, fmt);
48
49 if (unlikely(INTEL_DEBUG & DEBUG_PERF))
50 vfprintf(stderr, fmt, args);
51
52 va_end(args);
53 }
54
55 static VkResult
56 anv_physical_device_init(struct anv_physical_device *device,
57 struct anv_instance *instance,
58 const char *path)
59 {
60 VkResult result;
61 int fd;
62
63 fd = open(path, O_RDWR | O_CLOEXEC);
64 if (fd < 0)
65 return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
66 "failed to open %s: %m", path);
67
68 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
69 device->instance = instance;
70 device->path = path;
71
72 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
73 if (!device->chipset_id) {
74 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
75 "failed to get chipset id: %m");
76 goto fail;
77 }
78
79 device->name = brw_get_device_name(device->chipset_id);
80 device->info = brw_get_device_info(device->chipset_id);
81 if (!device->info) {
82 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
83 "failed to get device info");
84 goto fail;
85 }
86
87 if (device->info->is_haswell) {
88 fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
89 } else if (device->info->gen == 7 && !device->info->is_baytrail) {
90 fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
91 } else if (device->info->gen == 7 && device->info->is_baytrail) {
92 fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
93 } else if (device->info->gen >= 8) {
94 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
95 * supported as anything */
96 } else {
97 result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
98 "Vulkan not yet supported on %s", device->name);
99 goto fail;
100 }
101
102 if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
103 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
104 "failed to get aperture size: %m");
105 goto fail;
106 }
107
108 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
109 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
110 "kernel missing gem wait");
111 goto fail;
112 }
113
114 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
115 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
116 "kernel missing execbuf2");
117 goto fail;
118 }
119
120 if (!device->info->has_llc &&
121 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
122 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
123 "kernel missing wc mmap");
124 goto fail;
125 }
126
127 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
128
129 close(fd);
130
131 brw_process_intel_debug_variable();
132
133 device->compiler = brw_compiler_create(NULL, device->info);
134 if (device->compiler == NULL) {
135 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136 goto fail;
137 }
138 device->compiler->shader_debug_log = compiler_debug_log;
139 device->compiler->shader_perf_log = compiler_perf_log;
140
141 /* XXX: Actually detect bit6 swizzling */
142 isl_device_init(&device->isl_dev, device->info, swizzled);
143
144 return VK_SUCCESS;
145
146 fail:
147 close(fd);
148 return result;
149 }
150
151 static void
152 anv_physical_device_finish(struct anv_physical_device *device)
153 {
154 ralloc_free(device->compiler);
155 }
156
157 static const VkExtensionProperties global_extensions[] = {
158 {
159 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
160 .specVersion = 25,
161 },
162 {
163 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
164 .specVersion = 5,
165 },
166 #ifdef HAVE_WAYLAND_PLATFORM
167 {
168 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
169 .specVersion = 4,
170 },
171 #endif
172 };
173
174 static const VkExtensionProperties device_extensions[] = {
175 {
176 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
177 .specVersion = 67,
178 },
179 };
180
181 static void *
182 default_alloc_func(void *pUserData, size_t size, size_t align,
183 VkSystemAllocationScope allocationScope)
184 {
185 return malloc(size);
186 }
187
188 static void *
189 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
190 size_t align, VkSystemAllocationScope allocationScope)
191 {
192 return realloc(pOriginal, size);
193 }
194
195 static void
196 default_free_func(void *pUserData, void *pMemory)
197 {
198 free(pMemory);
199 }
200
201 static const VkAllocationCallbacks default_alloc = {
202 .pUserData = NULL,
203 .pfnAllocation = default_alloc_func,
204 .pfnReallocation = default_realloc_func,
205 .pfnFree = default_free_func,
206 };
207
208 VkResult anv_CreateInstance(
209 const VkInstanceCreateInfo* pCreateInfo,
210 const VkAllocationCallbacks* pAllocator,
211 VkInstance* pInstance)
212 {
213 struct anv_instance *instance;
214
215 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
216
217 uint32_t client_version = pCreateInfo->pApplicationInfo->apiVersion;
218 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
219 client_version > VK_MAKE_VERSION(1, 0, 2)) {
220 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
221 "Client requested version %d.%d.%d",
222 VK_VERSION_MAJOR(client_version),
223 VK_VERSION_MINOR(client_version),
224 VK_VERSION_PATCH(client_version));
225 }
226
227 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
228 bool found = false;
229 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
230 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
231 global_extensions[j].extensionName) == 0) {
232 found = true;
233 break;
234 }
235 }
236 if (!found)
237 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
238 }
239
240 instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
241 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
242 if (!instance)
243 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
244
245 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
246
247 if (pAllocator)
248 instance->alloc = *pAllocator;
249 else
250 instance->alloc = default_alloc;
251
252 instance->apiVersion = pCreateInfo->pApplicationInfo->apiVersion;
253 instance->physicalDeviceCount = -1;
254
255 _mesa_locale_init();
256
257 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
258
259 anv_init_wsi(instance);
260
261 *pInstance = anv_instance_to_handle(instance);
262
263 return VK_SUCCESS;
264 }
265
266 void anv_DestroyInstance(
267 VkInstance _instance,
268 const VkAllocationCallbacks* pAllocator)
269 {
270 ANV_FROM_HANDLE(anv_instance, instance, _instance);
271
272 if (instance->physicalDeviceCount > 0) {
273 /* We support at most one physical device. */
274 assert(instance->physicalDeviceCount == 1);
275 anv_physical_device_finish(&instance->physicalDevice);
276 }
277
278 anv_finish_wsi(instance);
279
280 VG(VALGRIND_DESTROY_MEMPOOL(instance));
281
282 _mesa_locale_fini();
283
284 anv_free(&instance->alloc, instance);
285 }
286
287 VkResult anv_EnumeratePhysicalDevices(
288 VkInstance _instance,
289 uint32_t* pPhysicalDeviceCount,
290 VkPhysicalDevice* pPhysicalDevices)
291 {
292 ANV_FROM_HANDLE(anv_instance, instance, _instance);
293 VkResult result;
294
295 if (instance->physicalDeviceCount < 0) {
296 result = anv_physical_device_init(&instance->physicalDevice,
297 instance, "/dev/dri/renderD128");
298 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
299 instance->physicalDeviceCount = 0;
300 } else if (result == VK_SUCCESS) {
301 instance->physicalDeviceCount = 1;
302 } else {
303 return result;
304 }
305 }
306
307 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
308 * otherwise it's an inout parameter.
309 *
310 * The Vulkan spec (git aaed022) says:
311 *
312 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
313 * that is initialized with the number of devices the application is
314 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
315 * an array of at least this many VkPhysicalDevice handles [...].
316 *
317 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
318 * overwrites the contents of the variable pointed to by
319 * pPhysicalDeviceCount with the number of physical devices in in the
320 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
321 * pPhysicalDeviceCount with the number of physical handles written to
322 * pPhysicalDevices.
323 */
324 if (!pPhysicalDevices) {
325 *pPhysicalDeviceCount = instance->physicalDeviceCount;
326 } else if (*pPhysicalDeviceCount >= 1) {
327 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
328 *pPhysicalDeviceCount = 1;
329 } else {
330 *pPhysicalDeviceCount = 0;
331 }
332
333 return VK_SUCCESS;
334 }
335
336 void anv_GetPhysicalDeviceFeatures(
337 VkPhysicalDevice physicalDevice,
338 VkPhysicalDeviceFeatures* pFeatures)
339 {
340 anv_finishme("Get correct values for PhysicalDeviceFeatures");
341
342 *pFeatures = (VkPhysicalDeviceFeatures) {
343 .robustBufferAccess = true,
344 .fullDrawIndexUint32 = false,
345 .imageCubeArray = false,
346 .independentBlend = false,
347 .geometryShader = true,
348 .tessellationShader = false,
349 .sampleRateShading = false,
350 .dualSrcBlend = true,
351 .logicOp = true,
352 .multiDrawIndirect = false,
353 .drawIndirectFirstInstance = false,
354 .depthClamp = false,
355 .depthBiasClamp = false,
356 .fillModeNonSolid = true,
357 .depthBounds = false,
358 .wideLines = true,
359 .largePoints = true,
360 .alphaToOne = true,
361 .multiViewport = true,
362 .samplerAnisotropy = false, /* FINISHME */
363 .textureCompressionETC2 = true,
364 .textureCompressionASTC_LDR = true,
365 .textureCompressionBC = true,
366 .occlusionQueryPrecise = false, /* FINISHME */
367 .pipelineStatisticsQuery = true,
368 .vertexPipelineStoresAndAtomics = false,
369 .fragmentStoresAndAtomics = true,
370 .shaderTessellationAndGeometryPointSize = true,
371 .shaderImageGatherExtended = true,
372 .shaderStorageImageExtendedFormats = false,
373 .shaderStorageImageMultisample = false,
374 .shaderUniformBufferArrayDynamicIndexing = true,
375 .shaderSampledImageArrayDynamicIndexing = false,
376 .shaderStorageBufferArrayDynamicIndexing = false,
377 .shaderStorageImageArrayDynamicIndexing = false,
378 .shaderStorageImageReadWithoutFormat = false,
379 .shaderStorageImageWriteWithoutFormat = true,
380 .shaderClipDistance = false,
381 .shaderCullDistance = false,
382 .shaderFloat64 = false,
383 .shaderInt64 = false,
384 .shaderInt16 = false,
385 .alphaToOne = true,
386 .variableMultisampleRate = false,
387 .inheritedQueries = false,
388 };
389 }
390
391 void anv_GetPhysicalDeviceProperties(
392 VkPhysicalDevice physicalDevice,
393 VkPhysicalDeviceProperties* pProperties)
394 {
395 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
396 const struct brw_device_info *devinfo = pdevice->info;
397
398 anv_finishme("Get correct values for VkPhysicalDeviceLimits");
399
400 const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
401
402 VkSampleCountFlags sample_counts =
403 isl_device_get_sample_counts(&pdevice->isl_dev);
404
405 VkPhysicalDeviceLimits limits = {
406 .maxImageDimension1D = (1 << 14),
407 .maxImageDimension2D = (1 << 14),
408 .maxImageDimension3D = (1 << 10),
409 .maxImageDimensionCube = (1 << 14),
410 .maxImageArrayLayers = (1 << 10),
411 .maxTexelBufferElements = 128 * 1024 * 1024,
412 .maxUniformBufferRange = UINT32_MAX,
413 .maxStorageBufferRange = UINT32_MAX,
414 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
415 .maxMemoryAllocationCount = UINT32_MAX,
416 .maxSamplerAllocationCount = 64 * 1024,
417 .bufferImageGranularity = 64, /* A cache line */
418 .sparseAddressSpaceSize = 0,
419 .maxBoundDescriptorSets = MAX_SETS,
420 .maxPerStageDescriptorSamplers = 64,
421 .maxPerStageDescriptorUniformBuffers = 64,
422 .maxPerStageDescriptorStorageBuffers = 64,
423 .maxPerStageDescriptorSampledImages = 64,
424 .maxPerStageDescriptorStorageImages = 64,
425 .maxPerStageDescriptorInputAttachments = 64,
426 .maxPerStageResources = 128,
427 .maxDescriptorSetSamplers = 256,
428 .maxDescriptorSetUniformBuffers = 256,
429 .maxDescriptorSetUniformBuffersDynamic = 256,
430 .maxDescriptorSetStorageBuffers = 256,
431 .maxDescriptorSetStorageBuffersDynamic = 256,
432 .maxDescriptorSetSampledImages = 256,
433 .maxDescriptorSetStorageImages = 256,
434 .maxDescriptorSetInputAttachments = 256,
435 .maxVertexInputAttributes = 32,
436 .maxVertexInputBindings = 32,
437 .maxVertexInputAttributeOffset = 2047,
438 .maxVertexInputBindingStride = 2048,
439 .maxVertexOutputComponents = 128,
440 .maxTessellationGenerationLevel = 0,
441 .maxTessellationPatchSize = 0,
442 .maxTessellationControlPerVertexInputComponents = 0,
443 .maxTessellationControlPerVertexOutputComponents = 0,
444 .maxTessellationControlPerPatchOutputComponents = 0,
445 .maxTessellationControlTotalOutputComponents = 0,
446 .maxTessellationEvaluationInputComponents = 0,
447 .maxTessellationEvaluationOutputComponents = 0,
448 .maxGeometryShaderInvocations = 32,
449 .maxGeometryInputComponents = 64,
450 .maxGeometryOutputComponents = 128,
451 .maxGeometryOutputVertices = 256,
452 .maxGeometryTotalOutputComponents = 1024,
453 .maxFragmentInputComponents = 128,
454 .maxFragmentOutputAttachments = 8,
455 .maxFragmentDualSrcAttachments = 2,
456 .maxFragmentCombinedOutputResources = 8,
457 .maxComputeSharedMemorySize = 32768,
458 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
459 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
460 .maxComputeWorkGroupSize = {
461 16 * devinfo->max_cs_threads,
462 16 * devinfo->max_cs_threads,
463 16 * devinfo->max_cs_threads,
464 },
465 .subPixelPrecisionBits = 4 /* FIXME */,
466 .subTexelPrecisionBits = 4 /* FIXME */,
467 .mipmapPrecisionBits = 4 /* FIXME */,
468 .maxDrawIndexedIndexValue = UINT32_MAX,
469 .maxDrawIndirectCount = UINT32_MAX,
470 .maxSamplerLodBias = 16,
471 .maxSamplerAnisotropy = 16,
472 .maxViewports = MAX_VIEWPORTS,
473 .maxViewportDimensions = { (1 << 14), (1 << 14) },
474 .viewportBoundsRange = { -16384.0, 16384.0 },
475 .viewportSubPixelBits = 13, /* We take a float? */
476 .minMemoryMapAlignment = 4096, /* A page */
477 .minTexelBufferOffsetAlignment = 1,
478 .minUniformBufferOffsetAlignment = 1,
479 .minStorageBufferOffsetAlignment = 1,
480 .minTexelOffset = -8,
481 .maxTexelOffset = 7,
482 .minTexelGatherOffset = -8,
483 .maxTexelGatherOffset = 7,
484 .minInterpolationOffset = 0, /* FIXME */
485 .maxInterpolationOffset = 0, /* FIXME */
486 .subPixelInterpolationOffsetBits = 0, /* FIXME */
487 .maxFramebufferWidth = (1 << 14),
488 .maxFramebufferHeight = (1 << 14),
489 .maxFramebufferLayers = (1 << 10),
490 .framebufferColorSampleCounts = sample_counts,
491 .framebufferDepthSampleCounts = sample_counts,
492 .framebufferStencilSampleCounts = sample_counts,
493 .framebufferNoAttachmentsSampleCounts = sample_counts,
494 .maxColorAttachments = MAX_RTS,
495 .sampledImageColorSampleCounts = sample_counts,
496 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
497 .sampledImageDepthSampleCounts = sample_counts,
498 .sampledImageStencilSampleCounts = sample_counts,
499 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
500 .maxSampleMaskWords = 1,
501 .timestampComputeAndGraphics = false,
502 .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000),
503 .maxClipDistances = 0 /* FIXME */,
504 .maxCullDistances = 0 /* FIXME */,
505 .maxCombinedClipAndCullDistances = 0 /* FIXME */,
506 .discreteQueuePriorities = 1,
507 .pointSizeRange = { 0.125, 255.875 },
508 .lineWidthRange = { 0.0, 7.9921875 },
509 .pointSizeGranularity = (1.0 / 8.0),
510 .lineWidthGranularity = (1.0 / 128.0),
511 .strictLines = false, /* FINISHME */
512 .standardSampleLocations = true,
513 .optimalBufferCopyOffsetAlignment = 128,
514 .optimalBufferCopyRowPitchAlignment = 128,
515 .nonCoherentAtomSize = 64,
516 };
517
518 *pProperties = (VkPhysicalDeviceProperties) {
519 .apiVersion = VK_MAKE_VERSION(1, 0, 2),
520 .driverVersion = 1,
521 .vendorID = 0x8086,
522 .deviceID = pdevice->chipset_id,
523 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
524 .limits = limits,
525 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
526 };
527
528 strcpy(pProperties->deviceName, pdevice->name);
529 snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_SIZE,
530 "anv-%s", MESA_GIT_SHA1 + 4);
531 }
532
533 void anv_GetPhysicalDeviceQueueFamilyProperties(
534 VkPhysicalDevice physicalDevice,
535 uint32_t* pCount,
536 VkQueueFamilyProperties* pQueueFamilyProperties)
537 {
538 if (pQueueFamilyProperties == NULL) {
539 *pCount = 1;
540 return;
541 }
542
543 assert(*pCount >= 1);
544
545 *pQueueFamilyProperties = (VkQueueFamilyProperties) {
546 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
547 VK_QUEUE_COMPUTE_BIT |
548 VK_QUEUE_TRANSFER_BIT,
549 .queueCount = 1,
550 .timestampValidBits = 36, /* XXX: Real value here */
551 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
552 };
553 }
554
555 void anv_GetPhysicalDeviceMemoryProperties(
556 VkPhysicalDevice physicalDevice,
557 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
558 {
559 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
560 VkDeviceSize heap_size;
561
562 /* Reserve some wiggle room for the driver by exposing only 75% of the
563 * aperture to the heap.
564 */
565 heap_size = 3 * physical_device->aperture_size / 4;
566
567 if (physical_device->info->has_llc) {
568 /* Big core GPUs share LLC with the CPU and thus one memory type can be
569 * both cached and coherent at the same time.
570 */
571 pMemoryProperties->memoryTypeCount = 1;
572 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
573 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
574 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
575 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
576 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
577 .heapIndex = 0,
578 };
579 } else {
580 /* The spec requires that we expose a host-visible, coherent memory
581 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
582 * to give the application a choice between cached, but not coherent and
583 * coherent but uncached (WC though).
584 */
585 pMemoryProperties->memoryTypeCount = 2;
586 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
587 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
588 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
589 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
590 .heapIndex = 0,
591 };
592 pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
593 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
594 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
595 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
596 .heapIndex = 0,
597 };
598 }
599
600 pMemoryProperties->memoryHeapCount = 1;
601 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
602 .size = heap_size,
603 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
604 };
605 }
606
607 PFN_vkVoidFunction anv_GetInstanceProcAddr(
608 VkInstance instance,
609 const char* pName)
610 {
611 return anv_lookup_entrypoint(pName);
612 }
613
614 PFN_vkVoidFunction anv_GetDeviceProcAddr(
615 VkDevice device,
616 const char* pName)
617 {
618 return anv_lookup_entrypoint(pName);
619 }
620
621 static VkResult
622 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
623 {
624 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
625 queue->device = device;
626 queue->pool = &device->surface_state_pool;
627
628 return VK_SUCCESS;
629 }
630
631 static void
632 anv_queue_finish(struct anv_queue *queue)
633 {
634 }
635
636 static struct anv_state
637 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
638 {
639 struct anv_state state;
640
641 state = anv_state_pool_alloc(pool, size, align);
642 memcpy(state.map, p, size);
643
644 if (!pool->block_pool->device->info.has_llc)
645 anv_state_clflush(state);
646
647 return state;
648 }
649
650 struct gen8_border_color {
651 union {
652 float float32[4];
653 uint32_t uint32[4];
654 };
655 /* Pad out to 64 bytes */
656 uint32_t _pad[12];
657 };
658
659 static void
660 anv_device_init_border_colors(struct anv_device *device)
661 {
662 static const struct gen8_border_color border_colors[] = {
663 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
664 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
665 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
666 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
667 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
668 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
669 };
670
671 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
672 sizeof(border_colors), 64,
673 border_colors);
674 }
675
676 VkResult
677 anv_device_submit_simple_batch(struct anv_device *device,
678 struct anv_batch *batch)
679 {
680 struct anv_state state;
681 struct drm_i915_gem_execbuffer2 execbuf;
682 struct drm_i915_gem_exec_object2 exec2_objects[1];
683 struct anv_bo *bo = NULL;
684 VkResult result = VK_SUCCESS;
685 uint32_t size;
686 int64_t timeout;
687 int ret;
688
689 /* Kernel driver requires 8 byte aligned batch length */
690 size = align_u32(batch->next - batch->start, 8);
691 state = anv_state_pool_alloc(&device->dynamic_state_pool, MAX(size, 64), 32);
692 bo = &device->dynamic_state_pool.block_pool->bo;
693 memcpy(state.map, batch->start, size);
694
695 exec2_objects[0].handle = bo->gem_handle;
696 exec2_objects[0].relocation_count = 0;
697 exec2_objects[0].relocs_ptr = 0;
698 exec2_objects[0].alignment = 0;
699 exec2_objects[0].offset = bo->offset;
700 exec2_objects[0].flags = 0;
701 exec2_objects[0].rsvd1 = 0;
702 exec2_objects[0].rsvd2 = 0;
703
704 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
705 execbuf.buffer_count = 1;
706 execbuf.batch_start_offset = state.offset;
707 execbuf.batch_len = size;
708 execbuf.cliprects_ptr = 0;
709 execbuf.num_cliprects = 0;
710 execbuf.DR1 = 0;
711 execbuf.DR4 = 0;
712
713 execbuf.flags =
714 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
715 execbuf.rsvd1 = device->context_id;
716 execbuf.rsvd2 = 0;
717
718 ret = anv_gem_execbuffer(device, &execbuf);
719 if (ret != 0) {
720 /* We don't know the real error. */
721 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
722 goto fail;
723 }
724
725 timeout = INT64_MAX;
726 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
727 if (ret != 0) {
728 /* We don't know the real error. */
729 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
730 goto fail;
731 }
732
733 fail:
734 anv_state_pool_free(&device->dynamic_state_pool, state);
735
736 return result;
737 }
738
739 VkResult anv_CreateDevice(
740 VkPhysicalDevice physicalDevice,
741 const VkDeviceCreateInfo* pCreateInfo,
742 const VkAllocationCallbacks* pAllocator,
743 VkDevice* pDevice)
744 {
745 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
746 VkResult result;
747 struct anv_device *device;
748
749 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
750
751 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
752 bool found = false;
753 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
754 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
755 device_extensions[j].extensionName) == 0) {
756 found = true;
757 break;
758 }
759 }
760 if (!found)
761 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
762 }
763
764 anv_set_dispatch_devinfo(physical_device->info);
765
766 device = anv_alloc2(&physical_device->instance->alloc, pAllocator,
767 sizeof(*device), 8,
768 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
769 if (!device)
770 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
771
772 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
773 device->instance = physical_device->instance;
774
775 if (pAllocator)
776 device->alloc = *pAllocator;
777 else
778 device->alloc = physical_device->instance->alloc;
779
780 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
781 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
782 if (device->fd == -1) {
783 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
784 goto fail_device;
785 }
786
787 device->context_id = anv_gem_create_context(device);
788 if (device->context_id == -1) {
789 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
790 goto fail_fd;
791 }
792
793 device->info = *physical_device->info;
794 device->isl_dev = physical_device->isl_dev;
795
796 pthread_mutex_init(&device->mutex, NULL);
797
798 anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE);
799
800 anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
801
802 anv_state_pool_init(&device->dynamic_state_pool,
803 &device->dynamic_state_block_pool);
804
805 anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
806 anv_pipeline_cache_init(&device->default_pipeline_cache, device);
807
808 anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
809
810 anv_state_pool_init(&device->surface_state_pool,
811 &device->surface_state_block_pool);
812
813 anv_bo_init_new(&device->workaround_bo, device, 1024);
814
815 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
816
817 anv_queue_init(device, &device->queue);
818
819 switch (device->info.gen) {
820 case 7:
821 if (!device->info.is_haswell)
822 result = gen7_init_device_state(device);
823 else
824 result = gen75_init_device_state(device);
825 break;
826 case 8:
827 result = gen8_init_device_state(device);
828 break;
829 case 9:
830 result = gen9_init_device_state(device);
831 break;
832 }
833 if (result != VK_SUCCESS)
834 goto fail_fd;
835
836 result = anv_device_init_meta(device);
837 if (result != VK_SUCCESS)
838 goto fail_fd;
839
840 anv_device_init_border_colors(device);
841
842 *pDevice = anv_device_to_handle(device);
843
844 return VK_SUCCESS;
845
846 fail_fd:
847 close(device->fd);
848 fail_device:
849 anv_free(&device->alloc, device);
850
851 return result;
852 }
853
854 void anv_DestroyDevice(
855 VkDevice _device,
856 const VkAllocationCallbacks* pAllocator)
857 {
858 ANV_FROM_HANDLE(anv_device, device, _device);
859
860 anv_queue_finish(&device->queue);
861
862 anv_device_finish_meta(device);
863
864 #ifdef HAVE_VALGRIND
865 /* We only need to free these to prevent valgrind errors. The backing
866 * BO will go away in a couple of lines so we don't actually leak.
867 */
868 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
869 #endif
870
871 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
872 anv_gem_close(device, device->workaround_bo.gem_handle);
873
874 anv_bo_pool_finish(&device->batch_bo_pool);
875 anv_state_pool_finish(&device->dynamic_state_pool);
876 anv_block_pool_finish(&device->dynamic_state_block_pool);
877 anv_block_pool_finish(&device->instruction_block_pool);
878 anv_state_pool_finish(&device->surface_state_pool);
879 anv_block_pool_finish(&device->surface_state_block_pool);
880 anv_block_pool_finish(&device->scratch_block_pool);
881
882 close(device->fd);
883
884 pthread_mutex_destroy(&device->mutex);
885
886 anv_free(&device->alloc, device);
887 }
888
889 VkResult anv_EnumerateInstanceExtensionProperties(
890 const char* pLayerName,
891 uint32_t* pPropertyCount,
892 VkExtensionProperties* pProperties)
893 {
894 if (pProperties == NULL) {
895 *pPropertyCount = ARRAY_SIZE(global_extensions);
896 return VK_SUCCESS;
897 }
898
899 assert(*pPropertyCount >= ARRAY_SIZE(global_extensions));
900
901 *pPropertyCount = ARRAY_SIZE(global_extensions);
902 memcpy(pProperties, global_extensions, sizeof(global_extensions));
903
904 return VK_SUCCESS;
905 }
906
907 VkResult anv_EnumerateDeviceExtensionProperties(
908 VkPhysicalDevice physicalDevice,
909 const char* pLayerName,
910 uint32_t* pPropertyCount,
911 VkExtensionProperties* pProperties)
912 {
913 if (pProperties == NULL) {
914 *pPropertyCount = ARRAY_SIZE(device_extensions);
915 return VK_SUCCESS;
916 }
917
918 assert(*pPropertyCount >= ARRAY_SIZE(device_extensions));
919
920 *pPropertyCount = ARRAY_SIZE(device_extensions);
921 memcpy(pProperties, device_extensions, sizeof(device_extensions));
922
923 return VK_SUCCESS;
924 }
925
926 VkResult anv_EnumerateInstanceLayerProperties(
927 uint32_t* pPropertyCount,
928 VkLayerProperties* pProperties)
929 {
930 if (pProperties == NULL) {
931 *pPropertyCount = 0;
932 return VK_SUCCESS;
933 }
934
935 /* None supported at this time */
936 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
937 }
938
939 VkResult anv_EnumerateDeviceLayerProperties(
940 VkPhysicalDevice physicalDevice,
941 uint32_t* pPropertyCount,
942 VkLayerProperties* pProperties)
943 {
944 if (pProperties == NULL) {
945 *pPropertyCount = 0;
946 return VK_SUCCESS;
947 }
948
949 /* None supported at this time */
950 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
951 }
952
953 void anv_GetDeviceQueue(
954 VkDevice _device,
955 uint32_t queueNodeIndex,
956 uint32_t queueIndex,
957 VkQueue* pQueue)
958 {
959 ANV_FROM_HANDLE(anv_device, device, _device);
960
961 assert(queueIndex == 0);
962
963 *pQueue = anv_queue_to_handle(&device->queue);
964 }
965
966 VkResult anv_QueueSubmit(
967 VkQueue _queue,
968 uint32_t submitCount,
969 const VkSubmitInfo* pSubmits,
970 VkFence _fence)
971 {
972 ANV_FROM_HANDLE(anv_queue, queue, _queue);
973 ANV_FROM_HANDLE(anv_fence, fence, _fence);
974 struct anv_device *device = queue->device;
975 int ret;
976
977 for (uint32_t i = 0; i < submitCount; i++) {
978 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
979 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
980 pSubmits[i].pCommandBuffers[j]);
981 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
982
983 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
984 if (ret != 0) {
985 /* We don't know the real error. */
986 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
987 "execbuf2 failed: %m");
988 }
989
990 if (fence) {
991 ret = anv_gem_execbuffer(device, &fence->execbuf);
992 if (ret != 0) {
993 /* We don't know the real error. */
994 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
995 "execbuf2 failed: %m");
996 }
997 }
998
999 for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++)
1000 cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset;
1001 }
1002 }
1003
1004 return VK_SUCCESS;
1005 }
1006
1007 VkResult anv_QueueWaitIdle(
1008 VkQueue _queue)
1009 {
1010 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1011
1012 return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device));
1013 }
1014
1015 VkResult anv_DeviceWaitIdle(
1016 VkDevice _device)
1017 {
1018 ANV_FROM_HANDLE(anv_device, device, _device);
1019 struct anv_batch batch;
1020
1021 uint32_t cmds[8];
1022 batch.start = batch.next = cmds;
1023 batch.end = (void *) cmds + sizeof(cmds);
1024
1025 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
1026 anv_batch_emit(&batch, GEN7_MI_NOOP);
1027
1028 return anv_device_submit_simple_batch(device, &batch);
1029 }
1030
1031 VkResult
1032 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1033 {
1034 bo->gem_handle = anv_gem_create(device, size);
1035 if (!bo->gem_handle)
1036 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1037
1038 bo->map = NULL;
1039 bo->index = 0;
1040 bo->offset = 0;
1041 bo->size = size;
1042
1043 return VK_SUCCESS;
1044 }
1045
1046 VkResult anv_AllocateMemory(
1047 VkDevice _device,
1048 const VkMemoryAllocateInfo* pAllocateInfo,
1049 const VkAllocationCallbacks* pAllocator,
1050 VkDeviceMemory* pMem)
1051 {
1052 ANV_FROM_HANDLE(anv_device, device, _device);
1053 struct anv_device_memory *mem;
1054 VkResult result;
1055
1056 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1057
1058 if (pAllocateInfo->allocationSize == 0) {
1059 /* Apparently, this is allowed */
1060 *pMem = VK_NULL_HANDLE;
1061 return VK_SUCCESS;
1062 }
1063
1064 /* We support exactly one memory heap. */
1065 assert(pAllocateInfo->memoryTypeIndex == 0 ||
1066 (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
1067
1068 /* FINISHME: Fail if allocation request exceeds heap size. */
1069
1070 mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1071 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1072 if (mem == NULL)
1073 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1074
1075 /* The kernel is going to give us whole pages anyway */
1076 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1077
1078 result = anv_bo_init_new(&mem->bo, device, alloc_size);
1079 if (result != VK_SUCCESS)
1080 goto fail;
1081
1082 mem->type_index = pAllocateInfo->memoryTypeIndex;
1083
1084 *pMem = anv_device_memory_to_handle(mem);
1085
1086 return VK_SUCCESS;
1087
1088 fail:
1089 anv_free2(&device->alloc, pAllocator, mem);
1090
1091 return result;
1092 }
1093
1094 void anv_FreeMemory(
1095 VkDevice _device,
1096 VkDeviceMemory _mem,
1097 const VkAllocationCallbacks* pAllocator)
1098 {
1099 ANV_FROM_HANDLE(anv_device, device, _device);
1100 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1101
1102 if (mem == NULL)
1103 return;
1104
1105 if (mem->bo.map)
1106 anv_gem_munmap(mem->bo.map, mem->bo.size);
1107
1108 if (mem->bo.gem_handle != 0)
1109 anv_gem_close(device, mem->bo.gem_handle);
1110
1111 anv_free2(&device->alloc, pAllocator, mem);
1112 }
1113
1114 VkResult anv_MapMemory(
1115 VkDevice _device,
1116 VkDeviceMemory _memory,
1117 VkDeviceSize offset,
1118 VkDeviceSize size,
1119 VkMemoryMapFlags flags,
1120 void** ppData)
1121 {
1122 ANV_FROM_HANDLE(anv_device, device, _device);
1123 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1124
1125 if (mem == NULL) {
1126 *ppData = NULL;
1127 return VK_SUCCESS;
1128 }
1129
1130 if (size == VK_WHOLE_SIZE)
1131 size = mem->bo.size - offset;
1132
1133 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1134 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1135 * at a time is valid. We could just mmap up front and return an offset
1136 * pointer here, but that may exhaust virtual memory on 32 bit
1137 * userspace. */
1138
1139 uint32_t gem_flags = 0;
1140 if (!device->info.has_llc && mem->type_index == 0)
1141 gem_flags |= I915_MMAP_WC;
1142
1143 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1144 uint64_t map_offset = offset & ~4095ull;
1145 assert(offset >= map_offset);
1146 uint64_t map_size = (offset + size) - map_offset;
1147
1148 /* Let's map whole pages */
1149 map_size = align_u64(map_size, 4096);
1150
1151 mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
1152 map_offset, map_size, gem_flags);
1153 mem->map_size = map_size;
1154
1155 *ppData = mem->map + (offset - map_offset);
1156
1157 return VK_SUCCESS;
1158 }
1159
1160 void anv_UnmapMemory(
1161 VkDevice _device,
1162 VkDeviceMemory _memory)
1163 {
1164 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1165
1166 if (mem == NULL)
1167 return;
1168
1169 anv_gem_munmap(mem->map, mem->map_size);
1170 }
1171
1172 static void
1173 clflush_mapped_ranges(struct anv_device *device,
1174 uint32_t count,
1175 const VkMappedMemoryRange *ranges)
1176 {
1177 for (uint32_t i = 0; i < count; i++) {
1178 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1179 void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK);
1180 void *end;
1181
1182 if (ranges[i].offset + ranges[i].size > mem->map_size)
1183 end = mem->map + mem->map_size;
1184 else
1185 end = mem->map + ranges[i].offset + ranges[i].size;
1186
1187 while (p < end) {
1188 __builtin_ia32_clflush(p);
1189 p += CACHELINE_SIZE;
1190 }
1191 }
1192 }
1193
1194 VkResult anv_FlushMappedMemoryRanges(
1195 VkDevice _device,
1196 uint32_t memoryRangeCount,
1197 const VkMappedMemoryRange* pMemoryRanges)
1198 {
1199 ANV_FROM_HANDLE(anv_device, device, _device);
1200
1201 if (device->info.has_llc)
1202 return VK_SUCCESS;
1203
1204 /* Make sure the writes we're flushing have landed. */
1205 __builtin_ia32_mfence();
1206
1207 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1208
1209 return VK_SUCCESS;
1210 }
1211
1212 VkResult anv_InvalidateMappedMemoryRanges(
1213 VkDevice _device,
1214 uint32_t memoryRangeCount,
1215 const VkMappedMemoryRange* pMemoryRanges)
1216 {
1217 ANV_FROM_HANDLE(anv_device, device, _device);
1218
1219 if (device->info.has_llc)
1220 return VK_SUCCESS;
1221
1222 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1223
1224 /* Make sure no reads get moved up above the invalidate. */
1225 __builtin_ia32_mfence();
1226
1227 return VK_SUCCESS;
1228 }
1229
1230 void anv_GetBufferMemoryRequirements(
1231 VkDevice device,
1232 VkBuffer _buffer,
1233 VkMemoryRequirements* pMemoryRequirements)
1234 {
1235 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1236
1237 /* The Vulkan spec (git aaed022) says:
1238 *
1239 * memoryTypeBits is a bitfield and contains one bit set for every
1240 * supported memory type for the resource. The bit `1<<i` is set if and
1241 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1242 * structure for the physical device is supported.
1243 *
1244 * We support exactly one memory type.
1245 */
1246 pMemoryRequirements->memoryTypeBits = 1;
1247
1248 pMemoryRequirements->size = buffer->size;
1249 pMemoryRequirements->alignment = 16;
1250 }
1251
1252 void anv_GetImageMemoryRequirements(
1253 VkDevice device,
1254 VkImage _image,
1255 VkMemoryRequirements* pMemoryRequirements)
1256 {
1257 ANV_FROM_HANDLE(anv_image, image, _image);
1258
1259 /* The Vulkan spec (git aaed022) says:
1260 *
1261 * memoryTypeBits is a bitfield and contains one bit set for every
1262 * supported memory type for the resource. The bit `1<<i` is set if and
1263 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1264 * structure for the physical device is supported.
1265 *
1266 * We support exactly one memory type.
1267 */
1268 pMemoryRequirements->memoryTypeBits = 1;
1269
1270 pMemoryRequirements->size = image->size;
1271 pMemoryRequirements->alignment = image->alignment;
1272 }
1273
1274 void anv_GetImageSparseMemoryRequirements(
1275 VkDevice device,
1276 VkImage image,
1277 uint32_t* pSparseMemoryRequirementCount,
1278 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1279 {
1280 stub();
1281 }
1282
1283 void anv_GetDeviceMemoryCommitment(
1284 VkDevice device,
1285 VkDeviceMemory memory,
1286 VkDeviceSize* pCommittedMemoryInBytes)
1287 {
1288 *pCommittedMemoryInBytes = 0;
1289 }
1290
1291 VkResult anv_BindBufferMemory(
1292 VkDevice device,
1293 VkBuffer _buffer,
1294 VkDeviceMemory _memory,
1295 VkDeviceSize memoryOffset)
1296 {
1297 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1298 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1299
1300 if (mem) {
1301 buffer->bo = &mem->bo;
1302 buffer->offset = memoryOffset;
1303 } else {
1304 buffer->bo = NULL;
1305 buffer->offset = 0;
1306 }
1307
1308 return VK_SUCCESS;
1309 }
1310
1311 VkResult anv_BindImageMemory(
1312 VkDevice device,
1313 VkImage _image,
1314 VkDeviceMemory _memory,
1315 VkDeviceSize memoryOffset)
1316 {
1317 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1318 ANV_FROM_HANDLE(anv_image, image, _image);
1319
1320 if (mem) {
1321 image->bo = &mem->bo;
1322 image->offset = memoryOffset;
1323 } else {
1324 image->bo = NULL;
1325 image->offset = 0;
1326 }
1327
1328 return VK_SUCCESS;
1329 }
1330
1331 VkResult anv_QueueBindSparse(
1332 VkQueue queue,
1333 uint32_t bindInfoCount,
1334 const VkBindSparseInfo* pBindInfo,
1335 VkFence fence)
1336 {
1337 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1338 }
1339
1340 VkResult anv_CreateFence(
1341 VkDevice _device,
1342 const VkFenceCreateInfo* pCreateInfo,
1343 const VkAllocationCallbacks* pAllocator,
1344 VkFence* pFence)
1345 {
1346 ANV_FROM_HANDLE(anv_device, device, _device);
1347 struct anv_fence *fence;
1348 struct anv_batch batch;
1349 VkResult result;
1350
1351 const uint32_t fence_size = 128;
1352
1353 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1354
1355 fence = anv_alloc2(&device->alloc, pAllocator, sizeof(*fence), 8,
1356 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1357 if (fence == NULL)
1358 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1359
1360 result = anv_bo_init_new(&fence->bo, device, fence_size);
1361 if (result != VK_SUCCESS)
1362 goto fail;
1363
1364 fence->bo.map =
1365 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size, 0);
1366 batch.next = batch.start = fence->bo.map;
1367 batch.end = fence->bo.map + fence->bo.size;
1368 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
1369 anv_batch_emit(&batch, GEN7_MI_NOOP);
1370
1371 if (!device->info.has_llc) {
1372 assert(((uintptr_t) fence->bo.map & CACHELINE_MASK) == 0);
1373 assert(batch.next - fence->bo.map <= CACHELINE_SIZE);
1374 __builtin_ia32_mfence();
1375 __builtin_ia32_clflush(fence->bo.map);
1376 }
1377
1378 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1379 fence->exec2_objects[0].relocation_count = 0;
1380 fence->exec2_objects[0].relocs_ptr = 0;
1381 fence->exec2_objects[0].alignment = 0;
1382 fence->exec2_objects[0].offset = fence->bo.offset;
1383 fence->exec2_objects[0].flags = 0;
1384 fence->exec2_objects[0].rsvd1 = 0;
1385 fence->exec2_objects[0].rsvd2 = 0;
1386
1387 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1388 fence->execbuf.buffer_count = 1;
1389 fence->execbuf.batch_start_offset = 0;
1390 fence->execbuf.batch_len = batch.next - fence->bo.map;
1391 fence->execbuf.cliprects_ptr = 0;
1392 fence->execbuf.num_cliprects = 0;
1393 fence->execbuf.DR1 = 0;
1394 fence->execbuf.DR4 = 0;
1395
1396 fence->execbuf.flags =
1397 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1398 fence->execbuf.rsvd1 = device->context_id;
1399 fence->execbuf.rsvd2 = 0;
1400
1401 fence->ready = false;
1402
1403 *pFence = anv_fence_to_handle(fence);
1404
1405 return VK_SUCCESS;
1406
1407 fail:
1408 anv_free2(&device->alloc, pAllocator, fence);
1409
1410 return result;
1411 }
1412
1413 void anv_DestroyFence(
1414 VkDevice _device,
1415 VkFence _fence,
1416 const VkAllocationCallbacks* pAllocator)
1417 {
1418 ANV_FROM_HANDLE(anv_device, device, _device);
1419 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1420
1421 anv_gem_munmap(fence->bo.map, fence->bo.size);
1422 anv_gem_close(device, fence->bo.gem_handle);
1423 anv_free2(&device->alloc, pAllocator, fence);
1424 }
1425
1426 VkResult anv_ResetFences(
1427 VkDevice _device,
1428 uint32_t fenceCount,
1429 const VkFence* pFences)
1430 {
1431 for (uint32_t i = 0; i < fenceCount; i++) {
1432 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1433 fence->ready = false;
1434 }
1435
1436 return VK_SUCCESS;
1437 }
1438
1439 VkResult anv_GetFenceStatus(
1440 VkDevice _device,
1441 VkFence _fence)
1442 {
1443 ANV_FROM_HANDLE(anv_device, device, _device);
1444 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1445 int64_t t = 0;
1446 int ret;
1447
1448 if (fence->ready)
1449 return VK_SUCCESS;
1450
1451 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1452 if (ret == 0) {
1453 fence->ready = true;
1454 return VK_SUCCESS;
1455 }
1456
1457 return VK_NOT_READY;
1458 }
1459
1460 VkResult anv_WaitForFences(
1461 VkDevice _device,
1462 uint32_t fenceCount,
1463 const VkFence* pFences,
1464 VkBool32 waitAll,
1465 uint64_t timeout)
1466 {
1467 ANV_FROM_HANDLE(anv_device, device, _device);
1468
1469 /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
1470 * to block indefinitely timeouts <= 0. Unfortunately, this was broken
1471 * for a couple of kernel releases. Since there's no way to know
1472 * whether or not the kernel we're using is one of the broken ones, the
1473 * best we can do is to clamp the timeout to INT64_MAX. This limits the
1474 * maximum timeout from 584 years to 292 years - likely not a big deal.
1475 */
1476 if (timeout > INT64_MAX)
1477 timeout = INT64_MAX;
1478
1479 int64_t t = timeout;
1480
1481 /* FIXME: handle !waitAll */
1482
1483 for (uint32_t i = 0; i < fenceCount; i++) {
1484 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1485 int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1486 if (ret == -1 && errno == ETIME) {
1487 return VK_TIMEOUT;
1488 } else if (ret == -1) {
1489 /* We don't know the real error. */
1490 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
1491 "gem wait failed: %m");
1492 }
1493 }
1494
1495 return VK_SUCCESS;
1496 }
1497
1498 // Queue semaphore functions
1499
1500 VkResult anv_CreateSemaphore(
1501 VkDevice device,
1502 const VkSemaphoreCreateInfo* pCreateInfo,
1503 const VkAllocationCallbacks* pAllocator,
1504 VkSemaphore* pSemaphore)
1505 {
1506 /* The DRM execbuffer ioctl always execute in-oder, even between different
1507 * rings. As such, there's nothing to do for the user space semaphore.
1508 */
1509
1510 *pSemaphore = (VkSemaphore)1;
1511
1512 return VK_SUCCESS;
1513 }
1514
1515 void anv_DestroySemaphore(
1516 VkDevice device,
1517 VkSemaphore semaphore,
1518 const VkAllocationCallbacks* pAllocator)
1519 {
1520 }
1521
1522 // Event functions
1523
1524 VkResult anv_CreateEvent(
1525 VkDevice _device,
1526 const VkEventCreateInfo* pCreateInfo,
1527 const VkAllocationCallbacks* pAllocator,
1528 VkEvent* pEvent)
1529 {
1530 ANV_FROM_HANDLE(anv_device, device, _device);
1531 struct anv_state state;
1532 struct anv_event *event;
1533
1534 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
1535
1536 state = anv_state_pool_alloc(&device->dynamic_state_pool,
1537 sizeof(*event), 4);
1538 event = state.map;
1539 event->state = state;
1540 event->semaphore = VK_EVENT_RESET;
1541
1542 if (!device->info.has_llc) {
1543 /* Make sure the writes we're flushing have landed. */
1544 __builtin_ia32_mfence();
1545 __builtin_ia32_clflush(event);
1546 }
1547
1548 *pEvent = anv_event_to_handle(event);
1549
1550 return VK_SUCCESS;
1551 }
1552
1553 void anv_DestroyEvent(
1554 VkDevice _device,
1555 VkEvent _event,
1556 const VkAllocationCallbacks* pAllocator)
1557 {
1558 ANV_FROM_HANDLE(anv_device, device, _device);
1559 ANV_FROM_HANDLE(anv_event, event, _event);
1560
1561 anv_state_pool_free(&device->dynamic_state_pool, event->state);
1562 }
1563
1564 VkResult anv_GetEventStatus(
1565 VkDevice _device,
1566 VkEvent _event)
1567 {
1568 ANV_FROM_HANDLE(anv_device, device, _device);
1569 ANV_FROM_HANDLE(anv_event, event, _event);
1570
1571 if (!device->info.has_llc) {
1572 /* Invalidate read cache before reading event written by GPU. */
1573 __builtin_ia32_clflush(event);
1574 __builtin_ia32_mfence();
1575
1576 }
1577
1578 return event->semaphore;
1579 }
1580
1581 VkResult anv_SetEvent(
1582 VkDevice _device,
1583 VkEvent _event)
1584 {
1585 ANV_FROM_HANDLE(anv_device, device, _device);
1586 ANV_FROM_HANDLE(anv_event, event, _event);
1587
1588 event->semaphore = VK_EVENT_SET;
1589
1590 if (!device->info.has_llc) {
1591 /* Make sure the writes we're flushing have landed. */
1592 __builtin_ia32_mfence();
1593 __builtin_ia32_clflush(event);
1594 }
1595
1596 return VK_SUCCESS;
1597 }
1598
1599 VkResult anv_ResetEvent(
1600 VkDevice _device,
1601 VkEvent _event)
1602 {
1603 ANV_FROM_HANDLE(anv_device, device, _device);
1604 ANV_FROM_HANDLE(anv_event, event, _event);
1605
1606 event->semaphore = VK_EVENT_RESET;
1607
1608 if (!device->info.has_llc) {
1609 /* Make sure the writes we're flushing have landed. */
1610 __builtin_ia32_mfence();
1611 __builtin_ia32_clflush(event);
1612 }
1613
1614 return VK_SUCCESS;
1615 }
1616
1617 // Buffer functions
1618
1619 VkResult anv_CreateBuffer(
1620 VkDevice _device,
1621 const VkBufferCreateInfo* pCreateInfo,
1622 const VkAllocationCallbacks* pAllocator,
1623 VkBuffer* pBuffer)
1624 {
1625 ANV_FROM_HANDLE(anv_device, device, _device);
1626 struct anv_buffer *buffer;
1627
1628 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1629
1630 buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1631 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1632 if (buffer == NULL)
1633 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1634
1635 buffer->size = pCreateInfo->size;
1636 buffer->usage = pCreateInfo->usage;
1637 buffer->bo = NULL;
1638 buffer->offset = 0;
1639
1640 *pBuffer = anv_buffer_to_handle(buffer);
1641
1642 return VK_SUCCESS;
1643 }
1644
1645 void anv_DestroyBuffer(
1646 VkDevice _device,
1647 VkBuffer _buffer,
1648 const VkAllocationCallbacks* pAllocator)
1649 {
1650 ANV_FROM_HANDLE(anv_device, device, _device);
1651 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1652
1653 anv_free2(&device->alloc, pAllocator, buffer);
1654 }
1655
1656 void
1657 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
1658 enum isl_format format,
1659 uint32_t offset, uint32_t range, uint32_t stride)
1660 {
1661 switch (device->info.gen) {
1662 case 7:
1663 if (device->info.is_haswell)
1664 gen75_fill_buffer_surface_state(state.map, format, offset, range,
1665 stride);
1666 else
1667 gen7_fill_buffer_surface_state(state.map, format, offset, range,
1668 stride);
1669 break;
1670 case 8:
1671 gen8_fill_buffer_surface_state(state.map, format, offset, range, stride);
1672 break;
1673 case 9:
1674 gen9_fill_buffer_surface_state(state.map, format, offset, range, stride);
1675 break;
1676 default:
1677 unreachable("unsupported gen\n");
1678 }
1679
1680 if (!device->info.has_llc)
1681 anv_state_clflush(state);
1682 }
1683
1684 void anv_DestroySampler(
1685 VkDevice _device,
1686 VkSampler _sampler,
1687 const VkAllocationCallbacks* pAllocator)
1688 {
1689 ANV_FROM_HANDLE(anv_device, device, _device);
1690 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
1691
1692 anv_free2(&device->alloc, pAllocator, sampler);
1693 }
1694
1695 VkResult anv_CreateFramebuffer(
1696 VkDevice _device,
1697 const VkFramebufferCreateInfo* pCreateInfo,
1698 const VkAllocationCallbacks* pAllocator,
1699 VkFramebuffer* pFramebuffer)
1700 {
1701 ANV_FROM_HANDLE(anv_device, device, _device);
1702 struct anv_framebuffer *framebuffer;
1703
1704 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1705
1706 size_t size = sizeof(*framebuffer) +
1707 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
1708 framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8,
1709 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1710 if (framebuffer == NULL)
1711 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1712
1713 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1714 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1715 VkImageView _iview = pCreateInfo->pAttachments[i];
1716 framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
1717 }
1718
1719 framebuffer->width = pCreateInfo->width;
1720 framebuffer->height = pCreateInfo->height;
1721 framebuffer->layers = pCreateInfo->layers;
1722
1723 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
1724
1725 return VK_SUCCESS;
1726 }
1727
1728 void anv_DestroyFramebuffer(
1729 VkDevice _device,
1730 VkFramebuffer _fb,
1731 const VkAllocationCallbacks* pAllocator)
1732 {
1733 ANV_FROM_HANDLE(anv_device, device, _device);
1734 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
1735
1736 anv_free2(&device->alloc, pAllocator, fb);
1737 }
1738
1739 void vkCmdDbgMarkerBegin(
1740 VkCommandBuffer commandBuffer,
1741 const char* pMarker)
1742 __attribute__ ((visibility ("default")));
1743
1744 void vkCmdDbgMarkerEnd(
1745 VkCommandBuffer commandBuffer)
1746 __attribute__ ((visibility ("default")));
1747
1748 void vkCmdDbgMarkerBegin(
1749 VkCommandBuffer commandBuffer,
1750 const char* pMarker)
1751 {
1752 }
1753
1754 void vkCmdDbgMarkerEnd(
1755 VkCommandBuffer commandBuffer)
1756 {
1757 }