anv: kill of custom define HAVE_WAYLAND_PLATFORM
[mesa.git] / src / intel / vulkan / anv_device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "mesa/main/git_sha1.h"
32 #include "util/strtod.h"
33 #include "util/debug.h"
34
35 #include "genxml/gen7_pack.h"
36
37 struct anv_dispatch_table dtable;
38
39 static void
40 compiler_debug_log(void *data, const char *fmt, ...)
41 { }
42
43 static void
44 compiler_perf_log(void *data, const char *fmt, ...)
45 {
46 va_list args;
47 va_start(args, fmt);
48
49 if (unlikely(INTEL_DEBUG & DEBUG_PERF))
50 vfprintf(stderr, fmt, args);
51
52 va_end(args);
53 }
54
55 static VkResult
56 anv_physical_device_init(struct anv_physical_device *device,
57 struct anv_instance *instance,
58 const char *path)
59 {
60 VkResult result;
61 int fd;
62
63 fd = open(path, O_RDWR | O_CLOEXEC);
64 if (fd < 0)
65 return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
66 "failed to open %s: %m", path);
67
68 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
69 device->instance = instance;
70 device->path = path;
71
72 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
73 if (!device->chipset_id) {
74 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
75 "failed to get chipset id: %m");
76 goto fail;
77 }
78
79 device->name = brw_get_device_name(device->chipset_id);
80 device->info = brw_get_device_info(device->chipset_id);
81 if (!device->info) {
82 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
83 "failed to get device info");
84 goto fail;
85 }
86
87 if (device->info->is_haswell) {
88 fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
89 } else if (device->info->gen == 7 && !device->info->is_baytrail) {
90 fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
91 } else if (device->info->gen == 7 && device->info->is_baytrail) {
92 fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
93 } else if (device->info->gen >= 8) {
94 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
95 * supported as anything */
96 } else {
97 result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
98 "Vulkan not yet supported on %s", device->name);
99 goto fail;
100 }
101
102 device->cmd_parser_version = -1;
103 if (device->info->gen == 7) {
104 device->cmd_parser_version =
105 anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
106 if (device->cmd_parser_version == -1) {
107 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
108 "failed to get command parser version");
109 goto fail;
110 }
111 }
112
113 if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
114 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
115 "failed to get aperture size: %m");
116 goto fail;
117 }
118
119 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
120 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
121 "kernel missing gem wait");
122 goto fail;
123 }
124
125 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
126 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
127 "kernel missing execbuf2");
128 goto fail;
129 }
130
131 if (!device->info->has_llc &&
132 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
133 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
134 "kernel missing wc mmap");
135 goto fail;
136 }
137
138 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
139
140 close(fd);
141
142 brw_process_intel_debug_variable();
143
144 device->compiler = brw_compiler_create(NULL, device->info);
145 if (device->compiler == NULL) {
146 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
147 goto fail;
148 }
149 device->compiler->shader_debug_log = compiler_debug_log;
150 device->compiler->shader_perf_log = compiler_perf_log;
151
152 /* XXX: Actually detect bit6 swizzling */
153 isl_device_init(&device->isl_dev, device->info, swizzled);
154
155 return VK_SUCCESS;
156
157 fail:
158 close(fd);
159 return result;
160 }
161
162 static void
163 anv_physical_device_finish(struct anv_physical_device *device)
164 {
165 ralloc_free(device->compiler);
166 }
167
168 static const VkExtensionProperties global_extensions[] = {
169 {
170 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
171 .specVersion = 25,
172 },
173 {
174 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
175 .specVersion = 5,
176 },
177 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
178 {
179 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
180 .specVersion = 4,
181 },
182 #endif
183 };
184
185 static const VkExtensionProperties device_extensions[] = {
186 {
187 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
188 .specVersion = 67,
189 },
190 };
191
192 static void *
193 default_alloc_func(void *pUserData, size_t size, size_t align,
194 VkSystemAllocationScope allocationScope)
195 {
196 return malloc(size);
197 }
198
199 static void *
200 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
201 size_t align, VkSystemAllocationScope allocationScope)
202 {
203 return realloc(pOriginal, size);
204 }
205
206 static void
207 default_free_func(void *pUserData, void *pMemory)
208 {
209 free(pMemory);
210 }
211
212 static const VkAllocationCallbacks default_alloc = {
213 .pUserData = NULL,
214 .pfnAllocation = default_alloc_func,
215 .pfnReallocation = default_realloc_func,
216 .pfnFree = default_free_func,
217 };
218
219 VkResult anv_CreateInstance(
220 const VkInstanceCreateInfo* pCreateInfo,
221 const VkAllocationCallbacks* pAllocator,
222 VkInstance* pInstance)
223 {
224 struct anv_instance *instance;
225
226 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
227
228 uint32_t client_version;
229 if (pCreateInfo->pApplicationInfo &&
230 pCreateInfo->pApplicationInfo->apiVersion != 0) {
231 client_version = pCreateInfo->pApplicationInfo->apiVersion;
232 } else {
233 client_version = VK_MAKE_VERSION(1, 0, 0);
234 }
235
236 if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
237 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
238 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
239 "Client requested version %d.%d.%d",
240 VK_VERSION_MAJOR(client_version),
241 VK_VERSION_MINOR(client_version),
242 VK_VERSION_PATCH(client_version));
243 }
244
245 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
246 bool found = false;
247 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
248 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
249 global_extensions[j].extensionName) == 0) {
250 found = true;
251 break;
252 }
253 }
254 if (!found)
255 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
256 }
257
258 instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
259 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
260 if (!instance)
261 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
262
263 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
264
265 if (pAllocator)
266 instance->alloc = *pAllocator;
267 else
268 instance->alloc = default_alloc;
269
270 instance->apiVersion = client_version;
271 instance->physicalDeviceCount = -1;
272
273 memset(instance->wsi, 0, sizeof(instance->wsi));
274
275 _mesa_locale_init();
276
277 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
278
279 anv_init_wsi(instance);
280
281 *pInstance = anv_instance_to_handle(instance);
282
283 return VK_SUCCESS;
284 }
285
286 void anv_DestroyInstance(
287 VkInstance _instance,
288 const VkAllocationCallbacks* pAllocator)
289 {
290 ANV_FROM_HANDLE(anv_instance, instance, _instance);
291
292 if (instance->physicalDeviceCount > 0) {
293 /* We support at most one physical device. */
294 assert(instance->physicalDeviceCount == 1);
295 anv_physical_device_finish(&instance->physicalDevice);
296 }
297
298 anv_finish_wsi(instance);
299
300 VG(VALGRIND_DESTROY_MEMPOOL(instance));
301
302 _mesa_locale_fini();
303
304 anv_free(&instance->alloc, instance);
305 }
306
307 VkResult anv_EnumeratePhysicalDevices(
308 VkInstance _instance,
309 uint32_t* pPhysicalDeviceCount,
310 VkPhysicalDevice* pPhysicalDevices)
311 {
312 ANV_FROM_HANDLE(anv_instance, instance, _instance);
313 VkResult result;
314
315 if (instance->physicalDeviceCount < 0) {
316 result = anv_physical_device_init(&instance->physicalDevice,
317 instance, "/dev/dri/renderD128");
318 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
319 instance->physicalDeviceCount = 0;
320 } else if (result == VK_SUCCESS) {
321 instance->physicalDeviceCount = 1;
322 } else {
323 return result;
324 }
325 }
326
327 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
328 * otherwise it's an inout parameter.
329 *
330 * The Vulkan spec (git aaed022) says:
331 *
332 * pPhysicalDeviceCount is a pointer to an unsigned integer variable
333 * that is initialized with the number of devices the application is
334 * prepared to receive handles to. pname:pPhysicalDevices is pointer to
335 * an array of at least this many VkPhysicalDevice handles [...].
336 *
337 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
338 * overwrites the contents of the variable pointed to by
339 * pPhysicalDeviceCount with the number of physical devices in in the
340 * instance; otherwise, vkEnumeratePhysicalDevices overwrites
341 * pPhysicalDeviceCount with the number of physical handles written to
342 * pPhysicalDevices.
343 */
344 if (!pPhysicalDevices) {
345 *pPhysicalDeviceCount = instance->physicalDeviceCount;
346 } else if (*pPhysicalDeviceCount >= 1) {
347 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
348 *pPhysicalDeviceCount = 1;
349 } else {
350 *pPhysicalDeviceCount = 0;
351 }
352
353 return VK_SUCCESS;
354 }
355
356 void anv_GetPhysicalDeviceFeatures(
357 VkPhysicalDevice physicalDevice,
358 VkPhysicalDeviceFeatures* pFeatures)
359 {
360 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
361
362 *pFeatures = (VkPhysicalDeviceFeatures) {
363 .robustBufferAccess = true,
364 .fullDrawIndexUint32 = true,
365 .imageCubeArray = false,
366 .independentBlend = pdevice->info->gen >= 8,
367 .geometryShader = true,
368 .tessellationShader = false,
369 .sampleRateShading = false,
370 .dualSrcBlend = true,
371 .logicOp = true,
372 .multiDrawIndirect = false,
373 .drawIndirectFirstInstance = false,
374 .depthClamp = false,
375 .depthBiasClamp = false,
376 .fillModeNonSolid = true,
377 .depthBounds = false,
378 .wideLines = true,
379 .largePoints = true,
380 .alphaToOne = true,
381 .multiViewport = true,
382 .samplerAnisotropy = false, /* FINISHME */
383 .textureCompressionETC2 = pdevice->info->gen >= 8 ||
384 pdevice->info->is_baytrail,
385 .textureCompressionASTC_LDR = false, /* FINISHME */
386 .textureCompressionBC = true,
387 .occlusionQueryPrecise = true,
388 .pipelineStatisticsQuery = false,
389 .fragmentStoresAndAtomics = true,
390 .shaderTessellationAndGeometryPointSize = true,
391 .shaderImageGatherExtended = true,
392 .shaderStorageImageExtendedFormats = false,
393 .shaderStorageImageMultisample = false,
394 .shaderUniformBufferArrayDynamicIndexing = true,
395 .shaderSampledImageArrayDynamicIndexing = true,
396 .shaderStorageBufferArrayDynamicIndexing = true,
397 .shaderStorageImageArrayDynamicIndexing = true,
398 .shaderStorageImageReadWithoutFormat = false,
399 .shaderStorageImageWriteWithoutFormat = true,
400 .shaderClipDistance = false,
401 .shaderCullDistance = false,
402 .shaderFloat64 = false,
403 .shaderInt64 = false,
404 .shaderInt16 = false,
405 .alphaToOne = true,
406 .variableMultisampleRate = false,
407 .inheritedQueries = false,
408 };
409
410 /* We can't do image stores in vec4 shaders */
411 pFeatures->vertexPipelineStoresAndAtomics =
412 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
413 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
414 }
415
416 void
417 anv_device_get_cache_uuid(void *uuid)
418 {
419 memset(uuid, 0, VK_UUID_SIZE);
420 snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
421 }
422
423 void anv_GetPhysicalDeviceProperties(
424 VkPhysicalDevice physicalDevice,
425 VkPhysicalDeviceProperties* pProperties)
426 {
427 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
428 const struct brw_device_info *devinfo = pdevice->info;
429
430 anv_finishme("Get correct values for VkPhysicalDeviceLimits");
431
432 const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
433
434 VkSampleCountFlags sample_counts =
435 isl_device_get_sample_counts(&pdevice->isl_dev);
436
437 VkPhysicalDeviceLimits limits = {
438 .maxImageDimension1D = (1 << 14),
439 .maxImageDimension2D = (1 << 14),
440 .maxImageDimension3D = (1 << 11),
441 .maxImageDimensionCube = (1 << 14),
442 .maxImageArrayLayers = (1 << 11),
443 .maxTexelBufferElements = 128 * 1024 * 1024,
444 .maxUniformBufferRange = UINT32_MAX,
445 .maxStorageBufferRange = UINT32_MAX,
446 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
447 .maxMemoryAllocationCount = UINT32_MAX,
448 .maxSamplerAllocationCount = 64 * 1024,
449 .bufferImageGranularity = 64, /* A cache line */
450 .sparseAddressSpaceSize = 0,
451 .maxBoundDescriptorSets = MAX_SETS,
452 .maxPerStageDescriptorSamplers = 64,
453 .maxPerStageDescriptorUniformBuffers = 64,
454 .maxPerStageDescriptorStorageBuffers = 64,
455 .maxPerStageDescriptorSampledImages = 64,
456 .maxPerStageDescriptorStorageImages = 64,
457 .maxPerStageDescriptorInputAttachments = 64,
458 .maxPerStageResources = 128,
459 .maxDescriptorSetSamplers = 256,
460 .maxDescriptorSetUniformBuffers = 256,
461 .maxDescriptorSetUniformBuffersDynamic = 256,
462 .maxDescriptorSetStorageBuffers = 256,
463 .maxDescriptorSetStorageBuffersDynamic = 256,
464 .maxDescriptorSetSampledImages = 256,
465 .maxDescriptorSetStorageImages = 256,
466 .maxDescriptorSetInputAttachments = 256,
467 .maxVertexInputAttributes = 32,
468 .maxVertexInputBindings = 32,
469 .maxVertexInputAttributeOffset = 2047,
470 .maxVertexInputBindingStride = 2048,
471 .maxVertexOutputComponents = 128,
472 .maxTessellationGenerationLevel = 0,
473 .maxTessellationPatchSize = 0,
474 .maxTessellationControlPerVertexInputComponents = 0,
475 .maxTessellationControlPerVertexOutputComponents = 0,
476 .maxTessellationControlPerPatchOutputComponents = 0,
477 .maxTessellationControlTotalOutputComponents = 0,
478 .maxTessellationEvaluationInputComponents = 0,
479 .maxTessellationEvaluationOutputComponents = 0,
480 .maxGeometryShaderInvocations = 32,
481 .maxGeometryInputComponents = 64,
482 .maxGeometryOutputComponents = 128,
483 .maxGeometryOutputVertices = 256,
484 .maxGeometryTotalOutputComponents = 1024,
485 .maxFragmentInputComponents = 128,
486 .maxFragmentOutputAttachments = 8,
487 .maxFragmentDualSrcAttachments = 2,
488 .maxFragmentCombinedOutputResources = 8,
489 .maxComputeSharedMemorySize = 32768,
490 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
491 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
492 .maxComputeWorkGroupSize = {
493 16 * devinfo->max_cs_threads,
494 16 * devinfo->max_cs_threads,
495 16 * devinfo->max_cs_threads,
496 },
497 .subPixelPrecisionBits = 4 /* FIXME */,
498 .subTexelPrecisionBits = 4 /* FIXME */,
499 .mipmapPrecisionBits = 4 /* FIXME */,
500 .maxDrawIndexedIndexValue = UINT32_MAX,
501 .maxDrawIndirectCount = UINT32_MAX,
502 .maxSamplerLodBias = 16,
503 .maxSamplerAnisotropy = 16,
504 .maxViewports = MAX_VIEWPORTS,
505 .maxViewportDimensions = { (1 << 14), (1 << 14) },
506 .viewportBoundsRange = { -16384.0, 16384.0 },
507 .viewportSubPixelBits = 13, /* We take a float? */
508 .minMemoryMapAlignment = 4096, /* A page */
509 .minTexelBufferOffsetAlignment = 1,
510 .minUniformBufferOffsetAlignment = 1,
511 .minStorageBufferOffsetAlignment = 1,
512 .minTexelOffset = -8,
513 .maxTexelOffset = 7,
514 .minTexelGatherOffset = -8,
515 .maxTexelGatherOffset = 7,
516 .minInterpolationOffset = 0, /* FIXME */
517 .maxInterpolationOffset = 0, /* FIXME */
518 .subPixelInterpolationOffsetBits = 0, /* FIXME */
519 .maxFramebufferWidth = (1 << 14),
520 .maxFramebufferHeight = (1 << 14),
521 .maxFramebufferLayers = (1 << 10),
522 .framebufferColorSampleCounts = sample_counts,
523 .framebufferDepthSampleCounts = sample_counts,
524 .framebufferStencilSampleCounts = sample_counts,
525 .framebufferNoAttachmentsSampleCounts = sample_counts,
526 .maxColorAttachments = MAX_RTS,
527 .sampledImageColorSampleCounts = sample_counts,
528 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
529 .sampledImageDepthSampleCounts = sample_counts,
530 .sampledImageStencilSampleCounts = sample_counts,
531 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
532 .maxSampleMaskWords = 1,
533 .timestampComputeAndGraphics = false,
534 .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000),
535 .maxClipDistances = 0 /* FIXME */,
536 .maxCullDistances = 0 /* FIXME */,
537 .maxCombinedClipAndCullDistances = 0 /* FIXME */,
538 .discreteQueuePriorities = 1,
539 .pointSizeRange = { 0.125, 255.875 },
540 .lineWidthRange = { 0.0, 7.9921875 },
541 .pointSizeGranularity = (1.0 / 8.0),
542 .lineWidthGranularity = (1.0 / 128.0),
543 .strictLines = false, /* FINISHME */
544 .standardSampleLocations = true,
545 .optimalBufferCopyOffsetAlignment = 128,
546 .optimalBufferCopyRowPitchAlignment = 128,
547 .nonCoherentAtomSize = 64,
548 };
549
550 *pProperties = (VkPhysicalDeviceProperties) {
551 .apiVersion = VK_MAKE_VERSION(1, 0, 5),
552 .driverVersion = 1,
553 .vendorID = 0x8086,
554 .deviceID = pdevice->chipset_id,
555 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
556 .limits = limits,
557 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
558 };
559
560 strcpy(pProperties->deviceName, pdevice->name);
561 anv_device_get_cache_uuid(pProperties->pipelineCacheUUID);
562 }
563
564 void anv_GetPhysicalDeviceQueueFamilyProperties(
565 VkPhysicalDevice physicalDevice,
566 uint32_t* pCount,
567 VkQueueFamilyProperties* pQueueFamilyProperties)
568 {
569 if (pQueueFamilyProperties == NULL) {
570 *pCount = 1;
571 return;
572 }
573
574 assert(*pCount >= 1);
575
576 *pQueueFamilyProperties = (VkQueueFamilyProperties) {
577 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
578 VK_QUEUE_COMPUTE_BIT |
579 VK_QUEUE_TRANSFER_BIT,
580 .queueCount = 1,
581 .timestampValidBits = 36, /* XXX: Real value here */
582 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
583 };
584 }
585
586 void anv_GetPhysicalDeviceMemoryProperties(
587 VkPhysicalDevice physicalDevice,
588 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
589 {
590 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
591 VkDeviceSize heap_size;
592
593 /* Reserve some wiggle room for the driver by exposing only 75% of the
594 * aperture to the heap.
595 */
596 heap_size = 3 * physical_device->aperture_size / 4;
597
598 if (physical_device->info->has_llc) {
599 /* Big core GPUs share LLC with the CPU and thus one memory type can be
600 * both cached and coherent at the same time.
601 */
602 pMemoryProperties->memoryTypeCount = 1;
603 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
604 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
605 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
606 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
607 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
608 .heapIndex = 0,
609 };
610 } else {
611 /* The spec requires that we expose a host-visible, coherent memory
612 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
613 * to give the application a choice between cached, but not coherent and
614 * coherent but uncached (WC though).
615 */
616 pMemoryProperties->memoryTypeCount = 2;
617 pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
618 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
619 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
620 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
621 .heapIndex = 0,
622 };
623 pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
624 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
625 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
626 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
627 .heapIndex = 0,
628 };
629 }
630
631 pMemoryProperties->memoryHeapCount = 1;
632 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
633 .size = heap_size,
634 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
635 };
636 }
637
638 PFN_vkVoidFunction anv_GetInstanceProcAddr(
639 VkInstance instance,
640 const char* pName)
641 {
642 return anv_lookup_entrypoint(pName);
643 }
644
645 /* The loader wants us to expose a second GetInstanceProcAddr function
646 * to work around certain LD_PRELOAD issues seen in apps.
647 */
648 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
649 VkInstance instance,
650 const char* pName);
651
652 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
653 VkInstance instance,
654 const char* pName)
655 {
656 return anv_GetInstanceProcAddr(instance, pName);
657 }
658
659 PFN_vkVoidFunction anv_GetDeviceProcAddr(
660 VkDevice device,
661 const char* pName)
662 {
663 return anv_lookup_entrypoint(pName);
664 }
665
666 static VkResult
667 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
668 {
669 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
670 queue->device = device;
671 queue->pool = &device->surface_state_pool;
672
673 return VK_SUCCESS;
674 }
675
676 static void
677 anv_queue_finish(struct anv_queue *queue)
678 {
679 }
680
681 static struct anv_state
682 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
683 {
684 struct anv_state state;
685
686 state = anv_state_pool_alloc(pool, size, align);
687 memcpy(state.map, p, size);
688
689 if (!pool->block_pool->device->info.has_llc)
690 anv_state_clflush(state);
691
692 return state;
693 }
694
695 struct gen8_border_color {
696 union {
697 float float32[4];
698 uint32_t uint32[4];
699 };
700 /* Pad out to 64 bytes */
701 uint32_t _pad[12];
702 };
703
704 static void
705 anv_device_init_border_colors(struct anv_device *device)
706 {
707 static const struct gen8_border_color border_colors[] = {
708 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
709 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
710 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
711 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
712 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
713 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
714 };
715
716 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
717 sizeof(border_colors), 64,
718 border_colors);
719 }
720
721 VkResult
722 anv_device_submit_simple_batch(struct anv_device *device,
723 struct anv_batch *batch)
724 {
725 struct drm_i915_gem_execbuffer2 execbuf;
726 struct drm_i915_gem_exec_object2 exec2_objects[1];
727 struct anv_bo bo;
728 VkResult result = VK_SUCCESS;
729 uint32_t size;
730 int64_t timeout;
731 int ret;
732
733 /* Kernel driver requires 8 byte aligned batch length */
734 size = align_u32(batch->next - batch->start, 8);
735 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
736 if (result != VK_SUCCESS)
737 return result;
738
739 memcpy(bo.map, batch->start, size);
740 if (!device->info.has_llc)
741 anv_clflush_range(bo.map, size);
742
743 exec2_objects[0].handle = bo.gem_handle;
744 exec2_objects[0].relocation_count = 0;
745 exec2_objects[0].relocs_ptr = 0;
746 exec2_objects[0].alignment = 0;
747 exec2_objects[0].offset = bo.offset;
748 exec2_objects[0].flags = 0;
749 exec2_objects[0].rsvd1 = 0;
750 exec2_objects[0].rsvd2 = 0;
751
752 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
753 execbuf.buffer_count = 1;
754 execbuf.batch_start_offset = 0;
755 execbuf.batch_len = size;
756 execbuf.cliprects_ptr = 0;
757 execbuf.num_cliprects = 0;
758 execbuf.DR1 = 0;
759 execbuf.DR4 = 0;
760
761 execbuf.flags =
762 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
763 execbuf.rsvd1 = device->context_id;
764 execbuf.rsvd2 = 0;
765
766 ret = anv_gem_execbuffer(device, &execbuf);
767 if (ret != 0) {
768 /* We don't know the real error. */
769 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
770 goto fail;
771 }
772
773 timeout = INT64_MAX;
774 ret = anv_gem_wait(device, bo.gem_handle, &timeout);
775 if (ret != 0) {
776 /* We don't know the real error. */
777 result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
778 goto fail;
779 }
780
781 fail:
782 anv_bo_pool_free(&device->batch_bo_pool, &bo);
783
784 return result;
785 }
786
787 VkResult anv_CreateDevice(
788 VkPhysicalDevice physicalDevice,
789 const VkDeviceCreateInfo* pCreateInfo,
790 const VkAllocationCallbacks* pAllocator,
791 VkDevice* pDevice)
792 {
793 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
794 VkResult result;
795 struct anv_device *device;
796
797 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
798
799 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
800 bool found = false;
801 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
802 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
803 device_extensions[j].extensionName) == 0) {
804 found = true;
805 break;
806 }
807 }
808 if (!found)
809 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
810 }
811
812 anv_set_dispatch_devinfo(physical_device->info);
813
814 device = anv_alloc2(&physical_device->instance->alloc, pAllocator,
815 sizeof(*device), 8,
816 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
817 if (!device)
818 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
819
820 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
821 device->instance = physical_device->instance;
822 device->chipset_id = physical_device->chipset_id;
823
824 if (pAllocator)
825 device->alloc = *pAllocator;
826 else
827 device->alloc = physical_device->instance->alloc;
828
829 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
830 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
831 if (device->fd == -1) {
832 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
833 goto fail_device;
834 }
835
836 device->context_id = anv_gem_create_context(device);
837 if (device->context_id == -1) {
838 result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
839 goto fail_fd;
840 }
841
842 device->info = *physical_device->info;
843 device->isl_dev = physical_device->isl_dev;
844
845 /* On Broadwell and later, we can use batch chaining to more efficiently
846 * implement growing command buffers. Prior to Haswell, the kernel
847 * command parser gets in the way and we have to fall back to growing
848 * the batch.
849 */
850 device->can_chain_batches = device->info.gen >= 8;
851
852 pthread_mutex_init(&device->mutex, NULL);
853
854 anv_bo_pool_init(&device->batch_bo_pool, device);
855
856 anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
857
858 anv_state_pool_init(&device->dynamic_state_pool,
859 &device->dynamic_state_block_pool);
860
861 anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
862 anv_pipeline_cache_init(&device->default_pipeline_cache, device);
863
864 anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
865
866 anv_state_pool_init(&device->surface_state_pool,
867 &device->surface_state_block_pool);
868
869 anv_bo_init_new(&device->workaround_bo, device, 1024);
870
871 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
872
873 anv_queue_init(device, &device->queue);
874
875 switch (device->info.gen) {
876 case 7:
877 if (!device->info.is_haswell)
878 result = gen7_init_device_state(device);
879 else
880 result = gen75_init_device_state(device);
881 break;
882 case 8:
883 result = gen8_init_device_state(device);
884 break;
885 case 9:
886 result = gen9_init_device_state(device);
887 break;
888 default:
889 /* Shouldn't get here as we don't create physical devices for any other
890 * gens. */
891 unreachable("unhandled gen");
892 }
893 if (result != VK_SUCCESS)
894 goto fail_fd;
895
896 result = anv_device_init_meta(device);
897 if (result != VK_SUCCESS)
898 goto fail_fd;
899
900 anv_device_init_border_colors(device);
901
902 *pDevice = anv_device_to_handle(device);
903
904 return VK_SUCCESS;
905
906 fail_fd:
907 close(device->fd);
908 fail_device:
909 anv_free(&device->alloc, device);
910
911 return result;
912 }
913
914 void anv_DestroyDevice(
915 VkDevice _device,
916 const VkAllocationCallbacks* pAllocator)
917 {
918 ANV_FROM_HANDLE(anv_device, device, _device);
919
920 anv_queue_finish(&device->queue);
921
922 anv_device_finish_meta(device);
923
924 #ifdef HAVE_VALGRIND
925 /* We only need to free these to prevent valgrind errors. The backing
926 * BO will go away in a couple of lines so we don't actually leak.
927 */
928 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
929 #endif
930
931 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
932 anv_gem_close(device, device->workaround_bo.gem_handle);
933
934 anv_bo_pool_finish(&device->batch_bo_pool);
935 anv_state_pool_finish(&device->dynamic_state_pool);
936 anv_block_pool_finish(&device->dynamic_state_block_pool);
937 anv_block_pool_finish(&device->instruction_block_pool);
938 anv_state_pool_finish(&device->surface_state_pool);
939 anv_block_pool_finish(&device->surface_state_block_pool);
940 anv_block_pool_finish(&device->scratch_block_pool);
941
942 close(device->fd);
943
944 pthread_mutex_destroy(&device->mutex);
945
946 anv_free(&device->alloc, device);
947 }
948
949 VkResult anv_EnumerateInstanceExtensionProperties(
950 const char* pLayerName,
951 uint32_t* pPropertyCount,
952 VkExtensionProperties* pProperties)
953 {
954 if (pProperties == NULL) {
955 *pPropertyCount = ARRAY_SIZE(global_extensions);
956 return VK_SUCCESS;
957 }
958
959 assert(*pPropertyCount >= ARRAY_SIZE(global_extensions));
960
961 *pPropertyCount = ARRAY_SIZE(global_extensions);
962 memcpy(pProperties, global_extensions, sizeof(global_extensions));
963
964 return VK_SUCCESS;
965 }
966
967 VkResult anv_EnumerateDeviceExtensionProperties(
968 VkPhysicalDevice physicalDevice,
969 const char* pLayerName,
970 uint32_t* pPropertyCount,
971 VkExtensionProperties* pProperties)
972 {
973 if (pProperties == NULL) {
974 *pPropertyCount = ARRAY_SIZE(device_extensions);
975 return VK_SUCCESS;
976 }
977
978 assert(*pPropertyCount >= ARRAY_SIZE(device_extensions));
979
980 *pPropertyCount = ARRAY_SIZE(device_extensions);
981 memcpy(pProperties, device_extensions, sizeof(device_extensions));
982
983 return VK_SUCCESS;
984 }
985
986 VkResult anv_EnumerateInstanceLayerProperties(
987 uint32_t* pPropertyCount,
988 VkLayerProperties* pProperties)
989 {
990 if (pProperties == NULL) {
991 *pPropertyCount = 0;
992 return VK_SUCCESS;
993 }
994
995 /* None supported at this time */
996 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
997 }
998
999 VkResult anv_EnumerateDeviceLayerProperties(
1000 VkPhysicalDevice physicalDevice,
1001 uint32_t* pPropertyCount,
1002 VkLayerProperties* pProperties)
1003 {
1004 if (pProperties == NULL) {
1005 *pPropertyCount = 0;
1006 return VK_SUCCESS;
1007 }
1008
1009 /* None supported at this time */
1010 return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1011 }
1012
1013 void anv_GetDeviceQueue(
1014 VkDevice _device,
1015 uint32_t queueNodeIndex,
1016 uint32_t queueIndex,
1017 VkQueue* pQueue)
1018 {
1019 ANV_FROM_HANDLE(anv_device, device, _device);
1020
1021 assert(queueIndex == 0);
1022
1023 *pQueue = anv_queue_to_handle(&device->queue);
1024 }
1025
1026 VkResult anv_QueueSubmit(
1027 VkQueue _queue,
1028 uint32_t submitCount,
1029 const VkSubmitInfo* pSubmits,
1030 VkFence _fence)
1031 {
1032 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1033 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1034 struct anv_device *device = queue->device;
1035 int ret;
1036
1037 for (uint32_t i = 0; i < submitCount; i++) {
1038 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1039 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1040 pSubmits[i].pCommandBuffers[j]);
1041 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1042
1043 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
1044 if (ret != 0) {
1045 /* We don't know the real error. */
1046 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
1047 "execbuf2 failed: %m");
1048 }
1049
1050 for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++)
1051 cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset;
1052 }
1053 }
1054
1055 if (fence) {
1056 ret = anv_gem_execbuffer(device, &fence->execbuf);
1057 if (ret != 0) {
1058 /* We don't know the real error. */
1059 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
1060 "execbuf2 failed: %m");
1061 }
1062 }
1063
1064 return VK_SUCCESS;
1065 }
1066
1067 VkResult anv_QueueWaitIdle(
1068 VkQueue _queue)
1069 {
1070 ANV_FROM_HANDLE(anv_queue, queue, _queue);
1071
1072 return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device));
1073 }
1074
1075 VkResult anv_DeviceWaitIdle(
1076 VkDevice _device)
1077 {
1078 ANV_FROM_HANDLE(anv_device, device, _device);
1079 struct anv_batch batch;
1080
1081 uint32_t cmds[8];
1082 batch.start = batch.next = cmds;
1083 batch.end = (void *) cmds + sizeof(cmds);
1084
1085 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1086 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1087
1088 return anv_device_submit_simple_batch(device, &batch);
1089 }
1090
1091 VkResult
1092 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1093 {
1094 bo->gem_handle = anv_gem_create(device, size);
1095 if (!bo->gem_handle)
1096 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1097
1098 bo->map = NULL;
1099 bo->index = 0;
1100 bo->offset = 0;
1101 bo->size = size;
1102 bo->is_winsys_bo = false;
1103
1104 return VK_SUCCESS;
1105 }
1106
1107 VkResult anv_AllocateMemory(
1108 VkDevice _device,
1109 const VkMemoryAllocateInfo* pAllocateInfo,
1110 const VkAllocationCallbacks* pAllocator,
1111 VkDeviceMemory* pMem)
1112 {
1113 ANV_FROM_HANDLE(anv_device, device, _device);
1114 struct anv_device_memory *mem;
1115 VkResult result;
1116
1117 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1118
1119 if (pAllocateInfo->allocationSize == 0) {
1120 /* Apparently, this is allowed */
1121 *pMem = VK_NULL_HANDLE;
1122 return VK_SUCCESS;
1123 }
1124
1125 /* We support exactly one memory heap. */
1126 assert(pAllocateInfo->memoryTypeIndex == 0 ||
1127 (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
1128
1129 /* FINISHME: Fail if allocation request exceeds heap size. */
1130
1131 mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1132 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1133 if (mem == NULL)
1134 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1135
1136 /* The kernel is going to give us whole pages anyway */
1137 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1138
1139 result = anv_bo_init_new(&mem->bo, device, alloc_size);
1140 if (result != VK_SUCCESS)
1141 goto fail;
1142
1143 mem->type_index = pAllocateInfo->memoryTypeIndex;
1144
1145 *pMem = anv_device_memory_to_handle(mem);
1146
1147 return VK_SUCCESS;
1148
1149 fail:
1150 anv_free2(&device->alloc, pAllocator, mem);
1151
1152 return result;
1153 }
1154
1155 void anv_FreeMemory(
1156 VkDevice _device,
1157 VkDeviceMemory _mem,
1158 const VkAllocationCallbacks* pAllocator)
1159 {
1160 ANV_FROM_HANDLE(anv_device, device, _device);
1161 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1162
1163 if (mem == NULL)
1164 return;
1165
1166 if (mem->bo.map)
1167 anv_gem_munmap(mem->bo.map, mem->bo.size);
1168
1169 if (mem->bo.gem_handle != 0)
1170 anv_gem_close(device, mem->bo.gem_handle);
1171
1172 anv_free2(&device->alloc, pAllocator, mem);
1173 }
1174
1175 VkResult anv_MapMemory(
1176 VkDevice _device,
1177 VkDeviceMemory _memory,
1178 VkDeviceSize offset,
1179 VkDeviceSize size,
1180 VkMemoryMapFlags flags,
1181 void** ppData)
1182 {
1183 ANV_FROM_HANDLE(anv_device, device, _device);
1184 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1185
1186 if (mem == NULL) {
1187 *ppData = NULL;
1188 return VK_SUCCESS;
1189 }
1190
1191 if (size == VK_WHOLE_SIZE)
1192 size = mem->bo.size - offset;
1193
1194 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1195 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1196 * at a time is valid. We could just mmap up front and return an offset
1197 * pointer here, but that may exhaust virtual memory on 32 bit
1198 * userspace. */
1199
1200 uint32_t gem_flags = 0;
1201 if (!device->info.has_llc && mem->type_index == 0)
1202 gem_flags |= I915_MMAP_WC;
1203
1204 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
1205 uint64_t map_offset = offset & ~4095ull;
1206 assert(offset >= map_offset);
1207 uint64_t map_size = (offset + size) - map_offset;
1208
1209 /* Let's map whole pages */
1210 map_size = align_u64(map_size, 4096);
1211
1212 mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
1213 map_offset, map_size, gem_flags);
1214 mem->map_size = map_size;
1215
1216 *ppData = mem->map + (offset - map_offset);
1217
1218 return VK_SUCCESS;
1219 }
1220
1221 void anv_UnmapMemory(
1222 VkDevice _device,
1223 VkDeviceMemory _memory)
1224 {
1225 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1226
1227 if (mem == NULL)
1228 return;
1229
1230 anv_gem_munmap(mem->map, mem->map_size);
1231 }
1232
1233 static void
1234 clflush_mapped_ranges(struct anv_device *device,
1235 uint32_t count,
1236 const VkMappedMemoryRange *ranges)
1237 {
1238 for (uint32_t i = 0; i < count; i++) {
1239 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1240 void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK);
1241 void *end;
1242
1243 if (ranges[i].offset + ranges[i].size > mem->map_size)
1244 end = mem->map + mem->map_size;
1245 else
1246 end = mem->map + ranges[i].offset + ranges[i].size;
1247
1248 while (p < end) {
1249 __builtin_ia32_clflush(p);
1250 p += CACHELINE_SIZE;
1251 }
1252 }
1253 }
1254
1255 VkResult anv_FlushMappedMemoryRanges(
1256 VkDevice _device,
1257 uint32_t memoryRangeCount,
1258 const VkMappedMemoryRange* pMemoryRanges)
1259 {
1260 ANV_FROM_HANDLE(anv_device, device, _device);
1261
1262 if (device->info.has_llc)
1263 return VK_SUCCESS;
1264
1265 /* Make sure the writes we're flushing have landed. */
1266 __builtin_ia32_mfence();
1267
1268 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1269
1270 return VK_SUCCESS;
1271 }
1272
1273 VkResult anv_InvalidateMappedMemoryRanges(
1274 VkDevice _device,
1275 uint32_t memoryRangeCount,
1276 const VkMappedMemoryRange* pMemoryRanges)
1277 {
1278 ANV_FROM_HANDLE(anv_device, device, _device);
1279
1280 if (device->info.has_llc)
1281 return VK_SUCCESS;
1282
1283 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1284
1285 /* Make sure no reads get moved up above the invalidate. */
1286 __builtin_ia32_mfence();
1287
1288 return VK_SUCCESS;
1289 }
1290
1291 void anv_GetBufferMemoryRequirements(
1292 VkDevice device,
1293 VkBuffer _buffer,
1294 VkMemoryRequirements* pMemoryRequirements)
1295 {
1296 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1297
1298 /* The Vulkan spec (git aaed022) says:
1299 *
1300 * memoryTypeBits is a bitfield and contains one bit set for every
1301 * supported memory type for the resource. The bit `1<<i` is set if and
1302 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1303 * structure for the physical device is supported.
1304 *
1305 * We support exactly one memory type.
1306 */
1307 pMemoryRequirements->memoryTypeBits = 1;
1308
1309 pMemoryRequirements->size = buffer->size;
1310 pMemoryRequirements->alignment = 16;
1311 }
1312
1313 void anv_GetImageMemoryRequirements(
1314 VkDevice device,
1315 VkImage _image,
1316 VkMemoryRequirements* pMemoryRequirements)
1317 {
1318 ANV_FROM_HANDLE(anv_image, image, _image);
1319
1320 /* The Vulkan spec (git aaed022) says:
1321 *
1322 * memoryTypeBits is a bitfield and contains one bit set for every
1323 * supported memory type for the resource. The bit `1<<i` is set if and
1324 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1325 * structure for the physical device is supported.
1326 *
1327 * We support exactly one memory type.
1328 */
1329 pMemoryRequirements->memoryTypeBits = 1;
1330
1331 pMemoryRequirements->size = image->size;
1332 pMemoryRequirements->alignment = image->alignment;
1333 }
1334
1335 void anv_GetImageSparseMemoryRequirements(
1336 VkDevice device,
1337 VkImage image,
1338 uint32_t* pSparseMemoryRequirementCount,
1339 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
1340 {
1341 stub();
1342 }
1343
1344 void anv_GetDeviceMemoryCommitment(
1345 VkDevice device,
1346 VkDeviceMemory memory,
1347 VkDeviceSize* pCommittedMemoryInBytes)
1348 {
1349 *pCommittedMemoryInBytes = 0;
1350 }
1351
1352 VkResult anv_BindBufferMemory(
1353 VkDevice device,
1354 VkBuffer _buffer,
1355 VkDeviceMemory _memory,
1356 VkDeviceSize memoryOffset)
1357 {
1358 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1359 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1360
1361 if (mem) {
1362 buffer->bo = &mem->bo;
1363 buffer->offset = memoryOffset;
1364 } else {
1365 buffer->bo = NULL;
1366 buffer->offset = 0;
1367 }
1368
1369 return VK_SUCCESS;
1370 }
1371
1372 VkResult anv_BindImageMemory(
1373 VkDevice device,
1374 VkImage _image,
1375 VkDeviceMemory _memory,
1376 VkDeviceSize memoryOffset)
1377 {
1378 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1379 ANV_FROM_HANDLE(anv_image, image, _image);
1380
1381 if (mem) {
1382 image->bo = &mem->bo;
1383 image->offset = memoryOffset;
1384 } else {
1385 image->bo = NULL;
1386 image->offset = 0;
1387 }
1388
1389 return VK_SUCCESS;
1390 }
1391
1392 VkResult anv_QueueBindSparse(
1393 VkQueue queue,
1394 uint32_t bindInfoCount,
1395 const VkBindSparseInfo* pBindInfo,
1396 VkFence fence)
1397 {
1398 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1399 }
1400
1401 VkResult anv_CreateFence(
1402 VkDevice _device,
1403 const VkFenceCreateInfo* pCreateInfo,
1404 const VkAllocationCallbacks* pAllocator,
1405 VkFence* pFence)
1406 {
1407 ANV_FROM_HANDLE(anv_device, device, _device);
1408 struct anv_bo fence_bo;
1409 struct anv_fence *fence;
1410 struct anv_batch batch;
1411 VkResult result;
1412
1413 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1414
1415 result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
1416 if (result != VK_SUCCESS)
1417 return result;
1418
1419 /* Fences are small. Just store the CPU data structure in the BO. */
1420 fence = fence_bo.map;
1421 fence->bo = fence_bo;
1422
1423 /* Place the batch after the CPU data but on its own cache line. */
1424 const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
1425 batch.next = batch.start = fence->bo.map + batch_offset;
1426 batch.end = fence->bo.map + fence->bo.size;
1427 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1428 anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1429
1430 if (!device->info.has_llc) {
1431 assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
1432 assert(batch.next - batch.start <= CACHELINE_SIZE);
1433 __builtin_ia32_mfence();
1434 __builtin_ia32_clflush(batch.start);
1435 }
1436
1437 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1438 fence->exec2_objects[0].relocation_count = 0;
1439 fence->exec2_objects[0].relocs_ptr = 0;
1440 fence->exec2_objects[0].alignment = 0;
1441 fence->exec2_objects[0].offset = fence->bo.offset;
1442 fence->exec2_objects[0].flags = 0;
1443 fence->exec2_objects[0].rsvd1 = 0;
1444 fence->exec2_objects[0].rsvd2 = 0;
1445
1446 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1447 fence->execbuf.buffer_count = 1;
1448 fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
1449 fence->execbuf.batch_len = batch.next - batch.start;
1450 fence->execbuf.cliprects_ptr = 0;
1451 fence->execbuf.num_cliprects = 0;
1452 fence->execbuf.DR1 = 0;
1453 fence->execbuf.DR4 = 0;
1454
1455 fence->execbuf.flags =
1456 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1457 fence->execbuf.rsvd1 = device->context_id;
1458 fence->execbuf.rsvd2 = 0;
1459
1460 fence->ready = false;
1461
1462 *pFence = anv_fence_to_handle(fence);
1463
1464 return VK_SUCCESS;
1465 }
1466
1467 void anv_DestroyFence(
1468 VkDevice _device,
1469 VkFence _fence,
1470 const VkAllocationCallbacks* pAllocator)
1471 {
1472 ANV_FROM_HANDLE(anv_device, device, _device);
1473 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1474
1475 assert(fence->bo.map == fence);
1476 anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
1477 }
1478
1479 VkResult anv_ResetFences(
1480 VkDevice _device,
1481 uint32_t fenceCount,
1482 const VkFence* pFences)
1483 {
1484 for (uint32_t i = 0; i < fenceCount; i++) {
1485 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1486 fence->ready = false;
1487 }
1488
1489 return VK_SUCCESS;
1490 }
1491
1492 VkResult anv_GetFenceStatus(
1493 VkDevice _device,
1494 VkFence _fence)
1495 {
1496 ANV_FROM_HANDLE(anv_device, device, _device);
1497 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1498 int64_t t = 0;
1499 int ret;
1500
1501 if (fence->ready)
1502 return VK_SUCCESS;
1503
1504 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1505 if (ret == 0) {
1506 fence->ready = true;
1507 return VK_SUCCESS;
1508 }
1509
1510 return VK_NOT_READY;
1511 }
1512
1513 VkResult anv_WaitForFences(
1514 VkDevice _device,
1515 uint32_t fenceCount,
1516 const VkFence* pFences,
1517 VkBool32 waitAll,
1518 uint64_t timeout)
1519 {
1520 ANV_FROM_HANDLE(anv_device, device, _device);
1521
1522 /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
1523 * to block indefinitely timeouts <= 0. Unfortunately, this was broken
1524 * for a couple of kernel releases. Since there's no way to know
1525 * whether or not the kernel we're using is one of the broken ones, the
1526 * best we can do is to clamp the timeout to INT64_MAX. This limits the
1527 * maximum timeout from 584 years to 292 years - likely not a big deal.
1528 */
1529 if (timeout > INT64_MAX)
1530 timeout = INT64_MAX;
1531
1532 int64_t t = timeout;
1533
1534 /* FIXME: handle !waitAll */
1535
1536 for (uint32_t i = 0; i < fenceCount; i++) {
1537 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1538 int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1539 if (ret == -1 && errno == ETIME) {
1540 return VK_TIMEOUT;
1541 } else if (ret == -1) {
1542 /* We don't know the real error. */
1543 return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
1544 "gem wait failed: %m");
1545 }
1546 }
1547
1548 return VK_SUCCESS;
1549 }
1550
1551 // Queue semaphore functions
1552
1553 VkResult anv_CreateSemaphore(
1554 VkDevice device,
1555 const VkSemaphoreCreateInfo* pCreateInfo,
1556 const VkAllocationCallbacks* pAllocator,
1557 VkSemaphore* pSemaphore)
1558 {
1559 /* The DRM execbuffer ioctl always execute in-oder, even between different
1560 * rings. As such, there's nothing to do for the user space semaphore.
1561 */
1562
1563 *pSemaphore = (VkSemaphore)1;
1564
1565 return VK_SUCCESS;
1566 }
1567
1568 void anv_DestroySemaphore(
1569 VkDevice device,
1570 VkSemaphore semaphore,
1571 const VkAllocationCallbacks* pAllocator)
1572 {
1573 }
1574
1575 // Event functions
1576
1577 VkResult anv_CreateEvent(
1578 VkDevice _device,
1579 const VkEventCreateInfo* pCreateInfo,
1580 const VkAllocationCallbacks* pAllocator,
1581 VkEvent* pEvent)
1582 {
1583 ANV_FROM_HANDLE(anv_device, device, _device);
1584 struct anv_state state;
1585 struct anv_event *event;
1586
1587 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
1588
1589 state = anv_state_pool_alloc(&device->dynamic_state_pool,
1590 sizeof(*event), 8);
1591 event = state.map;
1592 event->state = state;
1593 event->semaphore = VK_EVENT_RESET;
1594
1595 if (!device->info.has_llc) {
1596 /* Make sure the writes we're flushing have landed. */
1597 __builtin_ia32_mfence();
1598 __builtin_ia32_clflush(event);
1599 }
1600
1601 *pEvent = anv_event_to_handle(event);
1602
1603 return VK_SUCCESS;
1604 }
1605
1606 void anv_DestroyEvent(
1607 VkDevice _device,
1608 VkEvent _event,
1609 const VkAllocationCallbacks* pAllocator)
1610 {
1611 ANV_FROM_HANDLE(anv_device, device, _device);
1612 ANV_FROM_HANDLE(anv_event, event, _event);
1613
1614 anv_state_pool_free(&device->dynamic_state_pool, event->state);
1615 }
1616
1617 VkResult anv_GetEventStatus(
1618 VkDevice _device,
1619 VkEvent _event)
1620 {
1621 ANV_FROM_HANDLE(anv_device, device, _device);
1622 ANV_FROM_HANDLE(anv_event, event, _event);
1623
1624 if (!device->info.has_llc) {
1625 /* Invalidate read cache before reading event written by GPU. */
1626 __builtin_ia32_clflush(event);
1627 __builtin_ia32_mfence();
1628
1629 }
1630
1631 return event->semaphore;
1632 }
1633
1634 VkResult anv_SetEvent(
1635 VkDevice _device,
1636 VkEvent _event)
1637 {
1638 ANV_FROM_HANDLE(anv_device, device, _device);
1639 ANV_FROM_HANDLE(anv_event, event, _event);
1640
1641 event->semaphore = VK_EVENT_SET;
1642
1643 if (!device->info.has_llc) {
1644 /* Make sure the writes we're flushing have landed. */
1645 __builtin_ia32_mfence();
1646 __builtin_ia32_clflush(event);
1647 }
1648
1649 return VK_SUCCESS;
1650 }
1651
1652 VkResult anv_ResetEvent(
1653 VkDevice _device,
1654 VkEvent _event)
1655 {
1656 ANV_FROM_HANDLE(anv_device, device, _device);
1657 ANV_FROM_HANDLE(anv_event, event, _event);
1658
1659 event->semaphore = VK_EVENT_RESET;
1660
1661 if (!device->info.has_llc) {
1662 /* Make sure the writes we're flushing have landed. */
1663 __builtin_ia32_mfence();
1664 __builtin_ia32_clflush(event);
1665 }
1666
1667 return VK_SUCCESS;
1668 }
1669
1670 // Buffer functions
1671
1672 VkResult anv_CreateBuffer(
1673 VkDevice _device,
1674 const VkBufferCreateInfo* pCreateInfo,
1675 const VkAllocationCallbacks* pAllocator,
1676 VkBuffer* pBuffer)
1677 {
1678 ANV_FROM_HANDLE(anv_device, device, _device);
1679 struct anv_buffer *buffer;
1680
1681 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1682
1683 buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1684 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1685 if (buffer == NULL)
1686 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1687
1688 buffer->size = pCreateInfo->size;
1689 buffer->usage = pCreateInfo->usage;
1690 buffer->bo = NULL;
1691 buffer->offset = 0;
1692
1693 *pBuffer = anv_buffer_to_handle(buffer);
1694
1695 return VK_SUCCESS;
1696 }
1697
1698 void anv_DestroyBuffer(
1699 VkDevice _device,
1700 VkBuffer _buffer,
1701 const VkAllocationCallbacks* pAllocator)
1702 {
1703 ANV_FROM_HANDLE(anv_device, device, _device);
1704 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1705
1706 anv_free2(&device->alloc, pAllocator, buffer);
1707 }
1708
1709 void
1710 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
1711 enum isl_format format,
1712 uint32_t offset, uint32_t range, uint32_t stride)
1713 {
1714 isl_buffer_fill_state(&device->isl_dev, state.map,
1715 .address = offset,
1716 .mocs = device->default_mocs,
1717 .size = range,
1718 .format = format,
1719 .stride = stride);
1720
1721 if (!device->info.has_llc)
1722 anv_state_clflush(state);
1723 }
1724
1725 void anv_DestroySampler(
1726 VkDevice _device,
1727 VkSampler _sampler,
1728 const VkAllocationCallbacks* pAllocator)
1729 {
1730 ANV_FROM_HANDLE(anv_device, device, _device);
1731 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
1732
1733 anv_free2(&device->alloc, pAllocator, sampler);
1734 }
1735
1736 VkResult anv_CreateFramebuffer(
1737 VkDevice _device,
1738 const VkFramebufferCreateInfo* pCreateInfo,
1739 const VkAllocationCallbacks* pAllocator,
1740 VkFramebuffer* pFramebuffer)
1741 {
1742 ANV_FROM_HANDLE(anv_device, device, _device);
1743 struct anv_framebuffer *framebuffer;
1744
1745 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
1746
1747 size_t size = sizeof(*framebuffer) +
1748 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
1749 framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8,
1750 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1751 if (framebuffer == NULL)
1752 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1753
1754 framebuffer->attachment_count = pCreateInfo->attachmentCount;
1755 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
1756 VkImageView _iview = pCreateInfo->pAttachments[i];
1757 framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
1758 }
1759
1760 framebuffer->width = pCreateInfo->width;
1761 framebuffer->height = pCreateInfo->height;
1762 framebuffer->layers = pCreateInfo->layers;
1763
1764 *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
1765
1766 return VK_SUCCESS;
1767 }
1768
1769 void anv_DestroyFramebuffer(
1770 VkDevice _device,
1771 VkFramebuffer _fb,
1772 const VkAllocationCallbacks* pAllocator)
1773 {
1774 ANV_FROM_HANDLE(anv_device, device, _device);
1775 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
1776
1777 anv_free2(&device->alloc, pAllocator, fb);
1778 }
1779
1780 void vkCmdDbgMarkerBegin(
1781 VkCommandBuffer commandBuffer,
1782 const char* pMarker)
1783 __attribute__ ((visibility ("default")));
1784
1785 void vkCmdDbgMarkerEnd(
1786 VkCommandBuffer commandBuffer)
1787 __attribute__ ((visibility ("default")));
1788
1789 void vkCmdDbgMarkerBegin(
1790 VkCommandBuffer commandBuffer,
1791 const char* pMarker)
1792 {
1793 }
1794
1795 void vkCmdDbgMarkerEnd(
1796 VkCommandBuffer commandBuffer)
1797 {
1798 }