X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_device.c;h=fc5554ea2aa825f40e16283a56654156b2fa506f;hb=a44744e01d73f7187406200d57dd67aa235a7d13;hp=35ef4c486b2b8f8a45723307e6bee0c6c29415b6;hpb=f195d40eca49800799d85d110939a125041f4028;p=mesa.git diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 35ef4c486b2..fc5554ea2aa 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -29,12 +29,14 @@ #include #include #include +#include #include "anv_private.h" #include "util/strtod.h" #include "util/debug.h" #include "util/build_id.h" -#include "util/vk_util.h" +#include "util/mesa-sha1.h" +#include "vk_util.h" #include "genxml/gen7_pack.h" @@ -49,7 +51,7 @@ compiler_perf_log(void *data, const char *fmt, ...) va_start(args, fmt); if (unlikely(INTEL_DEBUG & DEBUG_PERF)) - vfprintf(stderr, fmt, args); + intel_logd_v(fmt, args); va_end(args); } @@ -63,10 +65,11 @@ anv_compute_heap_size(int fd, uint64_t *heap_size) /* If, for whatever reason, we can't actually get the GTT size from the * kernel (too old?) fall back to the aperture size. */ - anv_perf_warn("Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m"); + anv_perf_warn(NULL, NULL, + "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m"); if (anv_gem_get_aperture(fd, >t_size) == -1) { - return vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + return vk_errorf(NULL, NULL, VK_ERROR_INITIALIZATION_FAILED, "failed to get aperture size: %m"); } } @@ -96,19 +99,179 @@ anv_compute_heap_size(int fd, uint64_t *heap_size) return VK_SUCCESS; } -static bool -anv_device_get_cache_uuid(void *uuid) +static VkResult +anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) { - const struct build_id_note *note = build_id_find_nhdr("libvulkan_intel.so"); - if (!note) - return false; + /* The kernel query only tells us whether or not the kernel supports the + * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the + * hardware has actual 48bit address support. + */ + device->supports_48bit_addresses = + (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd); + + uint64_t heap_size; + VkResult result = anv_compute_heap_size(fd, &heap_size); + if (result != VK_SUCCESS) + return result; - unsigned len = build_id_length(note); - if (len < VK_UUID_SIZE) - return false; + if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) { + /* When running with an overridden PCI ID, we may get a GTT size from + * the kernel that is greater than 2 GiB but the execbuf check for 48bit + * address support can still fail. Just clamp the address space size to + * 2 GiB if we don't have 48-bit support. + */ + intel_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but " + "not support for 48-bit addresses", + __FILE__, __LINE__); + heap_size = 2ull << 30; + } - memcpy(uuid, build_id_data(note), VK_UUID_SIZE); - return true; + if (heap_size <= 3ull * (1ull << 30)) { + /* In this case, everything fits nicely into the 32-bit address space, + * so there's no need for supporting 48bit addresses on client-allocated + * memory objects. + */ + device->memory.heap_count = 1; + device->memory.heaps[0] = (struct anv_memory_heap) { + .size = heap_size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .supports_48bit_addresses = false, + }; + } else { + /* Not everything will fit nicely into a 32-bit address space. In this + * case we need a 64-bit heap. Advertise a small 32-bit heap and a + * larger 48-bit heap. If we're in this case, then we have a total heap + * size larger than 3GiB which most likely means they have 8 GiB of + * video memory and so carving off 1 GiB for the 32-bit heap should be + * reasonable. + */ + const uint64_t heap_size_32bit = 1ull << 30; + const uint64_t heap_size_48bit = heap_size - heap_size_32bit; + + assert(device->supports_48bit_addresses); + + device->memory.heap_count = 2; + device->memory.heaps[0] = (struct anv_memory_heap) { + .size = heap_size_48bit, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .supports_48bit_addresses = true, + }; + device->memory.heaps[1] = (struct anv_memory_heap) { + .size = heap_size_32bit, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .supports_48bit_addresses = false, + }; + } + + uint32_t type_count = 0; + for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) { + uint32_t valid_buffer_usage = ~0; + + /* There appears to be a hardware issue in the VF cache where it only + * considers the bottom 32 bits of memory addresses. If you happen to + * have two vertex buffers which get placed exactly 4 GiB apart and use + * them in back-to-back draw calls, you can get collisions. In order to + * solve this problem, we require vertex and index buffers be bound to + * memory allocated out of the 32-bit heap. + */ + if (device->memory.heaps[heap].supports_48bit_addresses) { + valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + } + + if (device->info.has_llc) { + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + */ + device->memory.types[type_count++] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = heap, + .valid_buffer_usage = valid_buffer_usage, + }; + } else { + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + device->memory.types[type_count++] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = heap, + .valid_buffer_usage = valid_buffer_usage, + }; + device->memory.types[type_count++] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = heap, + .valid_buffer_usage = valid_buffer_usage, + }; + } + } + device->memory.type_count = type_count; + + return VK_SUCCESS; +} + +static VkResult +anv_physical_device_init_uuids(struct anv_physical_device *device) +{ + const struct build_id_note *note = + build_id_find_nhdr_for_addr(anv_physical_device_init_uuids); + if (!note) { + return vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, + "Failed to find build-id"); + } + + unsigned build_id_len = build_id_length(note); + if (build_id_len < 20) { + return vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, + "build-id too short. It needs to be a SHA"); + } + + struct mesa_sha1 sha1_ctx; + uint8_t sha1[20]; + STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); + + /* The pipeline cache UUID is used for determining when a pipeline cache is + * invalid. It needs both a driver build and the PCI ID of the device. + */ + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); + _mesa_sha1_update(&sha1_ctx, &device->chipset_id, + sizeof(device->chipset_id)); + _mesa_sha1_final(&sha1_ctx, sha1); + memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); + + /* The driver UUID is used for determining sharability of images and memory + * between two Vulkan instances in separate processes. People who want to + * share memory need to also check the device UUID (below) so all this + * needs to be is the build-id. + */ + memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE); + + /* The device UUID uniquely identifies the given device within the machine. + * Since we never have more than one device, this doesn't need to be a real + * UUID. However, on the off-chance that someone tries to use this to + * cache pre-tiled images or something of the like, we use the PCI ID and + * some bits of ISL info to ensure that this is safe. + */ + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, &device->chipset_id, + sizeof(device->chipset_id)); + _mesa_sha1_update(&sha1_ctx, &device->isl_dev.has_bit6_swizzling, + sizeof(device->isl_dev.has_bit6_swizzling)); + _mesa_sha1_final(&sha1_ctx, sha1); + memcpy(device->device_uuid, sha1, VK_UUID_SIZE); + + return VK_SUCCESS; } static VkResult @@ -119,6 +282,8 @@ anv_physical_device_init(struct anv_physical_device *device, VkResult result; int fd; + brw_process_intel_debug_variable(); + fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); @@ -142,16 +307,19 @@ anv_physical_device_init(struct anv_physical_device *device, } if (device->info.is_haswell) { - fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); + intel_logw("Haswell Vulkan support is incomplete"); } else if (device->info.gen == 7 && !device->info.is_baytrail) { - fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); + intel_logw("Ivy Bridge Vulkan support is incomplete"); } else if (device->info.gen == 7 && device->info.is_baytrail) { - fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); - } else if (device->info.gen >= 8) { - /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully - * supported as anything */ + intel_logw("Bay Trail Vulkan support is incomplete"); + } else if (device->info.gen >= 8 && device->info.gen <= 9) { + /* Broadwell, Cherryview, Skylake, Broxton, Kabylake, Coffelake is as + * fully supported as anything */ + } else if (device->info.gen == 10) { + intel_logw("Cannonlake Vulkan support is alpha"); } else { - result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + result = vk_errorf(device->instance, device, + VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan not yet supported on %s", device->name); goto fail; } @@ -161,42 +329,46 @@ anv_physical_device_init(struct anv_physical_device *device, device->cmd_parser_version = anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION); if (device->cmd_parser_version == -1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, "failed to get command parser version"); goto fail; } } if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, "kernel missing gem wait"); goto fail; } if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, "kernel missing execbuf2"); goto fail; } if (!device->info.has_llc && anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, + result = vk_errorf(device->instance, device, + VK_ERROR_INITIALIZATION_FAILED, "kernel missing wc mmap"); goto fail; } - device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd); - - result = anv_compute_heap_size(fd, &device->heap_size); + result = anv_physical_device_init_heaps(device, fd); if (result != VK_SUCCESS) goto fail; - if (!anv_device_get_cache_uuid(device->uuid)) { - result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, - "cannot generate UUID"); - goto fail; - } + device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC); + device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE); + device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE); + device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY); + device->has_syncobj_wait = device->has_syncobj && + anv_gem_supports_syncobj_wait(fd); + bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); /* GENs prior to 8 do not support EU/Subslice info */ @@ -209,8 +381,7 @@ anv_physical_device_init(struct anv_physical_device *device, * many platforms, but otherwise, things will just work. */ if (device->subslice_total < 1 || device->eu_total < 1) { - fprintf(stderr, "WARNING: Kernel 4.1 required to properly" - " query GPU properties.\n"); + intel_logw("Kernel 4.1 required to properly query GPU properties"); } } else if (device->info.gen == 7) { device->subslice_total = 1 << (device->info.gt - 1); @@ -218,16 +389,15 @@ anv_physical_device_init(struct anv_physical_device *device, if (device->info.is_cherryview && device->subslice_total > 0 && device->eu_total > 0) { - /* Logical CS threads = EUs per subslice * 7 threads per EU */ - uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7; + /* Logical CS threads = EUs per subslice * num threads per EU */ + uint32_t max_cs_threads = + device->eu_total / device->subslice_total * device->info.num_thread_per_eu; /* Fuse configurations may give more threads than expected, never less. */ if (max_cs_threads > device->info.max_cs_threads) device->info.max_cs_threads = max_cs_threads; } - brw_process_intel_debug_variable(); - device->compiler = brw_compiler_create(NULL, &device->info); if (device->compiler == NULL) { result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); @@ -235,6 +405,13 @@ anv_physical_device_init(struct anv_physical_device *device, } device->compiler->shader_debug_log = compiler_debug_log; device->compiler->shader_perf_log = compiler_perf_log; + device->compiler->supports_pull_constants = false; + + isl_device_init(&device->isl_dev, &device->info, swizzled); + + result = anv_physical_device_init_uuids(device); + if (result != VK_SUCCESS) + goto fail; result = anv_init_wsi(device); if (result != VK_SUCCESS) { @@ -242,8 +419,6 @@ anv_physical_device_init(struct anv_physical_device *device, goto fail; } - isl_device_init(&device->isl_dev, &device->info, swizzled); - device->local_fd = fd; return VK_SUCCESS; @@ -260,66 +435,6 @@ anv_physical_device_finish(struct anv_physical_device *device) close(device->local_fd); } -static const VkExtensionProperties global_extensions[] = { - { - .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, - .specVersion = 25, - }, -#ifdef VK_USE_PLATFORM_XCB_KHR - { - .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, - .specVersion = 6, - }, -#endif -#ifdef VK_USE_PLATFORM_XLIB_KHR - { - .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, - .specVersion = 6, - }, -#endif -#ifdef VK_USE_PLATFORM_WAYLAND_KHR - { - .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, - .specVersion = 5, - }, -#endif - { - .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, - .specVersion = 1, - }, -}; - -static const VkExtensionProperties device_extensions[] = { - { - .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, - .specVersion = 68, - }, - { - .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_MAINTENANCE1_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, - .specVersion = 1, - }, - { - .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME, - .specVersion = 1, - }, -}; - static void * default_alloc_func(void *pUserData, size_t size, size_t align, VkSystemAllocationScope allocationScope) @@ -356,6 +471,13 @@ VkResult anv_CreateInstance( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + /* Check if user passed a debug report callback to be used during + * Create/Destroy of instance. + */ + const VkDebugReportCallbackCreateInfoEXT *ctor_cb = + vk_find_struct_const(pCreateInfo->pNext, + DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT); + uint32_t client_version; if (pCreateInfo->pApplicationInfo && pCreateInfo->pApplicationInfo->apiVersion != 0) { @@ -366,7 +488,18 @@ VkResult anv_CreateInstance( if (VK_MAKE_VERSION(1, 0, 0) > client_version || client_version > VK_MAKE_VERSION(1, 0, 0xfff)) { - return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, + + if (ctor_cb && ctor_cb->flags & VK_DEBUG_REPORT_ERROR_BIT_EXT) + ctor_cb->pfnCallback(VK_DEBUG_REPORT_ERROR_BIT_EXT, + VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT, + VK_NULL_HANDLE, /* No handle available yet. */ + __LINE__, + 0, + "anv", + "incompatible driver version", + ctor_cb->pUserData); + + return vk_errorf(NULL, NULL, VK_ERROR_INCOMPATIBLE_DRIVER, "Client requested version %d.%d.%d", VK_VERSION_MAJOR(client_version), VK_VERSION_MINOR(client_version), @@ -374,15 +507,8 @@ VkResult anv_CreateInstance( } for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - global_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) + const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; + if (!anv_instance_extension_supported(ext_name)) return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } @@ -401,6 +527,20 @@ VkResult anv_CreateInstance( instance->apiVersion = client_version; instance->physicalDeviceCount = -1; + if (pthread_mutex_init(&instance->callbacks_mutex, NULL) != 0) { + vk_free2(&default_alloc, pAllocator, instance); + return vk_error(VK_ERROR_INITIALIZATION_FAILED); + } + + list_inithead(&instance->callbacks); + + /* Store report debug callback to be used during DestroyInstance. */ + if (ctor_cb) { + instance->destroy_debug_cb.flags = ctor_cb->flags; + instance->destroy_debug_cb.callback = ctor_cb->pfnCallback; + instance->destroy_debug_cb.data = ctor_cb->pUserData; + } + _mesa_locale_init(); VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); @@ -427,6 +567,8 @@ void anv_DestroyInstance( VG(VALGRIND_DESTROY_MEMPOOL(instance)); + pthread_mutex_destroy(&instance->callbacks_mutex); + _mesa_locale_fini(); vk_free(&instance->alloc, instance); @@ -442,7 +584,7 @@ anv_enumerate_devices(struct anv_instance *instance) instance->physicalDeviceCount = 0; - max_devices = drmGetDevices2(0, devices, sizeof(devices)); + max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); if (max_devices < 1) return VK_ERROR_INCOMPATIBLE_DRIVER; @@ -458,6 +600,7 @@ anv_enumerate_devices(struct anv_instance *instance) break; } } + drmFreeDevices(devices, max_devices); if (result == VK_SUCCESS) instance->physicalDeviceCount = 1; @@ -508,7 +651,7 @@ void anv_GetPhysicalDeviceFeatures( .sampleRateShading = true, .dualSrcBlend = true, .logicOp = true, - .multiDrawIndirect = false, + .multiDrawIndirect = true, .drawIndirectFirstInstance = true, .depthClamp = true, .depthBiasClamp = true, @@ -560,6 +703,29 @@ void anv_GetPhysicalDeviceFeatures2KHR( vk_foreach_struct(ext, pFeatures->pNext) { switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: { + VkPhysicalDeviceMultiviewFeaturesKHX *features = + (VkPhysicalDeviceMultiviewFeaturesKHX *)ext; + features->multiview = true; + features->multiviewGeometryShader = true; + features->multiviewTessellationShader = true; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: { + VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext; + features->variablePointersStorageBuffer = true; + features->variablePointers = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES_KHR: { + VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *features = + (VkPhysicalDeviceSamplerYcbcrConversionFeaturesKHR *) ext; + features->samplerYcbcrConversion = true; + break; + } + default: anv_debug_ignored_stype(ext->sType); break; @@ -578,6 +744,9 @@ void anv_GetPhysicalDeviceProperties( const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? (1ul << 30) : (1ul << 27); + const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ? + 128 : 16; + VkSampleCountFlags sample_counts = isl_device_get_sample_counts(&pdevice->isl_dev); @@ -596,13 +765,13 @@ void anv_GetPhysicalDeviceProperties( .bufferImageGranularity = 64, /* A cache line */ .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = 64, + .maxPerStageDescriptorSamplers = max_samplers, .maxPerStageDescriptorUniformBuffers = 64, .maxPerStageDescriptorStorageBuffers = 64, - .maxPerStageDescriptorSampledImages = 64, + .maxPerStageDescriptorSampledImages = max_samplers, .maxPerStageDescriptorStorageImages = 64, .maxPerStageDescriptorInputAttachments = 64, - .maxPerStageResources = 128, + .maxPerStageResources = 250, .maxDescriptorSetSamplers = 256, .maxDescriptorSetUniformBuffers = 256, .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, @@ -678,7 +847,7 @@ void anv_GetPhysicalDeviceProperties( .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = false, - .timestampPeriod = devinfo->timebase_scale, + .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency, .maxClipDistances = 8, .maxCullDistances = 8, .maxCombinedClipAndCullDistances = 8, @@ -695,8 +864,8 @@ void anv_GetPhysicalDeviceProperties( }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = VK_MAKE_VERSION(1, 0, 42), - .driverVersion = 1, + .apiVersion = anv_physical_device_api_version(pdevice), + .driverVersion = vk_get_driver_version(), .vendorID = 0x8086, .deviceID = pdevice->chipset_id, .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, @@ -704,14 +873,18 @@ void anv_GetPhysicalDeviceProperties( .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ }; - strcpy(pProperties->deviceName, pdevice->name); - memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE); + snprintf(pProperties->deviceName, sizeof(pProperties->deviceName), + "%s", pdevice->name); + memcpy(pProperties->pipelineCacheUUID, + pdevice->pipeline_cache_uuid, VK_UUID_SIZE); } void anv_GetPhysicalDeviceProperties2KHR( VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR* pProperties) { + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); + anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); vk_foreach_struct(ext, pProperties->pNext) { @@ -724,6 +897,32 @@ void anv_GetPhysicalDeviceProperties2KHR( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: { + VkPhysicalDeviceIDPropertiesKHR *id_props = + (VkPhysicalDeviceIDPropertiesKHR *)ext; + memcpy(id_props->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + memcpy(id_props->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + /* The LUID is for Windows. */ + id_props->deviceLUIDValid = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: { + VkPhysicalDeviceMultiviewPropertiesKHX *properties = + (VkPhysicalDeviceMultiviewPropertiesKHX *)ext; + properties->maxMultiviewViewCount = 16; + properties->maxMultiviewInstanceIndex = UINT32_MAX / 16; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: { + VkPhysicalDevicePointClippingPropertiesKHR *properties = + (VkPhysicalDevicePointClippingPropertiesKHR *) ext; + properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR; + anv_finishme("Implement pop-free point clipping"); + break; + } + default: anv_debug_ignored_stype(ext->sType); break; @@ -777,44 +976,21 @@ void anv_GetPhysicalDeviceMemoryProperties( { ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); - if (physical_device->info.has_llc) { - /* Big core GPUs share LLC with the CPU and thus one memory type can be - * both cached and coherent at the same time. - */ - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } else { - /* The spec requires that we expose a host-visible, coherent memory - * type, but Atom GPUs don't share LLC. Thus we offer two memory types - * to give the application a choice between cached, but not coherent and - * coherent but uncached (WC though). - */ - pMemoryProperties->memoryTypeCount = 2; - pMemoryProperties->memoryTypes[0] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - pMemoryProperties->memoryTypes[1] = (VkMemoryType) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, + pMemoryProperties->memoryTypeCount = physical_device->memory.type_count; + for (uint32_t i = 0; i < physical_device->memory.type_count; i++) { + pMemoryProperties->memoryTypes[i] = (VkMemoryType) { + .propertyFlags = physical_device->memory.types[i].propertyFlags, + .heapIndex = physical_device->memory.types[i].heapIndex, }; } - pMemoryProperties->memoryHeapCount = 1; - pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { - .size = physical_device->heap_size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - }; + pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count; + for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) { + pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) { + .size = physical_device->memory.heaps[i].size, + .flags = physical_device->memory.heaps[i].flags, + }; + } } void anv_GetPhysicalDeviceMemoryProperties2KHR( @@ -885,7 +1061,7 @@ anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, state = anv_state_pool_alloc(pool, size, align); memcpy(state.map, p, size); - anv_state_flush(pool->block_pool->device, state); + anv_state_flush(pool->block_pool.device, state); return state; } @@ -916,60 +1092,30 @@ anv_device_init_border_colors(struct anv_device *device) border_colors); } -VkResult -anv_device_submit_simple_batch(struct anv_device *device, - struct anv_batch *batch) +static void +anv_device_init_trivial_batch(struct anv_device *device) { - struct drm_i915_gem_execbuffer2 execbuf; - struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo, *exec_bos[1]; - VkResult result = VK_SUCCESS; - uint32_t size; + anv_bo_init_new(&device->trivial_batch_bo, device, 4096); - /* Kernel driver requires 8 byte aligned batch length */ - size = align_u32(batch->next - batch->start, 8); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); - if (result != VK_SUCCESS) - return result; + if (device->instance->physicalDevice.has_exec_async) + device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC; - memcpy(bo.map, batch->start, size); - if (!device->info.has_llc) - anv_flush_range(bo.map, size); - - exec_bos[0] = &bo; - exec2_objects[0].handle = bo.gem_handle; - exec2_objects[0].relocation_count = 0; - exec2_objects[0].relocs_ptr = 0; - exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo.offset; - exec2_objects[0].flags = 0; - exec2_objects[0].rsvd1 = 0; - exec2_objects[0].rsvd2 = 0; - - execbuf.buffers_ptr = (uintptr_t) exec2_objects; - execbuf.buffer_count = 1; - execbuf.batch_start_offset = 0; - execbuf.batch_len = size; - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; - - execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - execbuf.rsvd1 = device->context_id; - execbuf.rsvd2 = 0; - - result = anv_device_execbuf(device, &execbuf, exec_bos); - if (result != VK_SUCCESS) - goto fail; + void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle, + 0, 4096, 0); - result = anv_device_wait(device, &bo, INT64_MAX); + struct anv_batch batch = { + .start = map, + .next = map, + .end = map + 4096, + }; - fail: - anv_bo_pool_free(&device->batch_bo_pool, &bo); + anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); + anv_batch_emit(&batch, GEN7_MI_NOOP, noop); - return result; + if (!device->info.has_llc) + gen_clflush_range(map, batch.next - map); + + anv_gem_munmap(map, device->trivial_batch_bo.size); } VkResult anv_CreateDevice( @@ -985,18 +1131,24 @@ VkResult anv_CreateDevice( assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - bool found = false; - for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { - if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], - device_extensions[j].extensionName) == 0) { - found = true; - break; - } - } - if (!found) + const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; + if (!anv_physical_device_extension_supported(physical_device, ext_name)) return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); } + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + VkPhysicalDeviceFeatures supported_features; + anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *)&supported_features; + VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; + unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); + } + } + device = vk_alloc2(&physical_device->instance->alloc, pAllocator, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -1061,36 +1213,42 @@ VkResult anv_CreateDevice( } pthread_condattr_destroy(&condattr); - anv_bo_pool_init(&device->batch_bo_pool, device); + uint64_t bo_flags = + (physical_device->supports_48bit_addresses ? EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) | + (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) | + (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0); - result = anv_block_pool_init(&device->dynamic_state_block_pool, device, - 16384); + anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags); + + result = anv_bo_cache_init(&device->bo_cache); if (result != VK_SUCCESS) goto fail_batch_bo_pool; - anv_state_pool_init(&device->dynamic_state_pool, - &device->dynamic_state_block_pool); + /* For the state pools we explicitly disable 48bit. */ + bo_flags = physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0; - result = anv_block_pool_init(&device->instruction_block_pool, device, - 1024 * 1024); + result = anv_state_pool_init(&device->dynamic_state_pool, device, 16384, + bo_flags); if (result != VK_SUCCESS) - goto fail_dynamic_state_pool; + goto fail_bo_cache; - anv_state_pool_init(&device->instruction_state_pool, - &device->instruction_block_pool); + result = anv_state_pool_init(&device->instruction_state_pool, device, 16384, + bo_flags | + (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0)); + if (result != VK_SUCCESS) + goto fail_dynamic_state_pool; - result = anv_block_pool_init(&device->surface_state_block_pool, device, - 4096); + result = anv_state_pool_init(&device->surface_state_pool, device, 4096, + bo_flags); if (result != VK_SUCCESS) goto fail_instruction_state_pool; - anv_state_pool_init(&device->surface_state_pool, - &device->surface_state_block_pool); - result = anv_bo_init_new(&device->workaround_bo, device, 1024); if (result != VK_SUCCESS) goto fail_surface_state_pool; + anv_device_init_trivial_batch(device); + anv_scratch_pool_init(device, &device->scratch_pool); anv_queue_init(device, &device->queue); @@ -1108,6 +1266,9 @@ VkResult anv_CreateDevice( case 9: result = gen9_init_device_state(device); break; + case 10: + result = gen10_init_device_state(device); + break; default: /* Shouldn't get here as we don't create physical devices for any other * gens. */ @@ -1131,13 +1292,12 @@ VkResult anv_CreateDevice( anv_gem_close(device, device->workaround_bo.gem_handle); fail_surface_state_pool: anv_state_pool_finish(&device->surface_state_pool); - anv_block_pool_finish(&device->surface_state_block_pool); fail_instruction_state_pool: anv_state_pool_finish(&device->instruction_state_pool); - anv_block_pool_finish(&device->instruction_block_pool); fail_dynamic_state_pool: anv_state_pool_finish(&device->dynamic_state_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); + fail_bo_cache: + anv_bo_cache_finish(&device->bo_cache); fail_batch_bo_pool: anv_bo_pool_finish(&device->batch_bo_pool); pthread_cond_destroy(&device->queue_submit); @@ -1178,12 +1338,13 @@ void anv_DestroyDevice( anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); anv_gem_close(device, device->workaround_bo.gem_handle); + anv_gem_close(device, device->trivial_batch_bo.gem_handle); + anv_state_pool_finish(&device->surface_state_pool); - anv_block_pool_finish(&device->surface_state_block_pool); anv_state_pool_finish(&device->instruction_state_pool); - anv_block_pool_finish(&device->instruction_block_pool); anv_state_pool_finish(&device->dynamic_state_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); + + anv_bo_cache_finish(&device->bo_cache); anv_bo_pool_finish(&device->batch_bo_pool); @@ -1197,45 +1358,6 @@ void anv_DestroyDevice( vk_free(&device->alloc, device); } -VkResult anv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(global_extensions); - return VK_SUCCESS; - } - - *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); - typed_memcpy(pProperties, global_extensions, *pPropertyCount); - - if (*pPropertyCount < ARRAY_SIZE(global_extensions)) - return VK_INCOMPLETE; - - return VK_SUCCESS; -} - -VkResult anv_EnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - if (pProperties == NULL) { - *pPropertyCount = ARRAY_SIZE(device_extensions); - return VK_SUCCESS; - } - - *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); - typed_memcpy(pProperties, device_extensions, *pPropertyCount); - - if (*pPropertyCount < ARRAY_SIZE(device_extensions)) - return VK_INCOMPLETE; - - return VK_SUCCESS; -} - VkResult anv_EnumerateInstanceLayerProperties( uint32_t* pPropertyCount, VkLayerProperties* pProperties) @@ -1276,26 +1398,6 @@ void anv_GetDeviceQueue( *pQueue = anv_queue_to_handle(&device->queue); } -VkResult -anv_device_execbuf(struct anv_device *device, - struct drm_i915_gem_execbuffer2 *execbuf, - struct anv_bo **execbuf_bos) -{ - int ret = anv_gem_execbuffer(device, execbuf); - if (ret != 0) { - /* We don't know the real error. */ - device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); - } - - struct drm_i915_gem_exec_object2 *objects = - (void *)(uintptr_t)execbuf->buffers_ptr; - for (uint32_t k = 0; k < execbuf->buffer_count; k++) - execbuf_bos[k]->offset = objects[k].offset; - - return VK_SUCCESS; -} - VkResult anv_device_query_status(struct anv_device *device) { @@ -1311,16 +1413,17 @@ anv_device_query_status(struct anv_device *device) if (ret == -1) { /* We don't know the real error. */ device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, "get_reset_stats failed: %m"); + return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, + "get_reset_stats failed: %m"); } if (active) { device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, + return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, "GPU hung on one of our command buffers"); } else if (pending) { device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, + return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, "GPU hung with commands in-flight"); } @@ -1340,7 +1443,8 @@ anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo) } else if (ret == -1) { /* We don't know the real error. */ device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); + return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, + "gem wait failed: %m"); } /* Query for device status after the busy call. If the BO we're checking @@ -1362,7 +1466,8 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo, } else if (ret == -1) { /* We don't know the real error. */ device->lost = true; - return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); + return vk_errorf(device->instance, device, VK_ERROR_DEVICE_LOST, + "gem wait failed: %m"); } /* Query for device status after the wait. If the BO we're waiting on got @@ -1373,119 +1478,6 @@ anv_device_wait(struct anv_device *device, struct anv_bo *bo, return anv_device_query_status(device); } -VkResult anv_QueueSubmit( - VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo* pSubmits, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - struct anv_device *device = queue->device; - - /* Query for device status prior to submitting. Technically, we don't need - * to do this. However, if we have a client that's submitting piles of - * garbage, we would rather break as early as possible to keep the GPU - * hanging contained. If we don't check here, we'll either be waiting for - * the kernel to kick us or we'll have to wait until the client waits on a - * fence before we actually know whether or not we've hung. - */ - VkResult result = anv_device_query_status(device); - if (result != VK_SUCCESS) - return result; - - /* We lock around QueueSubmit for three main reasons: - * - * 1) When a block pool is resized, we create a new gem handle with a - * different size and, in the case of surface states, possibly a - * different center offset but we re-use the same anv_bo struct when - * we do so. If this happens in the middle of setting up an execbuf, - * we could end up with our list of BOs out of sync with our list of - * gem handles. - * - * 2) The algorithm we use for building the list of unique buffers isn't - * thread-safe. While the client is supposed to syncronize around - * QueueSubmit, this would be extremely difficult to debug if it ever - * came up in the wild due to a broken app. It's better to play it - * safe and just lock around QueueSubmit. - * - * 3) The anv_cmd_buffer_execbuf function may perform relocations in - * userspace. Due to the fact that the surface state buffer is shared - * between batches, we can't afford to have that happen from multiple - * threads at the same time. Even though the user is supposed to - * ensure this doesn't happen, we play it safe as in (2) above. - * - * Since the only other things that ever take the device lock such as block - * pool resize only rarely happen, this will almost never be contended so - * taking a lock isn't really an expensive operation in this case. - */ - pthread_mutex_lock(&device->mutex); - - for (uint32_t i = 0; i < submitCount; i++) { - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - assert(!anv_batch_has_error(&cmd_buffer->batch)); - - result = anv_cmd_buffer_execbuf(device, cmd_buffer); - if (result != VK_SUCCESS) - goto out; - } - } - - if (fence) { - struct anv_bo *fence_bo = &fence->bo; - result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); - if (result != VK_SUCCESS) - goto out; - - /* Update the fence and wake up any waiters */ - assert(fence->state == ANV_FENCE_STATE_RESET); - fence->state = ANV_FENCE_STATE_SUBMITTED; - pthread_cond_broadcast(&device->queue_submit); - } - -out: - if (result != VK_SUCCESS) { - /* In the case that something has gone wrong we may end up with an - * inconsistent state from which it may not be trivial to recover. - * For example, we might have computed address relocations and - * any future attempt to re-submit this job will need to know about - * this and avoid computing relocation addresses again. - * - * To avoid this sort of issues, we assume that if something was - * wrong during submission we must already be in a really bad situation - * anyway (such us being out of memory) and return - * VK_ERROR_DEVICE_LOST to ensure that clients do not attempt to - * submit the same job again to this device. - */ - result = VK_ERROR_DEVICE_LOST; - device->lost = true; - - /* If we return VK_ERROR_DEVICE LOST here, we need to ensure that - * vkWaitForFences() and vkGetFenceStatus() return a valid result - * (VK_SUCCESS or VK_ERROR_DEVICE_LOST) in a finite amount of time. - * Setting the fence status to SIGNALED ensures this will happen in - * any case. - */ - if (fence) - fence->state = ANV_FENCE_STATE_SIGNALED; - } - - pthread_mutex_unlock(&device->mutex); - - return result; -} - -VkResult anv_QueueWaitIdle( - VkQueue _queue) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - - return anv_DeviceWaitIdle(anv_device_to_handle(queue->device)); -} - VkResult anv_DeviceWaitIdle( VkDevice _device) { @@ -1514,9 +1506,6 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) anv_bo_init(bo, gem_handle, size); - if (device->instance->physicalDevice.supports_48bit_addresses) - bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - return VK_SUCCESS; } @@ -1527,17 +1516,34 @@ VkResult anv_AllocateMemory( VkDeviceMemory* pMem) { ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_physical_device *pdevice = &device->instance->physicalDevice; struct anv_device_memory *mem; - VkResult result; + VkResult result = VK_SUCCESS; assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ assert(pAllocateInfo->allocationSize > 0); - /* We support exactly one memory heap. */ - assert(pAllocateInfo->memoryTypeIndex == 0 || - (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); + /* The kernel relocation API has a limitation of a 32-bit delta value + * applied to the address before it is written which, in spite of it being + * unsigned, is treated as signed . Because of the way that this maps to + * the Vulkan API, we cannot handle an offset into a buffer that does not + * fit into a signed 32 bits. The only mechanism we have for dealing with + * this at the moment is to limit all VkDeviceMemory objects to a maximum + * of 2GB each. The Vulkan spec allows us to do this: + * + * "Some platforms may have a limit on the maximum size of a single + * allocation. For example, certain systems may fail to create + * allocations with a size greater than or equal to 4GB. Such a limit is + * implementation-dependent, and if such a failure occurs then the error + * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned." + * + * We don't use vk_error here because it's not an error so much as an + * indication to the application that the allocation is too large. + */ + if (pAllocateInfo->allocationSize > (1ull << 31)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* FINISHME: Fail if allocation request exceeds heap size. */ @@ -1546,17 +1552,109 @@ VkResult anv_AllocateMemory( if (mem == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - /* The kernel is going to give us whole pages anyway */ - uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); + assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count); + mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex]; + mem->map = NULL; + mem->map_size = 0; - result = anv_bo_init_new(&mem->bo, device, alloc_size); - if (result != VK_SUCCESS) - goto fail; + const VkImportMemoryFdInfoKHR *fd_info = + vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - mem->type_index = pAllocateInfo->memoryTypeIndex; + /* The Vulkan spec permits handleType to be 0, in which case the struct is + * ignored. + */ + if (fd_info && fd_info->handleType) { + /* At the moment, we support only the below handle types. */ + assert(fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || + fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + result = anv_bo_cache_import(device, &device->bo_cache, + fd_info->fd, &mem->bo); + if (result != VK_SUCCESS) + goto fail; - mem->map = NULL; - mem->map_size = 0; + VkDeviceSize aligned_alloc_size = + align_u64(pAllocateInfo->allocationSize, 4096); + + /* For security purposes, we reject importing the bo if it's smaller + * than the requested allocation size. This prevents a malicious client + * from passing a buffer to a trusted client, lying about the size, and + * telling the trusted client to try and texture from an image that goes + * out-of-bounds. This sort of thing could lead to GPU hangs or worse + * in the trusted client. The trusted client can protect itself against + * this sort of attack but only if it can trust the buffer size. + */ + if (mem->bo->size < aligned_alloc_size) { + result = vk_errorf(device->instance, device, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "aligned allocationSize too large for " + "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: " + "%"PRIu64"B > %"PRIu64"B", + aligned_alloc_size, mem->bo->size); + anv_bo_cache_release(device, &device->bo_cache, mem->bo); + goto fail; + } + + /* From the Vulkan spec: + * + * "Importing memory from a file descriptor transfers ownership of + * the file descriptor from the application to the Vulkan + * implementation. The application must not perform any operations on + * the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + close(fd_info->fd); + } else { + result = anv_bo_cache_alloc(device, &device->bo_cache, + pAllocateInfo->allocationSize, + &mem->bo); + if (result != VK_SUCCESS) + goto fail; + + const VkMemoryDedicatedAllocateInfoKHR *dedicated_info = + vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO_KHR); + if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) { + ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); + + /* For images using modifiers, we require a dedicated allocation + * and we set the BO tiling to match the tiling of the underlying + * modifier. This is a bit unfortunate as this is completely + * pointless for Vulkan. However, GL needs to be able to map things + * so it needs the tiling to be set. The only way to do this in a + * non-racy way is to set the tiling in the creator of the BO so that + * makes it our job. + * + * One of these days, once the GL driver learns to not map things + * through the GTT in random places, we can drop this and start + * allowing multiple modified images in the same BO. + */ + if (image->drm_format_mod != DRM_FORMAT_MOD_INVALID) { + assert(isl_drm_modifier_get_info(image->drm_format_mod)->tiling == + image->planes[0].surface.isl.tiling); + const uint32_t i915_tiling = + isl_tiling_to_i915_tiling(image->planes[0].surface.isl.tiling); + int ret = anv_gem_set_tiling(device, mem->bo->gem_handle, + image->planes[0].surface.isl.row_pitch, + i915_tiling); + if (ret) { + anv_bo_cache_release(device, &device->bo_cache, mem->bo); + return vk_errorf(device->instance, NULL, + VK_ERROR_OUT_OF_DEVICE_MEMORY, + "failed to set BO tiling: %m"); + } + } + } + } + + assert(mem->type->heapIndex < pdevice->memory.heap_count); + if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses) + mem->bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + + if (pdevice->has_exec_async) + mem->bo->flags |= EXEC_OBJECT_ASYNC; *pMem = anv_device_memory_to_handle(mem); @@ -1568,6 +1666,50 @@ VkResult anv_AllocateMemory( return result; } +VkResult anv_GetMemoryFdKHR( + VkDevice device_h, + const VkMemoryGetFdInfoKHR* pGetFdInfo, + int* pFd) +{ + ANV_FROM_HANDLE(anv_device, dev, device_h); + ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); + + assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR || + pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + return anv_bo_cache_export(dev, &dev->bo_cache, mem->bo, pFd); +} + +VkResult anv_GetMemoryFdPropertiesKHR( + VkDevice _device, + VkExternalMemoryHandleTypeFlagBitsKHR handleType, + int fd, + VkMemoryFdPropertiesKHR* pMemoryFdProperties) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_physical_device *pdevice = &device->instance->physicalDevice; + + switch (handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR: + /* dma-buf can be imported as any memory type */ + pMemoryFdProperties->memoryTypeBits = + (1 << pdevice->memory.type_count) - 1; + return VK_SUCCESS; + + default: + /* The valid usage section for this function says: + * + * "handleType must not be one of the handle types defined as + * opaque." + * + * So opaque handle types fall into the default "unsupported" case. + */ + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR); + } +} + void anv_FreeMemory( VkDevice _device, VkDeviceMemory _mem, @@ -1582,11 +1724,7 @@ void anv_FreeMemory( if (mem->map) anv_UnmapMemory(_device, _mem); - if (mem->bo.map) - anv_gem_munmap(mem->bo.map, mem->bo.size); - - if (mem->bo.gem_handle != 0) - anv_gem_close(device, mem->bo.gem_handle); + anv_bo_cache_release(device, &device->bo_cache, mem->bo); vk_free2(&device->alloc, pAllocator, mem); } @@ -1608,7 +1746,7 @@ VkResult anv_MapMemory( } if (size == VK_WHOLE_SIZE) - size = mem->bo.size - offset; + size = mem->bo->size - offset; /* From the Vulkan spec version 1.0.32 docs for MapMemory: * @@ -1618,7 +1756,7 @@ VkResult anv_MapMemory( * equal to the size of the memory minus offset */ assert(size > 0); - assert(offset + size <= mem->bo.size); + assert(offset + size <= mem->bo->size); /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only * takes a VkDeviceMemory pointer, it seems like only one map of the memory @@ -1627,7 +1765,9 @@ VkResult anv_MapMemory( * userspace. */ uint32_t gem_flags = 0; - if (!device->info.has_llc && mem->type_index == 0) + + if (!device->info.has_llc && + (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) gem_flags |= I915_MMAP_WC; /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ @@ -1638,7 +1778,7 @@ VkResult anv_MapMemory( /* Let's map whole pages */ map_size = align_u64(map_size, 4096); - void *map = anv_gem_mmap(device, mem->bo.gem_handle, + void *map = anv_gem_mmap(device, mem->bo->gem_handle, map_offset, map_size, gem_flags); if (map == MAP_FAILED) return vk_error(VK_ERROR_MEMORY_MAP_FAILED); @@ -1676,7 +1816,7 @@ clflush_mapped_ranges(struct anv_device *device, if (ranges[i].offset >= mem->map_size) continue; - anv_clflush_range(mem->map + ranges[i].offset, + gen_clflush_range(mem->map + ranges[i].offset, MIN2(ranges[i].size, mem->map_size - ranges[i].offset)); } } @@ -1724,6 +1864,7 @@ void anv_GetBufferMemoryRequirements( { ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_physical_device *pdevice = &device->instance->physicalDevice; /* The Vulkan spec (git aaed022) says: * @@ -1731,13 +1872,41 @@ void anv_GetBufferMemoryRequirements( * supported memory type for the resource. The bit `1<memoryTypeBits = device->info.has_llc ? 1 : 3; + uint32_t memory_types = 0; + for (uint32_t i = 0; i < pdevice->memory.type_count; i++) { + uint32_t valid_usage = pdevice->memory.types[i].valid_buffer_usage; + if ((valid_usage & buffer->usage) == buffer->usage) + memory_types |= (1u << i); + } pMemoryRequirements->size = buffer->size; pMemoryRequirements->alignment = 16; + pMemoryRequirements->memoryTypeBits = memory_types; +} + +void anv_GetBufferMemoryRequirements2KHR( + VkDevice _device, + const VkBufferMemoryRequirementsInfo2KHR* pInfo, + VkMemoryRequirements2KHR* pMemoryRequirements) +{ + anv_GetBufferMemoryRequirements(_device, pInfo->buffer, + &pMemoryRequirements->memoryRequirements); + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: { + VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext; + requirements->prefersDedicatedAllocation = VK_FALSE; + requirements->requiresDedicatedAllocation = VK_FALSE; + break; + } + + default: + anv_debug_ignored_stype(ext->sType); + break; + } + } } void anv_GetImageMemoryRequirements( @@ -1747,6 +1916,7 @@ void anv_GetImageMemoryRequirements( { ANV_FROM_HANDLE(anv_image, image, _image); ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_physical_device *pdevice = &device->instance->physicalDevice; /* The Vulkan spec (git aaed022) says: * @@ -1755,12 +1925,85 @@ void anv_GetImageMemoryRequirements( * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties * structure for the physical device is supported. * - * We support exactly one memory type on LLC, two on non-LLC. + * All types are currently supported for images. */ - pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3; + uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1; pMemoryRequirements->size = image->size; pMemoryRequirements->alignment = image->alignment; + pMemoryRequirements->memoryTypeBits = memory_types; +} + +void anv_GetImageMemoryRequirements2KHR( + VkDevice _device, + const VkImageMemoryRequirementsInfo2KHR* pInfo, + VkMemoryRequirements2KHR* pMemoryRequirements) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_image, image, pInfo->image); + + anv_GetImageMemoryRequirements(_device, pInfo->image, + &pMemoryRequirements->memoryRequirements); + + vk_foreach_struct_const(ext, pInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO_KHR: { + struct anv_physical_device *pdevice = &device->instance->physicalDevice; + const VkImagePlaneMemoryRequirementsInfoKHR *plane_reqs = + (const VkImagePlaneMemoryRequirementsInfoKHR *) ext; + uint32_t plane = anv_image_aspect_to_plane(image->aspects, + plane_reqs->planeAspect); + + assert(image->planes[plane].offset == 0); + + /* The Vulkan spec (git aaed022) says: + * + * memoryTypeBits is a bitfield and contains one bit set for every + * supported memory type for the resource. The bit `1<memoryRequirements.memoryTypeBits = + (1ull << pdevice->memory.type_count) - 1; + + pMemoryRequirements->memoryRequirements.size = image->planes[plane].size; + pMemoryRequirements->memoryRequirements.alignment = + image->planes[plane].alignment; + break; + } + + default: + anv_debug_ignored_stype(ext->sType); + break; + } + } + + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR: { + VkMemoryDedicatedRequirementsKHR *requirements = (void *)ext; + if (image->drm_format_mod != DRM_FORMAT_MOD_INVALID) { + /* Require a dedicated allocation for images with modifiers. + * + * See also anv_AllocateMemory. + */ + requirements->prefersDedicatedAllocation = VK_TRUE; + requirements->requiresDedicatedAllocation = VK_TRUE; + } else { + requirements->prefersDedicatedAllocation = VK_FALSE; + requirements->requiresDedicatedAllocation = VK_FALSE; + } + break; + } + + default: + anv_debug_ignored_stype(ext->sType); + break; + } + } } void anv_GetImageSparseMemoryRequirements( @@ -1772,6 +2015,15 @@ void anv_GetImageSparseMemoryRequirements( *pSparseMemoryRequirementCount = 0; } +void anv_GetImageSparseMemoryRequirements2KHR( + VkDevice device, + const VkImageSparseMemoryRequirementsInfo2KHR* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2KHR* pSparseMemoryRequirements) +{ + *pSparseMemoryRequirementCount = 0; +} + void anv_GetDeviceMemoryCommitment( VkDevice device, VkDeviceMemory memory, @@ -1780,332 +2032,63 @@ void anv_GetDeviceMemoryCommitment( *pCommittedMemoryInBytes = 0; } -VkResult anv_BindBufferMemory( - VkDevice device, - VkBuffer _buffer, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) +static void +anv_bind_buffer_memory(const VkBindBufferMemoryInfoKHR *pBindInfo) { - ANV_FROM_HANDLE(anv_device_memory, mem, _memory); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory); + ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer); + + assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR); if (mem) { - buffer->bo = &mem->bo; - buffer->offset = memoryOffset; + assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage); + buffer->bo = mem->bo; + buffer->offset = pBindInfo->memoryOffset; } else { buffer->bo = NULL; buffer->offset = 0; } - - return VK_SUCCESS; -} - -VkResult anv_QueueBindSparse( - VkQueue _queue, - uint32_t bindInfoCount, - const VkBindSparseInfo* pBindInfo, - VkFence fence) -{ - ANV_FROM_HANDLE(anv_queue, queue, _queue); - if (unlikely(queue->device->lost)) - return VK_ERROR_DEVICE_LOST; - - return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); } -VkResult anv_CreateFence( - VkDevice _device, - const VkFenceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkFence* pFence) +VkResult anv_BindBufferMemory( + VkDevice device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) { - ANV_FROM_HANDLE(anv_device, device, _device); - struct anv_bo fence_bo; - struct anv_fence *fence; - struct anv_batch batch; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); - - result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); - if (result != VK_SUCCESS) - return result; - - /* Fences are small. Just store the CPU data structure in the BO. */ - fence = fence_bo.map; - fence->bo = fence_bo; - - /* Place the batch after the CPU data but on its own cache line. */ - const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); - batch.next = batch.start = fence->bo.map + batch_offset; - batch.end = fence->bo.map + fence->bo.size; - anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); - anv_batch_emit(&batch, GEN7_MI_NOOP, noop); - - if (!device->info.has_llc) { - assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); - assert(batch.next - batch.start <= CACHELINE_SIZE); - __builtin_ia32_mfence(); - __builtin_ia32_clflush(batch.start); - } - - fence->exec2_objects[0].handle = fence->bo.gem_handle; - fence->exec2_objects[0].relocation_count = 0; - fence->exec2_objects[0].relocs_ptr = 0; - fence->exec2_objects[0].alignment = 0; - fence->exec2_objects[0].offset = fence->bo.offset; - fence->exec2_objects[0].flags = 0; - fence->exec2_objects[0].rsvd1 = 0; - fence->exec2_objects[0].rsvd2 = 0; - - fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; - fence->execbuf.buffer_count = 1; - fence->execbuf.batch_start_offset = batch.start - fence->bo.map; - fence->execbuf.batch_len = batch.next - batch.start; - fence->execbuf.cliprects_ptr = 0; - fence->execbuf.num_cliprects = 0; - fence->execbuf.DR1 = 0; - fence->execbuf.DR4 = 0; - - fence->execbuf.flags = - I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; - fence->execbuf.rsvd1 = device->context_id; - fence->execbuf.rsvd2 = 0; - - if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { - fence->state = ANV_FENCE_STATE_SIGNALED; - } else { - fence->state = ANV_FENCE_STATE_RESET; - } - - *pFence = anv_fence_to_handle(fence); + anv_bind_buffer_memory( + &(VkBindBufferMemoryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHR, + .buffer = buffer, + .memory = memory, + .memoryOffset = memoryOffset, + }); return VK_SUCCESS; } -void anv_DestroyFence( - VkDevice _device, - VkFence _fence, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - if (!fence) - return; - - assert(fence->bo.map == fence); - anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); -} - -VkResult anv_ResetFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences) +VkResult anv_BindBufferMemory2KHR( + VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfoKHR* pBindInfos) { - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - fence->state = ANV_FENCE_STATE_RESET; - } + for (uint32_t i = 0; i < bindInfoCount; i++) + anv_bind_buffer_memory(&pBindInfos[i]); return VK_SUCCESS; } -VkResult anv_GetFenceStatus( - VkDevice _device, - VkFence _fence) -{ - ANV_FROM_HANDLE(anv_device, device, _device); - ANV_FROM_HANDLE(anv_fence, fence, _fence); - - if (unlikely(device->lost)) - return VK_ERROR_DEVICE_LOST; - - switch (fence->state) { - case ANV_FENCE_STATE_RESET: - /* If it hasn't even been sent off to the GPU yet, it's not ready */ - return VK_NOT_READY; - - case ANV_FENCE_STATE_SIGNALED: - /* It's been signaled, return success */ - return VK_SUCCESS; - - case ANV_FENCE_STATE_SUBMITTED: { - VkResult result = anv_device_bo_busy(device, &fence->bo); - if (result == VK_SUCCESS) { - fence->state = ANV_FENCE_STATE_SIGNALED; - return VK_SUCCESS; - } else { - return result; - } - } - default: - unreachable("Invalid fence status"); - } -} - -#define NSEC_PER_SEC 1000000000 -#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) - -VkResult anv_WaitForFences( - VkDevice _device, - uint32_t fenceCount, - const VkFence* pFences, - VkBool32 waitAll, - uint64_t _timeout) +VkResult anv_QueueBindSparse( + VkQueue _queue, + uint32_t bindInfoCount, + const VkBindSparseInfo* pBindInfo, + VkFence fence) { - ANV_FROM_HANDLE(anv_device, device, _device); - int ret; - - if (unlikely(device->lost)) - return VK_ERROR_DEVICE_LOST; - - /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed - * to block indefinitely timeouts <= 0. Unfortunately, this was broken - * for a couple of kernel releases. Since there's no way to know - * whether or not the kernel we're using is one of the broken ones, the - * best we can do is to clamp the timeout to INT64_MAX. This limits the - * maximum timeout from 584 years to 292 years - likely not a big deal. - */ - int64_t timeout = MIN2(_timeout, INT64_MAX); - - VkResult result = VK_SUCCESS; - uint32_t pending_fences = fenceCount; - while (pending_fences) { - pending_fences = 0; - bool signaled_fences = false; - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - switch (fence->state) { - case ANV_FENCE_STATE_RESET: - /* This fence hasn't been submitted yet, we'll catch it the next - * time around. Yes, this may mean we dead-loop but, short of - * lots of locking and a condition variable, there's not much that - * we can do about that. - */ - pending_fences++; - continue; - - case ANV_FENCE_STATE_SIGNALED: - /* This fence is not pending. If waitAll isn't set, we can return - * early. Otherwise, we have to keep going. - */ - if (!waitAll) { - result = VK_SUCCESS; - goto done; - } - continue; - - case ANV_FENCE_STATE_SUBMITTED: - /* These are the fences we really care about. Go ahead and wait - * on it until we hit a timeout. - */ - result = anv_device_wait(device, &fence->bo, timeout); - switch (result) { - case VK_SUCCESS: - fence->state = ANV_FENCE_STATE_SIGNALED; - signaled_fences = true; - if (!waitAll) - goto done; - break; - - case VK_TIMEOUT: - goto done; - - default: - return result; - } - } - } - - if (pending_fences && !signaled_fences) { - /* If we've hit this then someone decided to vkWaitForFences before - * they've actually submitted any of them to a queue. This is a - * fairly pessimal case, so it's ok to lock here and use a standard - * pthreads condition variable. - */ - pthread_mutex_lock(&device->mutex); - - /* It's possible that some of the fences have changed state since the - * last time we checked. Now that we have the lock, check for - * pending fences again and don't wait if it's changed. - */ - uint32_t now_pending_fences = 0; - for (uint32_t i = 0; i < fenceCount; i++) { - ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); - if (fence->state == ANV_FENCE_STATE_RESET) - now_pending_fences++; - } - assert(now_pending_fences <= pending_fences); - - if (now_pending_fences == pending_fences) { - struct timespec before; - clock_gettime(CLOCK_MONOTONIC, &before); - - uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; - uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + - (timeout / NSEC_PER_SEC); - abs_nsec %= NSEC_PER_SEC; - - /* Avoid roll-over in tv_sec on 32-bit systems if the user - * provided timeout is UINT64_MAX - */ - struct timespec abstime; - abstime.tv_nsec = abs_nsec; - abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); - - ret = pthread_cond_timedwait(&device->queue_submit, - &device->mutex, &abstime); - assert(ret != EINVAL); - - struct timespec after; - clock_gettime(CLOCK_MONOTONIC, &after); - uint64_t time_elapsed = - ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - - ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); - - if (time_elapsed >= timeout) { - pthread_mutex_unlock(&device->mutex); - result = VK_TIMEOUT; - goto done; - } - - timeout -= time_elapsed; - } - - pthread_mutex_unlock(&device->mutex); - } - } - -done: - if (unlikely(device->lost)) + ANV_FROM_HANDLE(anv_queue, queue, _queue); + if (unlikely(queue->device->lost)) return VK_ERROR_DEVICE_LOST; - return result; -} - -// Queue semaphore functions - -VkResult anv_CreateSemaphore( - VkDevice device, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore) -{ - /* The DRM execbuffer ioctl always execute in-oder, even between different - * rings. As such, there's nothing to do for the user space semaphore. - */ - - *pSemaphore = (VkSemaphore)1; - - return VK_SUCCESS; -} - -void anv_DestroySemaphore( - VkDevice device, - VkSemaphore semaphore, - const VkAllocationCallbacks* pAllocator) -{ + return vk_error(VK_ERROR_FEATURE_NOT_PRESENT); } // Event functions