* IN THE SOFTWARE.
*/
+#include <dlfcn.h>
#include <assert.h>
#include <stdbool.h>
#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
-#include "mesa/main/git_sha1.h"
#include "util/strtod.h"
#include "util/debug.h"
va_end(args);
}
+static bool
+anv_get_function_timestamp(void *ptr, uint32_t* timestamp)
+{
+ Dl_info info;
+ struct stat st;
+ if (!dladdr(ptr, &info) || !info.dli_fname)
+ return false;
+
+ if (stat(info.dli_fname, &st))
+ return false;
+
+ *timestamp = st.st_mtim.tv_sec;
+ return true;
+}
+
+static bool
+anv_device_get_cache_uuid(void *uuid)
+{
+ uint32_t timestamp;
+
+ memset(uuid, 0, VK_UUID_SIZE);
+ if (!anv_get_function_timestamp(anv_device_get_cache_uuid, ×tamp))
+ return false;
+
+ snprintf(uuid, VK_UUID_SIZE, "anv-%d", timestamp);
+ return true;
+}
+
static VkResult
anv_physical_device_init(struct anv_physical_device *device,
struct anv_instance *instance,
fd = open(path, O_RDWR | O_CLOEXEC);
if (fd < 0)
- return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
- "failed to open %s: %m", path);
+ return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
device->instance = instance;
- device->path = path;
+
+ assert(strlen(path) < ARRAY_SIZE(device->path));
+ strncpy(device->path, path, ARRAY_SIZE(device->path));
device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
if (!device->chipset_id) {
- result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
- "failed to get chipset id: %m");
+ result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
goto fail;
}
- device->name = brw_get_device_name(device->chipset_id);
- device->info = brw_get_device_info(device->chipset_id);
- if (!device->info) {
- result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
- "failed to get device info");
+ device->name = gen_get_device_name(device->chipset_id);
+ if (!gen_get_device_info(device->chipset_id, &device->info)) {
+ result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
goto fail;
}
- if (device->info->is_haswell) {
+ if (device->info.is_haswell) {
fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
- } else if (device->info->gen == 7 && !device->info->is_baytrail) {
+ } else if (device->info.gen == 7 && !device->info.is_baytrail) {
fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
- } else if (device->info->gen == 7 && device->info->is_baytrail) {
+ } else if (device->info.gen == 7 && device->info.is_baytrail) {
fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
- } else if (device->info->gen >= 8) {
+ } else if (device->info.gen >= 8) {
/* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
* supported as anything */
} else {
}
device->cmd_parser_version = -1;
- if (device->info->gen == 7) {
+ if (device->info.gen == 7) {
device->cmd_parser_version =
anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
if (device->cmd_parser_version == -1) {
goto fail;
}
- if (!device->info->has_llc &&
+ if (!device->info.has_llc &&
anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
"kernel missing wc mmap");
goto fail;
}
+ if (!anv_device_get_cache_uuid(device->uuid)) {
+ result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
+ "cannot generate UUID");
+ goto fail;
+ }
bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
- close(fd);
+ /* GENs prior to 8 do not support EU/Subslice info */
+ if (device->info.gen >= 8) {
+ device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
+ device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
+
+ /* Without this information, we cannot get the right Braswell
+ * brandstrings, and we have to use conservative numbers for GPGPU on
+ * many platforms, but otherwise, things will just work.
+ */
+ if (device->subslice_total < 1 || device->eu_total < 1) {
+ fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
+ " query GPU properties.\n");
+ }
+ } else if (device->info.gen == 7) {
+ device->subslice_total = 1 << (device->info.gt - 1);
+ }
+
+ if (device->info.is_cherryview &&
+ device->subslice_total > 0 && device->eu_total > 0) {
+ /* Logical CS threads = EUs per subslice * 7 threads per EU */
+ uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7;
+
+ /* Fuse configurations may give more threads than expected, never less. */
+ if (max_cs_threads > device->info.max_cs_threads)
+ device->info.max_cs_threads = max_cs_threads;
+ }
brw_process_intel_debug_variable();
- device->compiler = brw_compiler_create(NULL, device->info);
+ device->compiler = brw_compiler_create(NULL, &device->info);
if (device->compiler == NULL) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
device->compiler->shader_debug_log = compiler_debug_log;
device->compiler->shader_perf_log = compiler_perf_log;
- /* XXX: Actually detect bit6 swizzling */
- isl_device_init(&device->isl_dev, device->info, swizzled);
+ result = anv_init_wsi(device);
+ if (result != VK_SUCCESS) {
+ ralloc_free(device->compiler);
+ goto fail;
+ }
+ isl_device_init(&device->isl_dev, &device->info, swizzled);
+
+ close(fd);
return VK_SUCCESS;
fail:
static void
anv_physical_device_finish(struct anv_physical_device *device)
{
+ anv_finish_wsi(device);
ralloc_free(device->compiler);
}
.extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
.specVersion = 25,
},
+#ifdef VK_USE_PLATFORM_XCB_KHR
{
.extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
- .specVersion = 5,
+ .specVersion = 6,
},
-#ifdef HAVE_WAYLAND_PLATFORM
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+ {
+ .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+ .specVersion = 6,
+ },
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
{
.extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
- .specVersion = 4,
+ .specVersion = 5,
},
#endif
};
static const VkExtensionProperties device_extensions[] = {
{
.extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
- .specVersion = 67,
+ .specVersion = 68,
},
+ {
+ .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
+ .specVersion = 1,
+ }
};
static void *
-default_alloc_func(void *pUserData, size_t size, size_t align,
+default_alloc_func(void *pUserData, size_t size, size_t align,
VkSystemAllocationScope allocationScope)
{
return malloc(size);
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- instance = anv_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
+ instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!instance)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
instance->apiVersion = client_version;
instance->physicalDeviceCount = -1;
- memset(instance->wsi, 0, sizeof(instance->wsi));
-
_mesa_locale_init();
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
- anv_init_wsi(instance);
-
*pInstance = anv_instance_to_handle(instance);
return VK_SUCCESS;
anv_physical_device_finish(&instance->physicalDevice);
}
- anv_finish_wsi(instance);
-
VG(VALGRIND_DESTROY_MEMPOOL(instance));
_mesa_locale_fini();
- anv_free(&instance->alloc, instance);
+ vk_free(&instance->alloc, instance);
}
VkResult anv_EnumeratePhysicalDevices(
VkResult result;
if (instance->physicalDeviceCount < 0) {
- result = anv_physical_device_init(&instance->physicalDevice,
- instance, "/dev/dri/renderD128");
+ char path[20];
+ for (unsigned i = 0; i < 8; i++) {
+ snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
+ result = anv_physical_device_init(&instance->physicalDevice,
+ instance, path);
+ if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
+ break;
+ }
+
if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
instance->physicalDeviceCount = 0;
} else if (result == VK_SUCCESS) {
} else if (*pPhysicalDeviceCount >= 1) {
pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
*pPhysicalDeviceCount = 1;
+ } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
+ return VK_INCOMPLETE;
} else {
*pPhysicalDeviceCount = 0;
}
*pFeatures = (VkPhysicalDeviceFeatures) {
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
- .imageCubeArray = false,
- .independentBlend = pdevice->info->gen >= 8,
+ .imageCubeArray = true,
+ .independentBlend = true,
.geometryShader = true,
- .tessellationShader = false,
- .sampleRateShading = false,
+ .tessellationShader = true,
+ .sampleRateShading = true,
.dualSrcBlend = true,
.logicOp = true,
.multiDrawIndirect = false,
- .drawIndirectFirstInstance = false,
- .depthClamp = false,
- .depthBiasClamp = false,
+ .drawIndirectFirstInstance = true,
+ .depthClamp = true,
+ .depthBiasClamp = true,
.fillModeNonSolid = true,
.depthBounds = false,
.wideLines = true,
.largePoints = true,
.alphaToOne = true,
.multiViewport = true,
- .samplerAnisotropy = false, /* FINISHME */
- .textureCompressionETC2 = true,
- .textureCompressionASTC_LDR = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = pdevice->info.gen >= 8 ||
+ pdevice->info.is_baytrail,
+ .textureCompressionASTC_LDR = pdevice->info.gen >= 9, /* FINISHME CHV */
.textureCompressionBC = true,
.occlusionQueryPrecise = true,
.pipelineStatisticsQuery = false,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
.shaderImageGatherExtended = true,
- .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageExtendedFormats = true,
.shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = false,
+ .shaderStorageImageWriteWithoutFormat = false,
.shaderUniformBufferArrayDynamicIndexing = true,
.shaderSampledImageArrayDynamicIndexing = true,
.shaderStorageBufferArrayDynamicIndexing = true,
.shaderStorageImageArrayDynamicIndexing = true,
- .shaderStorageImageReadWithoutFormat = false,
- .shaderStorageImageWriteWithoutFormat = true,
- .shaderClipDistance = false,
- .shaderCullDistance = false,
- .shaderFloat64 = false,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
+ .shaderFloat64 = pdevice->info.gen >= 8,
.shaderInt64 = false,
.shaderInt16 = false,
- .alphaToOne = true,
+ .shaderResourceMinLod = false,
.variableMultisampleRate = false,
.inheritedQueries = false,
};
pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
}
-void
-anv_device_get_cache_uuid(void *uuid)
-{
- memset(uuid, 0, VK_UUID_SIZE);
- snprintf(uuid, VK_UUID_SIZE, "anv-%s", MESA_GIT_SHA1 + 4);
-}
-
void anv_GetPhysicalDeviceProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pProperties)
{
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
- const struct brw_device_info *devinfo = pdevice->info;
-
- anv_finishme("Get correct values for VkPhysicalDeviceLimits");
+ const struct gen_device_info *devinfo = &pdevice->info;
const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
+ /* See assertions made when programming the buffer surface state. */
+ const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
+ (1ul << 30) : (1ul << 27);
+
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
.maxImageDimensionCube = (1 << 14),
.maxImageArrayLayers = (1 << 11),
.maxTexelBufferElements = 128 * 1024 * 1024,
- .maxUniformBufferRange = UINT32_MAX,
- .maxStorageBufferRange = UINT32_MAX,
+ .maxUniformBufferRange = (1ul << 27),
+ .maxStorageBufferRange = max_raw_buffer_sz,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = UINT32_MAX,
.maxSamplerAllocationCount = 64 * 1024,
.maxVertexInputAttributeOffset = 2047,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
- .maxTessellationGenerationLevel = 0,
- .maxTessellationPatchSize = 0,
- .maxTessellationControlPerVertexInputComponents = 0,
- .maxTessellationControlPerVertexOutputComponents = 0,
- .maxTessellationControlPerPatchOutputComponents = 0,
- .maxTessellationControlTotalOutputComponents = 0,
- .maxTessellationEvaluationInputComponents = 0,
- .maxTessellationEvaluationOutputComponents = 0,
+ .maxTessellationGenerationLevel = 64,
+ .maxTessellationPatchSize = 32,
+ .maxTessellationControlPerVertexInputComponents = 128,
+ .maxTessellationControlPerVertexOutputComponents = 128,
+ .maxTessellationControlPerPatchOutputComponents = 128,
+ .maxTessellationControlTotalOutputComponents = 2048,
+ .maxTessellationEvaluationInputComponents = 128,
+ .maxTessellationEvaluationOutputComponents = 128,
.maxGeometryShaderInvocations = 32,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 128,
.maxGeometryTotalOutputComponents = 1024,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = 8,
- .maxFragmentDualSrcAttachments = 2,
+ .maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 8,
.maxComputeSharedMemorySize = 32768,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxSamplerAnisotropy = 16,
.maxViewports = MAX_VIEWPORTS,
.maxViewportDimensions = { (1 << 14), (1 << 14) },
- .viewportBoundsRange = { -16384.0, 16384.0 },
+ .viewportBoundsRange = { INT16_MIN, INT16_MAX },
.viewportSubPixelBits = 13, /* We take a float? */
.minMemoryMapAlignment = 4096, /* A page */
.minTexelBufferOffsetAlignment = 1,
- .minUniformBufferOffsetAlignment = 1,
- .minStorageBufferOffsetAlignment = 1,
+ .minUniformBufferOffsetAlignment = 16,
+ .minStorageBufferOffsetAlignment = 4,
.minTexelOffset = -8,
.maxTexelOffset = 7,
- .minTexelGatherOffset = -8,
- .maxTexelGatherOffset = 7,
- .minInterpolationOffset = 0, /* FIXME */
- .maxInterpolationOffset = 0, /* FIXME */
- .subPixelInterpolationOffsetBits = 0, /* FIXME */
+ .minTexelGatherOffset = -32,
+ .maxTexelGatherOffset = 31,
+ .minInterpolationOffset = -0.5,
+ .maxInterpolationOffset = 0.4375,
+ .subPixelInterpolationOffsetBits = 4,
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
- .maxFramebufferLayers = (1 << 10),
+ .maxFramebufferLayers = (1 << 11),
.framebufferColorSampleCounts = sample_counts,
.framebufferDepthSampleCounts = sample_counts,
.framebufferStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
- .timestampPeriod = time_stamp_base / (1000 * 1000 * 1000),
- .maxClipDistances = 0 /* FIXME */,
- .maxCullDistances = 0 /* FIXME */,
- .maxCombinedClipAndCullDistances = 0 /* FIXME */,
+ .timestampPeriod = time_stamp_base,
+ .maxClipDistances = 8,
+ .maxCullDistances = 8,
+ .maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 1,
.pointSizeRange = { 0.125, 255.875 },
.lineWidthRange = { 0.0, 7.9921875 },
};
strcpy(pProperties->deviceName, pdevice->name);
- anv_device_get_cache_uuid(pProperties->pipelineCacheUUID);
+ memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
}
void anv_GetPhysicalDeviceQueueFamilyProperties(
return;
}
- assert(*pCount >= 1);
+ /* The spec implicitly allows the incoming count to be 0. From the Vulkan
+ * 1.0.38 spec, Section 4.1 Physical Devices:
+ *
+ * If the value referenced by pQueueFamilyPropertyCount is not 0 [then
+ * do stuff].
+ */
+ if (*pCount == 0)
+ return;
*pQueueFamilyProperties = (VkQueueFamilyProperties) {
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
.timestampValidBits = 36, /* XXX: Real value here */
.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
};
+
+ *pCount = 1;
}
void anv_GetPhysicalDeviceMemoryProperties(
*/
heap_size = 3 * physical_device->aperture_size / 4;
- if (physical_device->info->has_llc) {
+ if (physical_device->info.has_llc) {
/* Big core GPUs share LLC with the CPU and thus one memory type can be
* both cached and coherent at the same time.
*/
VkInstance instance,
const char* pName)
{
- return anv_lookup_entrypoint(pName);
+ return anv_lookup_entrypoint(NULL, pName);
}
-/* The loader wants us to expose a second GetInstanceProcAddr function
- * to work around certain LD_PRELOAD issues seen in apps.
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
*/
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName);
+PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
VkInstance instance,
const char* pName)
}
PFN_vkVoidFunction anv_GetDeviceProcAddr(
- VkDevice device,
+ VkDevice _device,
const char* pName)
{
- return anv_lookup_entrypoint(pName);
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ return anv_lookup_entrypoint(&device->info, pName);
}
-static VkResult
+static void
anv_queue_init(struct anv_device *device, struct anv_queue *queue)
{
queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
queue->device = device;
queue->pool = &device->surface_state_pool;
-
- return VK_SUCCESS;
}
static void
{
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 exec2_objects[1];
- struct anv_bo bo;
+ struct anv_bo bo, *exec_bos[1];
VkResult result = VK_SUCCESS;
uint32_t size;
int64_t timeout;
if (!device->info.has_llc)
anv_clflush_range(bo.map, size);
+ exec_bos[0] = &bo;
exec2_objects[0].handle = bo.gem_handle;
exec2_objects[0].relocation_count = 0;
exec2_objects[0].relocs_ptr = 0;
execbuf.rsvd1 = device->context_id;
execbuf.rsvd2 = 0;
- ret = anv_gem_execbuffer(device, &execbuf);
- if (ret != 0) {
- /* We don't know the real error. */
- result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
+ result = anv_device_execbuf(device, &execbuf, exec_bos);
+ if (result != VK_SUCCESS)
goto fail;
- }
timeout = INT64_MAX;
ret = anv_gem_wait(device, bo.gem_handle, &timeout);
if (ret != 0) {
/* We don't know the real error. */
- result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
+ result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
goto fail;
}
return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
}
- anv_set_dispatch_devinfo(physical_device->info);
-
- device = anv_alloc2(&physical_device->instance->alloc, pAllocator,
+ device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
sizeof(*device), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device)
goto fail_fd;
}
- device->info = *physical_device->info;
+ device->info = physical_device->info;
device->isl_dev = physical_device->isl_dev;
/* On Broadwell and later, we can use batch chaining to more efficiently
*/
device->can_chain_batches = device->info.gen >= 8;
+ device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
+ pCreateInfo->pEnabledFeatures->robustBufferAccess;
+
pthread_mutex_init(&device->mutex, NULL);
+ pthread_condattr_t condattr;
+ pthread_condattr_init(&condattr);
+ pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+ pthread_cond_init(&device->queue_submit, NULL);
+ pthread_condattr_destroy(&condattr);
+
anv_bo_pool_init(&device->batch_bo_pool, device);
anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
&device->dynamic_state_block_pool);
anv_block_pool_init(&device->instruction_block_pool, device, 128 * 1024);
- anv_pipeline_cache_init(&device->default_pipeline_cache, device);
+ anv_state_pool_init(&device->instruction_state_pool,
+ &device->instruction_block_pool);
anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
anv_bo_init_new(&device->workaround_bo, device, 1024);
- anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
+ anv_scratch_pool_init(device, &device->scratch_pool);
anv_queue_init(device, &device->queue);
if (result != VK_SUCCESS)
goto fail_fd;
- result = anv_device_init_meta(device);
- if (result != VK_SUCCESS)
- goto fail_fd;
+ anv_device_init_blorp(device);
anv_device_init_border_colors(device);
fail_fd:
close(device->fd);
fail_device:
- anv_free(&device->alloc, device);
+ vk_free(&device->alloc, device);
return result;
}
{
ANV_FROM_HANDLE(anv_device, device, _device);
- anv_queue_finish(&device->queue);
+ anv_device_finish_blorp(device);
- anv_device_finish_meta(device);
+ anv_queue_finish(&device->queue);
#ifdef HAVE_VALGRIND
/* We only need to free these to prevent valgrind errors. The backing
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
#endif
+ anv_scratch_pool_finish(device, &device->scratch_pool);
+
anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
anv_gem_close(device, device->workaround_bo.gem_handle);
- anv_bo_pool_finish(&device->batch_bo_pool);
- anv_state_pool_finish(&device->dynamic_state_pool);
- anv_block_pool_finish(&device->dynamic_state_block_pool);
- anv_block_pool_finish(&device->instruction_block_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
- anv_block_pool_finish(&device->scratch_block_pool);
+ anv_state_pool_finish(&device->instruction_state_pool);
+ anv_block_pool_finish(&device->instruction_block_pool);
+ anv_state_pool_finish(&device->dynamic_state_pool);
+ anv_block_pool_finish(&device->dynamic_state_block_pool);
- close(device->fd);
+ anv_bo_pool_finish(&device->batch_bo_pool);
+ pthread_cond_destroy(&device->queue_submit);
pthread_mutex_destroy(&device->mutex);
- anv_free(&device->alloc, device);
+ anv_gem_destroy_context(device, device->context_id);
+
+ close(device->fd);
+
+ vk_free(&device->alloc, device);
}
VkResult anv_EnumerateInstanceExtensionProperties(
return VK_SUCCESS;
}
- assert(*pPropertyCount >= ARRAY_SIZE(global_extensions));
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
+ typed_memcpy(pProperties, global_extensions, *pPropertyCount);
- *pPropertyCount = ARRAY_SIZE(global_extensions);
- memcpy(pProperties, global_extensions, sizeof(global_extensions));
+ if (*pPropertyCount < ARRAY_SIZE(global_extensions))
+ return VK_INCOMPLETE;
return VK_SUCCESS;
}
return VK_SUCCESS;
}
- assert(*pPropertyCount >= ARRAY_SIZE(device_extensions));
+ *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
+ typed_memcpy(pProperties, device_extensions, *pPropertyCount);
- *pPropertyCount = ARRAY_SIZE(device_extensions);
- memcpy(pProperties, device_extensions, sizeof(device_extensions));
+ if (*pPropertyCount < ARRAY_SIZE(device_extensions))
+ return VK_INCOMPLETE;
return VK_SUCCESS;
}
*pQueue = anv_queue_to_handle(&device->queue);
}
+VkResult
+anv_device_execbuf(struct anv_device *device,
+ struct drm_i915_gem_execbuffer2 *execbuf,
+ struct anv_bo **execbuf_bos)
+{
+ int ret = anv_gem_execbuffer(device, execbuf);
+ if (ret != 0) {
+ /* We don't know the real error. */
+ return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
+ }
+
+ struct drm_i915_gem_exec_object2 *objects =
+ (void *)(uintptr_t)execbuf->buffers_ptr;
+ for (uint32_t k = 0; k < execbuf->buffer_count; k++)
+ execbuf_bos[k]->offset = objects[k].offset;
+
+ return VK_SUCCESS;
+}
+
VkResult anv_QueueSubmit(
VkQueue _queue,
uint32_t submitCount,
ANV_FROM_HANDLE(anv_queue, queue, _queue);
ANV_FROM_HANDLE(anv_fence, fence, _fence);
struct anv_device *device = queue->device;
- int ret;
+ VkResult result = VK_SUCCESS;
+
+ /* We lock around QueueSubmit for three main reasons:
+ *
+ * 1) When a block pool is resized, we create a new gem handle with a
+ * different size and, in the case of surface states, possibly a
+ * different center offset but we re-use the same anv_bo struct when
+ * we do so. If this happens in the middle of setting up an execbuf,
+ * we could end up with our list of BOs out of sync with our list of
+ * gem handles.
+ *
+ * 2) The algorithm we use for building the list of unique buffers isn't
+ * thread-safe. While the client is supposed to syncronize around
+ * QueueSubmit, this would be extremely difficult to debug if it ever
+ * came up in the wild due to a broken app. It's better to play it
+ * safe and just lock around QueueSubmit.
+ *
+ * 3) The anv_cmd_buffer_execbuf function may perform relocations in
+ * userspace. Due to the fact that the surface state buffer is shared
+ * between batches, we can't afford to have that happen from multiple
+ * threads at the same time. Even though the user is supposed to
+ * ensure this doesn't happen, we play it safe as in (2) above.
+ *
+ * Since the only other things that ever take the device lock such as block
+ * pool resize only rarely happen, this will almost never be contended so
+ * taking a lock isn't really an expensive operation in this case.
+ */
+ pthread_mutex_lock(&device->mutex);
for (uint32_t i = 0; i < submitCount; i++) {
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
pSubmits[i].pCommandBuffers[j]);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
- ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
- if (ret != 0) {
- /* We don't know the real error. */
- return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
- "execbuf2 failed: %m");
- }
-
- for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++)
- cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset;
+ result = anv_cmd_buffer_execbuf(device, cmd_buffer);
+ if (result != VK_SUCCESS)
+ goto out;
}
}
if (fence) {
- ret = anv_gem_execbuffer(device, &fence->execbuf);
- if (ret != 0) {
- /* We don't know the real error. */
- return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
- "execbuf2 failed: %m");
- }
+ struct anv_bo *fence_bo = &fence->bo;
+ result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
+ if (result != VK_SUCCESS)
+ goto out;
+
+ /* Update the fence and wake up any waiters */
+ assert(fence->state == ANV_FENCE_STATE_RESET);
+ fence->state = ANV_FENCE_STATE_SUBMITTED;
+ pthread_cond_broadcast(&device->queue_submit);
}
- return VK_SUCCESS;
+out:
+ pthread_mutex_unlock(&device->mutex);
+
+ return result;
}
VkResult anv_QueueWaitIdle(
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
- return ANV_CALL(DeviceWaitIdle)(anv_device_to_handle(queue->device));
+ return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
}
VkResult anv_DeviceWaitIdle(
batch.start = batch.next = cmds;
batch.end = (void *) cmds + sizeof(cmds);
- anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
- anv_batch_emit(&batch, GEN7_MI_NOOP);
+ anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
+ anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
return anv_device_submit_simple_batch(device, &batch);
}
VkResult
anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
{
- bo->gem_handle = anv_gem_create(device, size);
- if (!bo->gem_handle)
+ uint32_t gem_handle = anv_gem_create(device, size);
+ if (!gem_handle)
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
- bo->map = NULL;
- bo->index = 0;
- bo->offset = 0;
- bo->size = size;
- bo->is_winsys_bo = false;
+ anv_bo_init(bo, gem_handle, size);
return VK_SUCCESS;
}
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
- if (pAllocateInfo->allocationSize == 0) {
- /* Apparently, this is allowed */
- *pMem = VK_NULL_HANDLE;
- return VK_SUCCESS;
- }
+ /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
+ assert(pAllocateInfo->allocationSize > 0);
/* We support exactly one memory heap. */
assert(pAllocateInfo->memoryTypeIndex == 0 ||
/* FINISHME: Fail if allocation request exceeds heap size. */
- mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
+ mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (mem == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
mem->type_index = pAllocateInfo->memoryTypeIndex;
+ mem->map = NULL;
+ mem->map_size = 0;
+
*pMem = anv_device_memory_to_handle(mem);
return VK_SUCCESS;
fail:
- anv_free2(&device->alloc, pAllocator, mem);
+ vk_free2(&device->alloc, pAllocator, mem);
return result;
}
if (mem == NULL)
return;
+ if (mem->map)
+ anv_UnmapMemory(_device, _mem);
+
if (mem->bo.map)
anv_gem_munmap(mem->bo.map, mem->bo.size);
if (mem->bo.gem_handle != 0)
anv_gem_close(device, mem->bo.gem_handle);
- anv_free2(&device->alloc, pAllocator, mem);
+ vk_free2(&device->alloc, pAllocator, mem);
}
VkResult anv_MapMemory(
if (size == VK_WHOLE_SIZE)
size = mem->bo.size - offset;
+ /* From the Vulkan spec version 1.0.32 docs for MapMemory:
+ *
+ * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
+ * assert(size != 0);
+ * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
+ * equal to the size of the memory minus offset
+ */
+ assert(size > 0);
+ assert(offset + size <= mem->bo.size);
+
/* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
* takes a VkDeviceMemory pointer, it seems like only one map of the memory
* at a time is valid. We could just mmap up front and return an offset
/* Let's map whole pages */
map_size = align_u64(map_size, 4096);
- mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
- map_offset, map_size, gem_flags);
+ void *map = anv_gem_mmap(device, mem->bo.gem_handle,
+ map_offset, map_size, gem_flags);
+ if (map == MAP_FAILED)
+ return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
+
+ mem->map = map;
mem->map_size = map_size;
*ppData = mem->map + (offset - map_offset);
return;
anv_gem_munmap(mem->map, mem->map_size);
+
+ mem->map = NULL;
+ mem->map_size = 0;
}
static void
return VK_SUCCESS;
}
-VkResult anv_BindImageMemory(
- VkDevice device,
- VkImage _image,
- VkDeviceMemory _memory,
- VkDeviceSize memoryOffset)
-{
- ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
- ANV_FROM_HANDLE(anv_image, image, _image);
-
- if (mem) {
- image->bo = &mem->bo;
- image->offset = memoryOffset;
- } else {
- image->bo = NULL;
- image->offset = 0;
- }
-
- return VK_SUCCESS;
-}
-
VkResult anv_QueueBindSparse(
VkQueue queue,
uint32_t bindInfoCount,
const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
batch.next = batch.start = fence->bo.map + batch_offset;
batch.end = fence->bo.map + fence->bo.size;
- anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END);
- anv_batch_emit(&batch, GEN7_MI_NOOP);
+ anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
+ anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
if (!device->info.has_llc) {
assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
fence->execbuf.rsvd1 = device->context_id;
fence->execbuf.rsvd2 = 0;
- fence->ready = false;
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ } else {
+ fence->state = ANV_FENCE_STATE_RESET;
+ }
*pFence = anv_fence_to_handle(fence);
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_fence, fence, _fence);
+ if (!fence)
+ return;
+
assert(fence->bo.map == fence);
anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
}
{
for (uint32_t i = 0; i < fenceCount; i++) {
ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- fence->ready = false;
+ fence->state = ANV_FENCE_STATE_RESET;
}
return VK_SUCCESS;
int64_t t = 0;
int ret;
- if (fence->ready)
- return VK_SUCCESS;
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* If it hasn't even been sent off to the GPU yet, it's not ready */
+ return VK_NOT_READY;
- ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
- if (ret == 0) {
- fence->ready = true;
+ case ANV_FENCE_STATE_SIGNALED:
+ /* It's been signaled, return success */
return VK_SUCCESS;
- }
- return VK_NOT_READY;
+ case ANV_FENCE_STATE_SUBMITTED:
+ /* It's been submitted to the GPU but we don't know if it's done yet. */
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+ if (ret == 0) {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ return VK_SUCCESS;
+ } else {
+ return VK_NOT_READY;
+ }
+ default:
+ unreachable("Invalid fence status");
+ }
}
+#define NSEC_PER_SEC 1000000000
+#define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
+
VkResult anv_WaitForFences(
VkDevice _device,
uint32_t fenceCount,
const VkFence* pFences,
VkBool32 waitAll,
- uint64_t timeout)
+ uint64_t _timeout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
+ int ret;
/* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
* to block indefinitely timeouts <= 0. Unfortunately, this was broken
* best we can do is to clamp the timeout to INT64_MAX. This limits the
* maximum timeout from 584 years to 292 years - likely not a big deal.
*/
- if (timeout > INT64_MAX)
- timeout = INT64_MAX;
-
- int64_t t = timeout;
+ int64_t timeout = MIN2(_timeout, INT64_MAX);
+
+ uint32_t pending_fences = fenceCount;
+ while (pending_fences) {
+ pending_fences = 0;
+ bool signaled_fences = false;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ switch (fence->state) {
+ case ANV_FENCE_STATE_RESET:
+ /* This fence hasn't been submitted yet, we'll catch it the next
+ * time around. Yes, this may mean we dead-loop but, short of
+ * lots of locking and a condition variable, there's not much that
+ * we can do about that.
+ */
+ pending_fences++;
+ continue;
+
+ case ANV_FENCE_STATE_SIGNALED:
+ /* This fence is not pending. If waitAll isn't set, we can return
+ * early. Otherwise, we have to keep going.
+ */
+ if (!waitAll)
+ return VK_SUCCESS;
+ continue;
+
+ case ANV_FENCE_STATE_SUBMITTED:
+ /* These are the fences we really care about. Go ahead and wait
+ * on it until we hit a timeout.
+ */
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
+ if (ret == -1 && errno == ETIME) {
+ return VK_TIMEOUT;
+ } else if (ret == -1) {
+ /* We don't know the real error. */
+ return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
+ } else {
+ fence->state = ANV_FENCE_STATE_SIGNALED;
+ signaled_fences = true;
+ if (!waitAll)
+ return VK_SUCCESS;
+ continue;
+ }
+ }
+ }
- /* FIXME: handle !waitAll */
+ if (pending_fences && !signaled_fences) {
+ /* If we've hit this then someone decided to vkWaitForFences before
+ * they've actually submitted any of them to a queue. This is a
+ * fairly pessimal case, so it's ok to lock here and use a standard
+ * pthreads condition variable.
+ */
+ pthread_mutex_lock(&device->mutex);
+
+ /* It's possible that some of the fences have changed state since the
+ * last time we checked. Now that we have the lock, check for
+ * pending fences again and don't wait if it's changed.
+ */
+ uint32_t now_pending_fences = 0;
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ if (fence->state == ANV_FENCE_STATE_RESET)
+ now_pending_fences++;
+ }
+ assert(now_pending_fences <= pending_fences);
+
+ if (now_pending_fences == pending_fences) {
+ struct timespec before;
+ clock_gettime(CLOCK_MONOTONIC, &before);
+
+ uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
+ uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
+ (timeout / NSEC_PER_SEC);
+ abs_nsec %= NSEC_PER_SEC;
+
+ /* Avoid roll-over in tv_sec on 32-bit systems if the user
+ * provided timeout is UINT64_MAX
+ */
+ struct timespec abstime;
+ abstime.tv_nsec = abs_nsec;
+ abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
+
+ ret = pthread_cond_timedwait(&device->queue_submit,
+ &device->mutex, &abstime);
+ assert(ret != EINVAL);
+
+ struct timespec after;
+ clock_gettime(CLOCK_MONOTONIC, &after);
+ uint64_t time_elapsed =
+ ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
+ ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
+
+ if (time_elapsed >= timeout) {
+ pthread_mutex_unlock(&device->mutex);
+ return VK_TIMEOUT;
+ }
+
+ timeout -= time_elapsed;
+ }
- for (uint32_t i = 0; i < fenceCount; i++) {
- ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
- int ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
- if (ret == -1 && errno == ETIME) {
- return VK_TIMEOUT;
- } else if (ret == -1) {
- /* We don't know the real error. */
- return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
- "gem wait failed: %m");
+ pthread_mutex_unlock(&device->mutex);
}
}
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_event, event, _event);
+ if (!event)
+ return;
+
anv_state_pool_free(&device->dynamic_state_pool, event->state);
}
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
- buffer = anv_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
+ buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (buffer == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
- anv_free2(&device->alloc, pAllocator, buffer);
+ if (!buffer)
+ return;
+
+ vk_free2(&device->alloc, pAllocator, buffer);
}
void
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
- anv_free2(&device->alloc, pAllocator, sampler);
+ if (!sampler)
+ return;
+
+ vk_free2(&device->alloc, pAllocator, sampler);
}
VkResult anv_CreateFramebuffer(
size_t size = sizeof(*framebuffer) +
sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
- framebuffer = anv_alloc2(&device->alloc, pAllocator, size, 8,
+ framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (framebuffer == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
- anv_free2(&device->alloc, pAllocator, fb);
-}
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
- __attribute__ ((visibility ("default")));
-
-void vkCmdDbgMarkerBegin(
- VkCommandBuffer commandBuffer,
- const char* pMarker)
-{
-}
+ if (!fb)
+ return;
-void vkCmdDbgMarkerEnd(
- VkCommandBuffer commandBuffer)
-{
+ vk_free2(&device->alloc, pAllocator, fb);
}