From: Jason Ekstrand Date: Mon, 2 Dec 2019 22:03:56 +0000 (-0600) Subject: anv: Add allocator support for client-visible addresses X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a8e59b37081f169a83918de149dab7c31812577c;p=mesa.git anv: Add allocator support for client-visible addresses When a BO is flagged as having a client visible address, we put it in its own heap. We also support the client explicitly specifying an address in said heap. If an address collision happens, we return false from anv_vma_alloc which turns into a VK_ERROR_OUT_OF_DEVICE_MEMORY. Reviewed-by: Ivan Briano Reviewed-by: Lionel Landwerlin --- diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index eed6a194dd7..6e75904ab1c 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -531,6 +531,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, VkResult result = anv_device_import_bo_from_host_ptr(pool->device, map, size, bo_alloc_flags, + 0 /* client_address */, &new_bo); if (result != VK_SUCCESS) { munmap(map, size); @@ -1551,6 +1552,8 @@ anv_device_alloc_bo(struct anv_device *device, .size = size, .flags = bo_flags, .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL), + .has_client_visible_address = + (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, }; if (alloc_flags & ANV_BO_ALLOC_MAPPED) { @@ -1585,8 +1588,7 @@ anv_device_alloc_bo(struct anv_device *device, new_bo.has_fixed_address = true; new_bo.offset = explicit_address; } else { - assert(explicit_address == 0); - if (!anv_vma_alloc(device, &new_bo)) { + if (!anv_vma_alloc(device, &new_bo, explicit_address)) { if (new_bo.map) anv_gem_munmap(new_bo.map, size); anv_gem_close(device, new_bo.gem_handle); @@ -1613,6 +1615,7 @@ VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device, void *host_ptr, uint32_t size, enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, struct anv_bo **bo_out) { assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED | @@ -1643,6 +1646,24 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device, VK_ERROR_INVALID_EXTERNAL_HANDLE, "same host pointer imported two different ways"); } + + if (bo->has_client_visible_address != + ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) { + pthread_mutex_unlock(&cache->mutex); + return vk_errorf(device->instance, NULL, + VK_ERROR_INVALID_EXTERNAL_HANDLE, + "The same BO was imported with and without buffer " + "device address"); + } + + if (client_address && client_address != gen_48b_address(bo->offset)) { + pthread_mutex_unlock(&cache->mutex); + return vk_errorf(device->instance, NULL, + VK_ERROR_INVALID_EXTERNAL_HANDLE, + "The same BO was imported at two different " + "addresses"); + } + __sync_fetch_and_add(&bo->refcount, 1); } else { struct anv_bo new_bo = { @@ -1654,9 +1675,12 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device, .flags = bo_flags, .is_external = true, .from_host_ptr = true, + .has_client_visible_address = + (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, }; - if (!anv_vma_alloc(device, &new_bo)) { + assert(client_address == gen_48b_address(client_address)); + if (!anv_vma_alloc(device, &new_bo, client_address)) { anv_gem_close(device, new_bo.gem_handle); pthread_mutex_unlock(&cache->mutex); return vk_errorf(device->instance, NULL, @@ -1677,6 +1701,7 @@ VkResult anv_device_import_bo(struct anv_device *device, int fd, enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, struct anv_bo **bo_out) { assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED | @@ -1739,6 +1764,23 @@ anv_device_import_bo(struct anv_device *device, "The same BO was imported on two different heaps"); } + if (bo->has_client_visible_address != + ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) { + pthread_mutex_unlock(&cache->mutex); + return vk_errorf(device->instance, NULL, + VK_ERROR_INVALID_EXTERNAL_HANDLE, + "The same BO was imported with and without buffer " + "device address"); + } + + if (client_address && client_address != gen_48b_address(bo->offset)) { + pthread_mutex_unlock(&cache->mutex); + return vk_errorf(device->instance, NULL, + VK_ERROR_INVALID_EXTERNAL_HANDLE, + "The same BO was imported at two different " + "addresses"); + } + bo->flags = new_flags; __sync_fetch_and_add(&bo->refcount, 1); @@ -1757,9 +1799,12 @@ anv_device_import_bo(struct anv_device *device, .size = size, .flags = bo_flags, .is_external = true, + .has_client_visible_address = + (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, }; - if (!anv_vma_alloc(device, &new_bo)) { + assert(client_address == gen_48b_address(client_address)); + if (!anv_vma_alloc(device, &new_bo, client_address)) { anv_gem_close(device, new_bo.gem_handle); pthread_mutex_unlock(&cache->mutex); return vk_errorf(device->instance, NULL, diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index 1ca39d16c08..6a411a9b798 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -307,7 +307,9 @@ anv_import_ahw_memory(VkDevice device_h, if (dma_buf < 0) return VK_ERROR_INVALID_EXTERNAL_HANDLE; - VkResult result = anv_device_import_bo(device, dma_buf, 0, &mem->bo); + VkResult result = anv_device_import_bo(device, dma_buf, 0, + 0 /* client_address */, + &mem->bo); assert(VK_SUCCESS); /* "If the vkAllocateMemory command succeeds, the implementation must @@ -468,6 +470,7 @@ anv_image_from_gralloc(VkDevice device_h, result = anv_device_import_bo(device, dma_buf, ANV_BO_ALLOC_IMPLICIT_SYNC | ANV_BO_ALLOC_IMPLICIT_WRITE, + 0 /* client_address */, &bo); if (result != VK_SUCCESS) { return vk_errorf(device->instance, device, result, diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index d300350b789..f3366dedd5c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2500,6 +2500,9 @@ VkResult anv_CreateDevice( util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE); + util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS, + CLIENT_VISIBLE_HEAP_SIZE); + /* Leave the last 4GiB out of the high vma range, so that no state * base address + size can overflow 48 bits. For more information see * the comment about Wa32bitGeneralStateOffset in anv_allocator.c @@ -2690,6 +2693,7 @@ VkResult anv_CreateDevice( fail_vmas: if (physical_device->use_softpin) { util_vma_heap_finish(&device->vma_hi); + util_vma_heap_finish(&device->vma_cva); util_vma_heap_finish(&device->vma_lo); } fail_queue: @@ -2754,6 +2758,7 @@ void anv_DestroyDevice( if (physical_device->use_softpin) { util_vma_heap_finish(&device->vma_hi); + util_vma_heap_finish(&device->vma_cva); util_vma_heap_finish(&device->vma_lo); } @@ -2955,15 +2960,38 @@ VkResult anv_DeviceWaitIdle( } bool -anv_vma_alloc(struct anv_device *device, struct anv_bo *bo) +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo, + uint64_t client_address) { - if (!(bo->flags & EXEC_OBJECT_PINNED)) + if (!(bo->flags & EXEC_OBJECT_PINNED)) { + assert(!(bo->has_client_visible_address)); return true; + } pthread_mutex_lock(&device->vma_mutex); bo->offset = 0; + if (bo->has_client_visible_address) { + assert(bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); + if (client_address) { + if (util_vma_heap_alloc_addr(&device->vma_cva, + client_address, bo->size)) { + bo->offset = gen_canonical_address(client_address); + } + } else { + uint64_t addr = util_vma_heap_alloc(&device->vma_cva, bo->size, 4096); + if (addr) { + bo->offset = gen_canonical_address(addr); + assert(addr == gen_48b_address(bo->offset)); + } + } + /* We don't want to fall back to other heaps */ + goto done; + } + + assert(client_address == 0); + if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) { uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096); if (addr) { @@ -2980,6 +3008,7 @@ anv_vma_alloc(struct anv_device *device, struct anv_bo *bo) } } +done: pthread_mutex_unlock(&device->vma_mutex); return bo->offset != 0; @@ -2998,6 +3027,9 @@ anv_vma_free(struct anv_device *device, struct anv_bo *bo) if (addr_48b >= LOW_HEAP_MIN_ADDRESS && addr_48b <= LOW_HEAP_MAX_ADDRESS) { util_vma_heap_free(&device->vma_lo, addr_48b, bo->size); + } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS && + addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) { + util_vma_heap_free(&device->vma_cva, addr_48b, bo->size); } else { assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS); util_vma_heap_free(&device->vma_hi, addr_48b, bo->size); @@ -3117,7 +3149,7 @@ VkResult anv_AllocateMemory( VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); result = anv_device_import_bo(device, fd_info->fd, alloc_flags, - &mem->bo); + 0 /* client_address */, &mem->bo); if (result != VK_SUCCESS) goto fail; @@ -3173,6 +3205,7 @@ VkResult anv_AllocateMemory( host_ptr_info->pHostPointer, pAllocateInfo->allocationSize, alloc_flags, + 0 /* client_address */, &mem->bo); if (result != VK_SUCCESS) diff --git a/src/intel/vulkan/anv_intel.c b/src/intel/vulkan/anv_intel.c index 1b6fd32b00d..d73bc3d3768 100644 --- a/src/intel/vulkan/anv_intel.c +++ b/src/intel/vulkan/anv_intel.c @@ -74,6 +74,7 @@ VkResult anv_CreateDmaBufImageINTEL( result = anv_device_import_bo(device, pCreateInfo->fd, ANV_BO_ALLOC_IMPLICIT_SYNC, + 0 /* address */, &mem->bo); if (result != VK_SUCCESS) goto fail_import; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3e168f2b66d..81e11c8d72d 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -127,7 +127,9 @@ struct gen_perf_config; #define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */ #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL -#define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ +#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */ +#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL +#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */ #define LOW_HEAP_SIZE \ (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1) @@ -139,6 +141,8 @@ struct gen_perf_config; (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1) #define INSTRUCTION_STATE_POOL_SIZE \ (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1) +#define CLIENT_VISIBLE_HEAP_SIZE \ + (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1) /* Allowing different clear colors requires us to perform a depth resolve at * the end of certain render passes. This is because while slow clears store @@ -662,6 +666,9 @@ struct anv_bo { /** True if this BO wraps a host pointer */ bool from_host_ptr:1; + + /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */ + bool has_client_visible_address:1; }; static inline struct anv_bo * @@ -1208,6 +1215,7 @@ struct anv_device { pthread_mutex_t vma_mutex; struct util_vma_heap vma_lo; + struct util_vma_heap vma_cva; struct util_vma_heap vma_hi; /** List of all anv_device_memory objects */ @@ -1348,6 +1356,9 @@ enum anv_bo_alloc_flags { * This is equivalent to EXEC_OBJECT_WRITE. */ ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7), + + /** Has an address which is visible to the client */ + ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8), }; VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size, @@ -1357,9 +1368,11 @@ VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size, VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device, void *host_ptr, uint32_t size, enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, struct anv_bo **bo_out); VkResult anv_device_import_bo(struct anv_device *device, int fd, enum anv_bo_alloc_flags alloc_flags, + uint64_t client_address, struct anv_bo **bo); VkResult anv_device_export_bo(struct anv_device *device, struct anv_bo *bo, int *fd_out); @@ -1433,7 +1446,8 @@ int anv_gem_syncobj_wait(struct anv_device *device, uint32_t *handles, uint32_t num_handles, int64_t abs_timeout_ns, bool wait_all); -bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo); +bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo, + uint64_t client_address); void anv_vma_free(struct anv_device *device, struct anv_bo *bo); struct anv_reloc_list { diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 86010dbba0f..791975d1fcf 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -1911,6 +1911,7 @@ VkResult anv_ImportSemaphoreFdKHR( VkResult result = anv_device_import_bo(device, fd, ANV_BO_ALLOC_EXTERNAL | ANV_BO_ALLOC_IMPLICIT_SYNC, + 0 /* client_address */, &new_impl.bo); if (result != VK_SUCCESS) return result;