anv: Add allocator support for client-visible addresses
authorJason Ekstrand <jason@jlekstrand.net>
Mon, 2 Dec 2019 22:03:56 +0000 (16:03 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Thu, 5 Dec 2019 16:59:10 +0000 (10:59 -0600)
When a BO is flagged as having a client visible address, we put it in
its own heap.  We also support the client explicitly specifying an
address in said heap.  If an address collision happens, we return false
from anv_vma_alloc which turns into a VK_ERROR_OUT_OF_DEVICE_MEMORY.

Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/vulkan/anv_allocator.c
src/intel/vulkan/anv_android.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_intel.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/anv_queue.c

index eed6a194dd74f30d610a2c3cc1497f63d079c385..6e75904ab1ca64e6a4d1454b2369242dc5dde6b1 100644 (file)
@@ -531,6 +531,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
       VkResult result = anv_device_import_bo_from_host_ptr(pool->device,
                                                            map, size,
                                                            bo_alloc_flags,
+                                                           0 /* client_address */,
                                                            &new_bo);
       if (result != VK_SUCCESS) {
          munmap(map, size);
@@ -1551,6 +1552,8 @@ anv_device_alloc_bo(struct anv_device *device,
       .size = size,
       .flags = bo_flags,
       .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL),
+      .has_client_visible_address =
+         (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
    };
 
    if (alloc_flags & ANV_BO_ALLOC_MAPPED) {
@@ -1585,8 +1588,7 @@ anv_device_alloc_bo(struct anv_device *device,
       new_bo.has_fixed_address = true;
       new_bo.offset = explicit_address;
    } else {
-      assert(explicit_address == 0);
-      if (!anv_vma_alloc(device, &new_bo)) {
+      if (!anv_vma_alloc(device, &new_bo, explicit_address)) {
          if (new_bo.map)
             anv_gem_munmap(new_bo.map, size);
          anv_gem_close(device, new_bo.gem_handle);
@@ -1613,6 +1615,7 @@ VkResult
 anv_device_import_bo_from_host_ptr(struct anv_device *device,
                                    void *host_ptr, uint32_t size,
                                    enum anv_bo_alloc_flags alloc_flags,
+                                   uint64_t client_address,
                                    struct anv_bo **bo_out)
 {
    assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED |
@@ -1643,6 +1646,24 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device,
                           VK_ERROR_INVALID_EXTERNAL_HANDLE,
                           "same host pointer imported two different ways");
       }
+
+      if (bo->has_client_visible_address !=
+          ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) {
+         pthread_mutex_unlock(&cache->mutex);
+         return vk_errorf(device->instance, NULL,
+                          VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                          "The same BO was imported with and without buffer "
+                          "device address");
+      }
+
+      if (client_address && client_address != gen_48b_address(bo->offset)) {
+         pthread_mutex_unlock(&cache->mutex);
+         return vk_errorf(device->instance, NULL,
+                          VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                          "The same BO was imported at two different "
+                          "addresses");
+      }
+
       __sync_fetch_and_add(&bo->refcount, 1);
    } else {
       struct anv_bo new_bo = {
@@ -1654,9 +1675,12 @@ anv_device_import_bo_from_host_ptr(struct anv_device *device,
          .flags = bo_flags,
          .is_external = true,
          .from_host_ptr = true,
+         .has_client_visible_address =
+            (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
       };
 
-      if (!anv_vma_alloc(device, &new_bo)) {
+      assert(client_address == gen_48b_address(client_address));
+      if (!anv_vma_alloc(device, &new_bo, client_address)) {
          anv_gem_close(device, new_bo.gem_handle);
          pthread_mutex_unlock(&cache->mutex);
          return vk_errorf(device->instance, NULL,
@@ -1677,6 +1701,7 @@ VkResult
 anv_device_import_bo(struct anv_device *device,
                      int fd,
                      enum anv_bo_alloc_flags alloc_flags,
+                     uint64_t client_address,
                      struct anv_bo **bo_out)
 {
    assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED |
@@ -1739,6 +1764,23 @@ anv_device_import_bo(struct anv_device *device,
                           "The same BO was imported on two different heaps");
       }
 
+      if (bo->has_client_visible_address !=
+          ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) {
+         pthread_mutex_unlock(&cache->mutex);
+         return vk_errorf(device->instance, NULL,
+                          VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                          "The same BO was imported with and without buffer "
+                          "device address");
+      }
+
+      if (client_address && client_address != gen_48b_address(bo->offset)) {
+         pthread_mutex_unlock(&cache->mutex);
+         return vk_errorf(device->instance, NULL,
+                          VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                          "The same BO was imported at two different "
+                          "addresses");
+      }
+
       bo->flags = new_flags;
 
       __sync_fetch_and_add(&bo->refcount, 1);
@@ -1757,9 +1799,12 @@ anv_device_import_bo(struct anv_device *device,
          .size = size,
          .flags = bo_flags,
          .is_external = true,
+         .has_client_visible_address =
+            (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0,
       };
 
-      if (!anv_vma_alloc(device, &new_bo)) {
+      assert(client_address == gen_48b_address(client_address));
+      if (!anv_vma_alloc(device, &new_bo, client_address)) {
          anv_gem_close(device, new_bo.gem_handle);
          pthread_mutex_unlock(&cache->mutex);
          return vk_errorf(device->instance, NULL,
index 1ca39d16c08fbebe2fc6697103682d1ebe6ae3da..6a411a9b798b4612df19ffead6ac8f482cf28391 100644 (file)
@@ -307,7 +307,9 @@ anv_import_ahw_memory(VkDevice device_h,
    if (dma_buf < 0)
       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
 
-   VkResult result = anv_device_import_bo(device, dma_buf, 0, &mem->bo);
+   VkResult result = anv_device_import_bo(device, dma_buf, 0,
+                                          0 /* client_address */,
+                                          &mem->bo);
    assert(VK_SUCCESS);
 
    /* "If the vkAllocateMemory command succeeds, the implementation must
@@ -468,6 +470,7 @@ anv_image_from_gralloc(VkDevice device_h,
    result = anv_device_import_bo(device, dma_buf,
                                  ANV_BO_ALLOC_IMPLICIT_SYNC |
                                  ANV_BO_ALLOC_IMPLICIT_WRITE,
+                                 0 /* client_address */,
                                  &bo);
    if (result != VK_SUCCESS) {
       return vk_errorf(device->instance, device, result,
index d300350b78940b456eb05f1a0313cf1909e0eb50..f3366dedd5ca67ce597681610c259237d5d9e633 100644 (file)
@@ -2500,6 +2500,9 @@ VkResult anv_CreateDevice(
       util_vma_heap_init(&device->vma_lo,
                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
 
+      util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
+                         CLIENT_VISIBLE_HEAP_SIZE);
+
       /* Leave the last 4GiB out of the high vma range, so that no state
        * base address + size can overflow 48 bits. For more information see
        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
@@ -2690,6 +2693,7 @@ VkResult anv_CreateDevice(
  fail_vmas:
    if (physical_device->use_softpin) {
       util_vma_heap_finish(&device->vma_hi);
+      util_vma_heap_finish(&device->vma_cva);
       util_vma_heap_finish(&device->vma_lo);
    }
  fail_queue:
@@ -2754,6 +2758,7 @@ void anv_DestroyDevice(
 
    if (physical_device->use_softpin) {
       util_vma_heap_finish(&device->vma_hi);
+      util_vma_heap_finish(&device->vma_cva);
       util_vma_heap_finish(&device->vma_lo);
    }
 
@@ -2955,15 +2960,38 @@ VkResult anv_DeviceWaitIdle(
 }
 
 bool
-anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
+anv_vma_alloc(struct anv_device *device, struct anv_bo *bo,
+              uint64_t client_address)
 {
-   if (!(bo->flags & EXEC_OBJECT_PINNED))
+   if (!(bo->flags & EXEC_OBJECT_PINNED)) {
+      assert(!(bo->has_client_visible_address));
       return true;
+   }
 
    pthread_mutex_lock(&device->vma_mutex);
 
    bo->offset = 0;
 
+   if (bo->has_client_visible_address) {
+      assert(bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS);
+      if (client_address) {
+         if (util_vma_heap_alloc_addr(&device->vma_cva,
+                                      client_address, bo->size)) {
+            bo->offset = gen_canonical_address(client_address);
+         }
+      } else {
+         uint64_t addr = util_vma_heap_alloc(&device->vma_cva, bo->size, 4096);
+         if (addr) {
+            bo->offset = gen_canonical_address(addr);
+            assert(addr == gen_48b_address(bo->offset));
+         }
+      }
+      /* We don't want to fall back to other heaps */
+      goto done;
+   }
+
+   assert(client_address == 0);
+
    if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) {
       uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096);
       if (addr) {
@@ -2980,6 +3008,7 @@ anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
       }
    }
 
+done:
    pthread_mutex_unlock(&device->vma_mutex);
 
    return bo->offset != 0;
@@ -2998,6 +3027,9 @@ anv_vma_free(struct anv_device *device, struct anv_bo *bo)
    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
       util_vma_heap_free(&device->vma_lo, addr_48b, bo->size);
+   } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
+              addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
+      util_vma_heap_free(&device->vma_cva, addr_48b, bo->size);
    } else {
       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
       util_vma_heap_free(&device->vma_hi, addr_48b, bo->size);
@@ -3117,7 +3149,7 @@ VkResult anv_AllocateMemory(
                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 
       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
-                                    &mem->bo);
+                                    0 /* client_address */, &mem->bo);
       if (result != VK_SUCCESS)
          goto fail;
 
@@ -3173,6 +3205,7 @@ VkResult anv_AllocateMemory(
                                                   host_ptr_info->pHostPointer,
                                                   pAllocateInfo->allocationSize,
                                                   alloc_flags,
+                                                  0 /* client_address */,
                                                   &mem->bo);
 
       if (result != VK_SUCCESS)
index 1b6fd32b00dee0106b07fe69b80b872ee6262ae4..d73bc3d37685cdc642e4df98a33025d23339d2c1 100644 (file)
@@ -74,6 +74,7 @@ VkResult anv_CreateDmaBufImageINTEL(
 
    result = anv_device_import_bo(device, pCreateInfo->fd,
                                  ANV_BO_ALLOC_IMPLICIT_SYNC,
+                                 0 /* address */,
                                  &mem->bo);
    if (result != VK_SUCCESS)
       goto fail_import;
index 3e168f2b66d513e1af326c9fe42f43718b311300..81e11c8d72d0c8c3ae52ac7060143aaf51d9129f 100644 (file)
@@ -127,7 +127,9 @@ struct gen_perf_config;
 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
-#define HIGH_HEAP_MIN_ADDRESS              0x0001c0000000ULL /* 7 GiB */
+#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
+#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
+#define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
 
 #define LOW_HEAP_SIZE               \
    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
@@ -139,6 +141,8 @@ struct gen_perf_config;
    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
 #define INSTRUCTION_STATE_POOL_SIZE \
    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
+#define CLIENT_VISIBLE_HEAP_SIZE               \
+   (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
 
 /* Allowing different clear colors requires us to perform a depth resolve at
  * the end of certain render passes. This is because while slow clears store
@@ -662,6 +666,9 @@ struct anv_bo {
 
    /** True if this BO wraps a host pointer */
    bool from_host_ptr:1;
+
+   /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
+   bool has_client_visible_address:1;
 };
 
 static inline struct anv_bo *
@@ -1208,6 +1215,7 @@ struct anv_device {
 
     pthread_mutex_t                             vma_mutex;
     struct util_vma_heap                        vma_lo;
+    struct util_vma_heap                        vma_cva;
     struct util_vma_heap                        vma_hi;
 
     /** List of all anv_device_memory objects */
@@ -1348,6 +1356,9 @@ enum anv_bo_alloc_flags {
     * This is equivalent to EXEC_OBJECT_WRITE.
     */
    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
+
+   /** Has an address which is visible to the client */
+   ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
 };
 
 VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
@@ -1357,9 +1368,11 @@ VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
                                             void *host_ptr, uint32_t size,
                                             enum anv_bo_alloc_flags alloc_flags,
+                                            uint64_t client_address,
                                             struct anv_bo **bo_out);
 VkResult anv_device_import_bo(struct anv_device *device, int fd,
                               enum anv_bo_alloc_flags alloc_flags,
+                              uint64_t client_address,
                               struct anv_bo **bo);
 VkResult anv_device_export_bo(struct anv_device *device,
                               struct anv_bo *bo, int *fd_out);
@@ -1433,7 +1446,8 @@ int anv_gem_syncobj_wait(struct anv_device *device,
                          uint32_t *handles, uint32_t num_handles,
                          int64_t abs_timeout_ns, bool wait_all);
 
-bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
+bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo,
+                   uint64_t client_address);
 void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
 
 struct anv_reloc_list {
index 86010dbba0fe2b19f403d2c5c8869959a0ead2ac..791975d1fcf2011a30876af35823ca008a977e80 100644 (file)
@@ -1911,6 +1911,7 @@ VkResult anv_ImportSemaphoreFdKHR(
          VkResult result = anv_device_import_bo(device, fd,
                                                 ANV_BO_ALLOC_EXTERNAL |
                                                 ANV_BO_ALLOC_IMPLICIT_SYNC,
+                                                0 /* client_address */,
                                                 &new_impl.bo);
          if (result != VK_SUCCESS)
             return result;