From 3178e583c86a8324b629981e9d90f684f5a6cbc0 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 25 Oct 2019 16:10:11 -0500
Subject: [PATCH] anv: Rework anv_block_pool_expand_range

The growing algorithms for the softpin case and the userptr version are
almost entirely different.  Having this weird join doesn't make the code
more comprehensible.  This rework does a few things:

 1. Move the comment about 48-bit addresses to anv_device_init where we
    actually unset the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag.

 2. Separate the paths in anv_block_pool_expand_range so it's easier to
    see what happens in the two different cases.

 3. Use the anv_block_poo::bos array for storing all allocated BOs in
    both paths rather than using the cleanup list in both paths.  This
    lets us make the cleanups array only used for mmaps of the memfd for
    the userptr case.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 src/intel/vulkan/anv_allocator.c | 154 ++++++++++---------------------
 src/intel/vulkan/anv_device.c    |  25 +++++
 2 files changed, 76 insertions(+), 103 deletions(-)

diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index 80c2c6f3368..af313176196 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -109,11 +109,8 @@
 struct anv_mmap_cleanup {
    void *map;
    size_t size;
-   uint32_t gem_handle;
 };
 
-#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0})
-
 static inline uint32_t
 ilog2_round_up(uint32_t value)
 {
@@ -487,20 +484,17 @@ anv_block_pool_init(struct anv_block_pool *pool,
 void
 anv_block_pool_finish(struct anv_block_pool *pool)
 {
-   struct anv_mmap_cleanup *cleanup;
-   const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED);
-
-   u_vector_foreach(cleanup, &pool->mmap_cleanups) {
-      if (use_softpin)
-         anv_gem_munmap(cleanup->map, cleanup->size);
-      else
-         munmap(cleanup->map, cleanup->size);
-
-      if (cleanup->gem_handle)
-         anv_gem_close(pool->device, cleanup->gem_handle);
+   anv_block_pool_foreach_bo(bo, pool) {
+      if (bo->map)
+         anv_gem_munmap(bo->map, bo->size);
+      anv_gem_close(pool->device, bo->gem_handle);
    }
 
+   struct anv_mmap_cleanup *cleanup;
+   u_vector_foreach(cleanup, &pool->mmap_cleanups)
+      munmap(cleanup->map, cleanup->size);
    u_vector_finish(&pool->mmap_cleanups);
+
    if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
       close(pool->fd);
 }
@@ -509,9 +503,6 @@ static VkResult
 anv_block_pool_expand_range(struct anv_block_pool *pool,
                             uint32_t center_bo_offset, uint32_t size)
 {
-   void *map;
-   uint32_t gem_handle;
-   struct anv_mmap_cleanup *cleanup;
    const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED);
 
    /* Assert that we only ever grow the pool */
@@ -524,22 +515,35 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
           size - center_bo_offset <=
           BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER);
 
-   cleanup = u_vector_add(&pool->mmap_cleanups);
-   if (!cleanup)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-   *cleanup = ANV_MMAP_CLEANUP_INIT;
-
-   uint32_t newbo_size = size - pool->size;
    if (use_softpin) {
-      gem_handle = anv_gem_create(pool->device, newbo_size);
-      map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
+      uint32_t newbo_size = size - pool->size;
+      uint32_t gem_handle = anv_gem_create(pool->device, newbo_size);
+      void *map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
       if (map == MAP_FAILED) {
          anv_gem_close(pool->device, gem_handle);
          return vk_errorf(pool->device->instance, pool->device,
                           VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m");
       }
+
+      /* Regular objects are created I915_CACHING_CACHED on LLC platforms and
+       * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are
+       * always created as I915_CACHING_CACHED, which on non-LLC means
+       * snooped.
+       *
+       * On platforms that support softpin, we are not going to use userptr
+       * anymore, but we still want to rely on the snooped states. So make
+       * sure everything is set to I915_CACHING_CACHED.
+       */
+      if (!pool->device->info.has_llc)
+         anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_CACHED);
+
       assert(center_bo_offset == 0);
+
+      struct anv_bo *bo = &pool->bos[pool->nbos++];
+      anv_bo_init(bo, gem_handle, newbo_size);
+      bo->offset = pool->start_address + pool->size;
+      bo->flags = pool->bo_flags;
+      bo->map = map;
    } else {
       /* Just leak the old map until we destroy the pool.  We can't munmap it
        * without races or imposing locking on the block allocate fast path. On
@@ -547,97 +551,41 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
        * current map.  MAP_POPULATE seems like the right thing to do, but we
        * should try to get some numbers.
        */
-      map = mmap(NULL, size, PROT_READ | PROT_WRITE,
-                 MAP_SHARED | MAP_POPULATE, pool->fd,
-                 BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
+      void *map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                       MAP_SHARED | MAP_POPULATE, pool->fd,
+                       BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
       if (map == MAP_FAILED)
          return vk_errorf(pool->device->instance, pool->device,
                           VK_ERROR_MEMORY_MAP_FAILED, "mmap failed: %m");
 
-      /* Now that we mapped the new memory, we can write the new
-       * center_bo_offset back into pool and update pool->map. */
-      pool->center_bo_offset = center_bo_offset;
-      pool->map = map + center_bo_offset;
-      gem_handle = anv_gem_userptr(pool->device, map, size);
+      uint32_t gem_handle = anv_gem_userptr(pool->device, map, size);
       if (gem_handle == 0) {
          munmap(map, size);
          return vk_errorf(pool->device->instance, pool->device,
                           VK_ERROR_TOO_MANY_OBJECTS, "userptr failed: %m");
       }
-   }
-
-   cleanup->map = map;
-   cleanup->size = use_softpin ? newbo_size : size;
-   cleanup->gem_handle = gem_handle;
 
-   /* Regular objects are created I915_CACHING_CACHED on LLC platforms and
-    * I915_CACHING_NONE on non-LLC platforms. However, userptr objects are
-    * always created as I915_CACHING_CACHED, which on non-LLC means
-    * snooped.
-    *
-    * On platforms that support softpin, we are not going to use userptr
-    * anymore, but we still want to rely on the snooped states. So make sure
-    * everything is set to I915_CACHING_CACHED.
-    */
-   if (!pool->device->info.has_llc)
-      anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_CACHED);
-
-   /* For block pool BOs we have to be a bit careful about where we place them
-    * in the GTT.  There are two documented workarounds for state base address
-    * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
-    * which state that those two base addresses do not support 48-bit
-    * addresses and need to be placed in the bottom 32-bit range.
-    * Unfortunately, this is not quite accurate.
-    *
-    * The real problem is that we always set the size of our state pools in
-    * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
-    * likely significantly smaller.  We do this because we do not no at the
-    * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
-    * the pool during command buffer building so we don't actually have a
-    * valid final size.  If the address + size, as seen by STATE_BASE_ADDRESS
-    * overflows 48 bits, the GPU appears to treat all accesses to the buffer
-    * as being out of bounds and returns zero.  For dynamic state, this
-    * usually just leads to rendering corruptions, but shaders that are all
-    * zero hang the GPU immediately.
-    *
-    * The easiest solution to do is exactly what the bogus workarounds say to
-    * do: restrict these buffers to 32-bit addresses.  We could also pin the
-    * BO to some particular location of our choosing, but that's significantly
-    * more work than just not setting a flag.  So, we explicitly DO NOT set
-    * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
-    * hard work for us.
-    */
-   struct anv_bo *bo;
-   uint32_t bo_size;
-   uint64_t bo_offset;
+      struct anv_mmap_cleanup *cleanup = u_vector_add(&pool->mmap_cleanups);
+      if (!cleanup) {
+         munmap(map, size);
+         anv_gem_close(pool->device, gem_handle);
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+      cleanup->map = map;
+      cleanup->size = size;
 
-   assert(pool->nbos < ANV_MAX_BLOCK_POOL_BOS);
+      /* Now that we mapped the new memory, we can write the new
+       * center_bo_offset back into pool and update pool->map. */
+      pool->center_bo_offset = center_bo_offset;
+      pool->map = map + center_bo_offset;
 
-   if (use_softpin) {
-      /* With softpin, we add a new BO to the pool, and set its offset to right
-       * where the previous BO ends (the end of the pool).
-       */
-      bo = &pool->bos[pool->nbos++];
-      bo_size = newbo_size;
-      bo_offset = pool->start_address + pool->size;
-   } else {
-      /* Without softpin, we just need one BO, and we already have a pointer to
-       * it. Simply "allocate" it from our array if we didn't do it before.
-       * The offset doesn't matter since we are not pinning the BO anyway.
-       */
-      if (pool->nbos == 0) {
-         pool->wrapper_bo.map = &pool->bos[0];
-         pool->nbos++;
-      }
-      bo = pool->wrapper_bo.map;
-      bo_size = size;
-      bo_offset = 0;
+      struct anv_bo *bo = &pool->bos[pool->nbos++];
+      anv_bo_init(bo, gem_handle, size);
+      bo->flags = pool->bo_flags;
+      pool->wrapper_bo.map = bo;
    }
 
-   anv_bo_init(bo, gem_handle, bo_size);
-   bo->offset = bo_offset;
-   bo->flags = pool->bo_flags;
-   bo->map = map;
+   assert(pool->nbos < ANV_MAX_BLOCK_POOL_BOS);
    pool->size = size;
 
    return VK_SUCCESS;
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 996705f0988..65024de3ed9 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -2619,6 +2619,31 @@ VkResult anv_CreateDevice(
    if (result != VK_SUCCESS)
       goto fail_batch_bo_pool;
 
+   /* For state pool BOs we have to be a bit careful about where we place them
+    * in the GTT.  There are two documented workarounds for state base address
+    * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
+    * which state that those two base addresses do not support 48-bit
+    * addresses and need to be placed in the bottom 32-bit range.
+    * Unfortunately, this is not quite accurate.
+    *
+    * The real problem is that we always set the size of our state pools in
+    * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
+    * likely significantly smaller.  We do this because we do not no at the
+    * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
+    * the pool during command buffer building so we don't actually have a
+    * valid final size.  If the address + size, as seen by STATE_BASE_ADDRESS
+    * overflows 48 bits, the GPU appears to treat all accesses to the buffer
+    * as being out of bounds and returns zero.  For dynamic state, this
+    * usually just leads to rendering corruptions, but shaders that are all
+    * zero hang the GPU immediately.
+    *
+    * The easiest solution to do is exactly what the bogus workarounds say to
+    * do: restrict these buffers to 32-bit addresses.  We could also pin the
+    * BO to some particular location of our choosing, but that's significantly
+    * more work than just not setting a flag.  So, we explicitly DO NOT set
+    * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
+    * hard work for us.
+    */
    if (!physical_device->use_softpin)
       bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
-- 
2.30.2