anv: Choose BO flags internally in anv_block_pool

author Jason Ekstrand <jason@jlekstrand.net>

Fri, 25 Oct 2019 23:18:52 +0000 (18:18 -0500)

committer Jason Ekstrand <jason@jlekstrand.net>

Thu, 31 Oct 2019 13:46:09 +0000 (13:46 +0000)
author Jason Ekstrand <jason@jlekstrand.net>
Fri, 25 Oct 2019 23:18:52 +0000 (18:18 -0500)
committer Jason Ekstrand <jason@jlekstrand.net>
Thu, 31 Oct 2019 13:46:09 +0000 (13:46 +0000)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c

index 055bb882988ff69807f7bbcb11a085b694bb6ca6..c0df664698106dfcc94ef77ccd40b6b852bbd66f 100644 (file)
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -416,20 +416,25 @@ VkResult
  anv_block_pool_init(struct anv_block_pool *pool,
                      struct anv_device *device,
                      uint64_t start_address,
-                    uint32_t initial_size,
-                    uint64_t bo_flags)
+                    uint32_t initial_size)
  {
     VkResult result;
  
     pool->device = device;
-   pool->bo_flags = bo_flags;
+   pool->use_softpin = device->instance->physicalDevice.use_softpin;
     pool->nbos = 0;
     pool->size = 0;
     pool->center_bo_offset = 0;
     pool->start_address = gen_canonical_address(start_address);
     pool->map = NULL;
  
-   if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) {
+   if (pool->use_softpin) {
+      /* This pointer will always point to the first BO in the list */
+      anv_bo_init(&pool->bos[0], 0, 0);
+      pool->bo = &pool->bos[0];
+
+      pool->fd = -1;
+   } else {
        /* Just make it 2GB up-front.  The Linux kernel won't actually back it
         * with pages until we either map and fault on one of them or we use
         * userptr and send a chunk of it off to the GPU.
@@ -441,12 +446,6 @@ anv_block_pool_init(struct anv_block_pool *pool,
        anv_bo_init(&pool->wrapper_bo, 0, 0);
        pool->wrapper_bo.is_wrapper = true;
        pool->bo = &pool->wrapper_bo;
-   } else {
-      /* This pointer will always point to the first BO in the list */
-      anv_bo_init(&pool->bos[0], 0, 0);
-      pool->bo = &pool->bos[0];
-
-      pool->fd = -1;
     }
  
     if (!u_vector_init(&pool->mmap_cleanups,
@@ -475,7 +474,7 @@ anv_block_pool_init(struct anv_block_pool *pool,
   fail_mmap_cleanups:
     u_vector_finish(&pool->mmap_cleanups);
   fail_fd:
-   if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+   if (pool->fd >= 0)
        close(pool->fd);
  
     return result;
@@ -495,7 +494,7 @@ anv_block_pool_finish(struct anv_block_pool *pool)
        munmap(cleanup->map, cleanup->size);
     u_vector_finish(&pool->mmap_cleanups);
  
-   if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+   if (pool->fd >= 0)
        close(pool->fd);
  }
  
@@ -503,19 +502,55 @@ static VkResult
  anv_block_pool_expand_range(struct anv_block_pool *pool,
                              uint32_t center_bo_offset, uint32_t size)
  {
-   const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED);
-
     /* Assert that we only ever grow the pool */
     assert(center_bo_offset >= pool->back_state.end);
     assert(size - center_bo_offset >= pool->state.end);
  
     /* Assert that we don't go outside the bounds of the memfd */
     assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER);
-   assert(use_softpin ||
+   assert(pool->use_softpin ||
            size - center_bo_offset <=
            BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER);
  
-   if (use_softpin) {
+   /* For state pool BOs we have to be a bit careful about where we place them
+    * in the GTT.  There are two documented workarounds for state base address
+    * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
+    * which state that those two base addresses do not support 48-bit
+    * addresses and need to be placed in the bottom 32-bit range.
+    * Unfortunately, this is not quite accurate.
+    *
+    * The real problem is that we always set the size of our state pools in
+    * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
+    * likely significantly smaller.  We do this because we do not no at the
+    * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
+    * the pool during command buffer building so we don't actually have a
+    * valid final size.  If the address + size, as seen by STATE_BASE_ADDRESS
+    * overflows 48 bits, the GPU appears to treat all accesses to the buffer
+    * as being out of bounds and returns zero.  For dynamic state, this
+    * usually just leads to rendering corruptions, but shaders that are all
+    * zero hang the GPU immediately.
+    *
+    * The easiest solution to do is exactly what the bogus workarounds say to
+    * do: restrict these buffers to 32-bit addresses.  We could also pin the
+    * BO to some particular location of our choosing, but that's significantly
+    * more work than just not setting a flag.  So, we explicitly DO NOT set
+    * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
+    * hard work for us.  When using softpin, we're in control and the fixed
+    * addresses we choose are fine for base addresses.
+    */
+   uint64_t bo_flags = 0;
+   if (pool->use_softpin) {
+      bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+                  EXEC_OBJECT_PINNED;
+   }
+
+   if (pool->device->instance->physicalDevice.has_exec_async)
+      bo_flags |= EXEC_OBJECT_ASYNC;
+
+   if (pool->device->instance->physicalDevice.has_exec_capture)
+      bo_flags |= EXEC_OBJECT_CAPTURE;
+
+   if (pool->use_softpin) {
        uint32_t newbo_size = size - pool->size;
        uint32_t gem_handle = anv_gem_create(pool->device, newbo_size);
        void *map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
@@ -542,7 +577,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
        struct anv_bo *bo = &pool->bos[pool->nbos++];
        anv_bo_init(bo, gem_handle, newbo_size);
        bo->offset = pool->start_address + pool->size;
-      bo->flags = pool->bo_flags;
+      bo->flags = bo_flags;
        bo->map = map;
     } else {
        /* Just leak the old map until we destroy the pool.  We can't munmap it
@@ -581,7 +616,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
  
        struct anv_bo *bo = &pool->bos[pool->nbos++];
        anv_bo_init(bo, gem_handle, size);
-      bo->flags = pool->bo_flags;
+      bo->flags = bo_flags;
        pool->wrapper_bo.map = bo;
     }
  
@@ -600,7 +635,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
  void*
  anv_block_pool_map(struct anv_block_pool *pool, int32_t offset)
  {
-   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
+   if (pool->use_softpin) {
        struct anv_bo *bo = NULL;
        int32_t bo_offset = 0;
        anv_block_pool_foreach_bo(iter_bo, pool) {
@@ -766,7 +801,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
        if (state.next + block_size <= state.end) {
           return state.next;
        } else if (state.next <= state.end) {
-         if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) {
+         if (pool->use_softpin && state.next < state.end) {
              /* We need to grow the block pool, but still have some leftover
               * space that can't be used by that particular allocation. So we
               * add that as a "padding", and return it.
@@ -843,13 +878,11 @@ VkResult
  anv_state_pool_init(struct anv_state_pool *pool,
                      struct anv_device *device,
                      uint64_t start_address,
-                    uint32_t block_size,
-                    uint64_t bo_flags)
+                    uint32_t block_size)
  {
     VkResult result = anv_block_pool_init(&pool->block_pool, device,
                                           start_address,
-                                         block_size * 16,
-                                         bo_flags);
+                                         block_size * 16);
     if (result != VK_SUCCESS)
        return result;
  
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c

index a07d68e9dbaa3a667d523fbd489a41d0d9c76581..81cee9fe496d8c111a79baf92e9ef935ba155139 100644 (file)
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -2618,60 +2618,24 @@ VkResult anv_CreateDevice(
     if (result != VK_SUCCESS)
        goto fail_batch_bo_pool;
  
-   /* For state pool BOs we have to be a bit careful about where we place them
-    * in the GTT.  There are two documented workarounds for state base address
-    * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
-    * which state that those two base addresses do not support 48-bit
-    * addresses and need to be placed in the bottom 32-bit range.
-    * Unfortunately, this is not quite accurate.
-    *
-    * The real problem is that we always set the size of our state pools in
-    * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
-    * likely significantly smaller.  We do this because we do not no at the
-    * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
-    * the pool during command buffer building so we don't actually have a
-    * valid final size.  If the address + size, as seen by STATE_BASE_ADDRESS
-    * overflows 48 bits, the GPU appears to treat all accesses to the buffer
-    * as being out of bounds and returns zero.  For dynamic state, this
-    * usually just leads to rendering corruptions, but shaders that are all
-    * zero hang the GPU immediately.
-    *
-    * The easiest solution to do is exactly what the bogus workarounds say to
-    * do: restrict these buffers to 32-bit addresses.  We could also pin the
-    * BO to some particular location of our choosing, but that's significantly
-    * more work than just not setting a flag.  So, we explicitly DO NOT set
-    * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
-    * hard work for us.
-    */
-   if (!physical_device->use_softpin)
-      bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
     result = anv_state_pool_init(&device->dynamic_state_pool, device,
-                                DYNAMIC_STATE_POOL_MIN_ADDRESS,
-                                16384,
-                                bo_flags);
+                                DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384);
     if (result != VK_SUCCESS)
        goto fail_bo_cache;
  
     result = anv_state_pool_init(&device->instruction_state_pool, device,
-                                INSTRUCTION_STATE_POOL_MIN_ADDRESS,
-                                16384,
-                                bo_flags);
+                                INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384);
     if (result != VK_SUCCESS)
        goto fail_dynamic_state_pool;
  
     result = anv_state_pool_init(&device->surface_state_pool, device,
-                                SURFACE_STATE_POOL_MIN_ADDRESS,
-                                4096,
-                                bo_flags);
+                                SURFACE_STATE_POOL_MIN_ADDRESS, 4096);
     if (result != VK_SUCCESS)
        goto fail_instruction_state_pool;
  
     if (physical_device->use_softpin) {
        result = anv_state_pool_init(&device->binding_table_pool, device,
-                                   BINDING_TABLE_POOL_MIN_ADDRESS,
-                                   4096,
-                                   bo_flags);
+                                   BINDING_TABLE_POOL_MIN_ADDRESS, 4096);
        if (result != VK_SUCCESS)
           goto fail_surface_state_pool;
     }
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index 39c107ecd7ac93b25476727052f3049febee760f..081606deda2278b573e151497bb870e974b7445e 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -707,8 +707,7 @@ struct anv_block_state {
  
  struct anv_block_pool {
     struct anv_device *device;
-
-   uint64_t bo_flags;
+   bool use_softpin;
  
     /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
      * around the actual BO so that we grow the pool after the wrapper BO has
@@ -846,8 +845,7 @@ struct anv_state_stream {
  VkResult anv_block_pool_init(struct anv_block_pool *pool,
                               struct anv_device *device,
                               uint64_t start_address,
-                             uint32_t initial_size,
-                             uint64_t bo_flags);
+                             uint32_t initial_size);
  void anv_block_pool_finish(struct anv_block_pool *pool);
  int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
                               uint32_t block_size, uint32_t *padding);
@@ -858,8 +856,7 @@ void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
  VkResult anv_state_pool_init(struct anv_state_pool *pool,
                               struct anv_device *device,
                               uint64_t start_address,
-                             uint32_t block_size,
-                             uint64_t bo_flags);
+                             uint32_t block_size);
  void anv_state_pool_finish(struct anv_state_pool *pool);
  struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
                                        uint32_t state_size, uint32_t alignment);
diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c

index aea12b29de82fcfa9a1e00c5775bf4dea86e3ec2..8c221f7300b00cd0260d02f58fdda68bd03265f1 100644 (file)
--- a/src/intel/vulkan/tests/block_pool_grow_first.c
+++ b/src/intel/vulkan/tests/block_pool_grow_first.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
     const uint32_t block_size = 16 * 1024;
     const uint32_t initial_size = block_size / 2;
  
-   anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED);
+   anv_block_pool_init(&pool, &device, 4096, initial_size);
     assert(pool.size == initial_size);
  
     uint32_t padding;
diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c

index f6aa47688afca44df3a5ae5ebc255627989944f4..49fda232dbd0f4d4cf79cba7f5ca92549cc3fd9a 100644 (file)
--- a/src/intel/vulkan/tests/block_pool_no_free.c
+++ b/src/intel/vulkan/tests/block_pool_no_free.c
@@ -118,7 +118,7 @@ static void run_test()
     struct anv_block_pool pool;
  
     pthread_mutex_init(&device.mutex, NULL);
-   anv_block_pool_init(&pool, &device, 4096, 4096, 0);
+   anv_block_pool_init(&pool, &device, 4096, 4096);
  
     for (unsigned i = 0; i < NUM_THREADS; i++) {
        jobs[i].pool = &pool;
diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c

index 34743c601c36caf6ff76941c33b6612e72cac016..d79583e70bbd44d21740f5eda70b97757857379f 100644 (file)
--- a/src/intel/vulkan/tests/state_pool.c
+++ b/src/intel/vulkan/tests/state_pool.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
     pthread_mutex_init(&device.mutex, NULL);
  
     for (unsigned i = 0; i < NUM_RUNS; i++) {
-      anv_state_pool_init(&state_pool, &device, 4096, 256, 0);
+      anv_state_pool_init(&state_pool, &device, 4096, 256);
  
        /* Grab one so a zero offset is impossible */
        anv_state_pool_alloc(&state_pool, 16, 16);
diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c

index 9f1eb866e4e6419f6498c11893e3016edeaf2e87..e34deba2f1aa896745a880912835cddb1dba152f 100644 (file)
--- a/src/intel/vulkan/tests/state_pool_free_list_only.c
+++ b/src/intel/vulkan/tests/state_pool_free_list_only.c
@@ -42,7 +42,7 @@ int main(int argc, char **argv)
     struct anv_state_pool state_pool;
  
     pthread_mutex_init(&device.mutex, NULL);
-   anv_state_pool_init(&state_pool, &device, 4096, 4096, 0);
+   anv_state_pool_init(&state_pool, &device, 4096, 4096);
  
     /* Grab one so a zero offset is impossible */
     anv_state_pool_alloc(&state_pool, 16, 16);
diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c

index cb6591266e85f146c07881f51a08153a3edc92ea..ae4a3c16bdc1e3104e6a3c0ea849ddeb1bcc90cd 100644 (file)
--- a/src/intel/vulkan/tests/state_pool_no_free.c
+++ b/src/intel/vulkan/tests/state_pool_no_free.c
@@ -63,7 +63,7 @@ static void run_test()
     struct anv_state_pool state_pool;
  
     pthread_mutex_init(&device.mutex, NULL);
-   anv_state_pool_init(&state_pool, &device, 4096, 64, 0);
+   anv_state_pool_init(&state_pool, &device, 4096, 64);
  
     pthread_barrier_init(&barrier, NULL, NUM_THREADS);
  
diff --git a/src/intel/vulkan/tests/state_pool_padding.c b/src/intel/vulkan/tests/state_pool_padding.c

index 44509cde07aa95e57676a00526d616bb07c96300..b51300a39ffb7031f0bee5a44e7e80d3b8441fa9 100644 (file)
--- a/src/intel/vulkan/tests/state_pool_padding.c
+++ b/src/intel/vulkan/tests/state_pool_padding.c
@@ -33,7 +33,7 @@ int main(int argc, char **argv)
     };
     struct anv_state_pool state_pool;
  
-   anv_state_pool_init(&state_pool, &device, 4096, 4096, EXEC_OBJECT_PINNED);
+   anv_state_pool_init(&state_pool, &device, 4096, 4096);
  
     /* Get the size of the underlying block_pool */
     struct anv_block_pool *bp = &state_pool.block_pool;
author	Jason Ekstrand <jason@jlekstrand.net>
	Fri, 25 Oct 2019 23:18:52 +0000 (18:18 -0500)
committer	Jason Ekstrand <jason@jlekstrand.net>
	Thu, 31 Oct 2019 13:46:09 +0000 (13:46 +0000)
src/intel/vulkan/anv_allocator.c		patch \| blob \| history
src/intel/vulkan/anv_device.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/tests/block_pool_grow_first.c		patch \| blob \| history
src/intel/vulkan/tests/block_pool_no_free.c		patch \| blob \| history
src/intel/vulkan/tests/state_pool.c		patch \| blob \| history
src/intel/vulkan/tests/state_pool_free_list_only.c		patch \| blob \| history
src/intel/vulkan/tests/state_pool_no_free.c		patch \| blob \| history
src/intel/vulkan/tests/state_pool_padding.c		patch \| blob \| history