anv/allocator: Drop the block_size field from block_pool
authorJason Ekstrand <jason.ekstrand@intel.com>
Wed, 26 Apr 2017 08:27:33 +0000 (01:27 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Fri, 5 May 2017 02:07:54 +0000 (19:07 -0700)
Since the state_stream is now pulling from a state_pool, the only thing
pulling directly off the block pool is the state pool so we can just
move the block_size there.  The one exception is when we allocate
binding tables but we can just reference the state pool there as well.

The only functional change here is that we no longer grow the block pool
immediately upon creation so no BO gets allocated until our first state
allocation.

Reviewed-by: Juan A. Suarez Romero <jasuarez@igalia.com>
src/intel/vulkan/anv_allocator.c
src/intel/vulkan/anv_batch_chain.c
src/intel/vulkan/anv_blorp.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/tests/block_pool_no_free.c
src/intel/vulkan/tests/state_pool.c
src/intel/vulkan/tests/state_pool_free_list_only.c
src/intel/vulkan/tests/state_pool_no_free.c

index 97bcb0170ba4e283c2aec55135203d34ced27523..cd300122395f49ed1d1f7c618be5236eb3d4c53e 100644 (file)
@@ -245,20 +245,19 @@ anv_ptr_free_list_push(void **list, void *elem)
    } while (old != current);
 }
 
-static uint32_t
-anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
+static VkResult
+anv_block_pool_expand_range(struct anv_block_pool *pool,
+                            uint32_t center_bo_offset, uint32_t size);
 
 VkResult
 anv_block_pool_init(struct anv_block_pool *pool,
-                    struct anv_device *device, uint32_t block_size)
+                    struct anv_device *device,
+                    uint32_t initial_size)
 {
    VkResult result;
 
-   assert(util_is_power_of_two(block_size));
-
    pool->device = device;
    anv_bo_init(&pool->bo, 0, 0);
-   pool->block_size = block_size;
    pool->free_list = ANV_FREE_LIST_EMPTY;
    pool->back_free_list = ANV_FREE_LIST_EMPTY;
 
@@ -287,11 +286,14 @@ anv_block_pool_init(struct anv_block_pool *pool,
    pool->back_state.next = 0;
    pool->back_state.end = 0;
 
-   /* Immediately grow the pool so we'll have a backing bo. */
-   pool->state.end = anv_block_pool_grow(pool, &pool->state);
+   result = anv_block_pool_expand_range(pool, 0, initial_size);
+   if (result != VK_SUCCESS)
+      goto fail_mmap_cleanups;
 
    return VK_SUCCESS;
 
+ fail_mmap_cleanups:
+   u_vector_finish(&pool->mmap_cleanups);
  fail_fd:
    close(pool->fd);
 
@@ -432,7 +434,8 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
  *     the pool and a 4K CPU page.
  */
 static uint32_t
-anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
+anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state,
+                    uint32_t block_size)
 {
    uint32_t size;
    VkResult result = VK_SUCCESS;
@@ -471,7 +474,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
 
    if (old_size == 0) {
       /* This is the first allocation */
-      size = MAX2(32 * pool->block_size, PAGE_SIZE);
+      size = MAX2(32 * block_size, PAGE_SIZE);
    } else {
       size = old_size * 2;
    }
@@ -500,7 +503,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
       center_bo_offset = ((uint64_t)size * back_used) / total_used;
 
       /* Align down to a multiple of both the block size and page size */
-      uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE);
+      uint32_t granularity = MAX2(block_size, PAGE_SIZE);
       assert(util_is_power_of_two(granularity));
       center_bo_offset &= ~(granularity - 1);
 
@@ -515,7 +518,7 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
          center_bo_offset = size - pool->state.end;
    }
 
-   assert(center_bo_offset % pool->block_size == 0);
+   assert(center_bo_offset % block_size == 0);
    assert(center_bo_offset % PAGE_SIZE == 0);
 
    result = anv_block_pool_expand_range(pool, center_bo_offset, size);
@@ -544,12 +547,15 @@ done:
 
 static uint32_t
 anv_block_pool_alloc_new(struct anv_block_pool *pool,
-                         struct anv_block_state *pool_state)
+                         struct anv_block_state *pool_state,
+                         uint32_t block_size)
 {
    struct anv_block_state state, old, new;
 
+   assert(util_is_power_of_two(block_size));
+
    while (1) {
-      state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
+      state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
       if (state.next < state.end) {
          assert(pool->map);
          return state.next;
@@ -558,9 +564,8 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
           * pool_state->next acts a mutex: threads who try to allocate now will
           * get block indexes above the current limit and hit futex_wait
           * below. */
-         new.next = state.next + pool->block_size;
-         new.end = anv_block_pool_grow(pool, pool_state);
-         assert(new.end >= new.next && new.end % pool->block_size == 0);
+         new.next = state.next + block_size;
+         new.end = anv_block_pool_grow(pool, pool_state, block_size);
          old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
          if (old.next != state.next)
             futex_wake(&pool_state->end, INT_MAX);
@@ -573,7 +578,8 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool,
 }
 
 int32_t
-anv_block_pool_alloc(struct anv_block_pool *pool)
+anv_block_pool_alloc(struct anv_block_pool *pool,
+                     uint32_t block_size)
 {
    int32_t offset;
 
@@ -584,7 +590,7 @@ anv_block_pool_alloc(struct anv_block_pool *pool)
       return offset;
    }
 
-   return anv_block_pool_alloc_new(pool, &pool->state);
+   return anv_block_pool_alloc_new(pool, &pool->state, block_size);
 }
 
 /* Allocates a block out of the back of the block pool.
@@ -597,7 +603,8 @@ anv_block_pool_alloc(struct anv_block_pool *pool)
  * gymnastics with the block pool's BO when doing relocations.
  */
 int32_t
-anv_block_pool_alloc_back(struct anv_block_pool *pool)
+anv_block_pool_alloc_back(struct anv_block_pool *pool,
+                          uint32_t block_size)
 {
    int32_t offset;
 
@@ -608,7 +615,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
       return offset;
    }
 
-   offset = anv_block_pool_alloc_new(pool, &pool->back_state);
+   offset = anv_block_pool_alloc_new(pool, &pool->back_state, block_size);
 
    /* The offset we get out of anv_block_pool_alloc_new() is actually the
     * number of bytes downwards from the middle to the end of the block.
@@ -616,7 +623,7 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool)
     * start of the block.
     */
    assert(offset >= 0);
-   return -(offset + pool->block_size);
+   return -(offset + block_size);
 }
 
 void
@@ -631,9 +638,12 @@ anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
 
 void
 anv_state_pool_init(struct anv_state_pool *pool,
-                    struct anv_block_pool *block_pool)
+                    struct anv_block_pool *block_pool,
+                    uint32_t block_size)
 {
    pool->block_pool = block_pool;
+   assert(util_is_power_of_two(block_size));
+   pool->block_size = block_size;
    for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
       pool->buckets[i].free_list = ANV_FREE_LIST_EMPTY;
       pool->buckets[i].block.next = 0;
@@ -651,7 +661,8 @@ anv_state_pool_finish(struct anv_state_pool *pool)
 static uint32_t
 anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
                                     struct anv_block_pool *block_pool,
-                                    uint32_t state_size)
+                                    uint32_t state_size,
+                                    uint32_t block_size)
 {
    struct anv_block_state block, old, new;
    uint32_t offset;
@@ -662,9 +673,9 @@ anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
    if (block.next < block.end) {
       return block.next;
    } else if (block.next == block.end) {
-      offset = anv_block_pool_alloc(block_pool);
+      offset = anv_block_pool_alloc(block_pool, block_size);
       new.next = offset + state_size;
-      new.end = offset + block_pool->block_size;
+      new.end = offset + block_size;
       old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
       if (old.next != block.next)
          futex_wake(&pool->block.end, INT_MAX);
@@ -697,7 +708,8 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
 
    state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
                                                       pool->block_pool,
-                                                      state.alloc_size);
+                                                      state.alloc_size,
+                                                      pool->block_size);
 
 done:
    state.map = pool->block_pool->map + state.offset;
index 0529f22b842dd8c085c85a53cfc3413b13bb82a4..79dbf266d21df5b1846b2b7ebb4bf86d92a9bad2 100644 (file)
@@ -623,12 +623,13 @@ anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
 {
    struct anv_block_pool *block_pool =
        &cmd_buffer->device->surface_state_block_pool;
+   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
    int32_t *bt_block = u_vector_head(&cmd_buffer->bt_blocks);
    struct anv_state state;
 
    state.alloc_size = align_u32(entries * 4, 32);
 
-   if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
+   if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
       return (struct anv_state) { 0 };
 
    state.offset = cmd_buffer->bt_next;
@@ -663,6 +664,7 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_block_pool *block_pool =
        &cmd_buffer->device->surface_state_block_pool;
+   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
 
    int32_t *offset = u_vector_add(&cmd_buffer->bt_blocks);
    if (offset == NULL) {
@@ -670,7 +672,7 @@ anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    }
 
-   *offset = anv_block_pool_alloc_back(block_pool);
+   *offset = anv_block_pool_alloc_back(block_pool, state_pool->block_size);
    cmd_buffer->bt_next = 0;
 
    return VK_SUCCESS;
index d17b73dcc7e4d89b7085e498e602956009caa28d..e3e952060af3fd5769ecd169433c9111a1e26019 100644 (file)
@@ -686,7 +686,7 @@ void anv_CmdUpdateBuffer(
     * little data at the top to build its linked list.
     */
    const uint32_t max_update_size =
-      cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
+      cmd_buffer->device->dynamic_state_pool.block_size - 64;
 
    assert(max_update_size < MAX_SURFACE_DIM * 4);
 
index 23dff906ac2b052da8b9e322946f450afc54a175..71967b6dbd249c716ae6b9415e8867cddd495848 100644 (file)
@@ -1110,28 +1110,31 @@ VkResult anv_CreateDevice(
       goto fail_batch_bo_pool;
 
    result = anv_block_pool_init(&device->dynamic_state_block_pool, device,
-                                16384);
+                                16384 * 16);
    if (result != VK_SUCCESS)
       goto fail_bo_cache;
 
    anv_state_pool_init(&device->dynamic_state_pool,
-                       &device->dynamic_state_block_pool);
+                       &device->dynamic_state_block_pool,
+                       16384);
 
    result = anv_block_pool_init(&device->instruction_block_pool, device,
-                                1024 * 1024);
+                                1024 * 1024 * 16);
    if (result != VK_SUCCESS)
       goto fail_dynamic_state_pool;
 
    anv_state_pool_init(&device->instruction_state_pool,
-                       &device->instruction_block_pool);
+                       &device->instruction_block_pool,
+                       1024 * 1024);
 
    result = anv_block_pool_init(&device->surface_state_block_pool, device,
-                                4096);
+                                4096 * 16);
    if (result != VK_SUCCESS)
       goto fail_instruction_state_pool;
 
    anv_state_pool_init(&device->surface_state_pool,
-                       &device->surface_state_block_pool);
+                       &device->surface_state_block_pool,
+                       4096);
 
    result = anv_bo_init_new(&device->workaround_bo, device, 1024);
    if (result != VK_SUCCESS)
index 7db110e5a32d5eef1408a87e7ec58e2afae0005d..81a026451a13f2b9d0e2a9fa96767f371c872ef6 100644 (file)
@@ -461,8 +461,6 @@ struct anv_block_pool {
     */
    struct u_vector mmap_cleanups;
 
-   uint32_t block_size;
-
    union anv_free_list free_list;
    struct anv_block_state state;
 
@@ -504,6 +502,10 @@ struct anv_fixed_size_state_pool {
 
 struct anv_state_pool {
    struct anv_block_pool *block_pool;
+
+   /* The size of blocks which will be allocated from the block pool */
+   uint32_t block_size;
+
    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
 };
 
@@ -555,13 +557,17 @@ anv_invalidate_range(void *start, size_t size)
 }
 
 VkResult anv_block_pool_init(struct anv_block_pool *pool,
-                             struct anv_device *device, uint32_t block_size);
+                             struct anv_device *device,
+                             uint32_t initial_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
-int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
-int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
+int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
+                             uint32_t block_size);
+int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
+                                  uint32_t block_size);
 void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
 void anv_state_pool_init(struct anv_state_pool *pool,
-                         struct anv_block_pool *block_pool);
+                         struct anv_block_pool *block_pool,
+                         uint32_t block_size);
 void anv_state_pool_finish(struct anv_state_pool *pool);
 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
                                       uint32_t state_size, uint32_t alignment);
index 0a61818e42cc1818ee8d53c626cfef3c0abf935d..ac2b7801f7fbd6fbdc51dbc9405f9a3cf257d665 100644 (file)
@@ -25,6 +25,7 @@
 
 #include "anv_private.h"
 
+#define BLOCK_SIZE 16
 #define NUM_THREADS 16
 #define BLOCKS_PER_THREAD 1024
 #define NUM_RUNS 64
@@ -44,13 +45,13 @@ static void *alloc_blocks(void *_job)
    int32_t block, *data;
 
    for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
-      block = anv_block_pool_alloc(job->pool);
+      block = anv_block_pool_alloc(job->pool, BLOCK_SIZE);
       data = job->pool->map + block;
       *data = block;
       assert(block >= 0);
       job->blocks[i] = block;
 
-      block = anv_block_pool_alloc_back(job->pool);
+      block = anv_block_pool_alloc_back(job->pool, BLOCK_SIZE);
       data = job->pool->map + block;
       *data = block;
       assert(block < 0);
@@ -114,7 +115,7 @@ static void run_test()
    struct anv_block_pool pool;
 
    pthread_mutex_init(&device.mutex, NULL);
-   anv_block_pool_init(&pool, &device, 16);
+   anv_block_pool_init(&pool, &device, 4096);
 
    for (unsigned i = 0; i < NUM_THREADS; i++) {
       jobs[i].pool = &pool;
index 90c9bdea514cebcceb3a1f7aa41ee2c51ea0e2e0..db3f3ec08a4d10e8acc04068178379c654614be3 100644 (file)
@@ -44,8 +44,8 @@ int main(int argc, char **argv)
    pthread_mutex_init(&device.mutex, NULL);
 
    for (unsigned i = 0; i < NUM_RUNS; i++) {
-      anv_block_pool_init(&block_pool, &device, 256);
-      anv_state_pool_init(&state_pool, &block_pool);
+      anv_block_pool_init(&block_pool, &device, 4096);
+      anv_state_pool_init(&state_pool, &block_pool, 256);
 
       /* Grab one so a zero offset is impossible */
       anv_state_pool_alloc(&state_pool, 16, 16);
index 868815cf933ee38c4268c6400b52ca9a29a34f21..93b71efd437f4c187c4ac8f6a81b29922022120a 100644 (file)
@@ -42,7 +42,7 @@ int main(int argc, char **argv)
 
    pthread_mutex_init(&device.mutex, NULL);
    anv_block_pool_init(&block_pool, &device, 4096);
-   anv_state_pool_init(&state_pool, &block_pool);
+   anv_state_pool_init(&state_pool, &block_pool, 4096);
 
    /* Grab one so a zero offset is impossible */
    anv_state_pool_alloc(&state_pool, 16, 16);
index 6e012e468343bd1efb18cde4317c23eb87cdff7b..c3c7c24a0a31456402ba7e38f9ae012f0bdde98d 100644 (file)
@@ -62,8 +62,8 @@ static void run_test()
    struct anv_state_pool state_pool;
 
    pthread_mutex_init(&device.mutex, NULL);
-   anv_block_pool_init(&block_pool, &device, 64);
-   anv_state_pool_init(&state_pool, &block_pool);
+   anv_block_pool_init(&block_pool, &device, 4096);
+   anv_state_pool_init(&state_pool, &block_pool, 64);
 
    pthread_barrier_init(&barrier, NULL, NUM_THREADS);