From e3f1a08c5641a6b100371a3c17f0b484e1da9b68 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 28 Jan 2020 17:42:31 -0600 Subject: [PATCH] anv/block_pool: Ensure allocations have contiguous maps Because softpin block pools are made up of a set of BOs with different maps, it was possible for a single state to end up straddling blocks. To fix this, we pass a contiguous size to anv_block_pool_grow and it ensures that the next allocation in the pool will have at least that size. We also add an assert in anv_block_pool_map to ensure we always get contiguous maps. Prior to the changes to anv_block_pool_grow, the unit tests failed with this assert. With this patch, the tests pass. This was causing problems on Gen12 where we allocate the pages for the AUX table from the dynamic state pool. The first chunk, which gets allocated very early in the pool's history, is 1MB which was enough that it was getting multiple BOs. This caused the gen_aux_map code to write outside of the map and overwrite the instruction state pool buffer which lead to GPU hangs. Fixes: 731c4adcf9b "anv/allocator: Add support for non-userptr" Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_allocator.c | 32 ++++++++++++++----- src/intel/vulkan/anv_private.h | 3 +- src/intel/vulkan/genX_blorp_exec.c | 2 +- .../vulkan/tests/block_pool_grow_first.c | 2 +- src/intel/vulkan/tests/block_pool_no_free.c | 8 ++--- 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 636d9114d0d..7965008e060 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -565,7 +565,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, * rather than the start of the block pool BO map. */ void* -anv_block_pool_map(struct anv_block_pool *pool, int32_t offset) +anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t size) { if (pool->use_softpin) { struct anv_bo *bo = NULL; @@ -579,6 +579,7 @@ anv_block_pool_map(struct anv_block_pool *pool, int32_t offset) } assert(bo != NULL); assert(offset >= bo_offset); + assert((offset - bo_offset) + size <= bo->size); return bo->map + (offset - bo_offset); } else { @@ -611,7 +612,8 @@ anv_block_pool_map(struct anv_block_pool *pool, int32_t offset) * the pool and a 4K CPU page. */ static uint32_t -anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) +anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state, + uint32_t contiguous_size) { VkResult result = VK_SUCCESS; @@ -642,12 +644,24 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state) */ assert(old_size > 0); + const uint32_t old_back = pool->center_bo_offset; + const uint32_t old_front = old_size - pool->center_bo_offset; + /* The back_used and front_used may actually be smaller than the actual * requirement because they are based on the next pointers which are * updated prior to calling this function. */ - uint32_t back_required = MAX2(back_used, pool->center_bo_offset); - uint32_t front_required = MAX2(front_used, old_size - pool->center_bo_offset); + uint32_t back_required = MAX2(back_used, old_back); + uint32_t front_required = MAX2(front_used, old_front); + + if (pool->use_softpin) { + /* With softpin, the pool is made up of a bunch of buffers with separate + * maps. Make sure we have enough contiguous space that we can get a + * properly contiguous map for the next chunk. + */ + assert(old_back == 0); + front_required = MAX2(front_required, old_front + contiguous_size); + } if (back_used * 2 <= back_required && front_used * 2 <= front_required) { /* If we're in this case then this isn't the firsta allocation and we @@ -756,7 +770,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, */ new.next = state.next + block_size; do { - new.end = anv_block_pool_grow(pool, pool_state); + new.end = anv_block_pool_grow(pool, pool_state, block_size); } while (new.end < new.next); old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); @@ -929,7 +943,9 @@ anv_state_pool_return_blocks(struct anv_state_pool *pool, st_idx + i); state_i->alloc_size = block_size; state_i->offset = chunk_offset + block_size * i; - state_i->map = anv_block_pool_map(&pool->block_pool, state_i->offset); + state_i->map = anv_block_pool_map(&pool->block_pool, + state_i->offset, + state_i->alloc_size); } uint32_t block_bucket = anv_state_pool_get_bucket(block_size); @@ -1070,7 +1086,7 @@ anv_state_pool_alloc_no_vg(struct anv_state_pool *pool, state = anv_state_table_get(&pool->table, idx); state->offset = offset; state->alloc_size = alloc_size; - state->map = anv_block_pool_map(&pool->block_pool, offset); + state->map = anv_block_pool_map(&pool->block_pool, offset, alloc_size); if (padding > 0) { uint32_t return_offset = offset - padding; @@ -1114,7 +1130,7 @@ anv_state_pool_alloc_back(struct anv_state_pool *pool) state = anv_state_table_get(&pool->table, idx); state->offset = offset; state->alloc_size = alloc_size; - state->map = anv_block_pool_map(&pool->block_pool, state->offset); + state->map = anv_block_pool_map(&pool->block_pool, offset, alloc_size); done: VG(VALGRIND_MEMPOOL_ALLOC(pool, state->map, state->alloc_size)); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d7edcc89927..4c8f58e5f34 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -906,7 +906,8 @@ int32_t anv_block_pool_alloc(struct anv_block_pool *pool, uint32_t block_size, uint32_t *padding); int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool, uint32_t block_size); -void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset); +void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t +size); VkResult anv_state_pool_init(struct anv_state_pool *pool, struct anv_device *device, diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index 1a706b8c065..7a83eda14e8 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -66,7 +66,7 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, anv_batch_set_error(&cmd_buffer->batch, result); void *dest = anv_block_pool_map( - &cmd_buffer->device->surface_state_pool.block_pool, ss_offset); + &cmd_buffer->device->surface_state_pool.block_pool, ss_offset, 8); write_reloc(cmd_buffer->device, dest, address_u64, false); } diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c index dde7e87261d..0c300a91059 100644 --- a/src/intel/vulkan/tests/block_pool_grow_first.c +++ b/src/intel/vulkan/tests/block_pool_grow_first.c @@ -60,7 +60,7 @@ int main(int argc, char **argv) assert(offset == initial_size); /* Use the memory to ensure it is valid. */ - void *map = anv_block_pool_map(&pool, offset); + void *map = anv_block_pool_map(&pool, offset, block_size); memset(map, 22, block_size); anv_block_pool_finish(&pool); diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c index af3c58b78d9..b4b82693ae8 100644 --- a/src/intel/vulkan/tests/block_pool_no_free.c +++ b/src/intel/vulkan/tests/block_pool_no_free.c @@ -49,13 +49,13 @@ static void *alloc_blocks(void *_job) for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { block = anv_block_pool_alloc(job->pool, block_size, NULL); - data = anv_block_pool_map(job->pool, block); + data = anv_block_pool_map(job->pool, block, block_size); *data = block; assert(block >= 0); job->blocks[i] = block; block = anv_block_pool_alloc_back(job->pool, block_size); - data = anv_block_pool_map(job->pool, block); + data = anv_block_pool_map(job->pool, block, block_size); *data = block; assert(block < 0); job->back_blocks[i] = -block; @@ -63,11 +63,11 @@ static void *alloc_blocks(void *_job) for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) { block = job->blocks[i]; - data = anv_block_pool_map(job->pool, block); + data = anv_block_pool_map(job->pool, block, block_size); assert(*data == block); block = -job->back_blocks[i]; - data = anv_block_pool_map(job->pool, block); + data = anv_block_pool_map(job->pool, block, block_size); assert(*data == block); } -- 2.30.2