From: Jason Ekstrand Date: Fri, 25 Oct 2019 23:18:52 +0000 (-0500) Subject: anv: Choose BO flags internally in anv_block_pool X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5c664dff7593959d43f89d3fadcc148303134675;p=mesa.git anv: Choose BO flags internally in anv_block_pool All block pools are allocated with the same flags. There's no good reason why it needs to be configurable. Reviewed-by: Lionel Landwerlin --- diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 055bb882988..c0df6646981 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -416,20 +416,25 @@ VkResult anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint64_t start_address, - uint32_t initial_size, - uint64_t bo_flags) + uint32_t initial_size) { VkResult result; pool->device = device; - pool->bo_flags = bo_flags; + pool->use_softpin = device->instance->physicalDevice.use_softpin; pool->nbos = 0; pool->size = 0; pool->center_bo_offset = 0; pool->start_address = gen_canonical_address(start_address); pool->map = NULL; - if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) { + if (pool->use_softpin) { + /* This pointer will always point to the first BO in the list */ + anv_bo_init(&pool->bos[0], 0, 0); + pool->bo = &pool->bos[0]; + + pool->fd = -1; + } else { /* Just make it 2GB up-front. The Linux kernel won't actually back it * with pages until we either map and fault on one of them or we use * userptr and send a chunk of it off to the GPU. @@ -441,12 +446,6 @@ anv_block_pool_init(struct anv_block_pool *pool, anv_bo_init(&pool->wrapper_bo, 0, 0); pool->wrapper_bo.is_wrapper = true; pool->bo = &pool->wrapper_bo; - } else { - /* This pointer will always point to the first BO in the list */ - anv_bo_init(&pool->bos[0], 0, 0); - pool->bo = &pool->bos[0]; - - pool->fd = -1; } if (!u_vector_init(&pool->mmap_cleanups, @@ -475,7 +474,7 @@ anv_block_pool_init(struct anv_block_pool *pool, fail_mmap_cleanups: u_vector_finish(&pool->mmap_cleanups); fail_fd: - if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) + if (pool->fd >= 0) close(pool->fd); return result; @@ -495,7 +494,7 @@ anv_block_pool_finish(struct anv_block_pool *pool) munmap(cleanup->map, cleanup->size); u_vector_finish(&pool->mmap_cleanups); - if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) + if (pool->fd >= 0) close(pool->fd); } @@ -503,19 +502,55 @@ static VkResult anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t center_bo_offset, uint32_t size) { - const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED); - /* Assert that we only ever grow the pool */ assert(center_bo_offset >= pool->back_state.end); assert(size - center_bo_offset >= pool->state.end); /* Assert that we don't go outside the bounds of the memfd */ assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER); - assert(use_softpin || + assert(pool->use_softpin || size - center_bo_offset <= BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER); - if (use_softpin) { + /* For state pool BOs we have to be a bit careful about where we place them + * in the GTT. There are two documented workarounds for state base address + * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset + * which state that those two base addresses do not support 48-bit + * addresses and need to be placed in the bottom 32-bit range. + * Unfortunately, this is not quite accurate. + * + * The real problem is that we always set the size of our state pools in + * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most + * likely significantly smaller. We do this because we do not no at the + * time we emit STATE_BASE_ADDRESS whether or not we will need to expand + * the pool during command buffer building so we don't actually have a + * valid final size. If the address + size, as seen by STATE_BASE_ADDRESS + * overflows 48 bits, the GPU appears to treat all accesses to the buffer + * as being out of bounds and returns zero. For dynamic state, this + * usually just leads to rendering corruptions, but shaders that are all + * zero hang the GPU immediately. + * + * The easiest solution to do is exactly what the bogus workarounds say to + * do: restrict these buffers to 32-bit addresses. We could also pin the + * BO to some particular location of our choosing, but that's significantly + * more work than just not setting a flag. So, we explicitly DO NOT set + * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the + * hard work for us. When using softpin, we're in control and the fixed + * addresses we choose are fine for base addresses. + */ + uint64_t bo_flags = 0; + if (pool->use_softpin) { + bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS | + EXEC_OBJECT_PINNED; + } + + if (pool->device->instance->physicalDevice.has_exec_async) + bo_flags |= EXEC_OBJECT_ASYNC; + + if (pool->device->instance->physicalDevice.has_exec_capture) + bo_flags |= EXEC_OBJECT_CAPTURE; + + if (pool->use_softpin) { uint32_t newbo_size = size - pool->size; uint32_t gem_handle = anv_gem_create(pool->device, newbo_size); void *map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0); @@ -542,7 +577,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, struct anv_bo *bo = &pool->bos[pool->nbos++]; anv_bo_init(bo, gem_handle, newbo_size); bo->offset = pool->start_address + pool->size; - bo->flags = pool->bo_flags; + bo->flags = bo_flags; bo->map = map; } else { /* Just leak the old map until we destroy the pool. We can't munmap it @@ -581,7 +616,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, struct anv_bo *bo = &pool->bos[pool->nbos++]; anv_bo_init(bo, gem_handle, size); - bo->flags = pool->bo_flags; + bo->flags = bo_flags; pool->wrapper_bo.map = bo; } @@ -600,7 +635,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool, void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset) { - if (pool->bo_flags & EXEC_OBJECT_PINNED) { + if (pool->use_softpin) { struct anv_bo *bo = NULL; int32_t bo_offset = 0; anv_block_pool_foreach_bo(iter_bo, pool) { @@ -766,7 +801,7 @@ anv_block_pool_alloc_new(struct anv_block_pool *pool, if (state.next + block_size <= state.end) { return state.next; } else if (state.next <= state.end) { - if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) { + if (pool->use_softpin && state.next < state.end) { /* We need to grow the block pool, but still have some leftover * space that can't be used by that particular allocation. So we * add that as a "padding", and return it. @@ -843,13 +878,11 @@ VkResult anv_state_pool_init(struct anv_state_pool *pool, struct anv_device *device, uint64_t start_address, - uint32_t block_size, - uint64_t bo_flags) + uint32_t block_size) { VkResult result = anv_block_pool_init(&pool->block_pool, device, start_address, - block_size * 16, - bo_flags); + block_size * 16); if (result != VK_SUCCESS) return result; diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index a07d68e9dba..81cee9fe496 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2618,60 +2618,24 @@ VkResult anv_CreateDevice( if (result != VK_SUCCESS) goto fail_batch_bo_pool; - /* For state pool BOs we have to be a bit careful about where we place them - * in the GTT. There are two documented workarounds for state base address - * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset - * which state that those two base addresses do not support 48-bit - * addresses and need to be placed in the bottom 32-bit range. - * Unfortunately, this is not quite accurate. - * - * The real problem is that we always set the size of our state pools in - * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most - * likely significantly smaller. We do this because we do not no at the - * time we emit STATE_BASE_ADDRESS whether or not we will need to expand - * the pool during command buffer building so we don't actually have a - * valid final size. If the address + size, as seen by STATE_BASE_ADDRESS - * overflows 48 bits, the GPU appears to treat all accesses to the buffer - * as being out of bounds and returns zero. For dynamic state, this - * usually just leads to rendering corruptions, but shaders that are all - * zero hang the GPU immediately. - * - * The easiest solution to do is exactly what the bogus workarounds say to - * do: restrict these buffers to 32-bit addresses. We could also pin the - * BO to some particular location of our choosing, but that's significantly - * more work than just not setting a flag. So, we explicitly DO NOT set - * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the - * hard work for us. - */ - if (!physical_device->use_softpin) - bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; - result = anv_state_pool_init(&device->dynamic_state_pool, device, - DYNAMIC_STATE_POOL_MIN_ADDRESS, - 16384, - bo_flags); + DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384); if (result != VK_SUCCESS) goto fail_bo_cache; result = anv_state_pool_init(&device->instruction_state_pool, device, - INSTRUCTION_STATE_POOL_MIN_ADDRESS, - 16384, - bo_flags); + INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384); if (result != VK_SUCCESS) goto fail_dynamic_state_pool; result = anv_state_pool_init(&device->surface_state_pool, device, - SURFACE_STATE_POOL_MIN_ADDRESS, - 4096, - bo_flags); + SURFACE_STATE_POOL_MIN_ADDRESS, 4096); if (result != VK_SUCCESS) goto fail_instruction_state_pool; if (physical_device->use_softpin) { result = anv_state_pool_init(&device->binding_table_pool, device, - BINDING_TABLE_POOL_MIN_ADDRESS, - 4096, - bo_flags); + BINDING_TABLE_POOL_MIN_ADDRESS, 4096); if (result != VK_SUCCESS) goto fail_surface_state_pool; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 39c107ecd7a..081606deda2 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -707,8 +707,7 @@ struct anv_block_state { struct anv_block_pool { struct anv_device *device; - - uint64_t bo_flags; + bool use_softpin; /* Wrapper BO for use in relocation lists. This BO is simply a wrapper * around the actual BO so that we grow the pool after the wrapper BO has @@ -846,8 +845,7 @@ struct anv_state_stream { VkResult anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint64_t start_address, - uint32_t initial_size, - uint64_t bo_flags); + uint32_t initial_size); void anv_block_pool_finish(struct anv_block_pool *pool); int32_t anv_block_pool_alloc(struct anv_block_pool *pool, uint32_t block_size, uint32_t *padding); @@ -858,8 +856,7 @@ void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset); VkResult anv_state_pool_init(struct anv_state_pool *pool, struct anv_device *device, uint64_t start_address, - uint32_t block_size, - uint64_t bo_flags); + uint32_t block_size); void anv_state_pool_finish(struct anv_state_pool *pool); struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool, uint32_t state_size, uint32_t alignment); diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c index aea12b29de8..8c221f7300b 100644 --- a/src/intel/vulkan/tests/block_pool_grow_first.c +++ b/src/intel/vulkan/tests/block_pool_grow_first.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) const uint32_t block_size = 16 * 1024; const uint32_t initial_size = block_size / 2; - anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED); + anv_block_pool_init(&pool, &device, 4096, initial_size); assert(pool.size == initial_size); uint32_t padding; diff --git a/src/intel/vulkan/tests/block_pool_no_free.c b/src/intel/vulkan/tests/block_pool_no_free.c index f6aa47688af..49fda232dbd 100644 --- a/src/intel/vulkan/tests/block_pool_no_free.c +++ b/src/intel/vulkan/tests/block_pool_no_free.c @@ -118,7 +118,7 @@ static void run_test() struct anv_block_pool pool; pthread_mutex_init(&device.mutex, NULL); - anv_block_pool_init(&pool, &device, 4096, 4096, 0); + anv_block_pool_init(&pool, &device, 4096, 4096); for (unsigned i = 0; i < NUM_THREADS; i++) { jobs[i].pool = &pool; diff --git a/src/intel/vulkan/tests/state_pool.c b/src/intel/vulkan/tests/state_pool.c index 34743c601c3..d79583e70bb 100644 --- a/src/intel/vulkan/tests/state_pool.c +++ b/src/intel/vulkan/tests/state_pool.c @@ -45,7 +45,7 @@ int main(int argc, char **argv) pthread_mutex_init(&device.mutex, NULL); for (unsigned i = 0; i < NUM_RUNS; i++) { - anv_state_pool_init(&state_pool, &device, 4096, 256, 0); + anv_state_pool_init(&state_pool, &device, 4096, 256); /* Grab one so a zero offset is impossible */ anv_state_pool_alloc(&state_pool, 16, 16); diff --git a/src/intel/vulkan/tests/state_pool_free_list_only.c b/src/intel/vulkan/tests/state_pool_free_list_only.c index 9f1eb866e4e..e34deba2f1a 100644 --- a/src/intel/vulkan/tests/state_pool_free_list_only.c +++ b/src/intel/vulkan/tests/state_pool_free_list_only.c @@ -42,7 +42,7 @@ int main(int argc, char **argv) struct anv_state_pool state_pool; pthread_mutex_init(&device.mutex, NULL); - anv_state_pool_init(&state_pool, &device, 4096, 4096, 0); + anv_state_pool_init(&state_pool, &device, 4096, 4096); /* Grab one so a zero offset is impossible */ anv_state_pool_alloc(&state_pool, 16, 16); diff --git a/src/intel/vulkan/tests/state_pool_no_free.c b/src/intel/vulkan/tests/state_pool_no_free.c index cb6591266e8..ae4a3c16bdc 100644 --- a/src/intel/vulkan/tests/state_pool_no_free.c +++ b/src/intel/vulkan/tests/state_pool_no_free.c @@ -63,7 +63,7 @@ static void run_test() struct anv_state_pool state_pool; pthread_mutex_init(&device.mutex, NULL); - anv_state_pool_init(&state_pool, &device, 4096, 64, 0); + anv_state_pool_init(&state_pool, &device, 4096, 64); pthread_barrier_init(&barrier, NULL, NUM_THREADS); diff --git a/src/intel/vulkan/tests/state_pool_padding.c b/src/intel/vulkan/tests/state_pool_padding.c index 44509cde07a..b51300a39ff 100644 --- a/src/intel/vulkan/tests/state_pool_padding.c +++ b/src/intel/vulkan/tests/state_pool_padding.c @@ -33,7 +33,7 @@ int main(int argc, char **argv) }; struct anv_state_pool state_pool; - anv_state_pool_init(&state_pool, &device, 4096, 4096, EXEC_OBJECT_PINNED); + anv_state_pool_init(&state_pool, &device, 4096, 4096); /* Get the size of the underlying block_pool */ struct anv_block_pool *bp = &state_pool.block_pool;