anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t initial_size,
- uint64_t bo_flags)
+ uint32_t initial_size)
{
VkResult result;
pool->device = device;
- pool->bo_flags = bo_flags;
+ pool->use_softpin = device->instance->physicalDevice.use_softpin;
pool->nbos = 0;
pool->size = 0;
pool->center_bo_offset = 0;
pool->start_address = gen_canonical_address(start_address);
pool->map = NULL;
- if (!(pool->bo_flags & EXEC_OBJECT_PINNED)) {
+ if (pool->use_softpin) {
+ /* This pointer will always point to the first BO in the list */
+ anv_bo_init(&pool->bos[0], 0, 0);
+ pool->bo = &pool->bos[0];
+
+ pool->fd = -1;
+ } else {
/* Just make it 2GB up-front. The Linux kernel won't actually back it
* with pages until we either map and fault on one of them or we use
* userptr and send a chunk of it off to the GPU.
anv_bo_init(&pool->wrapper_bo, 0, 0);
pool->wrapper_bo.is_wrapper = true;
pool->bo = &pool->wrapper_bo;
- } else {
- /* This pointer will always point to the first BO in the list */
- anv_bo_init(&pool->bos[0], 0, 0);
- pool->bo = &pool->bos[0];
-
- pool->fd = -1;
}
if (!u_vector_init(&pool->mmap_cleanups,
fail_mmap_cleanups:
u_vector_finish(&pool->mmap_cleanups);
fail_fd:
- if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+ if (pool->fd >= 0)
close(pool->fd);
return result;
munmap(cleanup->map, cleanup->size);
u_vector_finish(&pool->mmap_cleanups);
- if (!(pool->bo_flags & EXEC_OBJECT_PINNED))
+ if (pool->fd >= 0)
close(pool->fd);
}
anv_block_pool_expand_range(struct anv_block_pool *pool,
uint32_t center_bo_offset, uint32_t size)
{
- const bool use_softpin = !!(pool->bo_flags & EXEC_OBJECT_PINNED);
-
/* Assert that we only ever grow the pool */
assert(center_bo_offset >= pool->back_state.end);
assert(size - center_bo_offset >= pool->state.end);
/* Assert that we don't go outside the bounds of the memfd */
assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER);
- assert(use_softpin ||
+ assert(pool->use_softpin ||
size - center_bo_offset <=
BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER);
- if (use_softpin) {
+ /* For state pool BOs we have to be a bit careful about where we place them
+ * in the GTT. There are two documented workarounds for state base address
+ * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
+ * which state that those two base addresses do not support 48-bit
+ * addresses and need to be placed in the bottom 32-bit range.
+ * Unfortunately, this is not quite accurate.
+ *
+ * The real problem is that we always set the size of our state pools in
+ * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
+ * likely significantly smaller. We do this because we do not no at the
+ * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
+ * the pool during command buffer building so we don't actually have a
+ * valid final size. If the address + size, as seen by STATE_BASE_ADDRESS
+ * overflows 48 bits, the GPU appears to treat all accesses to the buffer
+ * as being out of bounds and returns zero. For dynamic state, this
+ * usually just leads to rendering corruptions, but shaders that are all
+ * zero hang the GPU immediately.
+ *
+ * The easiest solution to do is exactly what the bogus workarounds say to
+ * do: restrict these buffers to 32-bit addresses. We could also pin the
+ * BO to some particular location of our choosing, but that's significantly
+ * more work than just not setting a flag. So, we explicitly DO NOT set
+ * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
+ * hard work for us. When using softpin, we're in control and the fixed
+ * addresses we choose are fine for base addresses.
+ */
+ uint64_t bo_flags = 0;
+ if (pool->use_softpin) {
+ bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+ EXEC_OBJECT_PINNED;
+ }
+
+ if (pool->device->instance->physicalDevice.has_exec_async)
+ bo_flags |= EXEC_OBJECT_ASYNC;
+
+ if (pool->device->instance->physicalDevice.has_exec_capture)
+ bo_flags |= EXEC_OBJECT_CAPTURE;
+
+ if (pool->use_softpin) {
uint32_t newbo_size = size - pool->size;
uint32_t gem_handle = anv_gem_create(pool->device, newbo_size);
void *map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
struct anv_bo *bo = &pool->bos[pool->nbos++];
anv_bo_init(bo, gem_handle, newbo_size);
bo->offset = pool->start_address + pool->size;
- bo->flags = pool->bo_flags;
+ bo->flags = bo_flags;
bo->map = map;
} else {
/* Just leak the old map until we destroy the pool. We can't munmap it
struct anv_bo *bo = &pool->bos[pool->nbos++];
anv_bo_init(bo, gem_handle, size);
- bo->flags = pool->bo_flags;
+ bo->flags = bo_flags;
pool->wrapper_bo.map = bo;
}
void*
anv_block_pool_map(struct anv_block_pool *pool, int32_t offset)
{
- if (pool->bo_flags & EXEC_OBJECT_PINNED) {
+ if (pool->use_softpin) {
struct anv_bo *bo = NULL;
int32_t bo_offset = 0;
anv_block_pool_foreach_bo(iter_bo, pool) {
if (state.next + block_size <= state.end) {
return state.next;
} else if (state.next <= state.end) {
- if (pool->bo_flags & EXEC_OBJECT_PINNED && state.next < state.end) {
+ if (pool->use_softpin && state.next < state.end) {
/* We need to grow the block pool, but still have some leftover
* space that can't be used by that particular allocation. So we
* add that as a "padding", and return it.
anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t block_size,
- uint64_t bo_flags)
+ uint32_t block_size)
{
VkResult result = anv_block_pool_init(&pool->block_pool, device,
start_address,
- block_size * 16,
- bo_flags);
+ block_size * 16);
if (result != VK_SUCCESS)
return result;
if (result != VK_SUCCESS)
goto fail_batch_bo_pool;
- /* For state pool BOs we have to be a bit careful about where we place them
- * in the GTT. There are two documented workarounds for state base address
- * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset
- * which state that those two base addresses do not support 48-bit
- * addresses and need to be placed in the bottom 32-bit range.
- * Unfortunately, this is not quite accurate.
- *
- * The real problem is that we always set the size of our state pools in
- * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most
- * likely significantly smaller. We do this because we do not no at the
- * time we emit STATE_BASE_ADDRESS whether or not we will need to expand
- * the pool during command buffer building so we don't actually have a
- * valid final size. If the address + size, as seen by STATE_BASE_ADDRESS
- * overflows 48 bits, the GPU appears to treat all accesses to the buffer
- * as being out of bounds and returns zero. For dynamic state, this
- * usually just leads to rendering corruptions, but shaders that are all
- * zero hang the GPU immediately.
- *
- * The easiest solution to do is exactly what the bogus workarounds say to
- * do: restrict these buffers to 32-bit addresses. We could also pin the
- * BO to some particular location of our choosing, but that's significantly
- * more work than just not setting a flag. So, we explicitly DO NOT set
- * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the
- * hard work for us.
- */
- if (!physical_device->use_softpin)
- bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
result = anv_state_pool_init(&device->dynamic_state_pool, device,
- DYNAMIC_STATE_POOL_MIN_ADDRESS,
- 16384,
- bo_flags);
+ DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384);
if (result != VK_SUCCESS)
goto fail_bo_cache;
result = anv_state_pool_init(&device->instruction_state_pool, device,
- INSTRUCTION_STATE_POOL_MIN_ADDRESS,
- 16384,
- bo_flags);
+ INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384);
if (result != VK_SUCCESS)
goto fail_dynamic_state_pool;
result = anv_state_pool_init(&device->surface_state_pool, device,
- SURFACE_STATE_POOL_MIN_ADDRESS,
- 4096,
- bo_flags);
+ SURFACE_STATE_POOL_MIN_ADDRESS, 4096);
if (result != VK_SUCCESS)
goto fail_instruction_state_pool;
if (physical_device->use_softpin) {
result = anv_state_pool_init(&device->binding_table_pool, device,
- BINDING_TABLE_POOL_MIN_ADDRESS,
- 4096,
- bo_flags);
+ BINDING_TABLE_POOL_MIN_ADDRESS, 4096);
if (result != VK_SUCCESS)
goto fail_surface_state_pool;
}
struct anv_block_pool {
struct anv_device *device;
-
- uint64_t bo_flags;
+ bool use_softpin;
/* Wrapper BO for use in relocation lists. This BO is simply a wrapper
* around the actual BO so that we grow the pool after the wrapper BO has
VkResult anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t initial_size,
- uint64_t bo_flags);
+ uint32_t initial_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
uint32_t block_size, uint32_t *padding);
VkResult anv_state_pool_init(struct anv_state_pool *pool,
struct anv_device *device,
uint64_t start_address,
- uint32_t block_size,
- uint64_t bo_flags);
+ uint32_t block_size);
void anv_state_pool_finish(struct anv_state_pool *pool);
struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
uint32_t state_size, uint32_t alignment);
const uint32_t block_size = 16 * 1024;
const uint32_t initial_size = block_size / 2;
- anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED);
+ anv_block_pool_init(&pool, &device, 4096, initial_size);
assert(pool.size == initial_size);
uint32_t padding;
struct anv_block_pool pool;
pthread_mutex_init(&device.mutex, NULL);
- anv_block_pool_init(&pool, &device, 4096, 4096, 0);
+ anv_block_pool_init(&pool, &device, 4096, 4096);
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = &pool;
pthread_mutex_init(&device.mutex, NULL);
for (unsigned i = 0; i < NUM_RUNS; i++) {
- anv_state_pool_init(&state_pool, &device, 4096, 256, 0);
+ anv_state_pool_init(&state_pool, &device, 4096, 256);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
struct anv_state_pool state_pool;
pthread_mutex_init(&device.mutex, NULL);
- anv_state_pool_init(&state_pool, &device, 4096, 4096, 0);
+ anv_state_pool_init(&state_pool, &device, 4096, 4096);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
struct anv_state_pool state_pool;
pthread_mutex_init(&device.mutex, NULL);
- anv_state_pool_init(&state_pool, &device, 4096, 64, 0);
+ anv_state_pool_init(&state_pool, &device, 4096, 64);
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
};
struct anv_state_pool state_pool;
- anv_state_pool_init(&state_pool, &device, 4096, 4096, EXEC_OBJECT_PINNED);
+ anv_state_pool_init(&state_pool, &device, 4096, 4096);
/* Get the size of the underlying block_pool */
struct anv_block_pool *bp = &state_pool.block_pool;