/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
* We use it to indicate the free list is empty. */
-#define EMPTY 1
+#define EMPTY UINT32_MAX
+
+#define PAGE_SIZE 4096
struct anv_mmap_cleanup {
void *map;
return 1 << ilog2_round_up(value);
}
-static bool
-anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
+struct anv_state_table_cleanup {
+ void *map;
+ size_t size;
+};
+
+#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0})
+#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry))
+
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size);
+
+VkResult
+anv_state_table_init(struct anv_state_table *table,
+ struct anv_device *device,
+ uint32_t initial_entries)
{
- union anv_free_list current, new, old;
+ VkResult result;
- current.u64 = list->u64;
- while (current.offset != EMPTY) {
- /* We have to add a memory barrier here so that the list head (and
- * offset) gets read before we read the map pointer. This way we
- * know that the map pointer is valid for the given offset at the
- * point where we read it.
- */
- __sync_synchronize();
+ table->device = device;
- int32_t *next_ptr = *map + current.offset;
- new.offset = VG_NOACCESS_READ(next_ptr);
- new.count = current.count + 1;
- old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
- if (old.u64 == current.u64) {
- *offset = current.offset;
- return true;
- }
- current = old;
+ table->fd = memfd_create("state table", MFD_CLOEXEC);
+ if (table->fd == -1)
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
+
+ /* Just make it 2GB up-front. The Linux kernel won't actually back it
+ * with pages until we either map and fault on one of them or we use
+ * userptr and send a chunk of it off to the GPU.
+ */
+ if (ftruncate(table->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
}
- return false;
+ if (!u_vector_init(&table->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
+ 128)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
+
+ table->state.next = 0;
+ table->state.end = 0;
+ table->size = 0;
+
+ uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
+ result = anv_state_table_expand_range(table, initial_size);
+ if (result != VK_SUCCESS)
+ goto fail_mmap_cleanups;
+
+ return VK_SUCCESS;
+
+ fail_mmap_cleanups:
+ u_vector_finish(&table->mmap_cleanups);
+ fail_fd:
+ close(table->fd);
+
+ return result;
}
-static void
-anv_free_list_push(union anv_free_list *list, void *map, int32_t offset,
- uint32_t size, uint32_t count)
+static VkResult
+anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
{
- union anv_free_list current, old, new;
- int32_t *next_ptr = map + offset;
+ void *map;
+ struct anv_mmap_cleanup *cleanup;
+
+ /* Assert that we only ever grow the pool */
+ assert(size >= table->state.end);
- /* If we're returning more than one chunk, we need to build a chain to add
- * to the list. Fortunately, we can do this without any atomics since we
- * own everything in the chain right now. `offset` is left pointing to the
- * head of our chain list while `next_ptr` points to the tail.
+ /* Make sure that we don't go outside the bounds of the memfd */
+ if (size > BLOCK_POOL_MEMFD_SIZE)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ cleanup = u_vector_add(&table->mmap_cleanups);
+ if (!cleanup)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ *cleanup = ANV_MMAP_CLEANUP_INIT;
+
+ /* Just leak the old map until we destroy the pool. We can't munmap it
+ * without races or imposing locking on the block allocate fast path. On
+ * the whole the leaked maps adds up to less than the size of the
+ * current map. MAP_POPULATE seems like the right thing to do, but we
+ * should try to get some numbers.
*/
- for (uint32_t i = 1; i < count; i++) {
- VG_NOACCESS_WRITE(next_ptr, offset + i * size);
- next_ptr = map + offset + i * size;
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, table->fd, 0);
+ if (map == MAP_FAILED) {
+ return vk_errorf(table->device->instance, table->device,
+ VK_ERROR_OUT_OF_HOST_MEMORY, "mmap failed: %m");
}
+ cleanup->map = map;
+ cleanup->size = size;
+
+ table->map = map;
+ table->size = size;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+anv_state_table_grow(struct anv_state_table *table)
+{
+ VkResult result = VK_SUCCESS;
+
+ uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE,
+ PAGE_SIZE);
+ uint32_t old_size = table->size;
+
+ /* The block pool is always initialized to a nonzero size and this function
+ * is always called after initialization.
+ */
+ assert(old_size > 0);
+
+ uint32_t required = MAX2(used, old_size);
+ if (used * 2 <= required) {
+ /* If we're in this case then this isn't the firsta allocation and we
+ * already have enough space on both sides to hold double what we
+ * have allocated. There's nothing for us to do.
+ */
+ goto done;
+ }
+
+ uint32_t size = old_size * 2;
+ while (size < required)
+ size *= 2;
+
+ assert(size > table->size);
+
+ result = anv_state_table_expand_range(table, size);
+
+ done:
+ return result;
+}
+
+void
+anv_state_table_finish(struct anv_state_table *table)
+{
+ struct anv_state_table_cleanup *cleanup;
+
+ u_vector_foreach(cleanup, &table->mmap_cleanups) {
+ if (cleanup->map)
+ munmap(cleanup->map, cleanup->size);
+ }
+
+ u_vector_finish(&table->mmap_cleanups);
+
+ close(table->fd);
+}
+
+VkResult
+anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
+ uint32_t count)
+{
+ struct anv_block_state state, old, new;
+ VkResult result;
+
+ assert(idx);
+
+ while(1) {
+ state.u64 = __sync_fetch_and_add(&table->state.u64, count);
+ if (state.next + count <= state.end) {
+ assert(table->map);
+ struct anv_free_entry *entry = &table->map[state.next];
+ for (int i = 0; i < count; i++) {
+ entry[i].state.idx = state.next + i;
+ }
+ *idx = state.next;
+ return VK_SUCCESS;
+ } else if (state.next <= state.end) {
+ /* We allocated the first block outside the pool so we have to grow
+ * the pool. pool_state->next acts a mutex: threads who try to
+ * allocate now will get block indexes above the current limit and
+ * hit futex_wait below.
+ */
+ new.next = state.next + count;
+ do {
+ result = anv_state_table_grow(table);
+ if (result != VK_SUCCESS)
+ return result;
+ new.end = table->size / ANV_STATE_ENTRY_SIZE;
+ } while (new.end < new.next);
+
+ old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64);
+ if (old.next != state.next)
+ futex_wake(&table->state.end, INT_MAX);
+ } else {
+ futex_wait(&table->state.end, state.end, NULL);
+ continue;
+ }
+ }
+}
+
+void
+anv_free_list_push(union anv_free_list *list,
+ struct anv_state_table *table,
+ uint32_t first, uint32_t count)
+{
+ union anv_free_list current, old, new;
+ uint32_t last = first;
+
+ for (uint32_t i = 1; i < count; i++, last++)
+ table->map[last].next = last + 1;
+
old = *list;
do {
current = old;
- VG_NOACCESS_WRITE(next_ptr, current.offset);
- new.offset = offset;
+ table->map[last].next = current.offset;
+ new.offset = first;
new.count = current.count + 1;
old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
} while (old.u64 != current.u64);
}
+struct anv_state *
+anv_free_list_pop(union anv_free_list *list,
+ struct anv_state_table *table)
+{
+ union anv_free_list current, new, old;
+
+ current.u64 = list->u64;
+ while (current.offset != EMPTY) {
+ __sync_synchronize();
+ new.offset = table->map[current.offset].next;
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ if (old.u64 == current.u64) {
+ struct anv_free_entry *entry = &table->map[current.offset];
+ return &entry->state;
+ }
+ current = old;
+ }
+
+ return NULL;
+}
+
/* All pointers in the ptr_free_list are assumed to be page-aligned. This
* means that the bottom 12 bits should all be zero.
*/
close(pool->fd);
}
-#define PAGE_SIZE 4096
-
static VkResult
anv_block_pool_expand_range(struct anv_block_pool *pool,
uint32_t center_bo_offset, uint32_t size)
return VK_SUCCESS;
}
+/** Returns current memory map of the block pool.
+ *
+ * The returned pointer points to the map for the memory at the specified
+ * offset. The offset parameter is relative to the "center" of the block pool
+ * rather than the start of the block pool BO map.
+ */
+void*
+anv_block_pool_map(struct anv_block_pool *pool, int32_t offset)
+{
+ return pool->map + offset;
+}
+
/** Grows and re-centers the block pool.
*
* We grow the block pool in one or both directions in such a way that the
if (result != VK_SUCCESS)
return result;
+ result = anv_state_table_init(&pool->table, device, 64);
+ if (result != VK_SUCCESS) {
+ anv_block_pool_finish(&pool->block_pool);
+ return result;
+ }
+
assert(util_is_power_of_two_or_zero(block_size));
pool->block_size = block_size;
pool->back_alloc_free_list = ANV_FREE_LIST_EMPTY;
anv_state_pool_finish(struct anv_state_pool *pool)
{
VG(VALGRIND_DESTROY_MEMPOOL(pool));
+ anv_state_table_finish(&pool->table);
anv_block_pool_finish(&pool->block_pool);
}
return 1 << size_log2;
}
+/** Helper to push a chunk into the state table.
+ *
+ * It creates 'count' entries into the state table and update their sizes,
+ * offsets and maps, also pushing them as "free" states.
+ */
+static void
+anv_state_pool_return_blocks(struct anv_state_pool *pool,
+ uint32_t chunk_offset, uint32_t count,
+ uint32_t block_size)
+{
+ if (count == 0)
+ return;
+
+ /* Make sure we always return chunks aligned to the block_size */
+ assert(chunk_offset % block_size == 0);
+
+ uint32_t st_idx;
+ VkResult result = anv_state_table_add(&pool->table, &st_idx, count);
+ assert(result == VK_SUCCESS);
+ for (int i = 0; i < count; i++) {
+ /* update states that were added back to the state table */
+ struct anv_state *state_i = anv_state_table_get(&pool->table,
+ st_idx + i);
+ state_i->alloc_size = block_size;
+ state_i->offset = chunk_offset + block_size * i;
+ state_i->map = anv_block_pool_map(&pool->block_pool, state_i->offset);
+ }
+
+ uint32_t block_bucket = anv_state_pool_get_bucket(block_size);
+ anv_free_list_push(&pool->buckets[block_bucket].free_list,
+ &pool->table, st_idx, count);
+}
+
static struct anv_state
anv_state_pool_alloc_no_vg(struct anv_state_pool *pool,
uint32_t size, uint32_t align)
{
uint32_t bucket = anv_state_pool_get_bucket(MAX2(size, align));
- struct anv_state state;
- state.alloc_size = anv_state_pool_get_bucket_size(bucket);
+ struct anv_state *state;
+ uint32_t alloc_size = anv_state_pool_get_bucket_size(bucket);
+ int32_t offset;
/* Try free list first. */
- if (anv_free_list_pop(&pool->buckets[bucket].free_list,
- &pool->block_pool.map, &state.offset)) {
- assert(state.offset >= 0);
+ state = anv_free_list_pop(&pool->buckets[bucket].free_list,
+ &pool->table);
+ if (state) {
+ assert(state->offset >= 0);
goto done;
}
/* Try to grab a chunk from some larger bucket and split it up */
for (unsigned b = bucket + 1; b < ANV_STATE_BUCKETS; b++) {
- int32_t chunk_offset;
- if (anv_free_list_pop(&pool->buckets[b].free_list,
- &pool->block_pool.map, &chunk_offset)) {
+ state = anv_free_list_pop(&pool->buckets[b].free_list, &pool->table);
+ if (state) {
unsigned chunk_size = anv_state_pool_get_bucket_size(b);
+ int32_t chunk_offset = state->offset;
+
+ /* First lets update the state we got to its new size. offset and map
+ * remain the same.
+ */
+ state->alloc_size = alloc_size;
/* We've found a chunk that's larger than the requested state size.
* There are a couple of options as to what we do with it:
* We choose option (3).
*/
if (chunk_size > pool->block_size &&
- state.alloc_size < pool->block_size) {
+ alloc_size < pool->block_size) {
assert(chunk_size % pool->block_size == 0);
/* We don't want to split giant chunks into tiny chunks. Instead,
* break anything bigger than a block into block-sized chunks and
* then break it down into bucket-sized chunks from there. Return
* all but the first block of the chunk to the block bucket.
*/
- const uint32_t block_bucket =
- anv_state_pool_get_bucket(pool->block_size);
- anv_free_list_push(&pool->buckets[block_bucket].free_list,
- pool->block_pool.map,
- chunk_offset + pool->block_size,
- pool->block_size,
- (chunk_size / pool->block_size) - 1);
+ uint32_t push_back = (chunk_size / pool->block_size) - 1;
+ anv_state_pool_return_blocks(pool, chunk_offset + pool->block_size,
+ push_back, pool->block_size);
chunk_size = pool->block_size;
}
- assert(chunk_size % state.alloc_size == 0);
- anv_free_list_push(&pool->buckets[bucket].free_list,
- pool->block_pool.map,
- chunk_offset + state.alloc_size,
- state.alloc_size,
- (chunk_size / state.alloc_size) - 1);
-
- state.offset = chunk_offset;
+ assert(chunk_size % alloc_size == 0);
+ uint32_t push_back = (chunk_size / alloc_size) - 1;
+ anv_state_pool_return_blocks(pool, chunk_offset + alloc_size,
+ push_back, alloc_size);
goto done;
}
}
- state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
- &pool->block_pool,
- state.alloc_size,
- pool->block_size);
+ offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
+ &pool->block_pool,
+ alloc_size,
+ pool->block_size);
+ /* Everytime we allocate a new state, add it to the state pool */
+ uint32_t idx;
+ VkResult result = anv_state_table_add(&pool->table, &idx, 1);
+ assert(result == VK_SUCCESS);
+
+ state = anv_state_table_get(&pool->table, idx);
+ state->offset = offset;
+ state->alloc_size = alloc_size;
+ state->map = anv_block_pool_map(&pool->block_pool, offset);
done:
- state.map = pool->block_pool.map + state.offset;
- return state;
+ return *state;
}
struct anv_state
struct anv_state
anv_state_pool_alloc_back(struct anv_state_pool *pool)
{
- struct anv_state state;
- state.alloc_size = pool->block_size;
+ struct anv_state *state;
+ uint32_t alloc_size = pool->block_size;
- if (anv_free_list_pop(&pool->back_alloc_free_list,
- &pool->block_pool.map, &state.offset)) {
- assert(state.offset < 0);
+ state = anv_free_list_pop(&pool->back_alloc_free_list, &pool->table);
+ if (state) {
+ assert(state->offset < 0);
goto done;
}
- state.offset = anv_block_pool_alloc_back(&pool->block_pool,
- pool->block_size);
+ int32_t offset;
+ offset = anv_block_pool_alloc_back(&pool->block_pool,
+ pool->block_size);
+ uint32_t idx;
+ VkResult result = anv_state_table_add(&pool->table, &idx, 1);
+ assert(result == VK_SUCCESS);
+
+ state = anv_state_table_get(&pool->table, idx);
+ state->offset = offset;
+ state->alloc_size = alloc_size;
+ state->map = pool->block_pool.map + state->offset;
done:
- state.map = pool->block_pool.map + state.offset;
- VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, state.alloc_size));
- return state;
+ VG(VALGRIND_MEMPOOL_ALLOC(pool, state->map, state->alloc_size));
+ return *state;
}
static void
if (state.offset < 0) {
assert(state.alloc_size == pool->block_size);
anv_free_list_push(&pool->back_alloc_free_list,
- pool->block_pool.map, state.offset,
- state.alloc_size, 1);
+ &pool->table, state.idx, 1);
} else {
anv_free_list_push(&pool->buckets[bucket].free_list,
- pool->block_pool.map, state.offset,
- state.alloc_size, 1);
+ &pool->table, state.idx, 1);
}
}
VkResult
anv_bo_cache_init(struct anv_bo_cache *cache)
{
- cache->bo_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ cache->bo_map = _mesa_pointer_hash_table_create(NULL);
if (!cache->bo_map)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);