Since the state_stream is now pulling from a state_pool, the only thing
pulling directly off the block pool is the state pool so we can just
move the block_size there. The one exception is when we allocate
binding tables but we can just reference the state pool there as well.
The only functional change here is that we no longer grow the block pool
immediately upon creation so no BO gets allocated until our first state
allocation.
Reviewed-by: Juan A. Suarez Romero <jasuarez@igalia.com>
} while (old != current);
}
} while (old != current);
}
-static uint32_t
-anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
+static VkResult
+anv_block_pool_expand_range(struct anv_block_pool *pool,
+ uint32_t center_bo_offset, uint32_t size);
VkResult
anv_block_pool_init(struct anv_block_pool *pool,
VkResult
anv_block_pool_init(struct anv_block_pool *pool,
- struct anv_device *device, uint32_t block_size)
+ struct anv_device *device,
+ uint32_t initial_size)
- assert(util_is_power_of_two(block_size));
-
pool->device = device;
anv_bo_init(&pool->bo, 0, 0);
pool->device = device;
anv_bo_init(&pool->bo, 0, 0);
- pool->block_size = block_size;
pool->free_list = ANV_FREE_LIST_EMPTY;
pool->back_free_list = ANV_FREE_LIST_EMPTY;
pool->free_list = ANV_FREE_LIST_EMPTY;
pool->back_free_list = ANV_FREE_LIST_EMPTY;
pool->back_state.next = 0;
pool->back_state.end = 0;
pool->back_state.next = 0;
pool->back_state.end = 0;
- /* Immediately grow the pool so we'll have a backing bo. */
- pool->state.end = anv_block_pool_grow(pool, &pool->state);
+ result = anv_block_pool_expand_range(pool, 0, initial_size);
+ if (result != VK_SUCCESS)
+ goto fail_mmap_cleanups;
+ fail_mmap_cleanups:
+ u_vector_finish(&pool->mmap_cleanups);
fail_fd:
close(pool->fd);
fail_fd:
close(pool->fd);
* the pool and a 4K CPU page.
*/
static uint32_t
* the pool and a 4K CPU page.
*/
static uint32_t
-anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
+anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state,
+ uint32_t block_size)
{
uint32_t size;
VkResult result = VK_SUCCESS;
{
uint32_t size;
VkResult result = VK_SUCCESS;
if (old_size == 0) {
/* This is the first allocation */
if (old_size == 0) {
/* This is the first allocation */
- size = MAX2(32 * pool->block_size, PAGE_SIZE);
+ size = MAX2(32 * block_size, PAGE_SIZE);
} else {
size = old_size * 2;
}
} else {
size = old_size * 2;
}
center_bo_offset = ((uint64_t)size * back_used) / total_used;
/* Align down to a multiple of both the block size and page size */
center_bo_offset = ((uint64_t)size * back_used) / total_used;
/* Align down to a multiple of both the block size and page size */
- uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE);
+ uint32_t granularity = MAX2(block_size, PAGE_SIZE);
assert(util_is_power_of_two(granularity));
center_bo_offset &= ~(granularity - 1);
assert(util_is_power_of_two(granularity));
center_bo_offset &= ~(granularity - 1);
center_bo_offset = size - pool->state.end;
}
center_bo_offset = size - pool->state.end;
}
- assert(center_bo_offset % pool->block_size == 0);
+ assert(center_bo_offset % block_size == 0);
assert(center_bo_offset % PAGE_SIZE == 0);
result = anv_block_pool_expand_range(pool, center_bo_offset, size);
assert(center_bo_offset % PAGE_SIZE == 0);
result = anv_block_pool_expand_range(pool, center_bo_offset, size);
static uint32_t
anv_block_pool_alloc_new(struct anv_block_pool *pool,
static uint32_t
anv_block_pool_alloc_new(struct anv_block_pool *pool,
- struct anv_block_state *pool_state)
+ struct anv_block_state *pool_state,
+ uint32_t block_size)
{
struct anv_block_state state, old, new;
{
struct anv_block_state state, old, new;
+ assert(util_is_power_of_two(block_size));
+
- state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
+ state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size);
if (state.next < state.end) {
assert(pool->map);
return state.next;
if (state.next < state.end) {
assert(pool->map);
return state.next;
* pool_state->next acts a mutex: threads who try to allocate now will
* get block indexes above the current limit and hit futex_wait
* below. */
* pool_state->next acts a mutex: threads who try to allocate now will
* get block indexes above the current limit and hit futex_wait
* below. */
- new.next = state.next + pool->block_size;
- new.end = anv_block_pool_grow(pool, pool_state);
- assert(new.end >= new.next && new.end % pool->block_size == 0);
+ new.next = state.next + block_size;
+ new.end = anv_block_pool_grow(pool, pool_state, block_size);
old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
if (old.next != state.next)
futex_wake(&pool_state->end, INT_MAX);
old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
if (old.next != state.next)
futex_wake(&pool_state->end, INT_MAX);
-anv_block_pool_alloc(struct anv_block_pool *pool)
+anv_block_pool_alloc(struct anv_block_pool *pool,
+ uint32_t block_size)
- return anv_block_pool_alloc_new(pool, &pool->state);
+ return anv_block_pool_alloc_new(pool, &pool->state, block_size);
}
/* Allocates a block out of the back of the block pool.
}
/* Allocates a block out of the back of the block pool.
* gymnastics with the block pool's BO when doing relocations.
*/
int32_t
* gymnastics with the block pool's BO when doing relocations.
*/
int32_t
-anv_block_pool_alloc_back(struct anv_block_pool *pool)
+anv_block_pool_alloc_back(struct anv_block_pool *pool,
+ uint32_t block_size)
- offset = anv_block_pool_alloc_new(pool, &pool->back_state);
+ offset = anv_block_pool_alloc_new(pool, &pool->back_state, block_size);
/* The offset we get out of anv_block_pool_alloc_new() is actually the
* number of bytes downwards from the middle to the end of the block.
/* The offset we get out of anv_block_pool_alloc_new() is actually the
* number of bytes downwards from the middle to the end of the block.
* start of the block.
*/
assert(offset >= 0);
* start of the block.
*/
assert(offset >= 0);
- return -(offset + pool->block_size);
+ return -(offset + block_size);
void
anv_state_pool_init(struct anv_state_pool *pool,
void
anv_state_pool_init(struct anv_state_pool *pool,
- struct anv_block_pool *block_pool)
+ struct anv_block_pool *block_pool,
+ uint32_t block_size)
{
pool->block_pool = block_pool;
{
pool->block_pool = block_pool;
+ assert(util_is_power_of_two(block_size));
+ pool->block_size = block_size;
for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
pool->buckets[i].free_list = ANV_FREE_LIST_EMPTY;
pool->buckets[i].block.next = 0;
for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
pool->buckets[i].free_list = ANV_FREE_LIST_EMPTY;
pool->buckets[i].block.next = 0;
static uint32_t
anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
struct anv_block_pool *block_pool,
static uint32_t
anv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool,
struct anv_block_pool *block_pool,
+ uint32_t state_size,
+ uint32_t block_size)
{
struct anv_block_state block, old, new;
uint32_t offset;
{
struct anv_block_state block, old, new;
uint32_t offset;
if (block.next < block.end) {
return block.next;
} else if (block.next == block.end) {
if (block.next < block.end) {
return block.next;
} else if (block.next == block.end) {
- offset = anv_block_pool_alloc(block_pool);
+ offset = anv_block_pool_alloc(block_pool, block_size);
new.next = offset + state_size;
new.next = offset + state_size;
- new.end = offset + block_pool->block_size;
+ new.end = offset + block_size;
old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
if (old.next != block.next)
futex_wake(&pool->block.end, INT_MAX);
old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
if (old.next != block.next)
futex_wake(&pool->block.end, INT_MAX);
state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
pool->block_pool,
state.offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket],
pool->block_pool,
+ state.alloc_size,
+ pool->block_size);
done:
state.map = pool->block_pool->map + state.offset;
done:
state.map = pool->block_pool->map + state.offset;
{
struct anv_block_pool *block_pool =
&cmd_buffer->device->surface_state_block_pool;
{
struct anv_block_pool *block_pool =
&cmd_buffer->device->surface_state_block_pool;
+ struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
int32_t *bt_block = u_vector_head(&cmd_buffer->bt_blocks);
struct anv_state state;
state.alloc_size = align_u32(entries * 4, 32);
int32_t *bt_block = u_vector_head(&cmd_buffer->bt_blocks);
struct anv_state state;
state.alloc_size = align_u32(entries * 4, 32);
- if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
+ if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
return (struct anv_state) { 0 };
state.offset = cmd_buffer->bt_next;
return (struct anv_state) { 0 };
state.offset = cmd_buffer->bt_next;
{
struct anv_block_pool *block_pool =
&cmd_buffer->device->surface_state_block_pool;
{
struct anv_block_pool *block_pool =
&cmd_buffer->device->surface_state_block_pool;
+ struct anv_state_pool *state_pool = &cmd_buffer->device->surface_state_pool;
int32_t *offset = u_vector_add(&cmd_buffer->bt_blocks);
if (offset == NULL) {
int32_t *offset = u_vector_add(&cmd_buffer->bt_blocks);
if (offset == NULL) {
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
- *offset = anv_block_pool_alloc_back(block_pool);
+ *offset = anv_block_pool_alloc_back(block_pool, state_pool->block_size);
cmd_buffer->bt_next = 0;
return VK_SUCCESS;
cmd_buffer->bt_next = 0;
return VK_SUCCESS;
* little data at the top to build its linked list.
*/
const uint32_t max_update_size =
* little data at the top to build its linked list.
*/
const uint32_t max_update_size =
- cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
+ cmd_buffer->device->dynamic_state_pool.block_size - 64;
assert(max_update_size < MAX_SURFACE_DIM * 4);
assert(max_update_size < MAX_SURFACE_DIM * 4);
goto fail_batch_bo_pool;
result = anv_block_pool_init(&device->dynamic_state_block_pool, device,
goto fail_batch_bo_pool;
result = anv_block_pool_init(&device->dynamic_state_block_pool, device,
if (result != VK_SUCCESS)
goto fail_bo_cache;
anv_state_pool_init(&device->dynamic_state_pool,
if (result != VK_SUCCESS)
goto fail_bo_cache;
anv_state_pool_init(&device->dynamic_state_pool,
- &device->dynamic_state_block_pool);
+ &device->dynamic_state_block_pool,
+ 16384);
result = anv_block_pool_init(&device->instruction_block_pool, device,
result = anv_block_pool_init(&device->instruction_block_pool, device,
if (result != VK_SUCCESS)
goto fail_dynamic_state_pool;
anv_state_pool_init(&device->instruction_state_pool,
if (result != VK_SUCCESS)
goto fail_dynamic_state_pool;
anv_state_pool_init(&device->instruction_state_pool,
- &device->instruction_block_pool);
+ &device->instruction_block_pool,
+ 1024 * 1024);
result = anv_block_pool_init(&device->surface_state_block_pool, device,
result = anv_block_pool_init(&device->surface_state_block_pool, device,
if (result != VK_SUCCESS)
goto fail_instruction_state_pool;
anv_state_pool_init(&device->surface_state_pool,
if (result != VK_SUCCESS)
goto fail_instruction_state_pool;
anv_state_pool_init(&device->surface_state_pool,
- &device->surface_state_block_pool);
+ &device->surface_state_block_pool,
+ 4096);
result = anv_bo_init_new(&device->workaround_bo, device, 1024);
if (result != VK_SUCCESS)
result = anv_bo_init_new(&device->workaround_bo, device, 1024);
if (result != VK_SUCCESS)
*/
struct u_vector mmap_cleanups;
*/
struct u_vector mmap_cleanups;
union anv_free_list free_list;
struct anv_block_state state;
union anv_free_list free_list;
struct anv_block_state state;
struct anv_state_pool {
struct anv_block_pool *block_pool;
struct anv_state_pool {
struct anv_block_pool *block_pool;
+
+ /* The size of blocks which will be allocated from the block pool */
+ uint32_t block_size;
+
struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
};
struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
};
}
VkResult anv_block_pool_init(struct anv_block_pool *pool,
}
VkResult anv_block_pool_init(struct anv_block_pool *pool,
- struct anv_device *device, uint32_t block_size);
+ struct anv_device *device,
+ uint32_t initial_size);
void anv_block_pool_finish(struct anv_block_pool *pool);
void anv_block_pool_finish(struct anv_block_pool *pool);
-int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
-int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
+int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
+ uint32_t block_size);
+int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
+ uint32_t block_size);
void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
void anv_state_pool_init(struct anv_state_pool *pool,
void anv_block_pool_free(struct anv_block_pool *pool, int32_t offset);
void anv_state_pool_init(struct anv_state_pool *pool,
- struct anv_block_pool *block_pool);
+ struct anv_block_pool *block_pool,
+ uint32_t block_size);
void anv_state_pool_finish(struct anv_state_pool *pool);
struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
uint32_t state_size, uint32_t alignment);
void anv_state_pool_finish(struct anv_state_pool *pool);
struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
uint32_t state_size, uint32_t alignment);
#define NUM_THREADS 16
#define BLOCKS_PER_THREAD 1024
#define NUM_RUNS 64
#define NUM_THREADS 16
#define BLOCKS_PER_THREAD 1024
#define NUM_RUNS 64
int32_t block, *data;
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
int32_t block, *data;
for (unsigned i = 0; i < BLOCKS_PER_THREAD; i++) {
- block = anv_block_pool_alloc(job->pool);
+ block = anv_block_pool_alloc(job->pool, BLOCK_SIZE);
data = job->pool->map + block;
*data = block;
assert(block >= 0);
job->blocks[i] = block;
data = job->pool->map + block;
*data = block;
assert(block >= 0);
job->blocks[i] = block;
- block = anv_block_pool_alloc_back(job->pool);
+ block = anv_block_pool_alloc_back(job->pool, BLOCK_SIZE);
data = job->pool->map + block;
*data = block;
assert(block < 0);
data = job->pool->map + block;
*data = block;
assert(block < 0);
struct anv_block_pool pool;
pthread_mutex_init(&device.mutex, NULL);
struct anv_block_pool pool;
pthread_mutex_init(&device.mutex, NULL);
- anv_block_pool_init(&pool, &device, 16);
+ anv_block_pool_init(&pool, &device, 4096);
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = &pool;
for (unsigned i = 0; i < NUM_THREADS; i++) {
jobs[i].pool = &pool;
pthread_mutex_init(&device.mutex, NULL);
for (unsigned i = 0; i < NUM_RUNS; i++) {
pthread_mutex_init(&device.mutex, NULL);
for (unsigned i = 0; i < NUM_RUNS; i++) {
- anv_block_pool_init(&block_pool, &device, 256);
- anv_state_pool_init(&state_pool, &block_pool);
+ anv_block_pool_init(&block_pool, &device, 4096);
+ anv_state_pool_init(&state_pool, &block_pool, 256);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
pthread_mutex_init(&device.mutex, NULL);
anv_block_pool_init(&block_pool, &device, 4096);
pthread_mutex_init(&device.mutex, NULL);
anv_block_pool_init(&block_pool, &device, 4096);
- anv_state_pool_init(&state_pool, &block_pool);
+ anv_state_pool_init(&state_pool, &block_pool, 4096);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
/* Grab one so a zero offset is impossible */
anv_state_pool_alloc(&state_pool, 16, 16);
struct anv_state_pool state_pool;
pthread_mutex_init(&device.mutex, NULL);
struct anv_state_pool state_pool;
pthread_mutex_init(&device.mutex, NULL);
- anv_block_pool_init(&block_pool, &device, 64);
- anv_state_pool_init(&state_pool, &block_pool);
+ anv_block_pool_init(&block_pool, &device, 4096);
+ anv_state_pool_init(&state_pool, &block_pool, 64);
pthread_barrier_init(&barrier, NULL, NUM_THREADS);
pthread_barrier_init(&barrier, NULL, NUM_THREADS);