From: Jason Ekstrand Date: Mon, 28 Oct 2019 20:42:20 +0000 (-0500) Subject: anv: Allocate batch and fence buffers from the cache X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ee77938733cd06f5fbc86d42c0b4ad0a64ca2579;p=mesa.git anv: Allocate batch and fence buffers from the cache Reviewed-by: Lionel Landwerlin --- diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 6ae14257480..f484b8343d8 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -357,57 +357,6 @@ anv_free_list_pop(union anv_free_list *list, return NULL; } -/* All pointers in the ptr_free_list are assumed to be page-aligned. This - * means that the bottom 12 bits should all be zero. - */ -#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff) -#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff)) -#define PFL_PACK(ptr, count) ({ \ - (void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \ -}) - -static bool -anv_ptr_free_list_pop(void **list, void **elem) -{ - void *current = *list; - while (PFL_PTR(current) != NULL) { - void **next_ptr = PFL_PTR(current); - void *new_ptr = VG_NOACCESS_READ(next_ptr); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(new_ptr, new_count); - void *old = __sync_val_compare_and_swap(list, current, new); - if (old == current) { - *elem = PFL_PTR(current); - return true; - } - current = old; - } - - return false; -} - -static void -anv_ptr_free_list_push(void **list, void *elem) -{ - void *old, *current; - void **next_ptr = elem; - - /* The pointer-based free list requires that the pointer be - * page-aligned. This is because we use the bottom 12 bits of the - * pointer to store a counter to solve the ABA concurrency problem. - */ - assert(((uintptr_t)elem & 0xfff) == 0); - - old = *list; - do { - current = old; - VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current)); - unsigned new_count = PFL_COUNT(current) + 1; - void *new = PFL_PACK(elem, new_count); - old = __sync_val_compare_and_swap(list, current, new); - } while (old != current); -} - static VkResult anv_block_pool_expand_range(struct anv_block_pool *pool, uint32_t center_bo_offset, uint32_t size); @@ -1311,18 +1260,17 @@ anv_state_stream_alloc(struct anv_state_stream *stream, return state; } -struct bo_pool_bo_link { - struct bo_pool_bo_link *next; - struct anv_bo bo; -}; - void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, uint64_t bo_flags) { pool->device = device; pool->bo_flags = bo_flags; - memset(pool->free_list, 0, sizeof(pool->free_list)); + for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { + util_sparse_array_free_list_init(&pool->free_list[i], + &device->bo_cache.bo_map, 0, + offsetof(struct anv_bo, free_index)); + } VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); } @@ -1331,14 +1279,15 @@ void anv_bo_pool_finish(struct anv_bo_pool *pool) { for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { - struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]); - while (link != NULL) { - struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); - - anv_gem_munmap(link_copy.bo.map, link_copy.bo.size); - anv_vma_free(pool->device, &link_copy.bo); - anv_gem_close(pool->device, link_copy.bo.gem_handle); - link = link_copy.next; + while (1) { + struct anv_bo *bo = + util_sparse_array_free_list_pop_elem(&pool->free_list[i]); + if (bo == NULL) + break; + + /* anv_device_release_bo is going to "free" it */ + VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1)); + anv_device_release_bo(pool->device, bo); } } @@ -1346,80 +1295,53 @@ anv_bo_pool_finish(struct anv_bo_pool *pool) } VkResult -anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) +anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, + struct anv_bo **bo_out) { - VkResult result; - const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size); const unsigned pow2_size = 1 << size_log2; const unsigned bucket = size_log2 - 12; assert(bucket < ARRAY_SIZE(pool->free_list)); - void *next_free_void; - if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) { - struct bo_pool_bo_link *next_free = next_free_void; - *bo = VG_NOACCESS_READ(&next_free->bo); - assert(bo->gem_handle); - assert(bo->map == next_free); - assert(size <= bo->size); - + struct anv_bo *bo = + util_sparse_array_free_list_pop_elem(&pool->free_list[bucket]); + if (bo != NULL) { VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); - + *bo_out = bo; return VK_SUCCESS; } - struct anv_bo new_bo; - - result = anv_bo_init_new(&new_bo, pool->device, pow2_size); + VkResult result = anv_device_alloc_bo(pool->device, + pow2_size, + ANV_BO_ALLOC_MAPPED | + ANV_BO_ALLOC_SNOOPED, + &bo); if (result != VK_SUCCESS) return result; - new_bo.flags = pool->bo_flags; - - if (!anv_vma_alloc(pool->device, &new_bo)) - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); - - assert(new_bo.size == pow2_size); - - new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0); - if (new_bo.map == MAP_FAILED) { - anv_gem_close(pool->device, new_bo.gem_handle); - anv_vma_free(pool->device, &new_bo); - return vk_error(VK_ERROR_MEMORY_MAP_FAILED); - } - - /* We are removing the state flushes, so lets make sure that these buffers - * are cached/snooped. - */ - if (!pool->device->info.has_llc) { - anv_gem_set_caching(pool->device, new_bo.gem_handle, - I915_CACHING_CACHED); - } - - *bo = new_bo; - + /* We want it to look like it came from this pool */ + VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); + *bo_out = bo; + return VK_SUCCESS; } void -anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) +anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo) { - /* Make a copy in case the anv_bo happens to be storred in the BO */ - struct anv_bo bo = *bo_in; - - VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); - - struct bo_pool_bo_link *link = bo.map; - VG_NOACCESS_WRITE(&link->bo, bo); + VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); - assert(util_is_power_of_two_or_zero(bo.size)); - const unsigned size_log2 = ilog2_round_up(bo.size); + assert(util_is_power_of_two_or_zero(bo->size)); + const unsigned size_log2 = ilog2_round_up(bo->size); const unsigned bucket = size_log2 - 12; assert(bucket < ARRAY_SIZE(pool->free_list)); - anv_ptr_free_list_push(&pool->free_list[bucket], link); + assert(util_sparse_array_get(&pool->device->bo_cache.bo_map, + bo->gem_handle) == bo); + util_sparse_array_free_list_push(&pool->free_list[bucket], + &bo->gem_handle, 1); } // Scratch pool diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index a677345c121..de03323fac2 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -308,8 +308,8 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, - ANV_CMD_BUFFER_BATCH_SIZE); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, + ANV_CMD_BUFFER_BATCH_SIZE, &bbo->bo); if (result != VK_SUCCESS) goto fail_alloc; @@ -322,7 +322,7 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); fail_alloc: vk_free(&cmd_buffer->pool->alloc, bbo); @@ -341,8 +341,8 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, if (bbo == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, - other_bbo->bo.size); + result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, + other_bbo->bo->size, &bbo->bo); if (result != VK_SUCCESS) goto fail_alloc; @@ -352,14 +352,13 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, goto fail_bo_alloc; bbo->length = other_bbo->length; - memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); - + memcpy(bbo->bo->map, other_bbo->bo->map, other_bbo->length); *bbo_out = bbo; return VK_SUCCESS; fail_bo_alloc: - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); fail_alloc: vk_free(&cmd_buffer->pool->alloc, bbo); @@ -370,8 +369,8 @@ static void anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, size_t batch_padding) { - batch->next = batch->start = bbo->bo.map; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->next = batch->start = bbo->bo->map; + batch->end = bbo->bo->map + bbo->bo->size - batch_padding; batch->relocs = &bbo->relocs; bbo->relocs.num_relocs = 0; _mesa_set_clear(bbo->relocs.deps, NULL); @@ -381,16 +380,16 @@ static void anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, size_t batch_padding) { - batch->start = bbo->bo.map; - batch->next = bbo->bo.map + bbo->length; - batch->end = bbo->bo.map + bbo->bo.size - batch_padding; + batch->start = bbo->bo->map; + batch->next = bbo->bo->map + bbo->length; + batch->end = bbo->bo->map + bbo->bo->size - batch_padding; batch->relocs = &bbo->relocs; } static void anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) { - assert(batch->start == bbo->bo.map); + assert(batch->start == bbo->bo->map); bbo->length = batch->next - batch->start; VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); } @@ -400,25 +399,25 @@ anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo, struct anv_batch *batch, size_t aditional, size_t batch_padding) { - assert(batch->start == bbo->bo.map); + assert(batch->start == bbo->bo->map); bbo->length = batch->next - batch->start; - size_t new_size = bbo->bo.size; + size_t new_size = bbo->bo->size; while (new_size <= bbo->length + aditional + batch_padding) new_size *= 2; - if (new_size == bbo->bo.size) + if (new_size == bbo->bo->size) return VK_SUCCESS; - struct anv_bo new_bo; + struct anv_bo *new_bo; VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, - &new_bo, new_size); + new_size, &new_bo); if (result != VK_SUCCESS) return result; - memcpy(new_bo.map, bbo->bo.map, bbo->length); + memcpy(new_bo->map, bbo->bo->map, bbo->length); - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); bbo->bo = new_bo; anv_batch_bo_continue(bbo, batch, batch_padding); @@ -434,24 +433,24 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer, { const uint32_t bb_start_offset = prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4; - ASSERTED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset; + ASSERTED const uint32_t *bb_start = prev_bbo->bo->map + bb_start_offset; /* Make sure we're looking at a MI_BATCH_BUFFER_START */ assert(((*bb_start >> 29) & 0x07) == 0); assert(((*bb_start >> 23) & 0x3f) == 49); if (cmd_buffer->device->instance->physicalDevice.use_softpin) { - assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED); - assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED); + assert(prev_bbo->bo->flags & EXEC_OBJECT_PINNED); + assert(next_bbo->bo->flags & EXEC_OBJECT_PINNED); write_reloc(cmd_buffer->device, - prev_bbo->bo.map + bb_start_offset + 4, - next_bbo->bo.offset + next_bbo_offset, true); + prev_bbo->bo->map + bb_start_offset + 4, + next_bbo->bo->offset + next_bbo_offset, true); } else { uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1; assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4); - prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo; + prev_bbo->relocs.reloc_bos[reloc_idx] = next_bbo->bo; prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset; /* Use a bogus presumed offset to force a relocation */ @@ -464,7 +463,7 @@ anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_cmd_buffer *cmd_buffer) { anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); - anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); + anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); vk_free(&cmd_buffer->pool->alloc, bbo); } @@ -561,9 +560,9 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, * chaining command, let's set it back where it should go. */ batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); + assert(batch->end == current_bbo->bo->map + current_bbo->bo->size); - emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); + emit_batch_buffer_start(cmd_buffer, bbo->bo, 0); anv_batch_bo_finish(current_bbo, batch); } @@ -870,7 +869,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * with our BATCH_BUFFER_END in another BO. */ cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe); @@ -911,11 +910,11 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * chaining command, let's set it back where it should go. */ cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; - assert(cmd_buffer->batch.start == batch_bo->bo.map); - assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); + assert(cmd_buffer->batch.start == batch_bo->bo->map); + assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); - emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0); - assert(cmd_buffer->batch.start == batch_bo->bo.map); + emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); + assert(cmd_buffer->batch.start == batch_bo->bo->map); } else { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; } @@ -961,10 +960,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *last_bbo = list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - emit_batch_buffer_start(primary, &first_bbo->bo, 0); + emit_batch_buffer_start(primary, first_bbo->bo, 0); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); - assert(primary->batch.start == this_bbo->bo.map); + assert(primary->batch.start == this_bbo->bo->map); uint32_t offset = primary->batch.next - primary->batch.start; /* Make the tail of the secondary point back to right after the @@ -1383,7 +1382,7 @@ relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo **bbo; u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { anv_reloc_list_apply(cmd_buffer->device, - &(*bbo)->relocs, &(*bbo)->bo, false); + &(*bbo)->relocs, (*bbo)->bo, false); } for (uint32_t i = 0; i < exec->bo_count; i++) @@ -1465,10 +1464,10 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, */ struct anv_batch_bo **bbo; u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { - adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, + adjust_relocations_to_state_pool(ss_pool, (*bbo)->bo, &(*bbo)->relocs, cmd_buffer->last_ss_pool_center); - result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0, + result = anv_execbuf_add_bo(execbuf, (*bbo)->bo, &(*bbo)->relocs, 0, &cmd_buffer->device->alloc); if (result != VK_SUCCESS) return result; @@ -1488,20 +1487,20 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, * corresponding to the first batch_bo in the chain with the last * element in the list. */ - if (first_batch_bo->bo.index != execbuf->bo_count - 1) { - uint32_t idx = first_batch_bo->bo.index; + if (first_batch_bo->bo->index != execbuf->bo_count - 1) { + uint32_t idx = first_batch_bo->bo->index; uint32_t last_idx = execbuf->bo_count - 1; struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; - assert(execbuf->bos[idx] == &first_batch_bo->bo); + assert(execbuf->bos[idx] == first_batch_bo->bo); execbuf->objects[idx] = execbuf->objects[last_idx]; execbuf->bos[idx] = execbuf->bos[last_idx]; execbuf->bos[idx]->index = idx; execbuf->objects[last_idx] = tmp_obj; - execbuf->bos[last_idx] = &first_batch_bo->bo; - first_batch_bo->bo.index = last_idx; + execbuf->bos[last_idx] = first_batch_bo->bo; + first_batch_bo->bo->index = last_idx; } /* If we are pinning our BOs, we shouldn't have to relocate anything */ @@ -1523,7 +1522,7 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, __builtin_ia32_mfence(); u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) - __builtin_ia32_clflush((*bbo)->bo.map + i); + __builtin_ia32_clflush((*bbo)->bo->map + i); } } @@ -1732,7 +1731,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device, switch (impl->type) { case ANV_FENCE_TYPE_BO: assert(!pdevice->has_syncobj_wait); - result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL, + result = anv_execbuf_add_bo(&execbuf, impl->bo.bo, NULL, EXEC_OBJECT_WRITE, &device->alloc); if (result != VK_SUCCESS) return result; @@ -1756,8 +1755,8 @@ anv_cmd_buffer_execbuf(struct anv_device *device, struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos); device->cmd_buffer_being_decoded = cmd_buffer; - gen_print_batch(&device->decoder_ctx, (*bo)->bo.map, - (*bo)->bo.size, (*bo)->bo.offset, false); + gen_print_batch(&device->decoder_ctx, (*bo)->bo->map, + (*bo)->bo->size, (*bo)->bo->offset, false); device->cmd_buffer_being_decoded = NULL; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 81cee9fe496..ff524a772b3 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -2373,13 +2373,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) { /* The decoder zeroes out the top 16 bits, so we need to as well */ - uint64_t bo_address = (*bo)->bo.offset & (~0ull >> 16); + uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16); - if (address >= bo_address && address < bo_address + (*bo)->bo.size) { + if (address >= bo_address && address < bo_address + (*bo)->bo->size) { return (struct gen_batch_decode_bo) { .addr = bo_address, - .size = (*bo)->bo.size, - .map = (*bo)->bo.map, + .size = (*bo)->bo->size, + .map = (*bo)->bo->map, }; } } @@ -2612,16 +2612,16 @@ VkResult anv_CreateDevice( (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) | (physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0); - anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags); - result = anv_bo_cache_init(&device->bo_cache); if (result != VK_SUCCESS) - goto fail_batch_bo_pool; + goto fail_queue_cond; + + anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags); result = anv_state_pool_init(&device->dynamic_state_pool, device, DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384); if (result != VK_SUCCESS) - goto fail_bo_cache; + goto fail_batch_bo_pool; result = anv_state_pool_init(&device->instruction_state_pool, device, INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384); @@ -2727,10 +2727,10 @@ VkResult anv_CreateDevice( anv_state_pool_finish(&device->instruction_state_pool); fail_dynamic_state_pool: anv_state_pool_finish(&device->dynamic_state_pool); - fail_bo_cache: - anv_bo_cache_finish(&device->bo_cache); fail_batch_bo_pool: anv_bo_pool_finish(&device->batch_bo_pool); + anv_bo_cache_finish(&device->bo_cache); + fail_queue_cond: pthread_cond_destroy(&device->queue_submit); fail_mutex: pthread_mutex_destroy(&device->mutex); @@ -2797,10 +2797,10 @@ void anv_DestroyDevice( anv_state_pool_finish(&device->instruction_state_pool); anv_state_pool_finish(&device->dynamic_state_pool); - anv_bo_cache_finish(&device->bo_cache); - anv_bo_pool_finish(&device->batch_bo_pool); + anv_bo_cache_finish(&device->bo_cache); + if (physical_device->use_softpin) { util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_lo); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 07ef198cb4d..0ddcea69e53 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -608,6 +608,9 @@ struct anv_bo { */ uint32_t index; + /* Index for use with util_sparse_array_free_list */ + uint32_t free_index; + /* Last known offset. This value is provided by the kernel when we * execbuf and is used as the presumed offset for the next bunch of * relocations. @@ -898,15 +901,15 @@ struct anv_bo_pool { uint64_t bo_flags; - void *free_list[16]; + struct util_sparse_array_free_list free_list[16]; }; void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, uint64_t bo_flags); void anv_bo_pool_finish(struct anv_bo_pool *pool); -VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, - uint32_t size); -void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); +VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, + struct anv_bo **bo_out); +void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo); struct anv_scratch_bo { bool exists; @@ -1400,7 +1403,7 @@ struct anv_batch_bo { /* Link in the anv_cmd_buffer.owned_batch_bos list */ struct list_head link; - struct anv_bo bo; + struct anv_bo * bo; /* Bytes actually consumed in this batch BO */ uint32_t length; @@ -2739,7 +2742,7 @@ struct anv_fence_impl { * will say it's idle in this case. */ struct { - struct anv_bo bo; + struct anv_bo *bo; enum anv_bo_fence_state state; } bo; diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index bee92168e90..6dbb50246e7 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -61,27 +61,26 @@ anv_device_submit_simple_batch(struct anv_device *device, { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 exec2_objects[1]; - struct anv_bo bo, *exec_bos[1]; + struct anv_bo *bo; VkResult result = VK_SUCCESS; uint32_t size; /* Kernel driver requires 8 byte aligned batch length */ size = align_u32(batch->next - batch->start, 8); - result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); + result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &bo); if (result != VK_SUCCESS) return result; - memcpy(bo.map, batch->start, size); + memcpy(bo->map, batch->start, size); if (!device->info.has_llc) - gen_flush_range(bo.map, size); + gen_flush_range(bo->map, size); - exec_bos[0] = &bo; - exec2_objects[0].handle = bo.gem_handle; + exec2_objects[0].handle = bo->gem_handle; exec2_objects[0].relocation_count = 0; exec2_objects[0].relocs_ptr = 0; exec2_objects[0].alignment = 0; - exec2_objects[0].offset = bo.offset; - exec2_objects[0].flags = bo.flags; + exec2_objects[0].offset = bo->offset; + exec2_objects[0].flags = bo->flags; exec2_objects[0].rsvd1 = 0; exec2_objects[0].rsvd2 = 0; @@ -99,17 +98,19 @@ anv_device_submit_simple_batch(struct anv_device *device, execbuf.rsvd1 = device->context_id; execbuf.rsvd2 = 0; - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) - gen_print_batch(&device->decoder_ctx, bo.map, bo.size, bo.offset, false); + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { + gen_print_batch(&device->decoder_ctx, bo->map, + bo->size, bo->offset, false); + } - result = anv_device_execbuf(device, &execbuf, exec_bos); + result = anv_device_execbuf(device, &execbuf, &bo); if (result != VK_SUCCESS) goto fail; - result = anv_device_wait(device, &bo, INT64_MAX); + result = anv_device_wait(device, bo, INT64_MAX); fail: - anv_bo_pool_free(&device->batch_bo_pool, &bo); + anv_bo_pool_free(&device->batch_bo_pool, bo); return result; } @@ -288,8 +289,8 @@ VkResult anv_CreateFence( } else { fence->permanent.type = ANV_FENCE_TYPE_BO; - VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, - &fence->permanent.bo.bo, 4096); + VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, 4096, + &fence->permanent.bo.bo); if (result != VK_SUCCESS) return result; @@ -315,7 +316,7 @@ anv_fence_impl_cleanup(struct anv_device *device, break; case ANV_FENCE_TYPE_BO: - anv_bo_pool_free(&device->batch_bo_pool, &impl->bo.bo); + anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo); break; case ANV_FENCE_TYPE_SYNCOBJ: @@ -417,7 +418,7 @@ VkResult anv_GetFenceStatus( return VK_SUCCESS; case ANV_BO_FENCE_STATE_SUBMITTED: { - VkResult result = anv_device_bo_busy(device, &impl->bo.bo); + VkResult result = anv_device_bo_busy(device, impl->bo.bo); if (result == VK_SUCCESS) { impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED; return VK_SUCCESS; @@ -591,7 +592,7 @@ anv_wait_for_bo_fences(struct anv_device *device, /* These are the fences we really care about. Go ahead and wait * on it until we hit a timeout. */ - result = anv_device_wait(device, &impl->bo.bo, + result = anv_device_wait(device, impl->bo.bo, anv_get_relative_timeout(abs_timeout_ns)); switch (result) { case VK_SUCCESS: