From cf6835485cd57173b002186a8b3f51239e59e431 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 21 Nov 2018 02:10:14 -0500 Subject: [PATCH] radeonsi: generalize the slab allocator code to allow layered slab allocators There is no change in behavior. It just makes it easier to change the number of slab allocators. --- src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 60 +++++++++++++++---- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 40 +++++++++---- src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h | 10 ++-- 3 files changed, 86 insertions(+), 24 deletions(-) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 1402e5c2ec2..80958d08f74 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -220,7 +220,9 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) static void amdgpu_clean_up_buffer_managers(struct amdgpu_winsys *ws) { - pb_slabs_reclaim(&ws->bo_slabs); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) + pb_slabs_reclaim(&ws->bo_slabs[i]); + pb_cache_release_all_buffers(&ws->bo_cache); } @@ -561,13 +563,27 @@ bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) return amdgpu_bo_can_reclaim(&bo->base); } +static struct pb_slabs *get_slabs(struct amdgpu_winsys *ws, uint64_t size) +{ + /* Find the correct slab allocator for the given size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + + if (size <= 1 << (slabs->min_order + slabs->num_orders - 1)) + return slabs; + } + + assert(0); + return NULL; +} + static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) { struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); assert(!bo->bo); - pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry); + pb_slab_free(get_slabs(bo->ws, bo->base.size), &bo->u.slab.entry); } static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { @@ -584,19 +600,29 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, enum radeon_bo_domain domains = radeon_domain_from_heap(heap); enum radeon_bo_flag flags = radeon_flags_from_heap(heap); uint32_t base_id; + unsigned slab_size = 0; if (!slab) return NULL; - unsigned slab_size = 1 << AMDGPU_SLAB_BO_SIZE_LOG2; + /* Determine the slab buffer size. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + struct pb_slabs *slabs = &ws->bo_slabs[i]; + unsigned max_entry_size = 1 << (slabs->min_order + slabs->num_orders - 1); + + if (entry_size <= max_entry_size) { + /* The slab size is twice the size of the largest possible entry. */ + slab_size = max_entry_size * 2; + } + } + assert(slab_size != 0); + slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, slab_size, slab_size, domains, flags)); if (!slab->buffer) goto fail; - assert(slab->buffer->bo); - slab->base.num_entries = slab->buffer->base.size / entry_size; slab->base.num_free = slab->base.num_entries; slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); @@ -621,7 +647,15 @@ struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, bo->unique_id = base_id + i; bo->u.slab.entry.slab = &slab->base; bo->u.slab.entry.group_index = group_index; - bo->u.slab.real = slab->buffer; + + if (slab->buffer->bo) { + /* The slab is not suballocated. */ + bo->u.slab.real = slab->buffer; + } else { + /* The slab is allocated out of a bigger slab. */ + bo->u.slab.real = slab->buffer->u.slab.real; + assert(bo->u.slab.real->bo); + } LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); } @@ -1235,22 +1269,28 @@ amdgpu_bo_create(struct radeon_winsys *rws, /* Sparse buffers must have NO_CPU_ACCESS set. */ assert(!(flags & RADEON_FLAG_SPARSE) || flags & RADEON_FLAG_NO_CPU_ACCESS); + struct pb_slabs *last_slab = &ws->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; + unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1); + /* Sub-allocate small buffers from slabs. */ if (!(flags & (RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_SPARSE)) && - size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && - alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { + size <= max_slab_entry_size && + /* The alignment must be at most the size of the smallest slab entry or + * the next power of two. */ + alignment <= MAX2(1 << ws->bo_slabs[0].min_order, util_next_power_of_two(size))) { struct pb_slab_entry *entry; int heap = radeon_get_heap_index(domain, flags); if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS) goto no_slab; - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + struct pb_slabs *slabs = get_slabs(ws, size); + entry = pb_slab_alloc(slabs, size, heap); if (!entry) { /* Clean up buffer managers and try again. */ amdgpu_clean_up_buffer_managers(ws); - entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + entry = pb_slab_alloc(slabs, size, heap); } if (!entry) return NULL; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index b20d702670d..91120e3c474 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -95,7 +95,10 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws) util_queue_destroy(&ws->cs_queue); simple_mtx_destroy(&ws->bo_fence_lock); - pb_slabs_deinit(&ws->bo_slabs); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + if (ws->bo_slabs[i].groups) + pb_slabs_deinit(&ws->bo_slabs[i]); + } pb_cache_deinit(&ws->bo_cache); util_hash_table_destroy(ws->bo_export_table); simple_mtx_destroy(&ws->global_bo_list_lock); @@ -307,16 +310,33 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); - if (!pb_slabs_init(&ws->bo_slabs, - AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, - ws, - amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc, - amdgpu_bo_slab_free)) - goto fail_cache; + unsigned min_slab_order = 9; /* 512 bytes */ + unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */ + unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / + NUM_SLAB_ALLOCATORS; + + /* Divide the size order range among slab managers. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + unsigned min_order = min_slab_order; + unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, + max_slab_order); + + if (!pb_slabs_init(&ws->bo_slabs[i], + min_order, max_order, + RADEON_MAX_SLAB_HEAPS, + ws, + amdgpu_bo_can_reclaim_slab, + amdgpu_bo_slab_alloc, + amdgpu_bo_slab_free)) { + amdgpu_winsys_destroy(&ws->base); + simple_mtx_unlock(&dev_tab_mutex); + return NULL; + } + + min_slab_order = max_order + 1; + } - ws->info.min_alloc_size = 1 << AMDGPU_SLAB_MIN_SIZE_LOG2; + ws->info.min_alloc_size = 1 << ws->bo_slabs[0].min_order; /* init reference */ pipe_reference_init(&ws->reference, 1); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h index c355eff5262..fc8f04544a9 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h @@ -38,15 +38,17 @@ struct amdgpu_cs; -#define AMDGPU_SLAB_MIN_SIZE_LOG2 9 /* 512 bytes */ -#define AMDGPU_SLAB_MAX_SIZE_LOG2 16 /* 64 KB */ -#define AMDGPU_SLAB_BO_SIZE_LOG2 17 /* 128 KB */ +#define NUM_SLAB_ALLOCATORS 1 struct amdgpu_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; - struct pb_slabs bo_slabs; + + /* Each slab buffer can only contain suballocations of equal sizes, so we + * need to layer the allocators, so that we don't waste too much memory. + */ + struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; amdgpu_device_handle dev; -- 2.30.2