From fb827c055cb1bdd2b18d0687c06c56b537d805f3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Mon, 12 Sep 2016 12:19:47 +0200 Subject: [PATCH] winsys/radeon: enable buffer allocation from slabs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Only enable for chips with GPUVM, because older driver paths do not take the required offset into account. Reviewed-by: Marek Olšák --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 170 ++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 12 ++ .../winsys/radeon/drm/radeon_drm_winsys.c | 24 ++- .../winsys/radeon/drm/radeon_drm_winsys.h | 5 + 4 files changed, 209 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index f9cf2e0d2cd..3af01f82fb6 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -42,6 +42,13 @@ #include #include +static struct pb_buffer * +radeon_winsys_bo_create(struct radeon_winsys *rws, + uint64_t size, + unsigned alignment, + enum radeon_bo_domain domain, + enum radeon_bo_flag flags); + static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo) { return (struct radeon_bo *)bo; @@ -700,6 +707,120 @@ bool radeon_bo_can_reclaim(struct pb_buffer *_buf) return radeon_bo_wait(_buf, 0, RADEON_USAGE_READWRITE); } +bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) +{ + struct radeon_bo *bo = NULL; /* fix container_of */ + bo = container_of(entry, bo, u.slab.entry); + + return radeon_bo_can_reclaim(&bo->base); +} + +static void radeon_bo_slab_destroy(struct pb_buffer *_buf) +{ + struct radeon_bo *bo = radeon_bo(_buf); + + assert(!bo->handle); + + pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry); +} + +static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = { + radeon_bo_slab_destroy + /* other functions are never called */ +}; + +struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap, + unsigned entry_size, + unsigned group_index) +{ + struct radeon_drm_winsys *ws = priv; + struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab); + enum radeon_bo_domain domains; + enum radeon_bo_flag flags = 0; + unsigned base_hash; + + if (!slab) + return NULL; + + if (heap & 1) + flags |= RADEON_FLAG_GTT_WC; + if (heap & 2) + flags |= RADEON_FLAG_CPU_ACCESS; + + switch (heap >> 2) { + case 0: + domains = RADEON_DOMAIN_VRAM; + break; + default: + case 1: + domains = RADEON_DOMAIN_VRAM_GTT; + break; + case 2: + domains = RADEON_DOMAIN_GTT; + break; + } + + slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base, + 64 * 1024, 64 * 1024, + domains, flags)); + if (!slab->buffer) + goto fail; + + assert(slab->buffer->handle); + + slab->base.num_entries = slab->buffer->base.size / entry_size; + slab->base.num_free = slab->base.num_entries; + slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); + if (!slab->entries) + goto fail_buffer; + + LIST_INITHEAD(&slab->base.free); + + base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries); + + for (unsigned i = 0; i < slab->base.num_entries; ++i) { + struct radeon_bo *bo = &slab->entries[i]; + + bo->base.alignment = entry_size; + bo->base.usage = slab->buffer->base.usage; + bo->base.size = entry_size; + bo->base.vtbl = &radeon_winsys_bo_slab_vtbl; + bo->rws = ws; + bo->va = slab->buffer->va + i * entry_size; + bo->initial_domain = domains; + bo->hash = base_hash + i; + bo->u.slab.entry.slab = &slab->base; + bo->u.slab.entry.group_index = group_index; + bo->u.slab.real = slab->buffer; + + LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); + } + + return &slab->base; + +fail_buffer: + radeon_bo_reference(&slab->buffer, NULL); +fail: + FREE(slab); + return NULL; +} + +void radeon_bo_slab_free(void *priv, struct pb_slab *pslab) +{ + struct radeon_slab *slab = (struct radeon_slab *)pslab; + + for (unsigned i = 0; i < slab->base.num_entries; ++i) { + struct radeon_bo *bo = &slab->entries[i]; + for (unsigned j = 0; j < bo->u.slab.num_fences; ++j) + radeon_bo_reference(&bo->u.slab.fences[j], NULL); + FREE(bo->u.slab.fences); + } + + FREE(slab->entries); + radeon_bo_reference(&slab->buffer, NULL); + FREE(slab); +} + static unsigned eg_tile_split(unsigned tile_split) { switch (tile_split) { @@ -823,6 +944,54 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, if (size > UINT_MAX) return NULL; + /* Sub-allocate small buffers from slabs. */ + if (!(flags & RADEON_FLAG_HANDLE) && + size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) && + ws->info.has_virtual_memory && + alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { + struct pb_slab_entry *entry; + unsigned heap = 0; + + if (flags & RADEON_FLAG_GTT_WC) + heap |= 1; + if (flags & RADEON_FLAG_CPU_ACCESS) + heap |= 2; + if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS)) + goto no_slab; + + switch (domain) { + case RADEON_DOMAIN_VRAM: + heap |= 0 * 4; + break; + case RADEON_DOMAIN_VRAM_GTT: + heap |= 1 * 4; + break; + case RADEON_DOMAIN_GTT: + heap |= 2 * 4; + break; + default: + goto no_slab; + } + + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + if (!entry) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&ws->bo_cache); + + entry = pb_slab_alloc(&ws->bo_slabs, size, heap); + } + if (!entry) + return NULL; + + bo = NULL; + bo = container_of(entry, bo, u.slab.entry); + + pipe_reference_init(&bo->base.reference, 1); + + return &bo->base; + } +no_slab: + /* This flag is irrelevant for the cache. */ flags &= ~RADEON_FLAG_HANDLE; @@ -862,6 +1031,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, pb_cache_bucket); if (!bo) { /* Clear the cache and try again. */ + pb_slabs_reclaim(&ws->bo_slabs); pb_cache_release_all_buffers(&ws->bo_cache); bo = radeon_create_bo(ws, size, alignment, usage, domain, flags, pb_cache_bucket); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 8f767fd2c73..236e94cbbff 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -74,10 +74,22 @@ struct radeon_bo { int num_active_ioctls; }; +struct radeon_slab { + struct pb_slab base; + struct radeon_bo *buffer; + struct radeon_bo *entries; +}; + void radeon_bo_destroy(struct pb_buffer *_buf); bool radeon_bo_can_reclaim(struct pb_buffer *_buf); void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws); +bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry); +struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap, + unsigned entry_size, + unsigned group_index); +void radeon_bo_slab_free(void *priv, struct pb_slab *slab); + static inline void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index e02f286b0c2..ae55746654b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -545,6 +545,8 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) pipe_mutex_destroy(ws->hyperz_owner_mutex); pipe_mutex_destroy(ws->cmask_owner_mutex); + if (ws->info.has_virtual_memory) + pb_slabs_deinit(&ws->bo_slabs); pb_cache_deinit(&ws->bo_cache); if (ws->gen >= DRV_R600) { @@ -759,10 +761,25 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) radeon_bo_destroy, radeon_bo_can_reclaim); + if (ws->info.has_virtual_memory) { + /* There is no fundamental obstacle to using slab buffer allocation + * without GPUVM, but enabling it requires making sure that the drivers + * honor the address offset. + */ + if (!pb_slabs_init(&ws->bo_slabs, + RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, + 12, + ws, + radeon_bo_can_reclaim_slab, + radeon_bo_slab_alloc, + radeon_bo_slab_free)) + goto fail_cache; + } + if (ws->gen >= DRV_R600) { ws->surf_man = radeon_surface_manager_new(ws->fd); if (!ws->surf_man) - goto fail; + goto fail_slab; } /* init reference */ @@ -819,7 +836,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) return &ws->base; -fail: +fail_slab: + if (ws->info.has_virtual_memory) + pb_slabs_deinit(&ws->bo_slabs); +fail_cache: pb_cache_deinit(&ws->bo_cache); fail1: pipe_mutex_unlock(fd_tab_mutex); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index b30055cf976..934cd584f86 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -32,6 +32,7 @@ #include "gallium/drivers/radeon/radeon_winsys.h" #include "pipebuffer/pb_cache.h" +#include "pipebuffer/pb_slab.h" #include "util/u_queue.h" #include "util/list.h" #include @@ -62,10 +63,14 @@ enum radeon_generation { DRV_SI }; +#define RADEON_SLAB_MIN_SIZE_LOG2 9 +#define RADEON_SLAB_MAX_SIZE_LOG2 14 + struct radeon_drm_winsys { struct radeon_winsys base; struct pipe_reference reference; struct pb_cache bo_cache; + struct pb_slabs bo_slabs; int fd; /* DRM file descriptor */ int num_cs; /* The number of command streams created. */ -- 2.30.2