From: Marek Olšák Date: Sat, 16 Jul 2016 22:53:55 +0000 (+0200) Subject: gallium/pb_cache: divide the cache into buckets for reducing cache misses X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3cdc0e133f77b5095525d23a30827b40954be1c7;p=mesa.git gallium/pb_cache: divide the cache into buckets for reducing cache misses Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 4dbf3ff97be..250f739c8b0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -210,7 +210,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, /* get a buffer from the cache */ buf = (struct pb_cache_buffer *) pb_cache_reclaim_buffer(&mgr->cache, size, desc->alignment, - desc->usage); + desc->usage, 0); if (buf) return &buf->base; @@ -243,7 +243,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, buf->base.vtbl = &pb_cache_buffer_vtbl; buf->mgr = mgr; - pb_cache_init_entry(&mgr->cache, &buf->cache_entry, &buf->base); + pb_cache_init_entry(&mgr->cache, &buf->cache_entry, &buf->base, 0); return &buf->base; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.c b/src/gallium/auxiliary/pipebuffer/pb_cache.c index 6a43cbc85be..664867befef 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_cache.c @@ -53,7 +53,7 @@ destroy_buffer_locked(struct pb_cache_entry *entry) * Free as many cache buffers from the list head as possible. */ static void -release_expired_buffers_locked(struct pb_cache *mgr) +release_expired_buffers_locked(struct list_head *cache) { struct list_head *curr, *next; struct pb_cache_entry *entry; @@ -61,9 +61,9 @@ release_expired_buffers_locked(struct pb_cache *mgr) now = os_time_get(); - curr = mgr->cache.next; + curr = cache->next; next = curr->next; - while (curr != &mgr->cache) { + while (curr != cache) { entry = LIST_ENTRY(struct pb_cache_entry, curr, head); if (!os_time_timeout(entry->start, entry->end, now)) @@ -84,11 +84,14 @@ void pb_cache_add_buffer(struct pb_cache_entry *entry) { struct pb_cache *mgr = entry->mgr; + struct list_head *cache = &mgr->buckets[entry->bucket_index]; + unsigned i; pipe_mutex_lock(mgr->mutex); assert(!pipe_is_referenced(&entry->buffer->reference)); - release_expired_buffers_locked(mgr); + for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) + release_expired_buffers_locked(&mgr->buckets[i]); /* Directly release any buffer that exceeds the limit. */ if (mgr->cache_size + entry->buffer->size > mgr->max_cache_size) { @@ -99,7 +102,7 @@ pb_cache_add_buffer(struct pb_cache_entry *entry) entry->start = os_time_get(); entry->end = entry->start + mgr->usecs; - LIST_ADDTAIL(&entry->head, &mgr->cache); + LIST_ADDTAIL(&entry->head, cache); ++mgr->num_buffers; mgr->cache_size += entry->buffer->size; pipe_mutex_unlock(mgr->mutex); @@ -140,23 +143,25 @@ pb_cache_is_buffer_compat(struct pb_cache_entry *entry, */ struct pb_buffer * pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size, - unsigned alignment, unsigned usage) + unsigned alignment, unsigned usage, + unsigned bucket_index) { struct pb_cache_entry *entry; struct pb_cache_entry *cur_entry; struct list_head *cur, *next; int64_t now; int ret = 0; + struct list_head *cache = &mgr->buckets[bucket_index]; pipe_mutex_lock(mgr->mutex); entry = NULL; - cur = mgr->cache.next; + cur = cache->next; next = cur->next; /* search in the expired buffers, freeing them in the process */ now = os_time_get(); - while (cur != &mgr->cache) { + while (cur != cache) { cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head); if (!entry && (ret = pb_cache_is_buffer_compat(cur_entry, size, @@ -178,7 +183,7 @@ pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size, /* keep searching in the hot buffers */ if (!entry && ret != -1) { - while (cur != &mgr->cache) { + while (cur != cache) { cur_entry = LIST_ENTRY(struct pb_cache_entry, cur, head); ret = pb_cache_is_buffer_compat(cur_entry, size, alignment, usage); @@ -219,26 +224,32 @@ pb_cache_release_all_buffers(struct pb_cache *mgr) { struct list_head *curr, *next; struct pb_cache_entry *buf; + unsigned i; pipe_mutex_lock(mgr->mutex); - curr = mgr->cache.next; - next = curr->next; - while (curr != &mgr->cache) { - buf = LIST_ENTRY(struct pb_cache_entry, curr, head); - destroy_buffer_locked(buf); - curr = next; + for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) { + struct list_head *cache = &mgr->buckets[i]; + + curr = cache->next; next = curr->next; + while (curr != cache) { + buf = LIST_ENTRY(struct pb_cache_entry, curr, head); + destroy_buffer_locked(buf); + curr = next; + next = curr->next; + } } pipe_mutex_unlock(mgr->mutex); } void pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry, - struct pb_buffer *buf) + struct pb_buffer *buf, unsigned bucket_index) { memset(entry, 0, sizeof(*entry)); entry->buffer = buf; entry->mgr = mgr; + entry->bucket_index = bucket_index; } /** @@ -262,7 +273,11 @@ pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor, void (*destroy_buffer)(struct pb_buffer *buf), bool (*can_reclaim)(struct pb_buffer *buf)) { - LIST_INITHEAD(&mgr->cache); + unsigned i; + + for (i = 0; i < ARRAY_SIZE(mgr->buckets); i++) + LIST_INITHEAD(&mgr->buckets[i]); + pipe_mutex_init(mgr->mutex); mgr->cache_size = 0; mgr->max_cache_size = maximum_cache_size; diff --git a/src/gallium/auxiliary/pipebuffer/pb_cache.h b/src/gallium/auxiliary/pipebuffer/pb_cache.h index f0fa0122602..aa83cc8b38c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_cache.h +++ b/src/gallium/auxiliary/pipebuffer/pb_cache.h @@ -42,11 +42,16 @@ struct pb_cache_entry struct pb_buffer *buffer; /**< Pointer to the structure this is part of. */ struct pb_cache *mgr; int64_t start, end; /**< Caching time interval */ + unsigned bucket_index; }; struct pb_cache { - struct list_head cache; + /* The cache is divided into buckets for minimizing cache misses. + * The driver controls which buffer goes into which bucket. + */ + struct list_head buckets[8]; + pipe_mutex mutex; uint64_t cache_size; uint64_t max_cache_size; @@ -61,10 +66,11 @@ struct pb_cache void pb_cache_add_buffer(struct pb_cache_entry *entry); struct pb_buffer *pb_cache_reclaim_buffer(struct pb_cache *mgr, pb_size size, - unsigned alignment, unsigned usage); + unsigned alignment, unsigned usage, + unsigned bucket_index); void pb_cache_release_all_buffers(struct pb_cache *mgr); void pb_cache_init_entry(struct pb_cache *mgr, struct pb_cache_entry *entry, - struct pb_buffer *buf); + struct pb_buffer *buf, unsigned bucket_index); void pb_cache_init(struct pb_cache *mgr, uint usecs, float size_factor, unsigned bypass_usage, uint64_t maximum_cache_size, void (*destroy_buffer)(struct pb_buffer *buf), diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index b4dc6c7901a..3545b78f769 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -303,7 +303,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, return NULL; } - pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base); + pb_cache_init_entry(&ws->bo_cache, &bo->cache_entry, &bo->base, 0); request.alloc_size = size; request.phys_alignment = alignment; @@ -508,7 +508,7 @@ amdgpu_bo_create(struct radeon_winsys *rws, /* Get a buffer from the cache. */ bo = (struct amdgpu_winsys_bo*) - pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage); + pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, 0); if (bo) return &bo->base; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 52b1ccd65a7..82dec8ed981 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -551,7 +551,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, bo->va = 0; bo->initial_domain = initial_domains; pipe_mutex_init(bo->map_mutex); - pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base); + pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base, 0); if (rws->info.has_virtual_memory) { struct drm_radeon_gem_va va; @@ -746,7 +746,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, assert(flags < sizeof(usage) * 8 - 3); usage |= 1 << (flags + 3); - bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage)); + bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, 0)); if (bo) return &bo->base;