From 0edebde9a48ab609c636050208505885da363593 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Mon, 12 Sep 2016 10:52:35 +0200 Subject: [PATCH] winsys/radeon: add slab buffer list MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Introducing radeon_bo::hash will reduce collisions between "real" buffers and buffers from slabs. Reviewed-by: Marek Olšák --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 3 + src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 98 ++++++++++++++++--- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 16 ++- .../winsys/radeon/drm/radeon_drm_winsys.h | 1 + 5 files changed, 107 insertions(+), 12 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 147a87795f3..3f58b00efdd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -587,6 +587,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws, bo->handle = args.handle; bo->va = 0; bo->initial_domain = initial_domains; + bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1); pipe_mutex_init(bo->u.real.map_mutex); pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base, pb_cache_bucket); @@ -864,6 +865,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws, bo->user_ptr = pointer; bo->va = 0; bo->initial_domain = RADEON_DOMAIN_GTT; + bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); pipe_mutex_init(bo->u.real.map_mutex); util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo); @@ -997,6 +999,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, bo->base.vtbl = &radeon_bo_vtbl; bo->rws = ws; bo->va = 0; + bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1); pipe_mutex_init(bo->u.real.map_mutex); if (bo->flink_name) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index b9a4a05b7c6..8e35a385ed8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -59,6 +59,7 @@ struct radeon_bo { uint32_t handle; /* 0 for slab entries */ uint32_t flink_name; uint64_t va; + uint32_t hash; enum radeon_bo_domain initial_domain; /* how many command streams is this bo referenced in? */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 20f90cf7cac..9fbd3783699 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -129,9 +129,14 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references); radeon_bo_reference(&csc->relocs_bo[i].bo, NULL); } + for (i = 0; i < csc->num_slab_buffers; ++i) { + p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references); + radeon_bo_reference(&csc->slab_buffers[i].bo, NULL); + } csc->num_relocs = 0; csc->num_validated_relocs = 0; + csc->num_slab_buffers = 0; csc->chunks[0].length_dw = 0; csc->chunks[1].length_dw = 0; @@ -143,6 +148,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) static void radeon_destroy_cs_context(struct radeon_cs_context *csc) { radeon_cs_context_cleanup(csc); + FREE(csc->slab_buffers); FREE(csc->relocs_bo); FREE(csc->relocs); } @@ -191,16 +197,26 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) { - unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + struct radeon_bo_item *buffers; + unsigned num_buffers; int i = csc->reloc_indices_hashlist[hash]; + if (bo->handle) { + buffers = csc->relocs_bo; + num_buffers = csc->num_relocs; + } else { + buffers = csc->slab_buffers; + num_buffers = csc->num_slab_buffers; + } + /* not found or found */ - if (i == -1 || csc->relocs_bo[i].bo == bo) + if (i == -1 || (i < num_buffers && buffers[i].bo == bo)) return i; /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = csc->num_relocs - 1; i >= 0; i--) { - if (csc->relocs_bo[i].bo == bo) { + for (i = num_buffers - 1; i >= 0; i--) { + if (buffers[i].bo == bo) { /* Put this reloc in the hash list. * This will prevent additional hash collisions if there are * several consecutive lookup_buffer calls for the same buffer. @@ -217,12 +233,12 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs, - struct radeon_bo *bo) +static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs, + struct radeon_bo *bo) { struct radeon_cs_context *csc = cs->csc; struct drm_radeon_cs_reloc *reloc; - unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); int i = -1; i = radeon_lookup_buffer(csc, bo); @@ -259,7 +275,7 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs, /* Initialize the new relocation. */ csc->relocs_bo[csc->num_relocs].bo = NULL; - csc->relocs_bo[csc->num_relocs].priority_usage = 0; + csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0; radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo); p_atomic_inc(&bo->num_cs_references); reloc = &csc->relocs[csc->num_relocs]; @@ -275,6 +291,53 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs, return csc->num_relocs++; } +static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + struct radeon_cs_context *csc = cs->csc; + unsigned hash; + struct radeon_bo_item *item; + int idx; + int real_idx; + + idx = radeon_lookup_buffer(csc, bo); + if (idx >= 0) + return idx; + + real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real); + + /* Check if the backing array is large enough. */ + if (csc->num_slab_buffers >= csc->max_slab_buffers) { + unsigned new_max = MAX2(csc->max_slab_buffers + 16, + (unsigned)(csc->max_slab_buffers * 1.3)); + struct radeon_bo_item *new_buffers = + REALLOC(csc->slab_buffers, + csc->max_slab_buffers * sizeof(*new_buffers), + new_max * sizeof(*new_buffers)); + if (!new_buffers) { + fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n"); + return -1; + } + + csc->max_slab_buffers = new_max; + csc->slab_buffers = new_buffers; + } + + /* Initialize the new relocation. */ + idx = csc->num_slab_buffers++; + item = &csc->slab_buffers[idx]; + + item->bo = NULL; + item->u.slab.real_idx = real_idx; + radeon_bo_reference(&item->bo, bo); + p_atomic_inc(&bo->num_cs_references); + + hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); + csc->reloc_indices_hashlist[hash] = idx; + + return idx; +} + static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs, struct pb_buffer *buf, enum radeon_bo_usage usage, @@ -287,14 +350,24 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs, enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; struct drm_radeon_cs_reloc *reloc; - unsigned index = radeon_lookup_or_add_buffer(cs, bo); + int index; + + if (!bo->handle) { + index = radeon_lookup_or_add_slab_buffer(cs, bo); + if (index < 0) + return 0; + + index = cs->csc->slab_buffers[index].u.slab.real_idx; + } else { + index = radeon_lookup_or_add_real_buffer(cs, bo); + } reloc = &cs->csc->relocs[index]; added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); reloc->read_domains |= rd; reloc->write_domain |= wd; reloc->flags = MAX2(reloc->flags, priority); - cs->csc->relocs_bo[index].priority_usage |= 1llu << priority; + cs->csc->relocs_bo[index].u.real.priority_usage |= 1llu << priority; if (added_domains & RADEON_DOMAIN_VRAM) cs->base.used_vram += bo->base.size; @@ -366,7 +439,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs, for (i = 0; i < cs->csc->num_relocs; i++) { list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size; list[i].vm_address = cs->csc->relocs_bo[i].bo->va; - list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage; + list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage; } } return cs->csc->num_relocs; @@ -584,6 +657,9 @@ static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, if (index == -1) return false; + if (!bo->handle) + index = cs->csc->slab_buffers[index].u.slab.real_idx; + if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) return true; if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index bd55548e93c..f9b26af28fa 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -31,7 +31,14 @@ struct radeon_bo_item { struct radeon_bo *bo; - uint64_t priority_usage; + union { + struct { + uint64_t priority_usage; + } real; + struct { + unsigned real_idx; + } slab; + } u; }; struct radeon_cs_context { @@ -50,6 +57,10 @@ struct radeon_cs_context { struct radeon_bo_item *relocs_bo; struct drm_radeon_cs_reloc *relocs; + unsigned num_slab_buffers; + unsigned max_slab_buffers; + struct radeon_bo_item *slab_buffers; + int reloc_indices_hashlist[4096]; }; @@ -108,6 +119,9 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs, if (index == -1) return false; + if (!bo->handle) + index = cs->csc->slab_buffers[index].u.slab.real_idx; + return cs->csc->relocs[index].write_domain != 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index 27fbe906f6a..55149806ae9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -75,6 +75,7 @@ struct radeon_drm_winsys { uint64_t mapped_gtt; uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */ uint64_t num_cs_flushes; + uint32_t next_bo_hash; enum radeon_generation gen; struct radeon_info info; -- 2.30.2