winsys/radeon: add slab buffer list
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 12 Sep 2016 08:52:35 +0000 (10:52 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 27 Sep 2016 14:45:32 +0000 (16:45 +0200)
Introducing radeon_bo::hash will reduce collisions between "real" buffers
and buffers from slabs.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/winsys/radeon/drm/radeon_drm_bo.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.h
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.h
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h

index 147a87795f3616721c0c45d364f5d6ebe296c5a2..3f58b00efdd012d5f8cf4e2c445ed14a65d94508 100644 (file)
@@ -587,6 +587,7 @@ static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
     bo->handle = args.handle;
     bo->va = 0;
     bo->initial_domain = initial_domains;
+    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
     pipe_mutex_init(bo->u.real.map_mutex);
     pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
                         pb_cache_bucket);
@@ -864,6 +865,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
     bo->user_ptr = pointer;
     bo->va = 0;
     bo->initial_domain = RADEON_DOMAIN_GTT;
+    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
     pipe_mutex_init(bo->u.real.map_mutex);
 
     util_hash_table_set(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
@@ -997,6 +999,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     bo->base.vtbl = &radeon_bo_vtbl;
     bo->rws = ws;
     bo->va = 0;
+    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
     pipe_mutex_init(bo->u.real.map_mutex);
 
     if (bo->flink_name)
index b9a4a05b7c6490c02f142723dd2500203bcda562..8e35a385ed8e9afa4ea0424d848bc0095bacd224 100644 (file)
@@ -59,6 +59,7 @@ struct radeon_bo {
     uint32_t handle; /* 0 for slab entries */
     uint32_t flink_name;
     uint64_t va;
+    uint32_t hash;
     enum radeon_bo_domain initial_domain;
 
     /* how many command streams is this bo referenced in? */
index 20f90cf7cac375a38500923f0789f136ef7fa94f..9fbd3783699c4618bd649def2a2d100e0e82793d 100644 (file)
@@ -129,9 +129,14 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
         p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
         radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
     }
+    for (i = 0; i < csc->num_slab_buffers; ++i) {
+        p_atomic_dec(&csc->slab_buffers[i].bo->num_cs_references);
+        radeon_bo_reference(&csc->slab_buffers[i].bo, NULL);
+    }
 
     csc->num_relocs = 0;
     csc->num_validated_relocs = 0;
+    csc->num_slab_buffers = 0;
     csc->chunks[0].length_dw = 0;
     csc->chunks[1].length_dw = 0;
 
@@ -143,6 +148,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
 {
     radeon_cs_context_cleanup(csc);
+    FREE(csc->slab_buffers);
     FREE(csc->relocs_bo);
     FREE(csc->relocs);
 }
@@ -191,16 +197,26 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
 
 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
 {
-    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+    unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+    struct radeon_bo_item *buffers;
+    unsigned num_buffers;
     int i = csc->reloc_indices_hashlist[hash];
 
+    if (bo->handle) {
+        buffers = csc->relocs_bo;
+        num_buffers = csc->num_relocs;
+    } else {
+        buffers = csc->slab_buffers;
+        num_buffers = csc->num_slab_buffers;
+    }
+
     /* not found or found */
-    if (i == -1 || csc->relocs_bo[i].bo == bo)
+    if (i == -1 || (i < num_buffers && buffers[i].bo == bo))
         return i;
 
     /* Hash collision, look for the BO in the list of relocs linearly. */
-    for (i = csc->num_relocs - 1; i >= 0; i--) {
-        if (csc->relocs_bo[i].bo == bo) {
+    for (i = num_buffers - 1; i >= 0; i--) {
+        if (buffers[i].bo == bo) {
             /* Put this reloc in the hash list.
              * This will prevent additional hash collisions if there are
              * several consecutive lookup_buffer calls for the same buffer.
@@ -217,12 +233,12 @@ int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
     return -1;
 }
 
-static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
-                                            struct radeon_bo *bo)
+static unsigned radeon_lookup_or_add_real_buffer(struct radeon_drm_cs *cs,
+                                                 struct radeon_bo *bo)
 {
     struct radeon_cs_context *csc = cs->csc;
     struct drm_radeon_cs_reloc *reloc;
-    unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+    unsigned hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
     int i = -1;
 
     i = radeon_lookup_buffer(csc, bo);
@@ -259,7 +275,7 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
 
     /* Initialize the new relocation. */
     csc->relocs_bo[csc->num_relocs].bo = NULL;
-    csc->relocs_bo[csc->num_relocs].priority_usage = 0;
+    csc->relocs_bo[csc->num_relocs].u.real.priority_usage = 0;
     radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
     p_atomic_inc(&bo->num_cs_references);
     reloc = &csc->relocs[csc->num_relocs];
@@ -275,6 +291,53 @@ static unsigned radeon_lookup_or_add_buffer(struct radeon_drm_cs *cs,
     return csc->num_relocs++;
 }
 
+static int radeon_lookup_or_add_slab_buffer(struct radeon_drm_cs *cs,
+                                            struct radeon_bo *bo)
+{
+    struct radeon_cs_context *csc = cs->csc;
+    unsigned hash;
+    struct radeon_bo_item *item;
+    int idx;
+    int real_idx;
+
+    idx = radeon_lookup_buffer(csc, bo);
+    if (idx >= 0)
+        return idx;
+
+    real_idx = radeon_lookup_or_add_real_buffer(cs, bo->u.slab.real);
+
+    /* Check if the backing array is large enough. */
+    if (csc->num_slab_buffers >= csc->max_slab_buffers) {
+        unsigned new_max = MAX2(csc->max_slab_buffers + 16,
+                                (unsigned)(csc->max_slab_buffers * 1.3));
+        struct radeon_bo_item *new_buffers =
+            REALLOC(csc->slab_buffers,
+                    csc->max_slab_buffers * sizeof(*new_buffers),
+                    new_max * sizeof(*new_buffers));
+        if (!new_buffers) {
+            fprintf(stderr, "radeon_lookup_or_add_slab_buffer: allocation failure\n");
+            return -1;
+        }
+
+        csc->max_slab_buffers = new_max;
+        csc->slab_buffers = new_buffers;
+    }
+
+    /* Initialize the new relocation. */
+    idx = csc->num_slab_buffers++;
+    item = &csc->slab_buffers[idx];
+
+    item->bo = NULL;
+    item->u.slab.real_idx = real_idx;
+    radeon_bo_reference(&item->bo, bo);
+    p_atomic_inc(&bo->num_cs_references);
+
+    hash = bo->hash & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
+    csc->reloc_indices_hashlist[hash] = idx;
+
+    return idx;
+}
+
 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
                                         struct pb_buffer *buf,
                                         enum radeon_bo_usage usage,
@@ -287,14 +350,24 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
     enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
     enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
     struct drm_radeon_cs_reloc *reloc;
-    unsigned index = radeon_lookup_or_add_buffer(cs, bo);
+    int index;
+
+    if (!bo->handle) {
+        index = radeon_lookup_or_add_slab_buffer(cs, bo);
+        if (index < 0)
+            return 0;
+
+        index = cs->csc->slab_buffers[index].u.slab.real_idx;
+    } else {
+        index = radeon_lookup_or_add_real_buffer(cs, bo);
+    }
 
     reloc = &cs->csc->relocs[index];
     added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
     reloc->read_domains |= rd;
     reloc->write_domain |= wd;
     reloc->flags = MAX2(reloc->flags, priority);
-    cs->csc->relocs_bo[index].priority_usage |= 1llu << priority;
+    cs->csc->relocs_bo[index].u.real.priority_usage |= 1llu << priority;
 
     if (added_domains & RADEON_DOMAIN_VRAM)
         cs->base.used_vram += bo->base.size;
@@ -366,7 +439,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
         for (i = 0; i < cs->csc->num_relocs; i++) {
             list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
             list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
-            list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
+            list[i].priority_usage = cs->csc->relocs_bo[i].u.real.priority_usage;
         }
     }
     return cs->csc->num_relocs;
@@ -584,6 +657,9 @@ static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
     if (index == -1)
         return false;
 
+    if (!bo->handle)
+        index = cs->csc->slab_buffers[index].u.slab.real_idx;
+
     if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
         return true;
     if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
index bd55548e93c3366ab5992a31e642a5d2020ec968..f9b26af28fa88a1965b0a5516798518636521b0d 100644 (file)
 
 struct radeon_bo_item {
     struct radeon_bo    *bo;
-    uint64_t            priority_usage;
+    union {
+        struct {
+            uint64_t    priority_usage;
+        } real;
+        struct {
+            unsigned    real_idx;
+        } slab;
+    } u;
 };
 
 struct radeon_cs_context {
@@ -50,6 +57,10 @@ struct radeon_cs_context {
     struct radeon_bo_item       *relocs_bo;
     struct drm_radeon_cs_reloc  *relocs;
 
+    unsigned                    num_slab_buffers;
+    unsigned                    max_slab_buffers;
+    struct radeon_bo_item       *slab_buffers;
+
     int                         reloc_indices_hashlist[4096];
 };
 
@@ -108,6 +119,9 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
     if (index == -1)
         return false;
 
+    if (!bo->handle)
+        index = cs->csc->slab_buffers[index].u.slab.real_idx;
+
     return cs->csc->relocs[index].write_domain != 0;
 }
 
index 27fbe906f6abe59db5d704f5c61025e7cbe27478..55149806ae9d6963d4414b6813eb953421376d23 100644 (file)
@@ -75,6 +75,7 @@ struct radeon_drm_winsys {
     uint64_t mapped_gtt;
     uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
     uint64_t num_cs_flushes;
+    uint32_t next_bo_hash;
 
     enum radeon_generation gen;
     struct radeon_info info;