winsys/amdgpu: enable buffer allocation from slabs
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 7 Sep 2016 08:50:59 +0000 (10:50 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 27 Sep 2016 14:45:23 +0000 (16:45 +0200)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h

index c367929b5be1a7f9c3d988d7c10f56781f8cc5de..c13dc2b0e90a63b9a0476d85f76f1846d622e3f9 100644 (file)
 #include <stdio.h>
 #include <inttypes.h>
 
+static struct pb_buffer *
+amdgpu_bo_create(struct radeon_winsys *rws,
+                 uint64_t size,
+                 unsigned alignment,
+                 enum radeon_bo_domain domain,
+                 enum radeon_bo_flag flags);
+
 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
                            enum radeon_bo_usage usage)
 {
@@ -443,6 +450,116 @@ bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf)
    return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE);
 }
 
+bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
+{
+   struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */
+   bo = container_of(entry, bo, u.slab.entry);
+
+   return amdgpu_bo_can_reclaim(&bo->base);
+}
+
+static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf)
+{
+   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+
+   assert(!bo->bo);
+
+   pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry);
+}
+
+static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = {
+   amdgpu_bo_slab_destroy
+   /* other functions are never called */
+};
+
+struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
+                                     unsigned entry_size,
+                                     unsigned group_index)
+{
+   struct amdgpu_winsys *ws = priv;
+   struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab);
+   enum radeon_bo_domain domains;
+   enum radeon_bo_flag flags = 0;
+   uint32_t base_id;
+
+   if (!slab)
+      return NULL;
+
+   if (heap & 1)
+      flags |= RADEON_FLAG_GTT_WC;
+   if (heap & 2)
+      flags |= RADEON_FLAG_CPU_ACCESS;
+
+   switch (heap >> 2) {
+   case 0:
+      domains = RADEON_DOMAIN_VRAM;
+      break;
+   default:
+   case 1:
+      domains = RADEON_DOMAIN_VRAM_GTT;
+      break;
+   case 2:
+      domains = RADEON_DOMAIN_GTT;
+      break;
+   }
+
+   slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base,
+                                                    64 * 1024, 64 * 1024,
+                                                    domains, flags));
+   if (!slab->buffer)
+      goto fail;
+
+   assert(slab->buffer->bo);
+
+   slab->base.num_entries = slab->buffer->base.size / entry_size;
+   slab->base.num_free = slab->base.num_entries;
+   slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
+   if (!slab->entries)
+      goto fail_buffer;
+
+   LIST_INITHEAD(&slab->base.free);
+
+   base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries);
+
+   for (unsigned i = 0; i < slab->base.num_entries; ++i) {
+      struct amdgpu_winsys_bo *bo = &slab->entries[i];
+
+      bo->base.alignment = entry_size;
+      bo->base.usage = slab->buffer->base.usage;
+      bo->base.size = entry_size;
+      bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl;
+      bo->ws = ws;
+      bo->va = slab->buffer->va + i * entry_size;
+      bo->initial_domain = domains;
+      bo->unique_id = base_id + i;
+      bo->u.slab.entry.slab = &slab->base;
+      bo->u.slab.entry.group_index = group_index;
+      bo->u.slab.real = slab->buffer;
+
+      LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free);
+   }
+
+   return &slab->base;
+
+fail_buffer:
+   amdgpu_winsys_bo_reference(&slab->buffer, NULL);
+fail:
+   FREE(slab);
+   return NULL;
+}
+
+void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab)
+{
+   struct amdgpu_slab *slab = amdgpu_slab(pslab);
+
+   for (unsigned i = 0; i < slab->base.num_entries; ++i)
+      amdgpu_bo_remove_fences(&slab->entries[i]);
+
+   FREE(slab->entries);
+   amdgpu_winsys_bo_reference(&slab->buffer, NULL);
+   FREE(slab);
+}
+
 static unsigned eg_tile_split(unsigned tile_split)
 {
    switch (tile_split) {
@@ -555,6 +672,53 @@ amdgpu_bo_create(struct radeon_winsys *rws,
    struct amdgpu_winsys_bo *bo;
    unsigned usage = 0, pb_cache_bucket;
 
+   /* Sub-allocate small buffers from slabs. */
+   if (!(flags & RADEON_FLAG_HANDLE) &&
+       size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) &&
+       alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
+      struct pb_slab_entry *entry;
+      unsigned heap = 0;
+
+      if (flags & RADEON_FLAG_GTT_WC)
+         heap |= 1;
+      if (flags & RADEON_FLAG_CPU_ACCESS)
+         heap |= 2;
+      if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS))
+         goto no_slab;
+
+      switch (domain) {
+      case RADEON_DOMAIN_VRAM:
+         heap |= 0 * 4;
+         break;
+      case RADEON_DOMAIN_VRAM_GTT:
+         heap |= 1 * 4;
+         break;
+      case RADEON_DOMAIN_GTT:
+         heap |= 2 * 4;
+         break;
+      default:
+         goto no_slab;
+      }
+
+      entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+      if (!entry) {
+         /* Clear the cache and try again. */
+         pb_cache_release_all_buffers(&ws->bo_cache);
+
+         entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
+      }
+      if (!entry)
+         return NULL;
+
+      bo = NULL;
+      bo = container_of(entry, bo, u.slab.entry);
+
+      pipe_reference_init(&bo->base.reference, 1);
+
+      return &bo->base;
+   }
+no_slab:
+
    /* This flag is irrelevant for the cache. */
    flags &= ~RADEON_FLAG_HANDLE;
 
@@ -597,6 +761,7 @@ amdgpu_bo_create(struct radeon_winsys *rws,
                          pb_cache_bucket);
    if (!bo) {
       /* Clear the cache and try again. */
+      pb_slabs_reclaim(&ws->bo_slabs);
       pb_cache_release_all_buffers(&ws->bo_cache);
       bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags,
                             pb_cache_bucket);
index e5b5cf538a8064c5336f01f8ca01bff6fec0d330..1e25897b6c1baafabef8cca95047c620a3b32645 100644 (file)
@@ -81,16 +81,34 @@ struct amdgpu_winsys_bo {
    struct pipe_fence_handle **fences;
 };
 
+struct amdgpu_slab {
+   struct pb_slab base;
+   struct amdgpu_winsys_bo *buffer;
+   struct amdgpu_winsys_bo *entries;
+};
+
 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf);
 void amdgpu_bo_destroy(struct pb_buffer *_buf);
 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws);
 
+bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
+struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap,
+                                     unsigned entry_size,
+                                     unsigned group_index);
+void amdgpu_bo_slab_free(void *priv, struct pb_slab *slab);
+
 static inline
 struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
 {
    return (struct amdgpu_winsys_bo *)bo;
 }
 
+static inline
+struct amdgpu_slab *amdgpu_slab(struct pb_slab *slab)
+{
+   return (struct amdgpu_slab *)slab;
+}
+
 static inline
 void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
                                 struct amdgpu_winsys_bo *src)
index 3961ee384400dabf286275e4899dbbde40fb5135..c83489d8820d6f9d41ebb739fdc4f68abee5e9c0 100644 (file)
@@ -382,6 +382,7 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
       util_queue_destroy(&ws->cs_queue);
 
    pipe_mutex_destroy(ws->bo_fence_lock);
+   pb_slabs_deinit(&ws->bo_slabs);
    pb_cache_deinit(&ws->bo_cache);
    pipe_mutex_destroy(ws->global_bo_list_lock);
    do_winsys_deinit(ws);
@@ -547,6 +548,15 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
                  (ws->info.vram_size + ws->info.gart_size) / 8,
                  amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
 
+   if (!pb_slabs_init(&ws->bo_slabs,
+                      AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2,
+                      12, /* number of heaps (domain/flags combinations) */
+                      ws,
+                      amdgpu_bo_can_reclaim_slab,
+                      amdgpu_bo_slab_alloc,
+                      amdgpu_bo_slab_free))
+      goto fail_cache;
+
    /* init reference */
    pipe_reference_init(&ws->reference, 1);
 
@@ -590,6 +600,9 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
 
    return &ws->base;
 
+fail_cache:
+   pb_cache_deinit(&ws->bo_cache);
+   do_winsys_deinit(ws);
 fail_alloc:
    FREE(ws);
 fail:
index 96d4e6d05674eb3d9f1a2cb40b899a6e9178ac81..69c6638075282211eef718b172c3d4a7261857d0 100644 (file)
@@ -33,6 +33,7 @@
 #define AMDGPU_WINSYS_H
 
 #include "pipebuffer/pb_cache.h"
+#include "pipebuffer/pb_slab.h"
 #include "gallium/drivers/radeon/radeon_winsys.h"
 #include "addrlib/addrinterface.h"
 #include "util/u_queue.h"
 
 struct amdgpu_cs;
 
+#define AMDGPU_SLAB_MIN_SIZE_LOG2 9
+#define AMDGPU_SLAB_MAX_SIZE_LOG2 14
+
 struct amdgpu_winsys {
    struct radeon_winsys base;
    struct pipe_reference reference;
    struct pb_cache bo_cache;
+   struct pb_slabs bo_slabs;
 
    amdgpu_device_handle dev;