radv/amdgpu: Add winsys implementation of virtual buffers.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Sat, 4 Feb 2017 00:18:05 +0000 (01:18 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Wed, 29 Mar 2017 06:50:17 +0000 (08:50 +0200)
v2: - Added comments.
    - Fixed a double unmap bug.
    - Actually unmap the non-edge old ranges.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_radeon_winsys.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c

index a8a1d2e369fb2cf2d3da366fef6213e143de6705..9f2430f75debf5762d49b5dc6a6d55efedbab0ee 100644 (file)
@@ -47,6 +47,7 @@ enum radeon_bo_flag { /* bitfield */
        RADEON_FLAG_GTT_WC =        (1 << 0),
        RADEON_FLAG_CPU_ACCESS =    (1 << 1),
        RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+       RADEON_FLAG_VIRTUAL =       (1 << 3)
 };
 
 enum radeon_bo_usage { /* bitfield */
@@ -289,6 +290,10 @@ struct radeon_winsys {
 
        void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
                                    struct radeon_bo_metadata *md);
+
+       void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
+                                   uint64_t offset, uint64_t size,
+                                   struct radeon_winsys_bo *bo, uint64_t bo_offset);
        struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
        void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
 
index 7f7fc7ac1ad3005ca433ee03bfe68c23cf51b214..8e78ef75233cc4a68b2dd84914c03a7e52d15239 100644 (file)
 #include <amdgpu_drm.h>
 #include <inttypes.h>
 
+static void
+radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
+                               const struct radv_amdgpu_map_range *range)
+{
+       assert(range->size);
+
+       if (!range->bo)
+               return; /* TODO: PRT mapping */
+
+       int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
+                               range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
+       if (r)
+               abort();
+}
+
+static void
+radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
+                                 const struct radv_amdgpu_map_range *range)
+{
+       assert(range->size);
+
+       if (!range->bo)
+               return; /* TODO: PRT mapping */
+
+       int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
+                               range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
+       if (r)
+               abort();
+}
+
+static void
+radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
+{
+       bo->bo_count = 0;
+       for (uint32_t i = 0; i < bo->range_count; ++i) {
+               bool found = false;
+               if (!bo->ranges[i].bo)
+                       continue;
+
+               for(uint32_t j = 0; j <  bo->bo_count; ++j) {
+                       if (bo->bos[j] == bo->ranges[i].bo) {
+                               found = true;
+                               break;
+                       }
+               }
+
+               if (!found) {
+                       if (bo->bo_capacity == bo->bo_count) {
+                               bo->bos = realloc(bo->bos,
+                                                 (bo->bo_capacity + 1) * sizeof(struct radv_amdgpu_bo *));
+                               ++bo->bo_capacity;
+                       }
+                       bo->bos[bo->bo_count++] = bo->ranges[i].bo;
+               }
+       }
+}
+
+static void
+radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
+                                   uint64_t offset, uint64_t size,
+                                   struct radeon_winsys_bo *_bo, uint64_t bo_offset)
+{
+       struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
+       struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
+       int range_count_delta, new_idx;
+       int first = 0, last;
+       struct radv_amdgpu_map_range new_first, new_last;
+
+       assert(parent->is_virtual);
+       assert(!bo || !bo->is_virtual);
+
+       if (!size)
+               return;
+
+       /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
+       if (parent->range_capacity - parent->range_count < 2) {
+               parent->range_capacity += 2;
+               parent->ranges = realloc(parent->ranges,
+                                        parent->range_capacity * sizeof(struct radv_amdgpu_map_range));
+       }
+
+       /*
+        * [first, last] is exactly the range of ranges that either overlap the
+        * new parent, or are adjacent to it. This corresponds to the bind ranges
+        * that may change.
+        */
+       while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
+               ++first;
+
+       last = first;
+       while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
+               ++last;
+
+       /* Whether the first or last range are going to be totally removed or just
+        * resized/left alone. Note that in the case of first == last, we will split
+        * this into a part before and after the new range. The remove flag is then
+        * whether to not create the corresponding split part. */
+       bool remove_first = parent->ranges[first].offset == offset;
+       bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
+       bool unmapped_first = false;
+
+       assert(parent->ranges[first].offset <= offset);
+       assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
+
+       /* Try to merge the new range with the first range. */
+       if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
+               size += offset - parent->ranges[first].offset;
+               offset = parent->ranges[first].offset;
+               remove_first = true;
+       }
+
+       /* Try to merge the new range with the last range. */
+       if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
+               size = parent->ranges[last].offset + parent->ranges[last].size - offset;
+               remove_last = true;
+       }
+
+       range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
+       new_idx = first + !remove_first;
+
+       /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
+       for (int i = first + 1; i < last; ++i)
+               radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
+
+       /* If the first/last range are not left alone we unmap then and optionally map
+        * them again after modifications. Not that this implicitly can do the splitting
+        * if first == last. */
+       new_first = parent->ranges[first];
+       new_last = parent->ranges[last];
+
+       if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
+               radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
+               unmapped_first = true;
+
+               if (!remove_first) {
+                       new_first.size = offset - new_first.offset;
+                       radv_amdgpu_winsys_virtual_map(parent, &new_first);
+               }
+       }
+
+       if (parent->ranges[last].offset < offset + size || remove_last) {
+               if (first != last || !unmapped_first)
+                       radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
+
+               if (!remove_last) {
+                       new_last.size -= offset + size - new_last.offset;
+                       new_last.offset = offset + size;
+                       radv_amdgpu_winsys_virtual_map(parent, &new_last);
+               }
+       }
+
+       /* Moves the range list after last to account for the changed number of ranges. */
+       memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
+               sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
+
+       if (!remove_first)
+               parent->ranges[first] = new_first;
+
+       if (!remove_last)
+               parent->ranges[new_idx + 1] = new_last;
+
+       /* Actually set up the new range. */
+       parent->ranges[new_idx].offset = offset;
+       parent->ranges[new_idx].size = size;
+       parent->ranges[new_idx].bo = bo;
+       parent->ranges[new_idx].bo_offset = bo_offset;
+
+       radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
+
+       parent->range_count += range_count_delta;
+
+       radv_amdgpu_winsys_rebuild_bo_list(parent);
+}
+
 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 {
        struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 
-       if (bo->ws->debug_all_bos) {
-               pthread_mutex_lock(&bo->ws->global_bo_list_lock);
-               LIST_DEL(&bo->global_list_item);
-               bo->ws->num_buffers--;
-               pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
+       if (bo->is_virtual) {
+               for (uint32_t i = 0; i < bo->range_count; ++i) {
+                       radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
+               }
+               free(bo->bos);
+               free(bo->ranges);
+       } else {
+               if (bo->ws->debug_all_bos) {
+                       pthread_mutex_lock(&bo->ws->global_bo_list_lock);
+                       LIST_DEL(&bo->global_list_item);
+                       bo->ws->num_buffers--;
+                       pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
+               }
+               amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+               amdgpu_bo_free(bo->bo);
        }
-       amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
        amdgpu_va_range_free(bo->va_handle);
-       amdgpu_bo_free(bo->bo);
        FREE(bo);
 }
 
@@ -81,6 +263,31 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                return NULL;
        }
 
+       r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+                                 size, alignment, 0, &va, &va_handle, 0);
+       if (r)
+               goto error_va_alloc;
+
+       bo->va = va;
+       bo->va_handle = va_handle;
+       bo->size = size;
+       bo->ws = ws;
+       bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
+
+       if (flags & RADEON_FLAG_VIRTUAL) {
+               bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
+               bo->range_count = 1;
+               bo->range_capacity = 1;
+
+               bo->ranges[0].offset = 0;
+               bo->ranges[0].size = size;
+               bo->ranges[0].bo = NULL;
+               bo->ranges[0].bo_offset = 0;
+
+               radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
+               return (struct radeon_winsys_bo *)bo;
+       }
+
        request.alloc_size = size;
        request.phys_alignment = alignment;
 
@@ -105,31 +312,22 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
                goto error_bo_alloc;
        }
 
-       r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-                                 size, alignment, 0, &va, &va_handle, 0);
-       if (r)
-               goto error_va_alloc;
-
        r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
        if (r)
                goto error_va_map;
 
        bo->bo = buf_handle;
-       bo->va = va;
-       bo->va_handle = va_handle;
        bo->initial_domain = initial_domain;
-       bo->size = size;
        bo->is_shared = false;
-       bo->ws = ws;
        radv_amdgpu_add_buffer_to_global_list(bo);
        return (struct radeon_winsys_bo *)bo;
 error_va_map:
-       amdgpu_va_range_free(va_handle);
-
-error_va_alloc:
        amdgpu_bo_free(buf_handle);
 
 error_bo_alloc:
+       amdgpu_va_range_free(va_handle);
+
+error_va_alloc:
        FREE(bo);
        return NULL;
 }
@@ -296,4 +494,5 @@ void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
        ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
        ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
        ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
+       ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
 }
index 499b063d56fb6db554a4ddcdc15689030b07972a..c7d484bc8dd31ea2fcd8db77182bbc1e012aa51d 100644 (file)
 
 #include "radv_amdgpu_winsys.h"
 
+
+struct radv_amdgpu_map_range {
+       uint64_t offset;
+       uint64_t size;
+       struct radv_amdgpu_winsys_bo *bo;
+       uint64_t bo_offset;
+};
+
 struct radv_amdgpu_winsys_bo {
-       amdgpu_bo_handle bo;
        amdgpu_va_handle va_handle;
-
        uint64_t va;
-       enum radeon_bo_domain initial_domain;
        uint64_t size;
-       bool is_shared;
-
        struct radv_amdgpu_winsys *ws;
-       struct list_head global_list_item;
+       bool is_virtual;
+
+       union {
+               /* physical bo */
+               struct {
+                       amdgpu_bo_handle bo;
+                       enum radeon_bo_domain initial_domain;
+                       bool is_shared;
+                       struct list_head global_list_item;
+               };
+               /* virtual bo */
+               struct {
+                       struct radv_amdgpu_map_range *ranges;
+                       uint32_t range_count;
+                       uint32_t range_capacity;
+
+                       struct radv_amdgpu_winsys_bo **bos;
+                       uint32_t bo_count;
+                       uint32_t bo_capacity;
+               };
+       };
 };
 
 static inline
index 80100e6eabb42fb809f6ff2c4d1088dce44b0c47..8430df4d0fec9517714ef8c0061045bb52d3e856 100644 (file)
 #include "radv_amdgpu_bo.h"
 #include "sid.h"
 
+
+enum {
+       VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
+};
+
 struct radv_amdgpu_cs {
        struct radeon_winsys_cs base;
        struct radv_amdgpu_winsys *ws;
@@ -56,6 +61,12 @@ struct radv_amdgpu_cs {
 
        int                         buffer_hash_table[1024];
        unsigned                    hw_ip;
+
+       unsigned                    num_virtual_buffers;
+       unsigned                    max_num_virtual_buffers;
+       struct radeon_winsys_bo     **virtual_buffers;
+       uint8_t                     *virtual_buffer_priorities;
+       int                         *virtual_buffer_hash_table;
 };
 
 static inline struct radv_amdgpu_cs *
@@ -141,6 +152,9 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
                cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
 
        free(cs->old_ib_buffers);
+       free(cs->virtual_buffers);
+       free(cs->virtual_buffer_priorities);
+       free(cs->virtual_buffer_hash_table);
        free(cs->handles);
        free(cs->priorities);
        free(cs);
@@ -315,7 +329,13 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
                cs->buffer_hash_table[hash] = -1;
        }
 
+       for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+               unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+               cs->virtual_buffer_hash_table[hash] = -1;
+       }
+
        cs->num_buffers = 0;
+       cs->num_virtual_buffers = 0;
 
        if (cs->ws->use_ib_bos) {
                cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
@@ -380,6 +400,49 @@ static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
        ++cs->num_buffers;
 }
 
+static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_winsys_cs *_cs,
+                                              struct radeon_winsys_bo *bo,
+                                              uint8_t priority)
+{
+       struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+       unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+
+
+       if (!cs->virtual_buffer_hash_table) {
+               cs->virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
+               for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
+                       cs->virtual_buffer_hash_table[i] = -1;
+       }
+
+       if (cs->virtual_buffer_hash_table[hash] >= 0) {
+               int idx = cs->virtual_buffer_hash_table[hash];
+               if (cs->virtual_buffers[idx] == bo) {
+                       cs->virtual_buffer_priorities[idx] = MAX2(cs->virtual_buffer_priorities[idx], priority);
+                       return;
+               }
+               for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+                       if (cs->virtual_buffers[i] == bo) {
+                               cs->virtual_buffer_priorities[i] = MAX2(cs->virtual_buffer_priorities[i], priority);
+                               cs->virtual_buffer_hash_table[hash] = i;
+                               return;
+                       }
+               }
+       }
+
+       if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
+               cs->max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
+               cs->virtual_buffers = realloc(cs->virtual_buffers, sizeof(struct radv_amdgpu_virtual_virtual_buffer*) * cs->max_num_virtual_buffers);
+               cs->virtual_buffer_priorities = realloc(cs->virtual_buffer_priorities, sizeof(uint8_t) * cs->max_num_virtual_buffers);
+       }
+
+       cs->virtual_buffers[cs->num_virtual_buffers] = bo;
+       cs->virtual_buffer_priorities[cs->num_virtual_buffers] = priority;
+
+       cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
+       ++cs->num_virtual_buffers;
+
+}
+
 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
                                 struct radeon_winsys_bo *_bo,
                                 uint8_t priority)
@@ -387,6 +450,11 @@ static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
        struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
        struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
 
+       if (bo->is_virtual)  {
+               radv_amdgpu_cs_add_virtual_buffer(_cs, _bo, priority);
+               return;
+       }
+
        radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
 }
 
@@ -401,6 +469,11 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
                                                   child->priorities[i]);
        }
 
+       for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
+               radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i],
+                                         child->virtual_buffer_priorities[i]);
+       }
+
        if (parent->ws->use_ib_bos) {
                if (parent->base.cdw + 4 > parent->base.max_dw)
                        radv_amdgpu_cs_grow(&parent->base, 4);
@@ -449,7 +522,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
                                          bo_list);
                free(handles);
                pthread_mutex_unlock(&ws->global_bo_list_lock);
-       } else if (count == 1 && !extra_bo && !extra_cs) {
+       } else if (count == 1 && !extra_bo && !extra_cs &&
+                  !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
                struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
                r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
                                          cs->priorities, bo_list);
@@ -459,6 +533,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
                for (unsigned i = 0; i < count; ++i) {
                        struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
                        total_buffer_count += cs->num_buffers;
+                       for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
+                               total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
                }
 
                if (extra_cs) {
@@ -512,6 +588,26 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
                                        ++unique_bo_count;
                                }
                        }
+                       for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
+                               struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
+                               for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
+                                       struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
+                                       bool found = false;
+                                       for (unsigned m = 0; m < unique_bo_count; ++m) {
+                                               if (handles[m] == bo->bo) {
+                                                       found = true;
+                                                       priorities[m] = MAX2(priorities[m],
+                                                                       cs->virtual_buffer_priorities[j]);
+                                                       break;
+                                               }
+                                       }
+                                       if (!found) {
+                                               handles[unique_bo_count] = bo->bo;
+                                               priorities[unique_bo_count] = cs->virtual_buffer_priorities[j];
+                                               ++unique_bo_count;
+                                       }
+                               }
+                       }
                }
                r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
                                          priorities, bo_list);