r600g/compute: Add an intermediate resource for OpenCL buffers
authorBruno Jiménez <brunojimen@gmail.com>
Wed, 18 Jun 2014 15:01:51 +0000 (17:01 +0200)
committerTom Stellard <thomas.stellard@amd.com>
Fri, 20 Jun 2014 17:43:28 +0000 (13:43 -0400)
This patch changes completely the way buffers are added to the
compute_memory_pool. Before this, whenever we were going to
map a buffer or write to or read from it, it would get placed
into the pool. Now, every unallocated buffer has its own
r600_resource until it is allocated in the pool.

NOTE: This patch also increase the GPU memory usage at the moment
of putting every buffer in it's place. More or less, the memory
usage is ~2x(sum of every buffer size)

v2: Cleanup

v3: Use temporary variables to avoid so many castings in functions,
    as suggested by Tom Stellard

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/r600/compute_memory_pool.c
src/gallium/drivers/r600/compute_memory_pool.h
src/gallium/drivers/r600/evergreen_compute.c

index ec8c470fc65c1f8241c54ee6a91c4e17c6aaed07..0b126a80191e9e1e0ed4aee4cb8c746c12d14fc9 100644 (file)
@@ -71,7 +71,6 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool,
        if (pool->shadow == NULL)
                return;
 
-       pool->next_id = 1;
        pool->size_in_dw = initial_size_in_dw;
        pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
                                                        pool->size_in_dw * 4);
@@ -314,6 +313,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool,
        for (item = pending_list; item; item = next) {
                next = item->next;
 
+               struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+               struct r600_context *rctx = (struct r600_context *)pipe;
+               struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
+               struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
+               struct pipe_box box;
+
+               u_box_1d(0, item->size_in_dw * 4, &box);
+
                /* Search for free space in the pool for this item. */
                while ((start_in_dw=compute_memory_prealloc_chunk(pool,
                                                item->size_in_dw)) == -1) {
@@ -365,6 +372,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool,
                        pool->item_list = item;
                }
 
+               rctx->b.b.resource_copy_region(pipe,
+                               dst, 0, item->start_in_dw * 4, 0 ,0,
+                               src, 0, &box);
+
+               pool->screen->b.b.resource_destroy(
+                       screen, src);
+               item->real_buffer = NULL;
+
                allocated += item->size_in_dw;
        }
 
@@ -375,6 +390,8 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool,
 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 {
        struct compute_memory_item *item, *next;
+       struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+       struct pipe_resource *res;
 
        COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
 
@@ -393,6 +410,12 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
                                item->next->prev = item->prev;
                        }
 
+                       if (item->real_buffer) {
+                               res = (struct pipe_resource *)item->real_buffer;
+                               pool->screen->b.b.resource_destroy(
+                                               screen, res);
+                       }
+
                        free(item);
 
                        return;
@@ -426,6 +449,8 @@ struct compute_memory_item* compute_memory_alloc(
        new_item->start_in_dw = -1; /* mark pending */
        new_item->id = pool->next_id++;
        new_item->pool = pool;
+       new_item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
+                                                       pool->screen, size_in_dw * 4);
 
        if (pool->item_list) {
                for (last_item = pool->item_list; last_item->next;
index c711c59bcfacfffb4bf6617491852f4c597dc4ed..e94159cedaca31c2d6e963795c419f04224b9b1e 100644 (file)
@@ -38,6 +38,8 @@ struct compute_memory_item
        int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
        int64_t size_in_dw; ///Size of the chunk in dwords
 
+       struct r600_resource *real_buffer;
+
        struct compute_memory_pool* pool;
 
        struct compute_memory_item* prev;
index a2abf1546dd5fe5099a83d8ec485c3ac837db91f..c152e54146ac4e203ba09d8b57cebd170a842089 100644 (file)
@@ -958,6 +958,17 @@ void *r600_compute_global_transfer_map(
        struct r600_resource_global* buffer =
                (struct r600_resource_global*)resource;
 
+       struct pipe_resource *dst;
+       unsigned offset = box->x;
+
+       if (buffer->chunk->real_buffer) {
+               dst = (struct pipe_resource*)buffer->chunk->real_buffer;
+       }
+       else {
+               dst = (struct pipe_resource*)buffer->chunk->pool->bo;
+               offset += (buffer->chunk->start_in_dw * 4);
+       }
+
        COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
                        "level = %u, usage = %u, box(x = %u, y = %u, z = %u "
                        "width = %u, height = %u, depth = %u)\n", level, usage,
@@ -967,8 +978,6 @@ void *r600_compute_global_transfer_map(
                "%u (box.x)\n", buffer->chunk->id, box->x);
 
 
-       compute_memory_finalize_pending(pool, ctx_);
-
        assert(resource->target == PIPE_BUFFER);
        assert(resource->bind & PIPE_BIND_GLOBAL);
        assert(box->x >= 0);
@@ -976,9 +985,8 @@ void *r600_compute_global_transfer_map(
        assert(box->z == 0);
 
        ///TODO: do it better, mapping is not possible if the pool is too big
-       return pipe_buffer_map_range(ctx_, (struct pipe_resource*)buffer->chunk->pool->bo,
-                       box->x + (buffer->chunk->start_in_dw * 4),
-                       box->width, usage, ptransfer);
+       return pipe_buffer_map_range(ctx_, dst,
+                       offset, box->width, usage, ptransfer);
 }
 
 void r600_compute_global_transfer_unmap(