From: Bruno Jiménez Date: Wed, 18 Jun 2014 15:01:51 +0000 (+0200) Subject: r600g/compute: Add an intermediate resource for OpenCL buffers X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0038402753b6a5c2a56f66ae0ffe25a9eb1d1b63;p=mesa.git r600g/compute: Add an intermediate resource for OpenCL buffers This patch changes completely the way buffers are added to the compute_memory_pool. Before this, whenever we were going to map a buffer or write to or read from it, it would get placed into the pool. Now, every unallocated buffer has its own r600_resource until it is allocated in the pool. NOTE: This patch also increase the GPU memory usage at the moment of putting every buffer in it's place. More or less, the memory usage is ~2x(sum of every buffer size) v2: Cleanup v3: Use temporary variables to avoid so many castings in functions, as suggested by Tom Stellard Reviewed-by: Tom Stellard --- diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c index ec8c470fc65..0b126a80191 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.c +++ b/src/gallium/drivers/r600/compute_memory_pool.c @@ -71,7 +71,6 @@ static void compute_memory_pool_init(struct compute_memory_pool * pool, if (pool->shadow == NULL) return; - pool->next_id = 1; pool->size_in_dw = initial_size_in_dw; pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4); @@ -314,6 +313,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, for (item = pending_list; item; item = next) { next = item->next; + struct pipe_screen *screen = (struct pipe_screen *)pool->screen; + struct r600_context *rctx = (struct r600_context *)pipe; + struct pipe_resource *dst = (struct pipe_resource *)pool->bo; + struct pipe_resource *src = (struct pipe_resource *)item->real_buffer; + struct pipe_box box; + + u_box_1d(0, item->size_in_dw * 4, &box); + /* Search for free space in the pool for this item. */ while ((start_in_dw=compute_memory_prealloc_chunk(pool, item->size_in_dw)) == -1) { @@ -365,6 +372,14 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, pool->item_list = item; } + rctx->b.b.resource_copy_region(pipe, + dst, 0, item->start_in_dw * 4, 0 ,0, + src, 0, &box); + + pool->screen->b.b.resource_destroy( + screen, src); + item->real_buffer = NULL; + allocated += item->size_in_dw; } @@ -375,6 +390,8 @@ int compute_memory_finalize_pending(struct compute_memory_pool* pool, void compute_memory_free(struct compute_memory_pool* pool, int64_t id) { struct compute_memory_item *item, *next; + struct pipe_screen *screen = (struct pipe_screen *)pool->screen; + struct pipe_resource *res; COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id); @@ -393,6 +410,12 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id) item->next->prev = item->prev; } + if (item->real_buffer) { + res = (struct pipe_resource *)item->real_buffer; + pool->screen->b.b.resource_destroy( + screen, res); + } + free(item); return; @@ -426,6 +449,8 @@ struct compute_memory_item* compute_memory_alloc( new_item->start_in_dw = -1; /* mark pending */ new_item->id = pool->next_id++; new_item->pool = pool; + new_item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram( + pool->screen, size_in_dw * 4); if (pool->item_list) { for (last_item = pool->item_list; last_item->next; diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h index c711c59bcfa..e94159cedac 100644 --- a/src/gallium/drivers/r600/compute_memory_pool.h +++ b/src/gallium/drivers/r600/compute_memory_pool.h @@ -38,6 +38,8 @@ struct compute_memory_item int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo int64_t size_in_dw; ///Size of the chunk in dwords + struct r600_resource *real_buffer; + struct compute_memory_pool* pool; struct compute_memory_item* prev; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a2abf1546dd..c152e54146a 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -958,6 +958,17 @@ void *r600_compute_global_transfer_map( struct r600_resource_global* buffer = (struct r600_resource_global*)resource; + struct pipe_resource *dst; + unsigned offset = box->x; + + if (buffer->chunk->real_buffer) { + dst = (struct pipe_resource*)buffer->chunk->real_buffer; + } + else { + dst = (struct pipe_resource*)buffer->chunk->pool->bo; + offset += (buffer->chunk->start_in_dw * 4); + } + COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" "level = %u, usage = %u, box(x = %u, y = %u, z = %u " "width = %u, height = %u, depth = %u)\n", level, usage, @@ -967,8 +978,6 @@ void *r600_compute_global_transfer_map( "%u (box.x)\n", buffer->chunk->id, box->x); - compute_memory_finalize_pending(pool, ctx_); - assert(resource->target == PIPE_BUFFER); assert(resource->bind & PIPE_BIND_GLOBAL); assert(box->x >= 0); @@ -976,9 +985,8 @@ void *r600_compute_global_transfer_map( assert(box->z == 0); ///TODO: do it better, mapping is not possible if the pool is too big - return pipe_buffer_map_range(ctx_, (struct pipe_resource*)buffer->chunk->pool->bo, - box->x + (buffer->chunk->start_in_dw * 4), - box->width, usage, ptransfer); + return pipe_buffer_map_range(ctx_, dst, + offset, box->width, usage, ptransfer); } void r600_compute_global_transfer_unmap(