X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fcompute_memory_pool.c;h=bcda155c71aa5c227ffd70051c0a98a0a284bdb2;hb=cbb0d4ad75e6309932af7995ca80fa5ff5db7c70;hp=5e25a1da35d329bc8c1c1e2f04d1c0d423af026e;hpb=5cd1c65dc1eeab9915303085e8f77209b7cd6b16;p=mesa.git

diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c
index 5e25a1da35d..bcda155c71a 100644
--- a/src/gallium/drivers/r600/compute_memory_pool.c
+++ b/src/gallium/drivers/r600/compute_memory_pool.c
@@ -26,123 +26,109 @@
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "util/u_blitter.h"
-#include "util/u_double_list.h"
+#include "util/list.h"
 #include "util/u_transfer.h"
 #include "util/u_surface.h"
 #include "util/u_pack_color.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_framebuffer.h"
-#include "r600.h"
-#include "r600_resource.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
 #include "r600_formats.h"
 #include "compute_memory_pool.h"
+#include "evergreen_compute.h"
 #include "evergreen_compute_internal.h"
 #include <inttypes.h>
 
-static struct r600_texture * create_pool_texture(struct r600_screen * screen,
-		unsigned size_in_dw)
-{
-
-	struct pipe_resource templ;
-	struct r600_texture * tex;
-
-	if (size_in_dw == 0) {
-		return NULL;
-	}
-	memset(&templ, 0, sizeof(templ));
-	templ.target = PIPE_TEXTURE_1D;
-	templ.format = PIPE_FORMAT_R32_UINT;
-	templ.bind = PIPE_BIND_CUSTOM;
-	templ.usage = PIPE_USAGE_IMMUTABLE;
-	templ.flags = 0;
-	templ.width0 = size_in_dw;
-	templ.height0 = 1;
-	templ.depth0 = 1;
-	templ.array_size = 1;
-
-	tex = (struct r600_texture *)r600_texture_create(
-						&screen->screen, &templ);
-	/* XXX: Propagate this error */
-	assert(tex && "Out of memory");
-	tex->is_rat = 1;
-	return tex;
-}
-
+#define ITEM_ALIGNMENT 1024
 /**
- * Creates a new pool
+ * Creates a new pool.
  */
 struct compute_memory_pool* compute_memory_pool_new(
 	struct r600_screen * rscreen)
 {
 	struct compute_memory_pool* pool = (struct compute_memory_pool*)
 				CALLOC(sizeof(struct compute_memory_pool), 1);
+	if (!pool)
+		return NULL;
 
-	COMPUTE_DBG("* compute_memory_pool_new()\n");
+	COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
 
 	pool->screen = rscreen;
+	pool->item_list = (struct list_head *)
+				CALLOC(sizeof(struct list_head), 1);
+	pool->unallocated_list = (struct list_head *)
+				CALLOC(sizeof(struct list_head), 1);
+	list_inithead(pool->item_list);
+	list_inithead(pool->unallocated_list);
 	return pool;
 }
 
+/**
+ * Initializes the pool with a size of \a initial_size_in_dw.
+ * \param pool			The pool to be initialized.
+ * \param initial_size_in_dw	The initial size.
+ * \see compute_memory_grow_defrag_pool
+ */
 static void compute_memory_pool_init(struct compute_memory_pool * pool,
 	unsigned initial_size_in_dw)
 {
 
-	COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
+	COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %u\n",
 		initial_size_in_dw);
 
-	/* XXX: pool->shadow is used when the buffer needs to be resized, but
-	 * resizing does not work at the moment.
-	 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
-	 */
-	pool->next_id = 1;
 	pool->size_in_dw = initial_size_in_dw;
-	pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
-							pool->size_in_dw);
+	pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
+						  pool->size_in_dw * 4);
 }
 
 /**
- * Frees all stuff in the pool and the pool struct itself too
+ * Frees all stuff in the pool and the pool struct itself too.
  */
 void compute_memory_pool_delete(struct compute_memory_pool* pool)
 {
-	COMPUTE_DBG("* compute_memory_pool_delete()\n");
+	COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
 	free(pool->shadow);
 	if (pool->bo) {
-		pool->screen->screen.resource_destroy((struct pipe_screen *)
+		pool->screen->b.b.resource_destroy((struct pipe_screen *)
 			pool->screen, (struct pipe_resource *)pool->bo);
 	}
+	/* In theory, all of the items were freed in compute_memory_free.
+	 * Just delete the list heads
+	 */
+	free(pool->item_list);
+	free(pool->unallocated_list);
+	/* And then the pool itself */
 	free(pool);
 }
 
 /**
  * Searches for an empty space in the pool, return with the pointer to the
- * allocatable space in the pool, returns -1 on failure.
+ * allocatable space in the pool.
+ * \param size_in_dw	The size of the space we are looking for.
+ * \return -1 on failure
  */
 int64_t compute_memory_prealloc_chunk(
 	struct compute_memory_pool* pool,
 	int64_t size_in_dw)
 {
-	assert(size_in_dw <= pool->size_in_dw);
-
 	struct compute_memory_item *item;
 
 	int last_end = 0;
 
-	COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
-		size_in_dw);
+	assert(size_in_dw <= pool->size_in_dw);
 
-	for (item = pool->item_list; item; item = item->next) {
-		if (item->start_in_dw > -1) {
-			if (item->start_in_dw-last_end > size_in_dw) {
-				return last_end;
-			}
+	COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64"\n",
+		size_in_dw);
 
-			last_end = item->start_in_dw + item->size_in_dw;
-			last_end += (1024 - last_end % 1024);
+	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+		if (last_end + size_in_dw <= item->start_in_dw) {
+			return last_end;
 		}
+
+		last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
 	}
 
 	if (pool->size_in_dw - last_end < size_in_dw) {
@@ -154,27 +140,40 @@ int64_t compute_memory_prealloc_chunk(
 
 /**
  *  Search for the chunk where we can link our new chunk after it.
+ *  \param start_in_dw	The position of the item we want to add to the pool.
+ *  \return The item that is just before the passed position
  */
-struct compute_memory_item* compute_memory_postalloc_chunk(
+struct list_head *compute_memory_postalloc_chunk(
 	struct compute_memory_pool* pool,
 	int64_t start_in_dw)
 {
-	struct compute_memory_item* item;
+	struct compute_memory_item *item;
+	struct compute_memory_item *next;
+	struct list_head *next_link;
 
-	COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
+	COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64"\n",
 		start_in_dw);
 
-	for (item = pool->item_list; item; item = item->next) {
-		if (item->next) {
+	/* Check if we can insert it in the front of the list */
+	item = LIST_ENTRY(struct compute_memory_item, pool->item_list->next, link);
+	if (LIST_IS_EMPTY(pool->item_list) || item->start_in_dw > start_in_dw) {
+		return pool->item_list;
+	}
+
+	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+		next_link = item->link.next;
+
+		if (next_link != pool->item_list) {
+			next = container_of(next_link, item, link);
 			if (item->start_in_dw < start_in_dw
-				&& item->next->start_in_dw > start_in_dw) {
-				return item;
+				&& next->start_in_dw > start_in_dw) {
+				return &item->link;
 			}
 		}
 		else {
 			/* end of chain */
 			assert(item->start_in_dw < start_in_dw);
-			return item;
+			return &item->link;
 		}
 	}
 
@@ -183,224 +182,422 @@ struct compute_memory_item* compute_memory_postalloc_chunk(
 }
 
 /**
- * Reallocates pool, conserves data
+ * Reallocates and defragments the pool, conserves data.
+ * \returns -1 if it fails, 0 otherwise
+ * \see compute_memory_finalize_pending
  */
-void compute_memory_grow_pool(struct compute_memory_pool* pool,
-	struct pipe_context * pipe, int new_size_in_dw)
+int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
+	struct pipe_context *pipe, int new_size_in_dw)
 {
-	COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
-		new_size_in_dw);
+	new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 
-	assert(new_size_in_dw >= pool->size_in_dw);
+	COMPUTE_DBG(pool->screen, "* compute_memory_grow_defrag_pool() "
+		"new_size_in_dw = %d (%d bytes)\n",
+		new_size_in_dw, new_size_in_dw * 4);
 
-	assert(!pool->bo && "Growing the global memory pool is not yet "
-		"supported.  You will see this message if you are trying to"
-		"use more than 64 kb of memory");
+	assert(new_size_in_dw >= pool->size_in_dw);
 
 	if (!pool->bo) {
 		compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
 	} else {
-		/* XXX: Growing memory pools does not work at the moment.  I think
-		 * it is because we are using fragment shaders to copy data to
-		 * the new texture and some of the compute registers are being
-		 * included in the 3D command stream. */
-		fprintf(stderr, "Warning: growing the global memory pool to"
-				"more than 64 kb is not yet supported\n");
-		new_size_in_dw += 1024 - (new_size_in_dw % 1024);
-
-		COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
-
-		compute_memory_shadow(pool, pipe, 1);
-		pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
-		pool->size_in_dw = new_size_in_dw;
-		pool->screen->screen.resource_destroy(
-			(struct pipe_screen *)pool->screen,
-			(struct pipe_resource *)pool->bo);
-		pool->bo = (struct r600_resource*)create_pool_texture(
-							pool->screen,
-							pool->size_in_dw);
-		compute_memory_shadow(pool, pipe, 0);
+		struct r600_resource *temp = NULL;
+
+		temp = r600_compute_buffer_alloc_vram(pool->screen, new_size_in_dw * 4);
+
+		if (temp != NULL) {
+			struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+			struct pipe_resource *dst = (struct pipe_resource *)temp;
+
+			COMPUTE_DBG(pool->screen, "  Growing and defragmenting the pool "
+					"using a temporary resource\n");
+
+			compute_memory_defrag(pool, src, dst, pipe);
+
+			pool->screen->b.b.resource_destroy(
+					(struct pipe_screen *)pool->screen,
+					src);
+
+			pool->bo = temp;
+			pool->size_in_dw = new_size_in_dw;
+		}
+		else {
+			COMPUTE_DBG(pool->screen, "  The creation of the temporary resource failed\n"
+				"  Falling back to using 'shadow'\n");
+
+			compute_memory_shadow(pool, pipe, 1);
+			pool->shadow = realloc(pool->shadow, new_size_in_dw * 4);
+			if (pool->shadow == NULL)
+				return -1;
+
+			pool->size_in_dw = new_size_in_dw;
+			pool->screen->b.b.resource_destroy(
+					(struct pipe_screen *)pool->screen,
+					(struct pipe_resource *)pool->bo);
+			pool->bo = r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4);
+			compute_memory_shadow(pool, pipe, 0);
+
+			if (pool->status & POOL_FRAGMENTED) {
+				struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+				compute_memory_defrag(pool, src, src, pipe);
+			}
+		}
 	}
+
+	return 0;
 }
 
 /**
  * Copy pool from device to host, or host to device.
+ * \param device_to_host 1 for device->host, 0 for host->device
+ * \see compute_memory_grow_defrag_pool
  */
 void compute_memory_shadow(struct compute_memory_pool* pool,
 	struct pipe_context * pipe, int device_to_host)
 {
 	struct compute_memory_item chunk;
 
-	COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
+	COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
 		device_to_host);
 
 	chunk.id = 0;
 	chunk.start_in_dw = 0;
 	chunk.size_in_dw = pool->size_in_dw;
-	chunk.prev = chunk.next = NULL;
 	compute_memory_transfer(pool, pipe, device_to_host, &chunk,
 				pool->shadow, 0, pool->size_in_dw*4);
 }
 
 /**
- * Allocates pending allocations in the pool
+ * Moves all the items marked for promotion from the \a unallocated_list
+ * to the \a item_list.
+ * \return -1 if it fails, 0 otherwise
+ * \see evergreen_set_global_binding
  */
-void compute_memory_finalize_pending(struct compute_memory_pool* pool,
+int compute_memory_finalize_pending(struct compute_memory_pool* pool,
 	struct pipe_context * pipe)
 {
-	struct compute_memory_item *pending_list = NULL, *end_p = NULL;
 	struct compute_memory_item *item, *next;
 
 	int64_t allocated = 0;
 	int64_t unallocated = 0;
+	int64_t last_pos;
+
+	int err = 0;
 
-	COMPUTE_DBG("* compute_memory_finalize_pending()\n");
+	COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
 
-	for (item = pool->item_list; item; item = item->next) {
-		COMPUTE_DBG("  + list: offset = %i id = %i size = %i "
-			"(%i bytes)\n",item->start_in_dw, item->id,
+	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+		COMPUTE_DBG(pool->screen, "  + list: offset = %"PRIi64" id = %"PRIi64" size = %"PRIi64" "
+			"(%"PRIi64" bytes)\n", item->start_in_dw, item->id,
 			item->size_in_dw, item->size_in_dw * 4);
 	}
 
-	/* Search through the list of memory items in the pool */
-	for (item = pool->item_list; item; item = next) {
-		next = item->next;
+	/* Calculate the total allocated size */
+	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+		allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
+	}
 
-		/* Check if the item is pending. */
-		if (item->start_in_dw == -1) {
-			/* It is pending, so add it to the pending_list... */
-			if (end_p) {
-				end_p->next = item;
-			}
-			else {
-				pending_list = item;
-			}
+	/* Calculate the total unallocated size of the items that
+	 * will be promoted to the pool */
+	LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) {
+		if (item->status & ITEM_FOR_PROMOTING)
+			unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
+	}
 
-			/* ... and then remove it from the item list. */
-			if (item->prev) {
-				item->prev->next = next;
-			}
-			else {
-				pool->item_list = next;
-			}
+	if (unallocated == 0) {
+		return 0;
+	}
 
-			if (next) {
-				next->prev = item->prev;
-			}
+	if (pool->size_in_dw < allocated + unallocated) {
+		err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated);
+		if (err == -1)
+			return -1;
+	}
+	else if (pool->status & POOL_FRAGMENTED) {
+		struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+		compute_memory_defrag(pool, src, src, pipe);
+	}
 
-			/* This sequence makes the item be at the end of the list */
-			item->prev = end_p;
-			item->next = NULL;
-			end_p = item;
+	/* After defragmenting the pool, allocated is equal to the first available
+	 * position for new items in the pool */
+	last_pos = allocated;
 
-			/* Update the amount of space we will need to allocate. */
-			unallocated += item->size_in_dw+1024;
-		}
-		else {
-			/* The item is not pendng, so update the amount of space
-			 * that has already been allocated. */
-			allocated += item->size_in_dw;
+	/* Loop through all the unallocated items, check if they are marked
+	 * for promoting, allocate space for them and add them to the item_list. */
+	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
+		if (item->status & ITEM_FOR_PROMOTING) {
+			err = compute_memory_promote_item(pool, item, pipe, last_pos);
+			item->status &= ~ITEM_FOR_PROMOTING;
+
+			last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
+
+			if (err == -1)
+				return -1;
 		}
 	}
 
-	/* If we require more space than the size of the pool, then grow the
-	 * pool.
-	 *
-	 * XXX: I'm pretty sure this won't work.  Imagine this scenario:
-	 *
-	 * Offset Item Size
-	 *   0    A    50
-	 * 200    B    50
-	 * 400    C    50
-	 *
-	 * Total size = 450
-	 * Allocated size = 150
-	 * Pending Item D Size = 200
-	 *
-	 * In this case, there are 300 units of free space in the pool, but
-	 * they aren't contiguous, so it will be impossible to allocate Item D.
-	 */
-	if (pool->size_in_dw < allocated+unallocated) {
-		compute_memory_grow_pool(pool, pipe, allocated+unallocated);
-	}
+	return 0;
+}
+
+/**
+ * Defragments the pool, so that there's no gap between items.
+ * \param pool	The pool to be defragmented
+ * \param src	The origin resource
+ * \param dst	The destination resource
+ * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
+ */
+void compute_memory_defrag(struct compute_memory_pool *pool,
+	struct pipe_resource *src, struct pipe_resource *dst,
+	struct pipe_context *pipe)
+{
+	struct compute_memory_item *item;
+	int64_t last_pos;
 
-	/* Loop through all the pending items, allocate space for them and
-	 * add them back to the item_list. */
-	for (item = pending_list; item; item = next) {
-		next = item->next;
+	COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n");
 
-		int64_t start_in_dw;
+	last_pos = 0;
+	LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+		if (src != dst || item->start_in_dw != last_pos) {
+			assert(last_pos <= item->start_in_dw);
+
+			compute_memory_move_item(pool, src, dst,
+					item, last_pos, pipe);
+		}
 
-		/* Search for free space in the pool for this item. */
-		while ((start_in_dw=compute_memory_prealloc_chunk(pool,
-						item->size_in_dw)) == -1) {
-			int64_t need = item->size_in_dw+2048 -
-						(pool->size_in_dw - allocated);
+		last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
+	}
 
-			need += 1024 - (need % 1024);
+	pool->status &= ~POOL_FRAGMENTED;
+}
 
-			if (need > 0) {
-				compute_memory_grow_pool(pool,
-						pipe,
-						pool->size_in_dw + need);
-			}
-			else {
-				need = pool->size_in_dw / 10;
-				need += 1024 - (need % 1024);
-				compute_memory_grow_pool(pool,
-						pipe,
-						pool->size_in_dw + need);
-			}
+/**
+ * Moves an item from the \a unallocated_list to the \a item_list.
+ * \param item	The item that will be promoted.
+ * \return -1 if it fails, 0 otherwise
+ * \see compute_memory_finalize_pending
+ */
+int compute_memory_promote_item(struct compute_memory_pool *pool,
+		struct compute_memory_item *item, struct pipe_context *pipe,
+		int64_t start_in_dw)
+{
+	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+	struct r600_context *rctx = (struct r600_context *)pipe;
+	struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
+	struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
+	struct pipe_box box;
+
+	COMPUTE_DBG(pool->screen, "* compute_memory_promote_item()\n"
+			"  + Promoting Item: %"PRIi64" , starting at: %"PRIi64" (%"PRIi64" bytes) "
+			"size: %"PRIi64" (%"PRIi64" bytes)\n\t\t\tnew start: %"PRIi64" (%"PRIi64" bytes)\n",
+			item->id, item->start_in_dw, item->start_in_dw * 4,
+			item->size_in_dw, item->size_in_dw * 4,
+			start_in_dw, start_in_dw * 4);
+
+	/* Remove the item from the unallocated list */
+	list_del(&item->link);
+
+	/* Add it back to the item_list */
+	list_addtail(&item->link, pool->item_list);
+	item->start_in_dw = start_in_dw;
+
+	if (src) {
+		u_box_1d(0, item->size_in_dw * 4, &box);
+
+		rctx->b.b.resource_copy_region(pipe,
+				dst, 0, item->start_in_dw * 4, 0 ,0,
+				src, 0, &box);
+
+		/* We check if the item is mapped for reading.
+		 * In this case, we need to keep the temporary buffer 'alive'
+		 * because it is possible to keep a map active for reading
+		 * while a kernel (that reads from it) executes */
+		if (!(item->status & ITEM_MAPPED_FOR_READING)) {
+			pool->screen->b.b.resource_destroy(screen, src);
+			item->real_buffer = NULL;
 		}
-		COMPUTE_DBG("  + Found space for Item %p id = %u "
-			"start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
-			item, item->id, start_in_dw, start_in_dw * 4,
-			item->size_in_dw, item->size_in_dw * 4);
+	}
+
+	return 0;
+}
+
+/**
+ * Moves an item from the \a item_list to the \a unallocated_list.
+ * \param item	The item that will be demoted
+ * \see r600_compute_global_transfer_map
+ */
+void compute_memory_demote_item(struct compute_memory_pool *pool,
+	struct compute_memory_item *item, struct pipe_context *pipe)
+{
+	struct r600_context *rctx = (struct r600_context *)pipe;
+	struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+	struct pipe_resource *dst;
+	struct pipe_box box;
+
+	COMPUTE_DBG(pool->screen, "* compute_memory_demote_item()\n"
+			"  + Demoting Item: %"PRIi64", starting at: %"PRIi64" (%"PRIi64" bytes) "
+			"size: %"PRIi64" (%"PRIi64" bytes)\n", item->id, item->start_in_dw,
+			item->start_in_dw * 4, item->size_in_dw, item->size_in_dw * 4);
+
+	/* First, we remove the item from the item_list */
+	list_del(&item->link);
+
+	/* Now we add it to the unallocated list */
+	list_addtail(&item->link, pool->unallocated_list);
+
+	/* We check if the intermediate buffer exists, and if it
+	 * doesn't, we create it again */
+	if (item->real_buffer == NULL) {
+		item->real_buffer = r600_compute_buffer_alloc_vram(
+				pool->screen, item->size_in_dw * 4);
+	}
 
-		item->start_in_dw = start_in_dw;
-		item->next = NULL;
-		item->prev = NULL;
+	dst = (struct pipe_resource *)item->real_buffer;
 
-		if (pool->item_list) {
-			struct compute_memory_item *pos;
+	/* We transfer the memory from the item in the pool to the
+	 * temporary buffer */
+	u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
 
-			pos = compute_memory_postalloc_chunk(pool, start_in_dw);
-			item->prev = pos;
-			item->next = pos->next;
-			pos->next = item;
+	rctx->b.b.resource_copy_region(pipe,
+		dst, 0, 0, 0, 0,
+		src, 0, &box);
 
-			if (item->next) {
-				item->next->prev = item;
-			}
-		}
-		else {
-			pool->item_list = item;
-		}
+	/* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
+	item->start_in_dw = -1;
 
-		allocated += item->size_in_dw;
+	if (item->link.next != pool->item_list) {
+		pool->status |= POOL_FRAGMENTED;
 	}
 }
 
+/**
+ * Moves the item \a item forward from the resource \a src to the
+ * resource \a dst at \a new_start_in_dw
+ *
+ * This function assumes two things:
+ * 1) The item is \b only moved forward, unless src is different from dst
+ * 2) The item \b won't change it's position inside the \a item_list
+ *
+ * \param item			The item that will be moved
+ * \param new_start_in_dw	The new position of the item in \a item_list
+ * \see compute_memory_defrag
+ */
+void compute_memory_move_item(struct compute_memory_pool *pool,
+	struct pipe_resource *src, struct pipe_resource *dst,
+	struct compute_memory_item *item, uint64_t new_start_in_dw,
+	struct pipe_context *pipe)
+{
+	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+	struct r600_context *rctx = (struct r600_context *)pipe;
+	struct pipe_box box;
+
+	MAYBE_UNUSED struct compute_memory_item *prev;
+
+	COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n"
+			"  + Moving item %"PRIi64" from %"PRIi64" (%"PRIi64" bytes) to %"PRIu64" (%"PRIu64" bytes)\n",
+			item->id, item->start_in_dw, item->start_in_dw * 4,
+			new_start_in_dw, new_start_in_dw * 4);
+
+	if (pool->item_list != item->link.prev) {
+		prev = container_of(item->link.prev, item, link);
+		assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
+	}
+
+	u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
+
+	/* If the ranges don't overlap, or we are copying from one resource
+	 * to another, we can just copy the item directly */
+	if (src != dst || new_start_in_dw + item->size_in_dw <= item->start_in_dw) {
+
+		rctx->b.b.resource_copy_region(pipe,
+			dst, 0, new_start_in_dw * 4, 0, 0,
+			src, 0, &box);
+	} else {
+		/* The ranges overlap, we will try first to use an intermediate
+		 * resource to move the item */
+		struct pipe_resource *tmp = (struct pipe_resource *)
+			r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
+
+		if (tmp != NULL) {
+			rctx->b.b.resource_copy_region(pipe,
+				tmp, 0, 0, 0, 0,
+				src, 0, &box);
+
+			box.x = 0;
+
+			rctx->b.b.resource_copy_region(pipe,
+				dst, 0, new_start_in_dw * 4, 0, 0,
+				tmp, 0, &box);
 
+			pool->screen->b.b.resource_destroy(screen, tmp);
+
+		} else {
+			/* The allocation of the temporary resource failed,
+			 * falling back to use mappings */
+			uint32_t *map;
+			int64_t offset;
+			struct pipe_transfer *trans;
+
+			offset = item->start_in_dw - new_start_in_dw;
+
+			u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
+
+			map = pipe->transfer_map(pipe, src, 0, PIPE_TRANSFER_READ_WRITE,
+				&box, &trans);
+
+			assert(map);
+			assert(trans);
+
+			memmove(map, map + offset, item->size_in_dw * 4);
+
+			pipe->transfer_unmap(pipe, trans);
+		}
+	}
+
+	item->start_in_dw = new_start_in_dw;
+}
+
+/**
+ * Frees the memory asociated to the item with id \a id from the pool.
+ * \param id	The id of the item to be freed.
+ */
 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 {
 	struct compute_memory_item *item, *next;
+	struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+	struct pipe_resource *res;
 
-	COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
+	COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id);
 
-	for (item = pool->item_list; item; item = next) {
-		next = item->next;
+	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
 
 		if (item->id == id) {
-			if (item->prev) {
-				item->prev->next = item->next;
+
+			if (item->link.next != pool->item_list) {
+				pool->status |= POOL_FRAGMENTED;
 			}
-			else {
-				pool->item_list = item->next;
+
+			list_del(&item->link);
+
+			if (item->real_buffer) {
+				res = (struct pipe_resource *)item->real_buffer;
+				pool->screen->b.b.resource_destroy(
+						screen, res);
 			}
 
-			if (item->next) {
-				item->next->prev = item->prev;
+			free(item);
+
+			return;
+		}
+	}
+
+	LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
+
+		if (item->id == id) {
+			list_del(&item->link);
+
+			if (item->real_buffer) {
+				res = (struct pipe_resource *)item->real_buffer;
+				pool->screen->b.b.resource_destroy(
+						screen, res);
 			}
 
 			free(item);
@@ -416,45 +613,44 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 }
 
 /**
- * Creates pending allocations
+ * Creates pending allocations for new items, these items are
+ * placed in the unallocated_list.
+ * \param size_in_dw	The size, in double words, of the new item.
+ * \return The new item
+ * \see r600_compute_global_buffer_create
  */
 struct compute_memory_item* compute_memory_alloc(
 	struct compute_memory_pool* pool,
 	int64_t size_in_dw)
 {
-	struct compute_memory_item *new_item;
+	struct compute_memory_item *new_item = NULL;
 
-	COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
+	COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n",
 			size_in_dw, 4 * size_in_dw);
 
 	new_item = (struct compute_memory_item *)
 				CALLOC(sizeof(struct compute_memory_item), 1);
+	if (!new_item)
+		return NULL;
+
 	new_item->size_in_dw = size_in_dw;
 	new_item->start_in_dw = -1; /* mark pending */
 	new_item->id = pool->next_id++;
 	new_item->pool = pool;
+	new_item->real_buffer = NULL;
 
-	struct compute_memory_item *last_item;
-
-	if (pool->item_list) {
-		for (last_item = pool->item_list; last_item->next;
-						last_item = last_item->next);
-
-		last_item->next = new_item;
-		new_item->prev = last_item;
-	}
-	else {
-		pool->item_list = new_item;
-	}
+	list_addtail(&new_item->link, pool->unallocated_list);
 
-	COMPUTE_DBG("  + Adding item %p id = %u size = %u (%u bytes)\n",
+	COMPUTE_DBG(pool->screen, "  + Adding item %p id = %"PRIi64" size = %"PRIi64" (%"PRIi64" bytes)\n",
 			new_item, new_item->id, new_item->size_in_dw,
 			new_item->size_in_dw * 4);
 	return new_item;
 }
 
 /**
- * Transfer data host<->device, offset and size is in bytes
+ * Transfer data host<->device, offset and size is in bytes.
+ * \param device_to_host 1 for device->host, 0 for host->device.
+ * \see compute_memory_shadow
  */
 void compute_memory_transfer(
 	struct compute_memory_pool* pool,
@@ -474,31 +670,26 @@ void compute_memory_transfer(
 
 	assert(gart);
 
-	COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
+	COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
 		"offset_in_chunk = %d, size = %d\n", device_to_host,
 		offset_in_chunk, size);
 
-	if (device_to_host)
-	{
-		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
-			&(struct pipe_box) { .width = aligned_size,
-			.height = 1, .depth = 1 });
+	if (device_to_host) {
+		map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
+			&(struct pipe_box) { .width = aligned_size * 4,
+			.height = 1, .depth = 1 }, &xfer);
 		assert(xfer);
-		map = pipe->transfer_map(pipe, xfer);
 		assert(map);
 		memcpy(data, map + internal_offset, size);
 		pipe->transfer_unmap(pipe, xfer);
-		pipe->transfer_destroy(pipe, xfer);
 	} else {
-		xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
-			&(struct pipe_box) { .width = aligned_size,
-			.height = 1, .depth = 1 });
+		map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
+			&(struct pipe_box) { .width = aligned_size * 4,
+			.height = 1, .depth = 1 }, &xfer);
 		assert(xfer);
-		map = pipe->transfer_map(pipe, xfer);
 		assert(map);
 		memcpy(map + internal_offset, data, size);
 		pipe->transfer_unmap(pipe, xfer);
-		pipe->transfer_destroy(pipe, xfer);
 	}
 }