gallium: fix warnings in release build
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
index 04d24f6cbd33354c4f71ef3dd8734e6b8c66f3ab..bcda155c71aa5c227ffd70051c0a98a0a284bdb2 100644 (file)
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "util/u_blitter.h"
-#include "util/u_double_list.h"
+#include "util/list.h"
 #include "util/u_transfer.h"
 #include "util/u_surface.h"
 #include "util/u_pack_color.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_framebuffer.h"
-#include "r600.h"
-#include "r600_resource.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
 #include "r600_formats.h"
 #include "compute_memory_pool.h"
+#include "evergreen_compute.h"
 #include "evergreen_compute_internal.h"
 #include <inttypes.h>
 
+#define ITEM_ALIGNMENT 1024
 /**
- * Creates a new pool
+ * Creates a new pool.
  */
 struct compute_memory_pool* compute_memory_pool_new(
        struct r600_screen * rscreen)
 {
        struct compute_memory_pool* pool = (struct compute_memory_pool*)
                                CALLOC(sizeof(struct compute_memory_pool), 1);
+       if (!pool)
+               return NULL;
 
-       COMPUTE_DBG("* compute_memory_pool_new()\n");
+       COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
 
        pool->screen = rscreen;
+       pool->item_list = (struct list_head *)
+                               CALLOC(sizeof(struct list_head), 1);
+       pool->unallocated_list = (struct list_head *)
+                               CALLOC(sizeof(struct list_head), 1);
+       list_inithead(pool->item_list);
+       list_inithead(pool->unallocated_list);
        return pool;
 }
 
+/**
+ * Initializes the pool with a size of \a initial_size_in_dw.
+ * \param pool                 The pool to be initialized.
+ * \param initial_size_in_dw   The initial size.
+ * \see compute_memory_grow_defrag_pool
+ */
 static void compute_memory_pool_init(struct compute_memory_pool * pool,
        unsigned initial_size_in_dw)
 {
 
-       COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
+       COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %u\n",
                initial_size_in_dw);
 
-       /* XXX: pool->shadow is used when the buffer needs to be resized, but
-        * resizing does not work at the moment.
-        * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
-        */
-       pool->next_id = 1;
        pool->size_in_dw = initial_size_in_dw;
-       pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
-                                                       pool->size_in_dw * 4);
+       pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
+                                                 pool->size_in_dw * 4);
 }
 
 /**
- * Frees all stuff in the pool and the pool struct itself too
+ * Frees all stuff in the pool and the pool struct itself too.
  */
 void compute_memory_pool_delete(struct compute_memory_pool* pool)
 {
-       COMPUTE_DBG("* compute_memory_pool_delete()\n");
+       COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
        free(pool->shadow);
        if (pool->bo) {
-               pool->screen->screen.resource_destroy((struct pipe_screen *)
+               pool->screen->b.b.resource_destroy((struct pipe_screen *)
                        pool->screen, (struct pipe_resource *)pool->bo);
        }
+       /* In theory, all of the items were freed in compute_memory_free.
+        * Just delete the list heads
+        */
+       free(pool->item_list);
+       free(pool->unallocated_list);
+       /* And then the pool itself */
        free(pool);
 }
 
 /**
  * Searches for an empty space in the pool, return with the pointer to the
- * allocatable space in the pool, returns -1 on failure.
+ * allocatable space in the pool.
+ * \param size_in_dw   The size of the space we are looking for.
+ * \return -1 on failure
  */
 int64_t compute_memory_prealloc_chunk(
        struct compute_memory_pool* pool,
        int64_t size_in_dw)
 {
-       assert(size_in_dw <= pool->size_in_dw);
-
        struct compute_memory_item *item;
 
        int last_end = 0;
 
-       COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
-               size_in_dw);
+       assert(size_in_dw <= pool->size_in_dw);
 
-       for (item = pool->item_list; item; item = item->next) {
-               if (item->start_in_dw > -1) {
-                       if (item->start_in_dw-last_end > size_in_dw) {
-                               return last_end;
-                       }
+       COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64"\n",
+               size_in_dw);
 
-                       last_end = item->start_in_dw + item->size_in_dw;
-                       last_end += (1024 - last_end % 1024);
+       LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+               if (last_end + size_in_dw <= item->start_in_dw) {
+                       return last_end;
                }
+
+               last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
        }
 
        if (pool->size_in_dw - last_end < size_in_dw) {
@@ -125,32 +140,40 @@ int64_t compute_memory_prealloc_chunk(
 
 /**
  *  Search for the chunk where we can link our new chunk after it.
+ *  \param start_in_dw The position of the item we want to add to the pool.
+ *  \return The item that is just before the passed position
  */
-struct compute_memory_item* compute_memory_postalloc_chunk(
+struct list_head *compute_memory_postalloc_chunk(
        struct compute_memory_pool* pool,
        int64_t start_in_dw)
 {
-       struct compute_memory_item* item;
+       struct compute_memory_item *item;
+       struct compute_memory_item *next;
+       struct list_head *next_link;
 
-       COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
+       COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64"\n",
                start_in_dw);
 
        /* Check if we can insert it in the front of the list */
-       if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
-               return NULL;
+       item = LIST_ENTRY(struct compute_memory_item, pool->item_list->next, link);
+       if (LIST_IS_EMPTY(pool->item_list) || item->start_in_dw > start_in_dw) {
+               return pool->item_list;
        }
 
-       for (item = pool->item_list; item; item = item->next) {
-               if (item->next) {
+       LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+               next_link = item->link.next;
+
+               if (next_link != pool->item_list) {
+                       next = container_of(next_link, item, link);
                        if (item->start_in_dw < start_in_dw
-                               && item->next->start_in_dw > start_in_dw) {
-                               return item;
+                               && next->start_in_dw > start_in_dw) {
+                               return &item->link;
                        }
                }
                else {
                        /* end of chain */
                        assert(item->start_in_dw < start_in_dw);
-                       return item;
+                       return &item->link;
                }
        }
 
@@ -159,236 +182,422 @@ struct compute_memory_item* compute_memory_postalloc_chunk(
 }
 
 /**
- * Reallocates pool, conserves data
+ * Reallocates and defragments the pool, conserves data.
+ * \returns -1 if it fails, 0 otherwise
+ * \see compute_memory_finalize_pending
  */
-void compute_memory_grow_pool(struct compute_memory_pool* pool,
-       struct pipe_context * pipe, int new_size_in_dw)
+int compute_memory_grow_defrag_pool(struct compute_memory_pool *pool,
+       struct pipe_context *pipe, int new_size_in_dw)
 {
-       COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
-               new_size_in_dw);
+       new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
 
-       assert(new_size_in_dw >= pool->size_in_dw);
+       COMPUTE_DBG(pool->screen, "* compute_memory_grow_defrag_pool() "
+               "new_size_in_dw = %d (%d bytes)\n",
+               new_size_in_dw, new_size_in_dw * 4);
 
-       assert(!pool->bo && "Growing the global memory pool is not yet "
-               "supported.  You will see this message if you are trying to"
-               "use more than 64 kb of memory");
+       assert(new_size_in_dw >= pool->size_in_dw);
 
        if (!pool->bo) {
                compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
        } else {
-               /* XXX: Growing memory pools does not work at the moment.  I think
-                * it is because we are using fragment shaders to copy data to
-                * the new texture and some of the compute registers are being
-                * included in the 3D command stream. */
-               fprintf(stderr, "Warning: growing the global memory pool to"
-                               "more than 64 kb is not yet supported\n");
-               new_size_in_dw += 1024 - (new_size_in_dw % 1024);
-
-               COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
-
-               compute_memory_shadow(pool, pipe, 1);
-               pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
-               pool->size_in_dw = new_size_in_dw;
-               pool->screen->screen.resource_destroy(
-                       (struct pipe_screen *)pool->screen,
-                       (struct pipe_resource *)pool->bo);
-               pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
-                                                       pool->screen,
-                                                       pool->size_in_dw * 4);
-               compute_memory_shadow(pool, pipe, 0);
+               struct r600_resource *temp = NULL;
+
+               temp = r600_compute_buffer_alloc_vram(pool->screen, new_size_in_dw * 4);
+
+               if (temp != NULL) {
+                       struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+                       struct pipe_resource *dst = (struct pipe_resource *)temp;
+
+                       COMPUTE_DBG(pool->screen, "  Growing and defragmenting the pool "
+                                       "using a temporary resource\n");
+
+                       compute_memory_defrag(pool, src, dst, pipe);
+
+                       pool->screen->b.b.resource_destroy(
+                                       (struct pipe_screen *)pool->screen,
+                                       src);
+
+                       pool->bo = temp;
+                       pool->size_in_dw = new_size_in_dw;
+               }
+               else {
+                       COMPUTE_DBG(pool->screen, "  The creation of the temporary resource failed\n"
+                               "  Falling back to using 'shadow'\n");
+
+                       compute_memory_shadow(pool, pipe, 1);
+                       pool->shadow = realloc(pool->shadow, new_size_in_dw * 4);
+                       if (pool->shadow == NULL)
+                               return -1;
+
+                       pool->size_in_dw = new_size_in_dw;
+                       pool->screen->b.b.resource_destroy(
+                                       (struct pipe_screen *)pool->screen,
+                                       (struct pipe_resource *)pool->bo);
+                       pool->bo = r600_compute_buffer_alloc_vram(pool->screen, pool->size_in_dw * 4);
+                       compute_memory_shadow(pool, pipe, 0);
+
+                       if (pool->status & POOL_FRAGMENTED) {
+                               struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+                               compute_memory_defrag(pool, src, src, pipe);
+                       }
+               }
        }
+
+       return 0;
 }
 
 /**
  * Copy pool from device to host, or host to device.
+ * \param device_to_host 1 for device->host, 0 for host->device
+ * \see compute_memory_grow_defrag_pool
  */
 void compute_memory_shadow(struct compute_memory_pool* pool,
        struct pipe_context * pipe, int device_to_host)
 {
        struct compute_memory_item chunk;
 
-       COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
+       COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
                device_to_host);
 
        chunk.id = 0;
        chunk.start_in_dw = 0;
        chunk.size_in_dw = pool->size_in_dw;
-       chunk.prev = chunk.next = NULL;
        compute_memory_transfer(pool, pipe, device_to_host, &chunk,
                                pool->shadow, 0, pool->size_in_dw*4);
 }
 
 /**
- * Allocates pending allocations in the pool
+ * Moves all the items marked for promotion from the \a unallocated_list
+ * to the \a item_list.
+ * \return -1 if it fails, 0 otherwise
+ * \see evergreen_set_global_binding
  */
-void compute_memory_finalize_pending(struct compute_memory_pool* pool,
+int compute_memory_finalize_pending(struct compute_memory_pool* pool,
        struct pipe_context * pipe)
 {
-       struct compute_memory_item *pending_list = NULL, *end_p = NULL;
        struct compute_memory_item *item, *next;
 
        int64_t allocated = 0;
        int64_t unallocated = 0;
+       int64_t last_pos;
 
-       COMPUTE_DBG("* compute_memory_finalize_pending()\n");
+       int err = 0;
 
-       for (item = pool->item_list; item; item = item->next) {
-               COMPUTE_DBG("  + list: offset = %i id = %i size = %i "
-                       "(%i bytes)\n",item->start_in_dw, item->id,
+       COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
+
+       LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+               COMPUTE_DBG(pool->screen, "  + list: offset = %"PRIi64" id = %"PRIi64" size = %"PRIi64" "
+                       "(%"PRIi64" bytes)\n", item->start_in_dw, item->id,
                        item->size_in_dw, item->size_in_dw * 4);
        }
 
-       /* Search through the list of memory items in the pool */
-       for (item = pool->item_list; item; item = next) {
-               next = item->next;
+       /* Calculate the total allocated size */
+       LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+               allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
+       }
 
-               /* Check if the item is pending. */
-               if (item->start_in_dw == -1) {
-                       /* It is pending, so add it to the pending_list... */
-                       if (end_p) {
-                               end_p->next = item;
-                       }
-                       else {
-                               pending_list = item;
-                       }
+       /* Calculate the total unallocated size of the items that
+        * will be promoted to the pool */
+       LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) {
+               if (item->status & ITEM_FOR_PROMOTING)
+                       unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
+       }
 
-                       /* ... and then remove it from the item list. */
-                       if (item->prev) {
-                               item->prev->next = next;
-                       }
-                       else {
-                               pool->item_list = next;
-                       }
+       if (unallocated == 0) {
+               return 0;
+       }
 
-                       if (next) {
-                               next->prev = item->prev;
-                       }
+       if (pool->size_in_dw < allocated + unallocated) {
+               err = compute_memory_grow_defrag_pool(pool, pipe, allocated + unallocated);
+               if (err == -1)
+                       return -1;
+       }
+       else if (pool->status & POOL_FRAGMENTED) {
+               struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+               compute_memory_defrag(pool, src, src, pipe);
+       }
 
-                       /* This sequence makes the item be at the end of the list */
-                       item->prev = end_p;
-                       item->next = NULL;
-                       end_p = item;
+       /* After defragmenting the pool, allocated is equal to the first available
+        * position for new items in the pool */
+       last_pos = allocated;
 
-                       /* Update the amount of space we will need to allocate. */
-                       unallocated += item->size_in_dw+1024;
+       /* Loop through all the unallocated items, check if they are marked
+        * for promoting, allocate space for them and add them to the item_list. */
+       LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
+               if (item->status & ITEM_FOR_PROMOTING) {
+                       err = compute_memory_promote_item(pool, item, pipe, last_pos);
+                       item->status &= ~ITEM_FOR_PROMOTING;
+
+                       last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
+
+                       if (err == -1)
+                               return -1;
                }
-               else {
-                       /* The item is not pendng, so update the amount of space
-                        * that has already been allocated. */
-                       allocated += item->size_in_dw;
+       }
+
+       return 0;
+}
+
+/**
+ * Defragments the pool, so that there's no gap between items.
+ * \param pool The pool to be defragmented
+ * \param src  The origin resource
+ * \param dst  The destination resource
+ * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
+ */
+void compute_memory_defrag(struct compute_memory_pool *pool,
+       struct pipe_resource *src, struct pipe_resource *dst,
+       struct pipe_context *pipe)
+{
+       struct compute_memory_item *item;
+       int64_t last_pos;
+
+       COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n");
+
+       last_pos = 0;
+       LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
+               if (src != dst || item->start_in_dw != last_pos) {
+                       assert(last_pos <= item->start_in_dw);
+
+                       compute_memory_move_item(pool, src, dst,
+                                       item, last_pos, pipe);
                }
+
+               last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
        }
 
-       /* If we require more space than the size of the pool, then grow the
-        * pool.
-        *
-        * XXX: I'm pretty sure this won't work.  Imagine this scenario:
-        *
-        * Offset Item Size
-        *   0    A    50
-        * 200    B    50
-        * 400    C    50
-        *
-        * Total size = 450
-        * Allocated size = 150
-        * Pending Item D Size = 200
-        *
-        * In this case, there are 300 units of free space in the pool, but
-        * they aren't contiguous, so it will be impossible to allocate Item D.
-        */
-       if (pool->size_in_dw < allocated+unallocated) {
-               compute_memory_grow_pool(pool, pipe, allocated+unallocated);
+       pool->status &= ~POOL_FRAGMENTED;
+}
+
+/**
+ * Moves an item from the \a unallocated_list to the \a item_list.
+ * \param item The item that will be promoted.
+ * \return -1 if it fails, 0 otherwise
+ * \see compute_memory_finalize_pending
+ */
+int compute_memory_promote_item(struct compute_memory_pool *pool,
+               struct compute_memory_item *item, struct pipe_context *pipe,
+               int64_t start_in_dw)
+{
+       struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+       struct r600_context *rctx = (struct r600_context *)pipe;
+       struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
+       struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
+       struct pipe_box box;
+
+       COMPUTE_DBG(pool->screen, "* compute_memory_promote_item()\n"
+                       "  + Promoting Item: %"PRIi64" , starting at: %"PRIi64" (%"PRIi64" bytes) "
+                       "size: %"PRIi64" (%"PRIi64" bytes)\n\t\t\tnew start: %"PRIi64" (%"PRIi64" bytes)\n",
+                       item->id, item->start_in_dw, item->start_in_dw * 4,
+                       item->size_in_dw, item->size_in_dw * 4,
+                       start_in_dw, start_in_dw * 4);
+
+       /* Remove the item from the unallocated list */
+       list_del(&item->link);
+
+       /* Add it back to the item_list */
+       list_addtail(&item->link, pool->item_list);
+       item->start_in_dw = start_in_dw;
+
+       if (src) {
+               u_box_1d(0, item->size_in_dw * 4, &box);
+
+               rctx->b.b.resource_copy_region(pipe,
+                               dst, 0, item->start_in_dw * 4, 0 ,0,
+                               src, 0, &box);
+
+               /* We check if the item is mapped for reading.
+                * In this case, we need to keep the temporary buffer 'alive'
+                * because it is possible to keep a map active for reading
+                * while a kernel (that reads from it) executes */
+               if (!(item->status & ITEM_MAPPED_FOR_READING)) {
+                       pool->screen->b.b.resource_destroy(screen, src);
+                       item->real_buffer = NULL;
+               }
        }
 
-       /* Loop through all the pending items, allocate space for them and
-        * add them back to the item_list. */
-       for (item = pending_list; item; item = next) {
-               next = item->next;
+       return 0;
+}
 
-               int64_t start_in_dw;
+/**
+ * Moves an item from the \a item_list to the \a unallocated_list.
+ * \param item The item that will be demoted
+ * \see r600_compute_global_transfer_map
+ */
+void compute_memory_demote_item(struct compute_memory_pool *pool,
+       struct compute_memory_item *item, struct pipe_context *pipe)
+{
+       struct r600_context *rctx = (struct r600_context *)pipe;
+       struct pipe_resource *src = (struct pipe_resource *)pool->bo;
+       struct pipe_resource *dst;
+       struct pipe_box box;
+
+       COMPUTE_DBG(pool->screen, "* compute_memory_demote_item()\n"
+                       "  + Demoting Item: %"PRIi64", starting at: %"PRIi64" (%"PRIi64" bytes) "
+                       "size: %"PRIi64" (%"PRIi64" bytes)\n", item->id, item->start_in_dw,
+                       item->start_in_dw * 4, item->size_in_dw, item->size_in_dw * 4);
+
+       /* First, we remove the item from the item_list */
+       list_del(&item->link);
+
+       /* Now we add it to the unallocated list */
+       list_addtail(&item->link, pool->unallocated_list);
+
+       /* We check if the intermediate buffer exists, and if it
+        * doesn't, we create it again */
+       if (item->real_buffer == NULL) {
+               item->real_buffer = r600_compute_buffer_alloc_vram(
+                               pool->screen, item->size_in_dw * 4);
+       }
 
-               /* Search for free space in the pool for this item. */
-               while ((start_in_dw=compute_memory_prealloc_chunk(pool,
-                                               item->size_in_dw)) == -1) {
-                       int64_t need = item->size_in_dw+2048 -
-                                               (pool->size_in_dw - allocated);
+       dst = (struct pipe_resource *)item->real_buffer;
 
-                       need += 1024 - (need % 1024);
+       /* We transfer the memory from the item in the pool to the
+        * temporary buffer */
+       u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
 
-                       if (need > 0) {
-                               compute_memory_grow_pool(pool,
-                                               pipe,
-                                               pool->size_in_dw + need);
-                       }
-                       else {
-                               need = pool->size_in_dw / 10;
-                               need += 1024 - (need % 1024);
-                               compute_memory_grow_pool(pool,
-                                               pipe,
-                                               pool->size_in_dw + need);
-                       }
-               }
-               COMPUTE_DBG("  + Found space for Item %p id = %u "
-                       "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
-                       item, item->id, start_in_dw, start_in_dw * 4,
-                       item->size_in_dw, item->size_in_dw * 4);
+       rctx->b.b.resource_copy_region(pipe,
+               dst, 0, 0, 0, 0,
+               src, 0, &box);
 
-               item->start_in_dw = start_in_dw;
-               item->next = NULL;
-               item->prev = NULL;
-
-               if (pool->item_list) {
-                       struct compute_memory_item *pos;
-
-                       pos = compute_memory_postalloc_chunk(pool, start_in_dw);
-                       if (pos) {
-                               item->prev = pos;
-                               item->next = pos->next;
-                               pos->next = item;
-                               if (item->next) {
-                                       item->next->prev = item;
-                               }
-                       } else {
-                               /* Add item to the front of the list */
-                               item->next = pool->item_list->next;
-                               if (pool->item_list->next) {
-                                       pool->item_list->next->prev = item;
-                               }
-                               item->prev = pool->item_list->prev;
-                               if (pool->item_list->prev) {
-                                       pool->item_list->prev->next = item;
-                               }
-                               pool->item_list = item;
-                       }
-               }
-               else {
-                       pool->item_list = item;
-               }
+       /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
+       item->start_in_dw = -1;
 
-               allocated += item->size_in_dw;
+       if (item->link.next != pool->item_list) {
+               pool->status |= POOL_FRAGMENTED;
        }
 }
 
+/**
+ * Moves the item \a item forward from the resource \a src to the
+ * resource \a dst at \a new_start_in_dw
+ *
+ * This function assumes two things:
+ * 1) The item is \b only moved forward, unless src is different from dst
+ * 2) The item \b won't change it's position inside the \a item_list
+ *
+ * \param item                 The item that will be moved
+ * \param new_start_in_dw      The new position of the item in \a item_list
+ * \see compute_memory_defrag
+ */
+void compute_memory_move_item(struct compute_memory_pool *pool,
+       struct pipe_resource *src, struct pipe_resource *dst,
+       struct compute_memory_item *item, uint64_t new_start_in_dw,
+       struct pipe_context *pipe)
+{
+       struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+       struct r600_context *rctx = (struct r600_context *)pipe;
+       struct pipe_box box;
+
+       MAYBE_UNUSED struct compute_memory_item *prev;
+
+       COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n"
+                       "  + Moving item %"PRIi64" from %"PRIi64" (%"PRIi64" bytes) to %"PRIu64" (%"PRIu64" bytes)\n",
+                       item->id, item->start_in_dw, item->start_in_dw * 4,
+                       new_start_in_dw, new_start_in_dw * 4);
+
+       if (pool->item_list != item->link.prev) {
+               prev = container_of(item->link.prev, item, link);
+               assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
+       }
+
+       u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
+
+       /* If the ranges don't overlap, or we are copying from one resource
+        * to another, we can just copy the item directly */
+       if (src != dst || new_start_in_dw + item->size_in_dw <= item->start_in_dw) {
+
+               rctx->b.b.resource_copy_region(pipe,
+                       dst, 0, new_start_in_dw * 4, 0, 0,
+                       src, 0, &box);
+       } else {
+               /* The ranges overlap, we will try first to use an intermediate
+                * resource to move the item */
+               struct pipe_resource *tmp = (struct pipe_resource *)
+                       r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
+
+               if (tmp != NULL) {
+                       rctx->b.b.resource_copy_region(pipe,
+                               tmp, 0, 0, 0, 0,
+                               src, 0, &box);
+
+                       box.x = 0;
+
+                       rctx->b.b.resource_copy_region(pipe,
+                               dst, 0, new_start_in_dw * 4, 0, 0,
+                               tmp, 0, &box);
+
+                       pool->screen->b.b.resource_destroy(screen, tmp);
+
+               } else {
+                       /* The allocation of the temporary resource failed,
+                        * falling back to use mappings */
+                       uint32_t *map;
+                       int64_t offset;
+                       struct pipe_transfer *trans;
 
+                       offset = item->start_in_dw - new_start_in_dw;
+
+                       u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
+
+                       map = pipe->transfer_map(pipe, src, 0, PIPE_TRANSFER_READ_WRITE,
+                               &box, &trans);
+
+                       assert(map);
+                       assert(trans);
+
+                       memmove(map, map + offset, item->size_in_dw * 4);
+
+                       pipe->transfer_unmap(pipe, trans);
+               }
+       }
+
+       item->start_in_dw = new_start_in_dw;
+}
+
+/**
+ * Frees the memory asociated to the item with id \a id from the pool.
+ * \param id   The id of the item to be freed.
+ */
 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 {
        struct compute_memory_item *item, *next;
+       struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
+       struct pipe_resource *res;
 
-       COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
+       COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %"PRIi64" \n", id);
 
-       for (item = pool->item_list; item; item = next) {
-               next = item->next;
+       LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
 
                if (item->id == id) {
-                       if (item->prev) {
-                               item->prev->next = item->next;
+
+                       if (item->link.next != pool->item_list) {
+                               pool->status |= POOL_FRAGMENTED;
                        }
-                       else {
-                               pool->item_list = item->next;
+
+                       list_del(&item->link);
+
+                       if (item->real_buffer) {
+                               res = (struct pipe_resource *)item->real_buffer;
+                               pool->screen->b.b.resource_destroy(
+                                               screen, res);
                        }
 
-                       if (item->next) {
-                               item->next->prev = item->prev;
+                       free(item);
+
+                       return;
+               }
+       }
+
+       LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
+
+               if (item->id == id) {
+                       list_del(&item->link);
+
+                       if (item->real_buffer) {
+                               res = (struct pipe_resource *)item->real_buffer;
+                               pool->screen->b.b.resource_destroy(
+                                               screen, res);
                        }
 
                        free(item);
@@ -404,45 +613,44 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 }
 
 /**
- * Creates pending allocations
+ * Creates pending allocations for new items, these items are
+ * placed in the unallocated_list.
+ * \param size_in_dw   The size, in double words, of the new item.
+ * \return The new item
+ * \see r600_compute_global_buffer_create
  */
 struct compute_memory_item* compute_memory_alloc(
        struct compute_memory_pool* pool,
        int64_t size_in_dw)
 {
-       struct compute_memory_item *new_item;
+       struct compute_memory_item *new_item = NULL;
 
-       COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
+       COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %"PRIi64" (%"PRIi64" bytes)\n",
                        size_in_dw, 4 * size_in_dw);
 
        new_item = (struct compute_memory_item *)
                                CALLOC(sizeof(struct compute_memory_item), 1);
+       if (!new_item)
+               return NULL;
+
        new_item->size_in_dw = size_in_dw;
        new_item->start_in_dw = -1; /* mark pending */
        new_item->id = pool->next_id++;
        new_item->pool = pool;
+       new_item->real_buffer = NULL;
 
-       struct compute_memory_item *last_item;
-
-       if (pool->item_list) {
-               for (last_item = pool->item_list; last_item->next;
-                                               last_item = last_item->next);
-
-               last_item->next = new_item;
-               new_item->prev = last_item;
-       }
-       else {
-               pool->item_list = new_item;
-       }
+       list_addtail(&new_item->link, pool->unallocated_list);
 
-       COMPUTE_DBG("  + Adding item %p id = %u size = %u (%u bytes)\n",
+       COMPUTE_DBG(pool->screen, "  + Adding item %p id = %"PRIi64" size = %"PRIi64" (%"PRIi64" bytes)\n",
                        new_item, new_item->id, new_item->size_in_dw,
                        new_item->size_in_dw * 4);
        return new_item;
 }
 
 /**
- * Transfer data host<->device, offset and size is in bytes
+ * Transfer data host<->device, offset and size is in bytes.
+ * \param device_to_host 1 for device->host, 0 for host->device.
+ * \see compute_memory_shadow
  */
 void compute_memory_transfer(
        struct compute_memory_pool* pool,
@@ -462,31 +670,26 @@ void compute_memory_transfer(
 
        assert(gart);
 
-       COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
+       COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
                "offset_in_chunk = %d, size = %d\n", device_to_host,
                offset_in_chunk, size);
 
-       if (device_to_host)
-       {
-               xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
-                       &(struct pipe_box) { .width = aligned_size,
-                       .height = 1, .depth = 1 });
+       if (device_to_host) {
+               map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
+                       &(struct pipe_box) { .width = aligned_size * 4,
+                       .height = 1, .depth = 1 }, &xfer);
                assert(xfer);
-               map = pipe->transfer_map(pipe, xfer);
                assert(map);
                memcpy(data, map + internal_offset, size);
                pipe->transfer_unmap(pipe, xfer);
-               pipe->transfer_destroy(pipe, xfer);
        } else {
-               xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
-                       &(struct pipe_box) { .width = aligned_size,
-                       .height = 1, .depth = 1 });
+               map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
+                       &(struct pipe_box) { .width = aligned_size * 4,
+                       .height = 1, .depth = 1 }, &xfer);
                assert(xfer);
-               map = pipe->transfer_map(pipe, xfer);
                assert(map);
                memcpy(map + internal_offset, data, size);
                pipe->transfer_unmap(pipe, xfer);
-               pipe->transfer_destroy(pipe, xfer);
        }
 }