r600g: fix RSQ of negative value on Cayman
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
index 648219b9eb93337335654c11cb9f49924f222832..bd3c14d83ba04c43c17c9e244ae16e526f6d2cd1 100644 (file)
 #include "r600_formats.h"
 #include "compute_memory_pool.h"
 #include "evergreen_compute_internal.h"
+#include <inttypes.h>
 
-static struct r600_resource_texture * create_pool_texture(struct r600_screen * screen,
+static struct r600_texture * create_pool_texture(struct r600_screen * screen,
                unsigned size_in_dw)
 {
 
        struct pipe_resource templ;
-       struct r600_resource_texture * tex;
+       struct r600_texture * tex;
 
+       if (size_in_dw == 0) {
+               return NULL;
+       }
        memset(&templ, 0, sizeof(templ));
        templ.target = PIPE_TEXTURE_1D;
        templ.format = PIPE_FORMAT_R32_UINT;
@@ -59,7 +63,7 @@ static struct r600_resource_texture * create_pool_texture(struct r600_screen * s
        templ.depth0 = 1;
        templ.array_size = 1;
 
-       tex = (struct r600_resource_texture *)r600_texture_create(
+       tex = (struct r600_texture *)r600_texture_create(
                                                &screen->screen, &templ);
        /* XXX: Propagate this error */
        assert(tex && "Out of memory");
@@ -71,20 +75,32 @@ static struct r600_resource_texture * create_pool_texture(struct r600_screen * s
  * Creates a new pool
  */
 struct compute_memory_pool* compute_memory_pool_new(
-       int64_t initial_size_in_dw,
        struct r600_screen * rscreen)
 {
        struct compute_memory_pool* pool = (struct compute_memory_pool*)
                                CALLOC(sizeof(struct compute_memory_pool), 1);
 
+       COMPUTE_DBG("* compute_memory_pool_new()\n");
+
+       pool->screen = rscreen;
+       return pool;
+}
+
+static void compute_memory_pool_init(struct compute_memory_pool * pool,
+       unsigned initial_size_in_dw)
+{
+
+       COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
+               initial_size_in_dw);
+
+       /* XXX: pool->shadow is used when the buffer needs to be resized, but
+        * resizing does not work at the moment.
+        * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
+        */
        pool->next_id = 1;
        pool->size_in_dw = initial_size_in_dw;
-       pool->screen = rscreen;
        pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
                                                        pool->size_in_dw);
-       pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
-
-       return pool;
 }
 
 /**
@@ -92,9 +108,12 @@ struct compute_memory_pool* compute_memory_pool_new(
  */
 void compute_memory_pool_delete(struct compute_memory_pool* pool)
 {
+       COMPUTE_DBG("* compute_memory_pool_delete()\n");
        free(pool->shadow);
-       pool->screen->screen.resource_destroy((struct pipe_screen *)
+       if (pool->bo) {
+               pool->screen->screen.resource_destroy((struct pipe_screen *)
                        pool->screen, (struct pipe_resource *)pool->bo);
+       }
        free(pool);
 }
 
@@ -112,6 +131,9 @@ int64_t compute_memory_prealloc_chunk(
 
        int last_end = 0;
 
+       COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
+               size_in_dw);
+
        for (item = pool->item_list; item; item = item->next) {
                if (item->start_in_dw > -1) {
                        if (item->start_in_dw-last_end > size_in_dw) {
@@ -139,6 +161,9 @@ struct compute_memory_item* compute_memory_postalloc_chunk(
 {
        struct compute_memory_item* item;
 
+       COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
+               start_in_dw);
+
        for (item = pool->item_list; item; item = item->next) {
                if (item->next) {
                        if (item->start_in_dw < start_in_dw
@@ -163,19 +188,39 @@ struct compute_memory_item* compute_memory_postalloc_chunk(
 void compute_memory_grow_pool(struct compute_memory_pool* pool,
        struct pipe_context * pipe, int new_size_in_dw)
 {
+       COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
+               new_size_in_dw);
+
        assert(new_size_in_dw >= pool->size_in_dw);
 
-       new_size_in_dw += 1024 - (new_size_in_dw % 1024);
+       assert(!pool->bo && "Growing the global memory pool is not yet "
+               "supported.  You will see this message if you are trying to"
+               "use more than 64 kb of memory");
 
-       compute_memory_shadow(pool, pipe, 1);
-       pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
-       pool->size_in_dw = new_size_in_dw;
-       pool->screen->screen.resource_destroy(
-               (struct pipe_screen *)pool->screen,
-               (struct pipe_resource *)pool->bo);
-       pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
+       if (!pool->bo) {
+               compute_memory_pool_init(pool, 1024 * 16);
+       } else {
+               /* XXX: Growing memory pools does not work at the moment.  I think
+                * it is because we are using fragment shaders to copy data to
+                * the new texture and some of the compute registers are being
+                * included in the 3D command stream. */
+               fprintf(stderr, "Warning: growing the global memory pool to"
+                               "more than 64 kb is not yet supported\n");
+               new_size_in_dw += 1024 - (new_size_in_dw % 1024);
+
+               COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
+
+               compute_memory_shadow(pool, pipe, 1);
+               pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
+               pool->size_in_dw = new_size_in_dw;
+               pool->screen->screen.resource_destroy(
+                       (struct pipe_screen *)pool->screen,
+                       (struct pipe_resource *)pool->bo);
+               pool->bo = (struct r600_resource*)create_pool_texture(
+                                                       pool->screen,
                                                        pool->size_in_dw);
-       compute_memory_shadow(pool, pipe, 0);
+               compute_memory_shadow(pool, pipe, 0);
+       }
 }
 
 /**
@@ -186,6 +231,9 @@ void compute_memory_shadow(struct compute_memory_pool* pool,
 {
        struct compute_memory_item chunk;
 
+       COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
+               device_to_host);
+
        chunk.id = 0;
        chunk.start_in_dw = 0;
        chunk.size_in_dw = pool->size_in_dw;
@@ -206,15 +254,21 @@ void compute_memory_finalize_pending(struct compute_memory_pool* pool,
        int64_t allocated = 0;
        int64_t unallocated = 0;
 
+       COMPUTE_DBG("* compute_memory_finalize_pending()\n");
+
        for (item = pool->item_list; item; item = item->next) {
-               COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
+               COMPUTE_DBG("  + list: offset = %i id = %i size = %i "
+                       "(%i bytes)\n",item->start_in_dw, item->id,
+                       item->size_in_dw, item->size_in_dw * 4);
        }
 
+       /* Search through the list of memory items in the pool */
        for (item = pool->item_list; item; item = next) {
                next = item->next;
 
-
+               /* Check if the item is pending. */
                if (item->start_in_dw == -1) {
+                       /* It is pending, so add it to the pending_list... */
                        if (end_p) {
                                end_p->next = item;
                        }
@@ -222,6 +276,7 @@ void compute_memory_finalize_pending(struct compute_memory_pool* pool,
                                pending_list = item;
                        }
 
+                       /* ... and then remove it from the item list. */
                        if (item->prev) {
                                item->prev->next = next;
                        }
@@ -233,26 +288,50 @@ void compute_memory_finalize_pending(struct compute_memory_pool* pool,
                                next->prev = item->prev;
                        }
 
+                       /* This sequence makes the item be at the end of the list */
                        item->prev = end_p;
                        item->next = NULL;
                        end_p = item;
 
+                       /* Update the amount of space we will need to allocate. */
                        unallocated += item->size_in_dw+1024;
                }
                else {
+                       /* The item is not pendng, so update the amount of space
+                        * that has already been allocated. */
                        allocated += item->size_in_dw;
                }
        }
 
+       /* If we require more space than the size of the pool, then grow the
+        * pool.
+        *
+        * XXX: I'm pretty sure this won't work.  Imagine this scenario:
+        *
+        * Offset Item Size
+        *   0    A    50
+        * 200    B    50
+        * 400    C    50
+        *
+        * Total size = 450
+        * Allocated size = 150
+        * Pending Item D Size = 200
+        *
+        * In this case, there are 300 units of free space in the pool, but
+        * they aren't contiguous, so it will be impossible to allocate Item D.
+        */
        if (pool->size_in_dw < allocated+unallocated) {
                compute_memory_grow_pool(pool, pipe, allocated+unallocated);
        }
 
+       /* Loop through all the pending items, allocate space for them and
+        * add them back to the item_list. */
        for (item = pending_list; item; item = next) {
                next = item->next;
 
                int64_t start_in_dw;
 
+               /* Search for free space in the pool for this item. */
                while ((start_in_dw=compute_memory_prealloc_chunk(pool,
                                                item->size_in_dw)) == -1) {
                        int64_t need = item->size_in_dw+2048 -
@@ -273,6 +352,10 @@ void compute_memory_finalize_pending(struct compute_memory_pool* pool,
                                                pool->size_in_dw + need);
                        }
                }
+               COMPUTE_DBG("  + Found space for Item %p id = %u "
+                       "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
+                       item, item->id, start_in_dw, start_in_dw * 4,
+                       item->size_in_dw, item->size_in_dw * 4);
 
                item->start_in_dw = start_in_dw;
                item->next = NULL;
@@ -303,6 +386,8 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 {
        struct compute_memory_item *item, *next;
 
+       COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
+
        for (item = pool->item_list; item; item = next) {
                next = item->next;
 
@@ -324,7 +409,7 @@ void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
                }
        }
 
-       fprintf(stderr, "Internal error, invalid id %ld "
+       fprintf(stderr, "Internal error, invalid id %"PRIi64" "
                "for compute_memory_free\n", id);
 
        assert(0 && "error");
@@ -339,7 +424,8 @@ struct compute_memory_item* compute_memory_alloc(
 {
        struct compute_memory_item *new_item;
 
-       COMPUTE_DBG("Alloc: %i\n", size_in_dw);
+       COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
+                       size_in_dw, 4 * size_in_dw);
 
        new_item = (struct compute_memory_item *)
                                CALLOC(sizeof(struct compute_memory_item), 1);
@@ -361,6 +447,9 @@ struct compute_memory_item* compute_memory_alloc(
                pool->item_list = new_item;
        }
 
+       COMPUTE_DBG("  + Adding item %p id = %u size = %u (%u bytes)\n",
+                       new_item, new_item->id, new_item->size_in_dw,
+                       new_item->size_in_dw * 4);
        return new_item;
 }
 
@@ -383,6 +472,12 @@ void compute_memory_transfer(
        struct pipe_transfer *xfer;
        uint32_t *map;
 
+       assert(gart);
+
+       COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
+               "offset_in_chunk = %d, size = %d\n", device_to_host,
+               offset_in_chunk, size);
+
        if (device_to_host)
        {
                xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,