src/gallium/drivers/r600/compute_memory_pool.c

   1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a
   3  * copy of this software and associated documentation files (the "Software"),
   4  * to deal in the Software without restriction, including without limitation
   5  * on the rights to use, copy, modify, merge, publish, distribute, sub
   6  * license, and/or sell copies of the Software, and to permit persons to whom
   7  * the Software is furnished to do so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice (including the next
  10  * paragraph) shall be included in all copies or substantial portions of the
  11  * Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * Authors:
  22  *      Adam Rak <adam.rak@streamnovation.com>
  23  */
  24
  25 #include "pipe/p_defines.h"
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28 #include "util/u_blitter.h"
  29 #include "util/u_double_list.h"
  30 #include "util/u_transfer.h"
  31 #include "util/u_surface.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_inlines.h"
  35 #include "util/u_framebuffer.h"
  36 #include "r600_resource.h"
  37 #include "r600_shader.h"
  38 #include "r600_pipe.h"
  39 #include "r600_formats.h"
  40 #include "compute_memory_pool.h"
  41 #include "evergreen_compute.h"
  42 #include "evergreen_compute_internal.h"
  43 #include <inttypes.h>
  44
  45 /**
  46  * Creates a new pool
  47  */
  48 struct compute_memory_pool* compute_memory_pool_new(
  49         struct r600_screen * rscreen)
  50 {
  51         struct compute_memory_pool* pool = (struct compute_memory_pool*)
  52                                 CALLOC(sizeof(struct compute_memory_pool), 1);
  53
  54         COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
  55
  56         pool->screen = rscreen;
  57         return pool;
  58 }
  59
  60 static void compute_memory_pool_init(struct compute_memory_pool * pool,
  61         unsigned initial_size_in_dw)
  62 {
  63
  64         COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
  65                 initial_size_in_dw);
  66
  67         pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
  68         pool->next_id = 1;
  69         pool->size_in_dw = initial_size_in_dw;
  70         pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
  71                                                         pool->size_in_dw * 4);
  72 }
  73
  74 /**
  75  * Frees all stuff in the pool and the pool struct itself too
  76  */
  77 void compute_memory_pool_delete(struct compute_memory_pool* pool)
  78 {
  79         COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
  80         free(pool->shadow);
  81         if (pool->bo) {
  82                 pool->screen->b.b.resource_destroy((struct pipe_screen *)
  83                         pool->screen, (struct pipe_resource *)pool->bo);
  84         }
  85         free(pool);
  86 }
  87
  88 /**
  89  * Searches for an empty space in the pool, return with the pointer to the
  90  * allocatable space in the pool, returns -1 on failure.
  91  */
  92 int64_t compute_memory_prealloc_chunk(
  93         struct compute_memory_pool* pool,
  94         int64_t size_in_dw)
  95 {
  96         struct compute_memory_item *item;
  97
  98         int last_end = 0;
  99
 100         assert(size_in_dw <= pool->size_in_dw);
 101
 102         COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
 103                 size_in_dw);
 104
 105         for (item = pool->item_list; item; item = item->next) {
 106                 if (item->start_in_dw > -1) {
 107                         if (item->start_in_dw-last_end > size_in_dw) {
 108                                 return last_end;
 109                         }
 110
 111                         last_end = item->start_in_dw + item->size_in_dw;
 112                         last_end += (1024 - last_end % 1024);
 113                 }
 114         }
 115
 116         if (pool->size_in_dw - last_end < size_in_dw) {
 117                 return -1;
 118         }
 119
 120         return last_end;
 121 }
 122
 123 /**
 124  *  Search for the chunk where we can link our new chunk after it.
 125  */
 126 struct compute_memory_item* compute_memory_postalloc_chunk(
 127         struct compute_memory_pool* pool,
 128         int64_t start_in_dw)
 129 {
 130         struct compute_memory_item* item;
 131
 132         COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
 133                 start_in_dw);
 134
 135         /* Check if we can insert it in the front of the list */
 136         if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
 137                 return NULL;
 138         }
 139
 140         for (item = pool->item_list; item; item = item->next) {
 141                 if (item->next) {
 142                         if (item->start_in_dw < start_in_dw
 143                                 && item->next->start_in_dw > start_in_dw) {
 144                                 return item;
 145                         }
 146                 }
 147                 else {
 148                         /* end of chain */
 149                         assert(item->start_in_dw < start_in_dw);
 150                         return item;
 151                 }
 152         }
 153
 154         assert(0 && "unreachable");
 155         return NULL;
 156 }
 157
 158 /**
 159  * Reallocates pool, conserves data
 160  */
 161 void compute_memory_grow_pool(struct compute_memory_pool* pool,
 162         struct pipe_context * pipe, int new_size_in_dw)
 163 {
 164         COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
 165                 "new_size_in_dw = %d (%d bytes)\n",
 166                 new_size_in_dw, new_size_in_dw * 4);
 167
 168         assert(new_size_in_dw >= pool->size_in_dw);
 169
 170         if (!pool->bo) {
 171                 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
 172         } else {
 173                 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
 174
 175                 COMPUTE_DBG(pool->screen, "  Aligned size = %d (%d bytes)\n",
 176                         new_size_in_dw, new_size_in_dw * 4);
 177
 178                 compute_memory_shadow(pool, pipe, 1);
 179                 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
 180                 pool->size_in_dw = new_size_in_dw;
 181                 pool->screen->b.b.resource_destroy(
 182                         (struct pipe_screen *)pool->screen,
 183                         (struct pipe_resource *)pool->bo);
 184                 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
 185                                                         pool->screen,
 186                                                         pool->size_in_dw * 4);
 187                 compute_memory_shadow(pool, pipe, 0);
 188         }
 189 }
 190
 191 /**
 192  * Copy pool from device to host, or host to device.
 193  */
 194 void compute_memory_shadow(struct compute_memory_pool* pool,
 195         struct pipe_context * pipe, int device_to_host)
 196 {
 197         struct compute_memory_item chunk;
 198
 199         COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
 200                 device_to_host);
 201
 202         chunk.id = 0;
 203         chunk.start_in_dw = 0;
 204         chunk.size_in_dw = pool->size_in_dw;
 205         chunk.prev = chunk.next = NULL;
 206         compute_memory_transfer(pool, pipe, device_to_host, &chunk,
 207                                 pool->shadow, 0, pool->size_in_dw*4);
 208 }
 209
 210 /**
 211  * Allocates pending allocations in the pool
 212  */
 213 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
 214         struct pipe_context * pipe)
 215 {
 216         struct compute_memory_item *pending_list = NULL, *end_p = NULL;
 217         struct compute_memory_item *item, *next;
 218
 219         int64_t allocated = 0;
 220         int64_t unallocated = 0;
 221
 222         int64_t start_in_dw = 0;
 223
 224         COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
 225
 226         for (item = pool->item_list; item; item = item->next) {
 227                 COMPUTE_DBG(pool->screen, "  + list: offset = %i id = %i size = %i "
 228                         "(%i bytes)\n",item->start_in_dw, item->id,
 229                         item->size_in_dw, item->size_in_dw * 4);
 230         }
 231
 232         /* Search through the list of memory items in the pool */
 233         for (item = pool->item_list; item; item = next) {
 234                 next = item->next;
 235
 236                 /* Check if the item is pending. */
 237                 if (item->start_in_dw == -1) {
 238                         /* It is pending, so add it to the pending_list... */
 239                         if (end_p) {
 240                                 end_p->next = item;
 241                         }
 242                         else {
 243                                 pending_list = item;
 244                         }
 245
 246                         /* ... and then remove it from the item list. */
 247                         if (item->prev) {
 248                                 item->prev->next = next;
 249                         }
 250                         else {
 251                                 pool->item_list = next;
 252                         }
 253
 254                         if (next) {
 255                                 next->prev = item->prev;
 256                         }
 257
 258                         /* This sequence makes the item be at the end of the list */
 259                         item->prev = end_p;
 260                         item->next = NULL;
 261                         end_p = item;
 262
 263                         /* Update the amount of space we will need to allocate. */
 264                         unallocated += item->size_in_dw+1024;
 265                 }
 266                 else {
 267                         /* The item is not pendng, so update the amount of space
 268                          * that has already been allocated. */
 269                         allocated += item->size_in_dw;
 270                 }
 271         }
 272
 273         /* If we require more space than the size of the pool, then grow the
 274          * pool.
 275          *
 276          * XXX: I'm pretty sure this won't work.  Imagine this scenario:
 277          *
 278          * Offset Item Size
 279          *   0    A    50
 280          * 200    B    50
 281          * 400    C    50
 282          *
 283          * Total size = 450
 284          * Allocated size = 150
 285          * Pending Item D Size = 200
 286          *
 287          * In this case, there are 300 units of free space in the pool, but
 288          * they aren't contiguous, so it will be impossible to allocate Item D.
 289          */
 290         if (pool->size_in_dw < allocated+unallocated) {
 291                 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
 292         }
 293
 294         /* Loop through all the pending items, allocate space for them and
 295          * add them back to the item_list. */
 296         for (item = pending_list; item; item = next) {
 297                 next = item->next;
 298
 299                 /* Search for free space in the pool for this item. */
 300                 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
 301                                                 item->size_in_dw)) == -1) {
 302                         int64_t need = item->size_in_dw+2048 -
 303                                                 (pool->size_in_dw - allocated);
 304
 305                         need += 1024 - (need % 1024);
 306
 307                         if (need > 0) {
 308                                 compute_memory_grow_pool(pool,
 309                                                 pipe,
 310                                                 pool->size_in_dw + need);
 311                         }
 312                         else {
 313                                 need = pool->size_in_dw / 10;
 314                                 need += 1024 - (need % 1024);
 315                                 compute_memory_grow_pool(pool,
 316                                                 pipe,
 317                                                 pool->size_in_dw + need);
 318                         }
 319                 }
 320                 COMPUTE_DBG(pool->screen, "  + Found space for Item %p id = %u "
 321                         "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
 322                         item, item->id, start_in_dw, start_in_dw * 4,
 323                         item->size_in_dw, item->size_in_dw * 4);
 324
 325                 item->start_in_dw = start_in_dw;
 326                 item->next = NULL;
 327                 item->prev = NULL;
 328
 329                 if (pool->item_list) {
 330                         struct compute_memory_item *pos;
 331
 332                         pos = compute_memory_postalloc_chunk(pool, start_in_dw);
 333                         if (pos) {
 334                                 item->prev = pos;
 335                                 item->next = pos->next;
 336                                 pos->next = item;
 337                                 if (item->next) {
 338                                         item->next->prev = item;
 339                                 }
 340                         } else {
 341                                 /* Add item to the front of the list */
 342                                 item->next = pool->item_list;
 343                                 item->prev = pool->item_list->prev;
 344                                 pool->item_list->prev = item;
 345                                 pool->item_list = item;
 346                         }
 347                 }
 348                 else {
 349                         pool->item_list = item;
 350                 }
 351
 352                 allocated += item->size_in_dw;
 353         }
 354 }
 355
 356
 357 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 358 {
 359         struct compute_memory_item *item, *next;
 360
 361         COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
 362
 363         for (item = pool->item_list; item; item = next) {
 364                 next = item->next;
 365
 366                 if (item->id == id) {
 367                         if (item->prev) {
 368                                 item->prev->next = item->next;
 369                         }
 370                         else {
 371                                 pool->item_list = item->next;
 372                         }
 373
 374                         if (item->next) {
 375                                 item->next->prev = item->prev;
 376                         }
 377
 378                         free(item);
 379
 380                         return;
 381                 }
 382         }
 383
 384         fprintf(stderr, "Internal error, invalid id %"PRIi64" "
 385                 "for compute_memory_free\n", id);
 386
 387         assert(0 && "error");
 388 }
 389
 390 /**
 391  * Creates pending allocations
 392  */
 393 struct compute_memory_item* compute_memory_alloc(
 394         struct compute_memory_pool* pool,
 395         int64_t size_in_dw)
 396 {
 397         struct compute_memory_item *new_item = NULL, *last_item = NULL;
 398
 399         COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
 400                         size_in_dw, 4 * size_in_dw);
 401
 402         new_item = (struct compute_memory_item *)
 403                                 CALLOC(sizeof(struct compute_memory_item), 1);
 404         new_item->size_in_dw = size_in_dw;
 405         new_item->start_in_dw = -1; /* mark pending */
 406         new_item->id = pool->next_id++;
 407         new_item->pool = pool;
 408
 409         if (pool->item_list) {
 410                 for (last_item = pool->item_list; last_item->next;
 411                                                 last_item = last_item->next);
 412
 413                 last_item->next = new_item;
 414                 new_item->prev = last_item;
 415         }
 416         else {
 417                 pool->item_list = new_item;
 418         }
 419
 420         COMPUTE_DBG(pool->screen, "  + Adding item %p id = %u size = %u (%u bytes)\n",
 421                         new_item, new_item->id, new_item->size_in_dw,
 422                         new_item->size_in_dw * 4);
 423         return new_item;
 424 }
 425
 426 /**
 427  * Transfer data host<->device, offset and size is in bytes
 428  */
 429 void compute_memory_transfer(
 430         struct compute_memory_pool* pool,
 431         struct pipe_context * pipe,
 432         int device_to_host,
 433         struct compute_memory_item* chunk,
 434         void* data,
 435         int offset_in_chunk,
 436         int size)
 437 {
 438         int64_t aligned_size = pool->size_in_dw;
 439         struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
 440         int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
 441
 442         struct pipe_transfer *xfer;
 443         uint32_t *map;
 444
 445         assert(gart);
 446
 447         COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
 448                 "offset_in_chunk = %d, size = %d\n", device_to_host,
 449                 offset_in_chunk, size);
 450
 451         if (device_to_host) {
 452                 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
 453                         &(struct pipe_box) { .width = aligned_size,
 454                         .height = 1, .depth = 1 }, &xfer);
 455                 assert(xfer);
 456                 assert(map);
 457                 memcpy(data, map + internal_offset, size);
 458                 pipe->transfer_unmap(pipe, xfer);
 459         } else {
 460                 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
 461                         &(struct pipe_box) { .width = aligned_size,
 462                         .height = 1, .depth = 1 }, &xfer);
 463                 assert(xfer);
 464                 assert(map);
 465                 memcpy(map + internal_offset, data, size);
 466                 pipe->transfer_unmap(pipe, xfer);
 467         }
 468 }
 469
 470 /**
 471  * Transfer data between chunk<->data, it is for VRAM<->GART transfers
 472  */
 473 void compute_memory_transfer_direct(
 474         struct compute_memory_pool* pool,
 475         int chunk_to_data,
 476         struct compute_memory_item* chunk,
 477         struct r600_resource* data,
 478         int offset_in_chunk,
 479         int offset_in_data,
 480         int size)
 481 {
 482         ///TODO: DMA
 483 }