src/gallium/drivers/r600/compute_memory_pool.c

   1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a
   3  * copy of this software and associated documentation files (the "Software"),
   4  * to deal in the Software without restriction, including without limitation
   5  * on the rights to use, copy, modify, merge, publish, distribute, sub
   6  * license, and/or sell copies of the Software, and to permit persons to whom
   7  * the Software is furnished to do so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice (including the next
  10  * paragraph) shall be included in all copies or substantial portions of the
  11  * Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * Authors:
  22  *      Adam Rak <adam.rak@streamnovation.com>
  23  */
  24
  25 #include "pipe/p_defines.h"
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28 #include "util/u_blitter.h"
  29 #include "util/u_double_list.h"
  30 #include "util/u_transfer.h"
  31 #include "util/u_surface.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_inlines.h"
  35 #include "util/u_framebuffer.h"
  36 #include "r600.h"
  37 #include "r600_resource.h"
  38 #include "r600_shader.h"
  39 #include "r600_pipe.h"
  40 #include "r600_formats.h"
  41 #include "compute_memory_pool.h"
  42 #include "evergreen_compute_internal.h"
  43 #include <inttypes.h>
  44
  45 /**
  46  * Creates a new pool
  47  */
  48 struct compute_memory_pool* compute_memory_pool_new(
  49         struct r600_screen * rscreen)
  50 {
  51         struct compute_memory_pool* pool = (struct compute_memory_pool*)
  52                                 CALLOC(sizeof(struct compute_memory_pool), 1);
  53
  54         COMPUTE_DBG("* compute_memory_pool_new()\n");
  55
  56         pool->screen = rscreen;
  57         return pool;
  58 }
  59
  60 static void compute_memory_pool_init(struct compute_memory_pool * pool,
  61         unsigned initial_size_in_dw)
  62 {
  63
  64         COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
  65                 initial_size_in_dw);
  66
  67         /* XXX: pool->shadow is used when the buffer needs to be resized, but
  68          * resizing does not work at the moment.
  69          * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
  70          */
  71         pool->next_id = 1;
  72         pool->size_in_dw = initial_size_in_dw;
  73         pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
  74                                                         pool->size_in_dw * 4);
  75 }
  76
  77 /**
  78  * Frees all stuff in the pool and the pool struct itself too
  79  */
  80 void compute_memory_pool_delete(struct compute_memory_pool* pool)
  81 {
  82         COMPUTE_DBG("* compute_memory_pool_delete()\n");
  83         free(pool->shadow);
  84         if (pool->bo) {
  85                 pool->screen->screen.resource_destroy((struct pipe_screen *)
  86                         pool->screen, (struct pipe_resource *)pool->bo);
  87         }
  88         free(pool);
  89 }
  90
  91 /**
  92  * Searches for an empty space in the pool, return with the pointer to the
  93  * allocatable space in the pool, returns -1 on failure.
  94  */
  95 int64_t compute_memory_prealloc_chunk(
  96         struct compute_memory_pool* pool,
  97         int64_t size_in_dw)
  98 {
  99         assert(size_in_dw <= pool->size_in_dw);
 100
 101         struct compute_memory_item *item;
 102
 103         int last_end = 0;
 104
 105         COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
 106                 size_in_dw);
 107
 108         for (item = pool->item_list; item; item = item->next) {
 109                 if (item->start_in_dw > -1) {
 110                         if (item->start_in_dw-last_end > size_in_dw) {
 111                                 return last_end;
 112                         }
 113
 114                         last_end = item->start_in_dw + item->size_in_dw;
 115                         last_end += (1024 - last_end % 1024);
 116                 }
 117         }
 118
 119         if (pool->size_in_dw - last_end < size_in_dw) {
 120                 return -1;
 121         }
 122
 123         return last_end;
 124 }
 125
 126 /**
 127  *  Search for the chunk where we can link our new chunk after it.
 128  */
 129 struct compute_memory_item* compute_memory_postalloc_chunk(
 130         struct compute_memory_pool* pool,
 131         int64_t start_in_dw)
 132 {
 133         struct compute_memory_item* item;
 134
 135         COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
 136                 start_in_dw);
 137
 138         /* Check if we can insert it in the front of the list */
 139         if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
 140                 return NULL;
 141         }
 142
 143         for (item = pool->item_list; item; item = item->next) {
 144                 if (item->next) {
 145                         if (item->start_in_dw < start_in_dw
 146                                 && item->next->start_in_dw > start_in_dw) {
 147                                 return item;
 148                         }
 149                 }
 150                 else {
 151                         /* end of chain */
 152                         assert(item->start_in_dw < start_in_dw);
 153                         return item;
 154                 }
 155         }
 156
 157         assert(0 && "unreachable");
 158         return NULL;
 159 }
 160
 161 /**
 162  * Reallocates pool, conserves data
 163  */
 164 void compute_memory_grow_pool(struct compute_memory_pool* pool,
 165         struct pipe_context * pipe, int new_size_in_dw)
 166 {
 167         COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
 168                 new_size_in_dw);
 169
 170         assert(new_size_in_dw >= pool->size_in_dw);
 171
 172         assert(!pool->bo && "Growing the global memory pool is not yet "
 173                 "supported.  You will see this message if you are trying to"
 174                 "use more than 64 kb of memory");
 175
 176         if (!pool->bo) {
 177                 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
 178         } else {
 179                 /* XXX: Growing memory pools does not work at the moment.  I think
 180                  * it is because we are using fragment shaders to copy data to
 181                  * the new texture and some of the compute registers are being
 182                  * included in the 3D command stream. */
 183                 fprintf(stderr, "Warning: growing the global memory pool to"
 184                                 "more than 64 kb is not yet supported\n");
 185                 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
 186
 187                 COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
 188
 189                 compute_memory_shadow(pool, pipe, 1);
 190                 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
 191                 pool->size_in_dw = new_size_in_dw;
 192                 pool->screen->screen.resource_destroy(
 193                         (struct pipe_screen *)pool->screen,
 194                         (struct pipe_resource *)pool->bo);
 195                 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
 196                                                         pool->screen,
 197                                                         pool->size_in_dw * 4);
 198                 compute_memory_shadow(pool, pipe, 0);
 199         }
 200 }
 201
 202 /**
 203  * Copy pool from device to host, or host to device.
 204  */
 205 void compute_memory_shadow(struct compute_memory_pool* pool,
 206         struct pipe_context * pipe, int device_to_host)
 207 {
 208         struct compute_memory_item chunk;
 209
 210         COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
 211                 device_to_host);
 212
 213         chunk.id = 0;
 214         chunk.start_in_dw = 0;
 215         chunk.size_in_dw = pool->size_in_dw;
 216         chunk.prev = chunk.next = NULL;
 217         compute_memory_transfer(pool, pipe, device_to_host, &chunk,
 218                                 pool->shadow, 0, pool->size_in_dw*4);
 219 }
 220
 221 /**
 222  * Allocates pending allocations in the pool
 223  */
 224 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
 225         struct pipe_context * pipe)
 226 {
 227         struct compute_memory_item *pending_list = NULL, *end_p = NULL;
 228         struct compute_memory_item *item, *next;
 229
 230         int64_t allocated = 0;
 231         int64_t unallocated = 0;
 232
 233         COMPUTE_DBG("* compute_memory_finalize_pending()\n");
 234
 235         for (item = pool->item_list; item; item = item->next) {
 236                 COMPUTE_DBG("  + list: offset = %i id = %i size = %i "
 237                         "(%i bytes)\n",item->start_in_dw, item->id,
 238                         item->size_in_dw, item->size_in_dw * 4);
 239         }
 240
 241         /* Search through the list of memory items in the pool */
 242         for (item = pool->item_list; item; item = next) {
 243                 next = item->next;
 244
 245                 /* Check if the item is pending. */
 246                 if (item->start_in_dw == -1) {
 247                         /* It is pending, so add it to the pending_list... */
 248                         if (end_p) {
 249                                 end_p->next = item;
 250                         }
 251                         else {
 252                                 pending_list = item;
 253                         }
 254
 255                         /* ... and then remove it from the item list. */
 256                         if (item->prev) {
 257                                 item->prev->next = next;
 258                         }
 259                         else {
 260                                 pool->item_list = next;
 261                         }
 262
 263                         if (next) {
 264                                 next->prev = item->prev;
 265                         }
 266
 267                         /* This sequence makes the item be at the end of the list */
 268                         item->prev = end_p;
 269                         item->next = NULL;
 270                         end_p = item;
 271
 272                         /* Update the amount of space we will need to allocate. */
 273                         unallocated += item->size_in_dw+1024;
 274                 }
 275                 else {
 276                         /* The item is not pendng, so update the amount of space
 277                          * that has already been allocated. */
 278                         allocated += item->size_in_dw;
 279                 }
 280         }
 281
 282         /* If we require more space than the size of the pool, then grow the
 283          * pool.
 284          *
 285          * XXX: I'm pretty sure this won't work.  Imagine this scenario:
 286          *
 287          * Offset Item Size
 288          *   0    A    50
 289          * 200    B    50
 290          * 400    C    50
 291          *
 292          * Total size = 450
 293          * Allocated size = 150
 294          * Pending Item D Size = 200
 295          *
 296          * In this case, there are 300 units of free space in the pool, but
 297          * they aren't contiguous, so it will be impossible to allocate Item D.
 298          */
 299         if (pool->size_in_dw < allocated+unallocated) {
 300                 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
 301         }
 302
 303         /* Loop through all the pending items, allocate space for them and
 304          * add them back to the item_list. */
 305         for (item = pending_list; item; item = next) {
 306                 next = item->next;
 307
 308                 int64_t start_in_dw;
 309
 310                 /* Search for free space in the pool for this item. */
 311                 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
 312                                                 item->size_in_dw)) == -1) {
 313                         int64_t need = item->size_in_dw+2048 -
 314                                                 (pool->size_in_dw - allocated);
 315
 316                         need += 1024 - (need % 1024);
 317
 318                         if (need > 0) {
 319                                 compute_memory_grow_pool(pool,
 320                                                 pipe,
 321                                                 pool->size_in_dw + need);
 322                         }
 323                         else {
 324                                 need = pool->size_in_dw / 10;
 325                                 need += 1024 - (need % 1024);
 326                                 compute_memory_grow_pool(pool,
 327                                                 pipe,
 328                                                 pool->size_in_dw + need);
 329                         }
 330                 }
 331                 COMPUTE_DBG("  + Found space for Item %p id = %u "
 332                         "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
 333                         item, item->id, start_in_dw, start_in_dw * 4,
 334                         item->size_in_dw, item->size_in_dw * 4);
 335
 336                 item->start_in_dw = start_in_dw;
 337                 item->next = NULL;
 338                 item->prev = NULL;
 339
 340                 if (pool->item_list) {
 341                         struct compute_memory_item *pos;
 342
 343                         pos = compute_memory_postalloc_chunk(pool, start_in_dw);
 344                         if (pos) {
 345                                 item->prev = pos;
 346                                 item->next = pos->next;
 347                                 pos->next = item;
 348                                 if (item->next) {
 349                                         item->next->prev = item;
 350                                 }
 351                         } else {
 352                                 /* Add item to the front of the list */
 353                                 item->next = pool->item_list->next;
 354                                 if (pool->item_list->next) {
 355                                         pool->item_list->next->prev = item;
 356                                 }
 357                                 item->prev = pool->item_list->prev;
 358                                 if (pool->item_list->prev) {
 359                                         pool->item_list->prev->next = item;
 360                                 }
 361                                 pool->item_list = item;
 362                         }
 363                 }
 364                 else {
 365                         pool->item_list = item;
 366                 }
 367
 368                 allocated += item->size_in_dw;
 369         }
 370 }
 371
 372
 373 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 374 {
 375         struct compute_memory_item *item, *next;
 376
 377         COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
 378
 379         for (item = pool->item_list; item; item = next) {
 380                 next = item->next;
 381
 382                 if (item->id == id) {
 383                         if (item->prev) {
 384                                 item->prev->next = item->next;
 385                         }
 386                         else {
 387                                 pool->item_list = item->next;
 388                         }
 389
 390                         if (item->next) {
 391                                 item->next->prev = item->prev;
 392                         }
 393
 394                         free(item);
 395
 396                         return;
 397                 }
 398         }
 399
 400         fprintf(stderr, "Internal error, invalid id %"PRIi64" "
 401                 "for compute_memory_free\n", id);
 402
 403         assert(0 && "error");
 404 }
 405
 406 /**
 407  * Creates pending allocations
 408  */
 409 struct compute_memory_item* compute_memory_alloc(
 410         struct compute_memory_pool* pool,
 411         int64_t size_in_dw)
 412 {
 413         struct compute_memory_item *new_item;
 414
 415         COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
 416                         size_in_dw, 4 * size_in_dw);
 417
 418         new_item = (struct compute_memory_item *)
 419                                 CALLOC(sizeof(struct compute_memory_item), 1);
 420         new_item->size_in_dw = size_in_dw;
 421         new_item->start_in_dw = -1; /* mark pending */
 422         new_item->id = pool->next_id++;
 423         new_item->pool = pool;
 424
 425         struct compute_memory_item *last_item;
 426
 427         if (pool->item_list) {
 428                 for (last_item = pool->item_list; last_item->next;
 429                                                 last_item = last_item->next);
 430
 431                 last_item->next = new_item;
 432                 new_item->prev = last_item;
 433         }
 434         else {
 435                 pool->item_list = new_item;
 436         }
 437
 438         COMPUTE_DBG("  + Adding item %p id = %u size = %u (%u bytes)\n",
 439                         new_item, new_item->id, new_item->size_in_dw,
 440                         new_item->size_in_dw * 4);
 441         return new_item;
 442 }
 443
 444 /**
 445  * Transfer data host<->device, offset and size is in bytes
 446  */
 447 void compute_memory_transfer(
 448         struct compute_memory_pool* pool,
 449         struct pipe_context * pipe,
 450         int device_to_host,
 451         struct compute_memory_item* chunk,
 452         void* data,
 453         int offset_in_chunk,
 454         int size)
 455 {
 456         int64_t aligned_size = pool->size_in_dw;
 457         struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
 458         int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
 459
 460         struct pipe_transfer *xfer;
 461         uint32_t *map;
 462
 463         assert(gart);
 464
 465         COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
 466                 "offset_in_chunk = %d, size = %d\n", device_to_host,
 467                 offset_in_chunk, size);
 468
 469         if (device_to_host)
 470         {
 471                 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
 472                         &(struct pipe_box) { .width = aligned_size,
 473                         .height = 1, .depth = 1 });
 474                 assert(xfer);
 475                 map = pipe->transfer_map(pipe, xfer);
 476                 assert(map);
 477                 memcpy(data, map + internal_offset, size);
 478                 pipe->transfer_unmap(pipe, xfer);
 479                 pipe->transfer_destroy(pipe, xfer);
 480         } else {
 481                 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
 482                         &(struct pipe_box) { .width = aligned_size,
 483                         .height = 1, .depth = 1 });
 484                 assert(xfer);
 485                 map = pipe->transfer_map(pipe, xfer);
 486                 assert(map);
 487                 memcpy(map + internal_offset, data, size);
 488                 pipe->transfer_unmap(pipe, xfer);
 489                 pipe->transfer_destroy(pipe, xfer);
 490         }
 491 }
 492
 493 /**
 494  * Transfer data between chunk<->data, it is for VRAM<->GART transfers
 495  */
 496 void compute_memory_transfer_direct(
 497         struct compute_memory_pool* pool,
 498         int chunk_to_data,
 499         struct compute_memory_item* chunk,
 500         struct r600_resource* data,
 501         int offset_in_chunk,
 502         int offset_in_data,
 503         int size)
 504 {
 505         ///TODO: DMA
 506 }