src/gallium/drivers/r600/compute_memory_pool.c

   1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a
   3  * copy of this software and associated documentation files (the "Software"),
   4  * to deal in the Software without restriction, including without limitation
   5  * on the rights to use, copy, modify, merge, publish, distribute, sub
   6  * license, and/or sell copies of the Software, and to permit persons to whom
   7  * the Software is furnished to do so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice (including the next
  10  * paragraph) shall be included in all copies or substantial portions of the
  11  * Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * Authors:
  22  *      Adam Rak <adam.rak@streamnovation.com>
  23  */
  24
  25 #include "pipe/p_defines.h"
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28 #include "util/u_blitter.h"
  29 #include "util/u_double_list.h"
  30 #include "util/u_transfer.h"
  31 #include "util/u_surface.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_inlines.h"
  35 #include "util/u_framebuffer.h"
  36 #include "r600_shader.h"
  37 #include "r600_pipe.h"
  38 #include "r600_formats.h"
  39 #include "compute_memory_pool.h"
  40 #include "evergreen_compute.h"
  41 #include "evergreen_compute_internal.h"
  42 #include <inttypes.h>
  43
  44 /**
  45  * Creates a new pool
  46  */
  47 struct compute_memory_pool* compute_memory_pool_new(
  48         struct r600_screen * rscreen)
  49 {
  50         struct compute_memory_pool* pool = (struct compute_memory_pool*)
  51                                 CALLOC(sizeof(struct compute_memory_pool), 1);
  52         if (pool == NULL)
  53                 return NULL;
  54
  55         COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
  56
  57         pool->screen = rscreen;
  58         return pool;
  59 }
  60
  61 static void compute_memory_pool_init(struct compute_memory_pool * pool,
  62         unsigned initial_size_in_dw)
  63 {
  64
  65         COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
  66                 initial_size_in_dw);
  67
  68         pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
  69         if (pool->shadow == NULL)
  70                 return;
  71
  72         pool->next_id = 1;
  73         pool->size_in_dw = initial_size_in_dw;
  74         pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
  75                                                         pool->size_in_dw * 4);
  76 }
  77
  78 /**
  79  * Frees all stuff in the pool and the pool struct itself too
  80  */
  81 void compute_memory_pool_delete(struct compute_memory_pool* pool)
  82 {
  83         COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
  84         free(pool->shadow);
  85         if (pool->bo) {
  86                 pool->screen->b.b.resource_destroy((struct pipe_screen *)
  87                         pool->screen, (struct pipe_resource *)pool->bo);
  88         }
  89         free(pool);
  90 }
  91
  92 /**
  93  * Searches for an empty space in the pool, return with the pointer to the
  94  * allocatable space in the pool, returns -1 on failure.
  95  */
  96 int64_t compute_memory_prealloc_chunk(
  97         struct compute_memory_pool* pool,
  98         int64_t size_in_dw)
  99 {
 100         struct compute_memory_item *item;
 101
 102         int last_end = 0;
 103
 104         assert(size_in_dw <= pool->size_in_dw);
 105
 106         COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
 107                 size_in_dw);
 108
 109         for (item = pool->item_list; item; item = item->next) {
 110                 if (item->start_in_dw > -1) {
 111                         if (item->start_in_dw-last_end > size_in_dw) {
 112                                 return last_end;
 113                         }
 114
 115                         last_end = item->start_in_dw + item->size_in_dw;
 116                         last_end += (1024 - last_end % 1024);
 117                 }
 118         }
 119
 120         if (pool->size_in_dw - last_end < size_in_dw) {
 121                 return -1;
 122         }
 123
 124         return last_end;
 125 }
 126
 127 /**
 128  *  Search for the chunk where we can link our new chunk after it.
 129  */
 130 struct compute_memory_item* compute_memory_postalloc_chunk(
 131         struct compute_memory_pool* pool,
 132         int64_t start_in_dw)
 133 {
 134         struct compute_memory_item* item;
 135
 136         COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
 137                 start_in_dw);
 138
 139         /* Check if we can insert it in the front of the list */
 140         if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
 141                 return NULL;
 142         }
 143
 144         for (item = pool->item_list; item; item = item->next) {
 145                 if (item->next) {
 146                         if (item->start_in_dw < start_in_dw
 147                                 && item->next->start_in_dw > start_in_dw) {
 148                                 return item;
 149                         }
 150                 }
 151                 else {
 152                         /* end of chain */
 153                         assert(item->start_in_dw < start_in_dw);
 154                         return item;
 155                 }
 156         }
 157
 158         assert(0 && "unreachable");
 159         return NULL;
 160 }
 161
 162 /**
 163  * Reallocates pool, conserves data.
 164  * @returns -1 if it fails, 0 otherwise
 165  */
 166 int compute_memory_grow_pool(struct compute_memory_pool* pool,
 167         struct pipe_context * pipe, int new_size_in_dw)
 168 {
 169         COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
 170                 "new_size_in_dw = %d (%d bytes)\n",
 171                 new_size_in_dw, new_size_in_dw * 4);
 172
 173         assert(new_size_in_dw >= pool->size_in_dw);
 174
 175         if (!pool->bo) {
 176                 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
 177                 if (pool->shadow == NULL)
 178                         return -1;
 179         } else {
 180                 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
 181
 182                 COMPUTE_DBG(pool->screen, "  Aligned size = %d (%d bytes)\n",
 183                         new_size_in_dw, new_size_in_dw * 4);
 184
 185                 compute_memory_shadow(pool, pipe, 1);
 186                 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
 187                 if (pool->shadow == NULL)
 188                         return -1;
 189
 190                 pool->size_in_dw = new_size_in_dw;
 191                 pool->screen->b.b.resource_destroy(
 192                         (struct pipe_screen *)pool->screen,
 193                         (struct pipe_resource *)pool->bo);
 194                 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
 195                                                         pool->screen,
 196                                                         pool->size_in_dw * 4);
 197                 compute_memory_shadow(pool, pipe, 0);
 198         }
 199
 200         return 0;
 201 }
 202
 203 /**
 204  * Copy pool from device to host, or host to device.
 205  */
 206 void compute_memory_shadow(struct compute_memory_pool* pool,
 207         struct pipe_context * pipe, int device_to_host)
 208 {
 209         struct compute_memory_item chunk;
 210
 211         COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
 212                 device_to_host);
 213
 214         chunk.id = 0;
 215         chunk.start_in_dw = 0;
 216         chunk.size_in_dw = pool->size_in_dw;
 217         chunk.prev = chunk.next = NULL;
 218         compute_memory_transfer(pool, pipe, device_to_host, &chunk,
 219                                 pool->shadow, 0, pool->size_in_dw*4);
 220 }
 221
 222 /**
 223  * Allocates pending allocations in the pool
 224  * @returns -1 if it fails, 0 otherwise
 225  */
 226 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
 227         struct pipe_context * pipe)
 228 {
 229         struct compute_memory_item *pending_list = NULL, *end_p = NULL;
 230         struct compute_memory_item *item, *next;
 231
 232         int64_t allocated = 0;
 233         int64_t unallocated = 0;
 234
 235         int64_t start_in_dw = 0;
 236
 237         int err = 0;
 238
 239         COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
 240
 241         for (item = pool->item_list; item; item = item->next) {
 242                 COMPUTE_DBG(pool->screen, "  + list: offset = %i id = %i size = %i "
 243                         "(%i bytes)\n",item->start_in_dw, item->id,
 244                         item->size_in_dw, item->size_in_dw * 4);
 245         }
 246
 247         /* Search through the list of memory items in the pool */
 248         for (item = pool->item_list; item; item = next) {
 249                 next = item->next;
 250
 251                 /* Check if the item is pending. */
 252                 if (item->start_in_dw == -1) {
 253                         /* It is pending, so add it to the pending_list... */
 254                         if (end_p) {
 255                                 end_p->next = item;
 256                         }
 257                         else {
 258                                 pending_list = item;
 259                         }
 260
 261                         /* ... and then remove it from the item list. */
 262                         if (item->prev) {
 263                                 item->prev->next = next;
 264                         }
 265                         else {
 266                                 pool->item_list = next;
 267                         }
 268
 269                         if (next) {
 270                                 next->prev = item->prev;
 271                         }
 272
 273                         /* This sequence makes the item be at the end of the list */
 274                         item->prev = end_p;
 275                         item->next = NULL;
 276                         end_p = item;
 277
 278                         /* Update the amount of space we will need to allocate. */
 279                         unallocated += item->size_in_dw+1024;
 280                 }
 281                 else {
 282                         /* The item is not pending, so update the amount of space
 283                          * that has already been allocated. */
 284                         allocated += item->size_in_dw;
 285                 }
 286         }
 287
 288         /* If we require more space than the size of the pool, then grow the
 289          * pool.
 290          *
 291          * XXX: I'm pretty sure this won't work.  Imagine this scenario:
 292          *
 293          * Offset Item Size
 294          *   0    A    50
 295          * 200    B    50
 296          * 400    C    50
 297          *
 298          * Total size = 450
 299          * Allocated size = 150
 300          * Pending Item D Size = 200
 301          *
 302          * In this case, there are 300 units of free space in the pool, but
 303          * they aren't contiguous, so it will be impossible to allocate Item D.
 304          */
 305         if (pool->size_in_dw < allocated+unallocated) {
 306                 err = compute_memory_grow_pool(pool, pipe, allocated+unallocated);
 307                 if (err == -1)
 308                         return -1;
 309         }
 310
 311         /* Loop through all the pending items, allocate space for them and
 312          * add them back to the item_list. */
 313         for (item = pending_list; item; item = next) {
 314                 next = item->next;
 315
 316                 /* Search for free space in the pool for this item. */
 317                 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
 318                                                 item->size_in_dw)) == -1) {
 319                         int64_t need = item->size_in_dw+2048 -
 320                                                 (pool->size_in_dw - allocated);
 321
 322                         if (need < 0) {
 323                                 need = pool->size_in_dw / 10;
 324                         }
 325
 326                         need += 1024 - (need % 1024);
 327
 328                         err = compute_memory_grow_pool(pool,
 329                                         pipe,
 330                                         pool->size_in_dw + need);
 331
 332                         if (err == -1)
 333                                 return -1;
 334                 }
 335                 COMPUTE_DBG(pool->screen, "  + Found space for Item %p id = %u "
 336                         "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
 337                         item, item->id, start_in_dw, start_in_dw * 4,
 338                         item->size_in_dw, item->size_in_dw * 4);
 339
 340                 item->start_in_dw = start_in_dw;
 341                 item->next = NULL;
 342                 item->prev = NULL;
 343
 344                 if (pool->item_list) {
 345                         struct compute_memory_item *pos;
 346
 347                         pos = compute_memory_postalloc_chunk(pool, start_in_dw);
 348                         if (pos) {
 349                                 item->prev = pos;
 350                                 item->next = pos->next;
 351                                 pos->next = item;
 352                                 if (item->next) {
 353                                         item->next->prev = item;
 354                                 }
 355                         } else {
 356                                 /* Add item to the front of the list */
 357                                 item->next = pool->item_list;
 358                                 item->prev = pool->item_list->prev;
 359                                 pool->item_list->prev = item;
 360                                 pool->item_list = item;
 361                         }
 362                 }
 363                 else {
 364                         pool->item_list = item;
 365                 }
 366
 367                 allocated += item->size_in_dw;
 368         }
 369
 370         return 0;
 371 }
 372
 373
 374 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 375 {
 376         struct compute_memory_item *item, *next;
 377
 378         COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
 379
 380         for (item = pool->item_list; item; item = next) {
 381                 next = item->next;
 382
 383                 if (item->id == id) {
 384                         if (item->prev) {
 385                                 item->prev->next = item->next;
 386                         }
 387                         else {
 388                                 pool->item_list = item->next;
 389                         }
 390
 391                         if (item->next) {
 392                                 item->next->prev = item->prev;
 393                         }
 394
 395                         free(item);
 396
 397                         return;
 398                 }
 399         }
 400
 401         fprintf(stderr, "Internal error, invalid id %"PRIi64" "
 402                 "for compute_memory_free\n", id);
 403
 404         assert(0 && "error");
 405 }
 406
 407 /**
 408  * Creates pending allocations
 409  */
 410 struct compute_memory_item* compute_memory_alloc(
 411         struct compute_memory_pool* pool,
 412         int64_t size_in_dw)
 413 {
 414         struct compute_memory_item *new_item = NULL, *last_item = NULL;
 415
 416         COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
 417                         size_in_dw, 4 * size_in_dw);
 418
 419         new_item = (struct compute_memory_item *)
 420                                 CALLOC(sizeof(struct compute_memory_item), 1);
 421         if (new_item == NULL)
 422                 return NULL;
 423
 424         new_item->size_in_dw = size_in_dw;
 425         new_item->start_in_dw = -1; /* mark pending */
 426         new_item->id = pool->next_id++;
 427         new_item->pool = pool;
 428
 429         if (pool->item_list) {
 430                 for (last_item = pool->item_list; last_item->next;
 431                                                 last_item = last_item->next);
 432
 433                 last_item->next = new_item;
 434                 new_item->prev = last_item;
 435         }
 436         else {
 437                 pool->item_list = new_item;
 438         }
 439
 440         COMPUTE_DBG(pool->screen, "  + Adding item %p id = %u size = %u (%u bytes)\n",
 441                         new_item, new_item->id, new_item->size_in_dw,
 442                         new_item->size_in_dw * 4);
 443         return new_item;
 444 }
 445
 446 /**
 447  * Transfer data host<->device, offset and size is in bytes
 448  */
 449 void compute_memory_transfer(
 450         struct compute_memory_pool* pool,
 451         struct pipe_context * pipe,
 452         int device_to_host,
 453         struct compute_memory_item* chunk,
 454         void* data,
 455         int offset_in_chunk,
 456         int size)
 457 {
 458         int64_t aligned_size = pool->size_in_dw;
 459         struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
 460         int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
 461
 462         struct pipe_transfer *xfer;
 463         uint32_t *map;
 464
 465         assert(gart);
 466
 467         COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
 468                 "offset_in_chunk = %d, size = %d\n", device_to_host,
 469                 offset_in_chunk, size);
 470
 471         if (device_to_host) {
 472                 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
 473                         &(struct pipe_box) { .width = aligned_size * 4,
 474                         .height = 1, .depth = 1 }, &xfer);
 475                 assert(xfer);
 476                 assert(map);
 477                 memcpy(data, map + internal_offset, size);
 478                 pipe->transfer_unmap(pipe, xfer);
 479         } else {
 480                 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
 481                         &(struct pipe_box) { .width = aligned_size * 4,
 482                         .height = 1, .depth = 1 }, &xfer);
 483                 assert(xfer);
 484                 assert(map);
 485                 memcpy(map + internal_offset, data, size);
 486                 pipe->transfer_unmap(pipe, xfer);
 487         }
 488 }
 489
 490 /**
 491  * Transfer data between chunk<->data, it is for VRAM<->GART transfers
 492  */
 493 void compute_memory_transfer_direct(
 494         struct compute_memory_pool* pool,
 495         int chunk_to_data,
 496         struct compute_memory_item* chunk,
 497         struct r600_resource* data,
 498         int offset_in_chunk,
 499         int offset_in_data,
 500         int size)
 501 {
 502         ///TODO: DMA
 503 }