src/gallium/drivers/r600/compute_memory_pool.c

   1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a
   3  * copy of this software and associated documentation files (the "Software"),
   4  * to deal in the Software without restriction, including without limitation
   5  * on the rights to use, copy, modify, merge, publish, distribute, sub
   6  * license, and/or sell copies of the Software, and to permit persons to whom
   7  * the Software is furnished to do so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice (including the next
  10  * paragraph) shall be included in all copies or substantial portions of the
  11  * Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * Authors:
  22  *      Adam Rak <adam.rak@streamnovation.com>
  23  */
  24
  25 #include "pipe/p_defines.h"
  26 #include "pipe/p_state.h"
  27 #include "pipe/p_context.h"
  28 #include "util/u_blitter.h"
  29 #include "util/u_double_list.h"
  30 #include "util/u_transfer.h"
  31 #include "util/u_surface.h"
  32 #include "util/u_pack_color.h"
  33 #include "util/u_memory.h"
  34 #include "util/u_inlines.h"
  35 #include "util/u_framebuffer.h"
  36 #include "r600.h"
  37 #include "r600_resource.h"
  38 #include "r600_shader.h"
  39 #include "r600_pipe.h"
  40 #include "r600_formats.h"
  41 #include "compute_memory_pool.h"
  42 #include "evergreen_compute_internal.h"
  43
  44 static struct r600_resource_texture * create_pool_texture(struct r600_screen * screen,
  45                 unsigned size_in_dw)
  46 {
  47
  48         struct pipe_resource templ;
  49         struct r600_resource_texture * tex;
  50
  51         if (size_in_dw == 0) {
  52                 return NULL;
  53         }
  54         memset(&templ, 0, sizeof(templ));
  55         templ.target = PIPE_TEXTURE_1D;
  56         templ.format = PIPE_FORMAT_R32_UINT;
  57         templ.bind = PIPE_BIND_CUSTOM;
  58         templ.usage = PIPE_USAGE_IMMUTABLE;
  59         templ.flags = 0;
  60         templ.width0 = size_in_dw;
  61         templ.height0 = 1;
  62         templ.depth0 = 1;
  63         templ.array_size = 1;
  64
  65         tex = (struct r600_resource_texture *)r600_texture_create(
  66                                                 &screen->screen, &templ);
  67         /* XXX: Propagate this error */
  68         assert(tex && "Out of memory");
  69         tex->is_rat = 1;
  70         return tex;
  71 }
  72
  73 /**
  74  * Creates a new pool
  75  */
  76 struct compute_memory_pool* compute_memory_pool_new(
  77         struct r600_screen * rscreen)
  78 {
  79         struct compute_memory_pool* pool = (struct compute_memory_pool*)
  80                                 CALLOC(sizeof(struct compute_memory_pool), 1);
  81
  82         COMPUTE_DBG("* compute_memory_pool_new()\n");
  83
  84         pool->screen = rscreen;
  85         return pool;
  86 }
  87
  88 static void compute_memory_pool_init(struct compute_memory_pool * pool,
  89         unsigned initial_size_in_dw)
  90 {
  91
  92         COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
  93                 initial_size_in_dw);
  94
  95         /* XXX: pool->shadow is used when the buffer needs to be resized, but
  96          * resizing does not work at the moment.
  97          * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
  98          */
  99         pool->next_id = 1;
 100         pool->size_in_dw = initial_size_in_dw;
 101         pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
 102                                                         pool->size_in_dw);
 103 }
 104
 105 /**
 106  * Frees all stuff in the pool and the pool struct itself too
 107  */
 108 void compute_memory_pool_delete(struct compute_memory_pool* pool)
 109 {
 110         COMPUTE_DBG("* compute_memory_pool_delete()\n");
 111         free(pool->shadow);
 112         if (pool->bo) {
 113                 pool->screen->screen.resource_destroy((struct pipe_screen *)
 114                         pool->screen, (struct pipe_resource *)pool->bo);
 115         }
 116         free(pool);
 117 }
 118
 119 /**
 120  * Searches for an empty space in the pool, return with the pointer to the
 121  * allocatable space in the pool, returns -1 on failure.
 122  */
 123 int64_t compute_memory_prealloc_chunk(
 124         struct compute_memory_pool* pool,
 125         int64_t size_in_dw)
 126 {
 127         assert(size_in_dw <= pool->size_in_dw);
 128
 129         struct compute_memory_item *item;
 130
 131         int last_end = 0;
 132
 133         COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
 134                 size_in_dw);
 135
 136         for (item = pool->item_list; item; item = item->next) {
 137                 if (item->start_in_dw > -1) {
 138                         if (item->start_in_dw-last_end > size_in_dw) {
 139                                 return last_end;
 140                         }
 141
 142                         last_end = item->start_in_dw + item->size_in_dw;
 143                         last_end += (1024 - last_end % 1024);
 144                 }
 145         }
 146
 147         if (pool->size_in_dw - last_end < size_in_dw) {
 148                 return -1;
 149         }
 150
 151         return last_end;
 152 }
 153
 154 /**
 155  *  Search for the chunk where we can link our new chunk after it.
 156  */
 157 struct compute_memory_item* compute_memory_postalloc_chunk(
 158         struct compute_memory_pool* pool,
 159         int64_t start_in_dw)
 160 {
 161         struct compute_memory_item* item;
 162
 163         COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
 164                 start_in_dw);
 165
 166         for (item = pool->item_list; item; item = item->next) {
 167                 if (item->next) {
 168                         if (item->start_in_dw < start_in_dw
 169                                 && item->next->start_in_dw > start_in_dw) {
 170                                 return item;
 171                         }
 172                 }
 173                 else {
 174                         /* end of chain */
 175                         assert(item->start_in_dw < start_in_dw);
 176                         return item;
 177                 }
 178         }
 179
 180         assert(0 && "unreachable");
 181         return NULL;
 182 }
 183
 184 /**
 185  * Reallocates pool, conserves data
 186  */
 187 void compute_memory_grow_pool(struct compute_memory_pool* pool,
 188         struct pipe_context * pipe, int new_size_in_dw)
 189 {
 190         COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
 191                 new_size_in_dw);
 192
 193         assert(new_size_in_dw >= pool->size_in_dw);
 194
 195         assert(!pool->bo && "Growing the global memory pool is not yet "
 196                 "supported.  You will see this message if you are trying to"
 197                 "use more than 64 kb of memory");
 198
 199         if (!pool->bo) {
 200                 compute_memory_pool_init(pool, 1024 * 16);
 201         } else {
 202                 /* XXX: Growing memory pools does not work at the moment.  I think
 203                  * it is because we are using fragment shaders to copy data to
 204                  * the new texture and some of the compute registers are being
 205                  * included in the 3D command stream. */
 206                 fprintf(stderr, "Warning: growing the global memory pool to"
 207                                 "more than 64 kb is not yet supported\n");
 208                 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
 209
 210                 COMPUTE_DBG("  Aligned size = %d\n", new_size_in_dw);
 211
 212                 compute_memory_shadow(pool, pipe, 1);
 213                 pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
 214                 pool->size_in_dw = new_size_in_dw;
 215                 pool->screen->screen.resource_destroy(
 216                         (struct pipe_screen *)pool->screen,
 217                         (struct pipe_resource *)pool->bo);
 218                 pool->bo = (struct r600_resource*)create_pool_texture(
 219                                                         pool->screen,
 220                                                         pool->size_in_dw);
 221                 compute_memory_shadow(pool, pipe, 0);
 222         }
 223 }
 224
 225 /**
 226  * Copy pool from device to host, or host to device.
 227  */
 228 void compute_memory_shadow(struct compute_memory_pool* pool,
 229         struct pipe_context * pipe, int device_to_host)
 230 {
 231         struct compute_memory_item chunk;
 232
 233         COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
 234                 device_to_host);
 235
 236         chunk.id = 0;
 237         chunk.start_in_dw = 0;
 238         chunk.size_in_dw = pool->size_in_dw;
 239         chunk.prev = chunk.next = NULL;
 240         compute_memory_transfer(pool, pipe, device_to_host, &chunk,
 241                                 pool->shadow, 0, pool->size_in_dw*4);
 242 }
 243
 244 /**
 245  * Allocates pending allocations in the pool
 246  */
 247 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
 248         struct pipe_context * pipe)
 249 {
 250         struct compute_memory_item *pending_list = NULL, *end_p = NULL;
 251         struct compute_memory_item *item, *next;
 252
 253         int64_t allocated = 0;
 254         int64_t unallocated = 0;
 255
 256         COMPUTE_DBG("* compute_memory_finalize_pending()\n");
 257
 258         for (item = pool->item_list; item; item = item->next) {
 259                 COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
 260         }
 261
 262         for (item = pool->item_list; item; item = next) {
 263                 next = item->next;
 264
 265
 266                 if (item->start_in_dw == -1) {
 267                         if (end_p) {
 268                                 end_p->next = item;
 269                         }
 270                         else {
 271                                 pending_list = item;
 272                         }
 273
 274                         if (item->prev) {
 275                                 item->prev->next = next;
 276                         }
 277                         else {
 278                                 pool->item_list = next;
 279                         }
 280
 281                         if (next) {
 282                                 next->prev = item->prev;
 283                         }
 284
 285                         item->prev = end_p;
 286                         item->next = NULL;
 287                         end_p = item;
 288
 289                         unallocated += item->size_in_dw+1024;
 290                 }
 291                 else {
 292                         allocated += item->size_in_dw;
 293                 }
 294         }
 295
 296         if (pool->size_in_dw < allocated+unallocated) {
 297                 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
 298         }
 299
 300         for (item = pending_list; item; item = next) {
 301                 next = item->next;
 302
 303                 int64_t start_in_dw;
 304
 305                 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
 306                                                 item->size_in_dw)) == -1) {
 307                         int64_t need = item->size_in_dw+2048 -
 308                                                 (pool->size_in_dw - allocated);
 309
 310                         need += 1024 - (need % 1024);
 311
 312                         if (need > 0) {
 313                                 compute_memory_grow_pool(pool,
 314                                                 pipe,
 315                                                 pool->size_in_dw + need);
 316                         }
 317                         else {
 318                                 need = pool->size_in_dw / 10;
 319                                 need += 1024 - (need % 1024);
 320                                 compute_memory_grow_pool(pool,
 321                                                 pipe,
 322                                                 pool->size_in_dw + need);
 323                         }
 324                 }
 325
 326                 item->start_in_dw = start_in_dw;
 327                 item->next = NULL;
 328                 item->prev = NULL;
 329
 330                 if (pool->item_list) {
 331                         struct compute_memory_item *pos;
 332
 333                         pos = compute_memory_postalloc_chunk(pool, start_in_dw);
 334                         item->prev = pos;
 335                         item->next = pos->next;
 336                         pos->next = item;
 337
 338                         if (item->next) {
 339                                 item->next->prev = item;
 340                         }
 341                 }
 342                 else {
 343                         pool->item_list = item;
 344                 }
 345
 346                 allocated += item->size_in_dw;
 347         }
 348 }
 349
 350
 351 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
 352 {
 353         struct compute_memory_item *item, *next;
 354
 355         COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
 356
 357         for (item = pool->item_list; item; item = next) {
 358                 next = item->next;
 359
 360                 if (item->id == id) {
 361                         if (item->prev) {
 362                                 item->prev->next = item->next;
 363                         }
 364                         else {
 365                                 pool->item_list = item->next;
 366                         }
 367
 368                         if (item->next) {
 369                                 item->next->prev = item->prev;
 370                         }
 371
 372                         free(item);
 373
 374                         return;
 375                 }
 376         }
 377
 378         fprintf(stderr, "Internal error, invalid id %ld "
 379                 "for compute_memory_free\n", id);
 380
 381         assert(0 && "error");
 382 }
 383
 384 /**
 385  * Creates pending allocations
 386  */
 387 struct compute_memory_item* compute_memory_alloc(
 388         struct compute_memory_pool* pool,
 389         int64_t size_in_dw)
 390 {
 391         struct compute_memory_item *new_item;
 392
 393         COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld\n", size_in_dw);
 394
 395         new_item = (struct compute_memory_item *)
 396                                 CALLOC(sizeof(struct compute_memory_item), 1);
 397         new_item->size_in_dw = size_in_dw;
 398         new_item->start_in_dw = -1; /* mark pending */
 399         new_item->id = pool->next_id++;
 400         new_item->pool = pool;
 401
 402         struct compute_memory_item *last_item;
 403
 404         if (pool->item_list) {
 405                 for (last_item = pool->item_list; last_item->next;
 406                                                 last_item = last_item->next);
 407
 408                 last_item->next = new_item;
 409                 new_item->prev = last_item;
 410         }
 411         else {
 412                 pool->item_list = new_item;
 413         }
 414
 415         return new_item;
 416 }
 417
 418 /**
 419  * Transfer data host<->device, offset and size is in bytes
 420  */
 421 void compute_memory_transfer(
 422         struct compute_memory_pool* pool,
 423         struct pipe_context * pipe,
 424         int device_to_host,
 425         struct compute_memory_item* chunk,
 426         void* data,
 427         int offset_in_chunk,
 428         int size)
 429 {
 430         int64_t aligned_size = pool->size_in_dw;
 431         struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
 432         int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
 433
 434         struct pipe_transfer *xfer;
 435         uint32_t *map;
 436
 437         assert(gart);
 438
 439         COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
 440                 "offset_in_chunk = %d, size = %d\n", device_to_host,
 441                 offset_in_chunk, size);
 442
 443         if (device_to_host)
 444         {
 445                 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
 446                         &(struct pipe_box) { .width = aligned_size,
 447                         .height = 1, .depth = 1 });
 448                 assert(xfer);
 449                 map = pipe->transfer_map(pipe, xfer);
 450                 assert(map);
 451                 memcpy(data, map + internal_offset, size);
 452                 pipe->transfer_unmap(pipe, xfer);
 453                 pipe->transfer_destroy(pipe, xfer);
 454         } else {
 455                 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
 456                         &(struct pipe_box) { .width = aligned_size,
 457                         .height = 1, .depth = 1 });
 458                 assert(xfer);
 459                 map = pipe->transfer_map(pipe, xfer);
 460                 assert(map);
 461                 memcpy(map + internal_offset, data, size);
 462                 pipe->transfer_unmap(pipe, xfer);
 463                 pipe->transfer_destroy(pipe, xfer);
 464         }
 465 }
 466
 467 /**
 468  * Transfer data between chunk<->data, it is for VRAM<->GART transfers
 469  */
 470 void compute_memory_transfer_direct(
 471         struct compute_memory_pool* pool,
 472         int chunk_to_data,
 473         struct compute_memory_item* chunk,
 474         struct r600_resource* data,
 475         int offset_in_chunk,
 476         int offset_in_data,
 477         int size)
 478 {
 479         ///TODO: DMA
 480 }