r600g/compute: Try to use a temporary resource when growing the pool
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 #define ITEM_ALIGNMENT 1024
46 /**
47 * Creates a new pool
48 */
49 struct compute_memory_pool* compute_memory_pool_new(
50 struct r600_screen * rscreen)
51 {
52 struct compute_memory_pool* pool = (struct compute_memory_pool*)
53 CALLOC(sizeof(struct compute_memory_pool), 1);
54 if (pool == NULL)
55 return NULL;
56
57 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
58
59 pool->screen = rscreen;
60 pool->item_list = (struct list_head *)
61 CALLOC(sizeof(struct list_head), 1);
62 pool->unallocated_list = (struct list_head *)
63 CALLOC(sizeof(struct list_head), 1);
64 list_inithead(pool->item_list);
65 list_inithead(pool->unallocated_list);
66 return pool;
67 }
68
69 static void compute_memory_pool_init(struct compute_memory_pool * pool,
70 unsigned initial_size_in_dw)
71 {
72
73 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
74 initial_size_in_dw);
75
76 pool->size_in_dw = initial_size_in_dw;
77 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
78 pool->size_in_dw * 4);
79 }
80
81 /**
82 * Frees all stuff in the pool and the pool struct itself too
83 */
84 void compute_memory_pool_delete(struct compute_memory_pool* pool)
85 {
86 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
87 free(pool->shadow);
88 if (pool->bo) {
89 pool->screen->b.b.resource_destroy((struct pipe_screen *)
90 pool->screen, (struct pipe_resource *)pool->bo);
91 }
92 free(pool);
93 }
94
95 /**
96 * Searches for an empty space in the pool, return with the pointer to the
97 * allocatable space in the pool, returns -1 on failure.
98 */
99 int64_t compute_memory_prealloc_chunk(
100 struct compute_memory_pool* pool,
101 int64_t size_in_dw)
102 {
103 struct compute_memory_item *item;
104
105 int last_end = 0;
106
107 assert(size_in_dw <= pool->size_in_dw);
108
109 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
110 size_in_dw);
111
112 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
113 if (last_end + size_in_dw <= item->start_in_dw) {
114 return last_end;
115 }
116
117 last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
118 }
119
120 if (pool->size_in_dw - last_end < size_in_dw) {
121 return -1;
122 }
123
124 return last_end;
125 }
126
127 /**
128 * Search for the chunk where we can link our new chunk after it.
129 */
130 struct list_head *compute_memory_postalloc_chunk(
131 struct compute_memory_pool* pool,
132 int64_t start_in_dw)
133 {
134 struct compute_memory_item *item;
135 struct compute_memory_item *next;
136 struct list_head *next_link;
137
138 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
139 start_in_dw);
140
141 /* Check if we can insert it in the front of the list */
142 item = LIST_ENTRY(struct compute_memory_item, pool->item_list->next, link);
143 if (LIST_IS_EMPTY(pool->item_list) || item->start_in_dw > start_in_dw) {
144 return pool->item_list;
145 }
146
147 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
148 next_link = item->link.next;
149
150 if (next_link != pool->item_list) {
151 next = container_of(next_link, item, link);
152 if (item->start_in_dw < start_in_dw
153 && next->start_in_dw > start_in_dw) {
154 return &item->link;
155 }
156 }
157 else {
158 /* end of chain */
159 assert(item->start_in_dw < start_in_dw);
160 return &item->link;
161 }
162 }
163
164 assert(0 && "unreachable");
165 return NULL;
166 }
167
168 /**
169 * Reallocates pool, conserves data.
170 * @returns -1 if it fails, 0 otherwise
171 */
172 int compute_memory_grow_pool(struct compute_memory_pool* pool,
173 struct pipe_context * pipe, int new_size_in_dw)
174 {
175 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
176 "new_size_in_dw = %d (%d bytes)\n",
177 new_size_in_dw, new_size_in_dw * 4);
178
179 assert(new_size_in_dw >= pool->size_in_dw);
180
181 if (!pool->bo) {
182 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
183 } else {
184 struct r600_resource *temp = NULL;
185
186 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
187
188 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
189 new_size_in_dw, new_size_in_dw * 4);
190
191 temp = (struct r600_resource *)r600_compute_buffer_alloc_vram(
192 pool->screen, new_size_in_dw * 4);
193
194 if (temp != NULL) {
195 struct r600_context *rctx = (struct r600_context *)pipe;
196 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
197 struct pipe_resource *dst = (struct pipe_resource *)temp;
198 struct pipe_box box;
199
200 COMPUTE_DBG(pool->screen, " Growing the pool using a temporary resource\n");
201
202 u_box_1d(0, pool->size_in_dw * 4, &box);
203
204 rctx->b.b.resource_copy_region(pipe,
205 dst, 0, 0, 0 ,0,
206 src, 0, &box);
207
208 pool->screen->b.b.resource_destroy(
209 (struct pipe_screen *)pool->screen,
210 src);
211
212 pool->bo = temp;
213 pool->size_in_dw = new_size_in_dw;
214 }
215 else {
216 COMPUTE_DBG(pool->screen, " The creation of the temporary resource failed\n"
217 " Falling back to using 'shadow'\n");
218
219 compute_memory_shadow(pool, pipe, 1);
220 pool->shadow = realloc(pool->shadow, new_size_in_dw * 4);
221 if (pool->shadow == NULL)
222 return -1;
223
224 pool->size_in_dw = new_size_in_dw;
225 pool->screen->b.b.resource_destroy(
226 (struct pipe_screen *)pool->screen,
227 (struct pipe_resource *)pool->bo);
228 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
229 pool->screen,
230 pool->size_in_dw * 4);
231 compute_memory_shadow(pool, pipe, 0);
232 }
233 }
234
235 return 0;
236 }
237
238 /**
239 * Copy pool from device to host, or host to device.
240 */
241 void compute_memory_shadow(struct compute_memory_pool* pool,
242 struct pipe_context * pipe, int device_to_host)
243 {
244 struct compute_memory_item chunk;
245
246 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
247 device_to_host);
248
249 chunk.id = 0;
250 chunk.start_in_dw = 0;
251 chunk.size_in_dw = pool->size_in_dw;
252 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
253 pool->shadow, 0, pool->size_in_dw*4);
254 }
255
256 /**
257 * Allocates pending allocations in the pool
258 * @returns -1 if it fails, 0 otherwise
259 */
260 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
261 struct pipe_context * pipe)
262 {
263 struct compute_memory_item *item, *next;
264
265 int64_t allocated = 0;
266 int64_t unallocated = 0;
267 int64_t last_pos;
268
269 int err = 0;
270
271 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
272
273 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
274 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
275 "(%i bytes)\n",item->start_in_dw, item->id,
276 item->size_in_dw, item->size_in_dw * 4);
277 }
278
279 /* Calculate the total allocated size */
280 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
281 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
282 }
283
284 /* Calculate the total unallocated size of the items that
285 * will be promoted to the pool */
286 LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) {
287 if (item->status & ITEM_FOR_PROMOTING)
288 unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
289 }
290
291 if (unallocated == 0) {
292 return 0;
293 }
294
295 if (pool->status & POOL_FRAGMENTED) {
296 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
297 compute_memory_defrag(pool, src, src, pipe);
298 }
299
300 if (pool->size_in_dw < allocated + unallocated) {
301 err = compute_memory_grow_pool(pool, pipe, allocated + unallocated);
302 if (err == -1)
303 return -1;
304 }
305
306 /* After defragmenting the pool, allocated is equal to the first available
307 * position for new items in the pool */
308 last_pos = allocated;
309
310 /* Loop through all the unallocated items, check if they are marked
311 * for promoting, allocate space for them and add them to the item_list. */
312 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
313 if (item->status & ITEM_FOR_PROMOTING) {
314 err = compute_memory_promote_item(pool, item, pipe, last_pos);
315 item->status &= ~ITEM_FOR_PROMOTING;
316
317 last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
318
319 if (err == -1)
320 return -1;
321 }
322 }
323
324 return 0;
325 }
326
327 /**
328 * Defragments the pool, so that there's no gap between items.
329 * \param pool The pool to be defragmented
330 */
331 void compute_memory_defrag(struct compute_memory_pool *pool,
332 struct pipe_resource *src, struct pipe_resource *dst,
333 struct pipe_context *pipe)
334 {
335 struct compute_memory_item *item;
336 int64_t last_pos;
337
338 COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n");
339
340 last_pos = 0;
341 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
342 if (src != dst || item->start_in_dw != last_pos) {
343 assert(last_pos <= item->start_in_dw);
344
345 compute_memory_move_item(pool, src, dst,
346 item, last_pos, pipe);
347 }
348
349 last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
350 }
351
352 pool->status &= ~POOL_FRAGMENTED;
353 }
354
355 int compute_memory_promote_item(struct compute_memory_pool *pool,
356 struct compute_memory_item *item, struct pipe_context *pipe,
357 int64_t start_in_dw)
358 {
359 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
360 struct r600_context *rctx = (struct r600_context *)pipe;
361 struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
362 struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
363 struct pipe_box box;
364
365 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
366 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
367 item, item->id, start_in_dw, start_in_dw * 4,
368 item->size_in_dw, item->size_in_dw * 4);
369
370 /* Remove the item from the unallocated list */
371 list_del(&item->link);
372
373 /* Add it back to the item_list */
374 list_addtail(&item->link, pool->item_list);
375 item->start_in_dw = start_in_dw;
376
377 if (src != NULL) {
378 u_box_1d(0, item->size_in_dw * 4, &box);
379
380 rctx->b.b.resource_copy_region(pipe,
381 dst, 0, item->start_in_dw * 4, 0 ,0,
382 src, 0, &box);
383
384 /* We check if the item is mapped for reading.
385 * In this case, we need to keep the temporary buffer 'alive'
386 * because it is possible to keep a map active for reading
387 * while a kernel (that reads from it) executes */
388 if (!(item->status & ITEM_MAPPED_FOR_READING)) {
389 pool->screen->b.b.resource_destroy(screen, src);
390 item->real_buffer = NULL;
391 }
392 }
393
394 return 0;
395 }
396
397 void compute_memory_demote_item(struct compute_memory_pool *pool,
398 struct compute_memory_item *item, struct pipe_context *pipe)
399 {
400 struct r600_context *rctx = (struct r600_context *)pipe;
401 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
402 struct pipe_resource *dst;
403 struct pipe_box box;
404
405 /* First, we remove the item from the item_list */
406 list_del(&item->link);
407
408 /* Now we add it to the unallocated list */
409 list_addtail(&item->link, pool->unallocated_list);
410
411 /* We check if the intermediate buffer exists, and if it
412 * doesn't, we create it again */
413 if (item->real_buffer == NULL) {
414 item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
415 pool->screen, item->size_in_dw * 4);
416 }
417
418 dst = (struct pipe_resource *)item->real_buffer;
419
420 /* We transfer the memory from the item in the pool to the
421 * temporary buffer */
422 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
423
424 rctx->b.b.resource_copy_region(pipe,
425 dst, 0, 0, 0, 0,
426 src, 0, &box);
427
428 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
429 item->start_in_dw = -1;
430
431 if (item->link.next != pool->item_list) {
432 pool->status |= POOL_FRAGMENTED;
433 }
434 }
435
436 /**
437 * Moves the item \a item forward from the resource \a src to the
438 * resource \a dst at \a new_start_in_dw
439 *
440 * This function assumes two things:
441 * 1) The item is \b only moved forward
442 * 2) The item \b won't change it's position inside the \a item_list
443 *
444 * \param item The item that will be moved
445 * \param new_start_in_dw The new position of the item in \a item_list
446 * \see compute_memory_defrag
447 */
448 void compute_memory_move_item(struct compute_memory_pool *pool,
449 struct pipe_resource *src, struct pipe_resource *dst,
450 struct compute_memory_item *item, uint64_t new_start_in_dw,
451 struct pipe_context *pipe)
452 {
453 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
454 struct r600_context *rctx = (struct r600_context *)pipe;
455 struct pipe_box box;
456
457 struct compute_memory_item *prev;
458
459 COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n"
460 " + Moving item %i from %u (%u bytes) to %u (%u bytes)\n",
461 item->id, item->start_in_dw, item->start_in_dw * 4,
462 new_start_in_dw, new_start_in_dw * 4);
463
464 if (pool->item_list != item->link.prev) {
465 prev = container_of(item->link.prev, item, link);
466 assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
467 }
468
469 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
470
471 /* If the ranges don't overlap, or we are copying from one resource
472 * to another, we can just copy the item directly */
473 if (src != dst || new_start_in_dw + item->size_in_dw <= item->start_in_dw) {
474
475 rctx->b.b.resource_copy_region(pipe,
476 dst, 0, new_start_in_dw * 4, 0, 0,
477 src, 0, &box);
478 } else {
479 /* The ranges overlap, we will try first to use an intermediate
480 * resource to move the item */
481 struct pipe_resource *tmp = (struct pipe_resource *)
482 r600_compute_buffer_alloc_vram(pool->screen, item->size_in_dw * 4);
483
484 if (tmp != NULL) {
485 rctx->b.b.resource_copy_region(pipe,
486 tmp, 0, 0, 0, 0,
487 src, 0, &box);
488
489 box.x = 0;
490
491 rctx->b.b.resource_copy_region(pipe,
492 dst, 0, new_start_in_dw * 4, 0, 0,
493 tmp, 0, &box);
494
495 pool->screen->b.b.resource_destroy(screen, tmp);
496
497 } else {
498 /* The allocation of the temporary resource failed,
499 * falling back to use mappings */
500 uint32_t *map;
501 int64_t offset;
502 struct pipe_transfer *trans;
503
504 offset = item->start_in_dw - new_start_in_dw;
505
506 u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
507
508 map = pipe->transfer_map(pipe, src, 0, PIPE_TRANSFER_READ_WRITE,
509 &box, &trans);
510
511 assert(map);
512 assert(trans);
513
514 memmove(map, map + offset, item->size_in_dw * 4);
515
516 pipe->transfer_unmap(pipe, trans);
517 }
518 }
519
520 item->start_in_dw = new_start_in_dw;
521 }
522
523 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
524 {
525 struct compute_memory_item *item, *next;
526 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
527 struct pipe_resource *res;
528
529 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
530
531 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
532
533 if (item->id == id) {
534
535 if (item->link.next != pool->item_list) {
536 pool->status |= POOL_FRAGMENTED;
537 }
538
539 list_del(&item->link);
540
541 if (item->real_buffer) {
542 res = (struct pipe_resource *)item->real_buffer;
543 pool->screen->b.b.resource_destroy(
544 screen, res);
545 }
546
547 free(item);
548
549 return;
550 }
551 }
552
553 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
554
555 if (item->id == id) {
556 list_del(&item->link);
557
558 if (item->real_buffer) {
559 res = (struct pipe_resource *)item->real_buffer;
560 pool->screen->b.b.resource_destroy(
561 screen, res);
562 }
563
564 free(item);
565
566 return;
567 }
568 }
569
570 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
571 "for compute_memory_free\n", id);
572
573 assert(0 && "error");
574 }
575
576 /**
577 * Creates pending allocations
578 */
579 struct compute_memory_item* compute_memory_alloc(
580 struct compute_memory_pool* pool,
581 int64_t size_in_dw)
582 {
583 struct compute_memory_item *new_item = NULL;
584
585 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
586 size_in_dw, 4 * size_in_dw);
587
588 new_item = (struct compute_memory_item *)
589 CALLOC(sizeof(struct compute_memory_item), 1);
590 if (new_item == NULL)
591 return NULL;
592
593 new_item->size_in_dw = size_in_dw;
594 new_item->start_in_dw = -1; /* mark pending */
595 new_item->id = pool->next_id++;
596 new_item->pool = pool;
597 new_item->real_buffer = NULL;
598
599 list_addtail(&new_item->link, pool->unallocated_list);
600
601 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
602 new_item, new_item->id, new_item->size_in_dw,
603 new_item->size_in_dw * 4);
604 return new_item;
605 }
606
607 /**
608 * Transfer data host<->device, offset and size is in bytes
609 */
610 void compute_memory_transfer(
611 struct compute_memory_pool* pool,
612 struct pipe_context * pipe,
613 int device_to_host,
614 struct compute_memory_item* chunk,
615 void* data,
616 int offset_in_chunk,
617 int size)
618 {
619 int64_t aligned_size = pool->size_in_dw;
620 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
621 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
622
623 struct pipe_transfer *xfer;
624 uint32_t *map;
625
626 assert(gart);
627
628 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
629 "offset_in_chunk = %d, size = %d\n", device_to_host,
630 offset_in_chunk, size);
631
632 if (device_to_host) {
633 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
634 &(struct pipe_box) { .width = aligned_size * 4,
635 .height = 1, .depth = 1 }, &xfer);
636 assert(xfer);
637 assert(map);
638 memcpy(data, map + internal_offset, size);
639 pipe->transfer_unmap(pipe, xfer);
640 } else {
641 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
642 &(struct pipe_box) { .width = aligned_size * 4,
643 .height = 1, .depth = 1 }, &xfer);
644 assert(xfer);
645 assert(map);
646 memcpy(map + internal_offset, data, size);
647 pipe->transfer_unmap(pipe, xfer);
648 }
649 }
650
651 /**
652 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
653 */
654 void compute_memory_transfer_direct(
655 struct compute_memory_pool* pool,
656 int chunk_to_data,
657 struct compute_memory_item* chunk,
658 struct r600_resource* data,
659 int offset_in_chunk,
660 int offset_in_data,
661 int size)
662 {
663 ///TODO: DMA
664 }