00b28bc3249729ea950db7a7fa055c43dfc0291d
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 #define ITEM_ALIGNMENT 1024
46 /**
47 * Creates a new pool
48 */
49 struct compute_memory_pool* compute_memory_pool_new(
50 struct r600_screen * rscreen)
51 {
52 struct compute_memory_pool* pool = (struct compute_memory_pool*)
53 CALLOC(sizeof(struct compute_memory_pool), 1);
54 if (pool == NULL)
55 return NULL;
56
57 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
58
59 pool->screen = rscreen;
60 pool->item_list = (struct list_head *)
61 CALLOC(sizeof(struct list_head), 1);
62 pool->unallocated_list = (struct list_head *)
63 CALLOC(sizeof(struct list_head), 1);
64 list_inithead(pool->item_list);
65 list_inithead(pool->unallocated_list);
66 return pool;
67 }
68
69 static void compute_memory_pool_init(struct compute_memory_pool * pool,
70 unsigned initial_size_in_dw)
71 {
72
73 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
74 initial_size_in_dw);
75
76 pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
77 if (pool->shadow == NULL)
78 return;
79
80 pool->size_in_dw = initial_size_in_dw;
81 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
82 pool->size_in_dw * 4);
83 }
84
85 /**
86 * Frees all stuff in the pool and the pool struct itself too
87 */
88 void compute_memory_pool_delete(struct compute_memory_pool* pool)
89 {
90 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
91 free(pool->shadow);
92 if (pool->bo) {
93 pool->screen->b.b.resource_destroy((struct pipe_screen *)
94 pool->screen, (struct pipe_resource *)pool->bo);
95 }
96 free(pool);
97 }
98
99 /**
100 * Searches for an empty space in the pool, return with the pointer to the
101 * allocatable space in the pool, returns -1 on failure.
102 */
103 int64_t compute_memory_prealloc_chunk(
104 struct compute_memory_pool* pool,
105 int64_t size_in_dw)
106 {
107 struct compute_memory_item *item;
108
109 int last_end = 0;
110
111 assert(size_in_dw <= pool->size_in_dw);
112
113 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
114 size_in_dw);
115
116 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
117 if (last_end + size_in_dw <= item->start_in_dw) {
118 return last_end;
119 }
120
121 last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
122 }
123
124 if (pool->size_in_dw - last_end < size_in_dw) {
125 return -1;
126 }
127
128 return last_end;
129 }
130
131 /**
132 * Search for the chunk where we can link our new chunk after it.
133 */
134 struct list_head *compute_memory_postalloc_chunk(
135 struct compute_memory_pool* pool,
136 int64_t start_in_dw)
137 {
138 struct compute_memory_item *item;
139 struct compute_memory_item *next;
140 struct list_head *next_link;
141
142 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
143 start_in_dw);
144
145 /* Check if we can insert it in the front of the list */
146 item = LIST_ENTRY(struct compute_memory_item, pool->item_list->next, link);
147 if (LIST_IS_EMPTY(pool->item_list) || item->start_in_dw > start_in_dw) {
148 return pool->item_list;
149 }
150
151 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
152 next_link = item->link.next;
153
154 if (next_link != pool->item_list) {
155 next = container_of(next_link, item, link);
156 if (item->start_in_dw < start_in_dw
157 && next->start_in_dw > start_in_dw) {
158 return &item->link;
159 }
160 }
161 else {
162 /* end of chain */
163 assert(item->start_in_dw < start_in_dw);
164 return &item->link;
165 }
166 }
167
168 assert(0 && "unreachable");
169 return NULL;
170 }
171
172 /**
173 * Reallocates pool, conserves data.
174 * @returns -1 if it fails, 0 otherwise
175 */
176 int compute_memory_grow_pool(struct compute_memory_pool* pool,
177 struct pipe_context * pipe, int new_size_in_dw)
178 {
179 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
180 "new_size_in_dw = %d (%d bytes)\n",
181 new_size_in_dw, new_size_in_dw * 4);
182
183 assert(new_size_in_dw >= pool->size_in_dw);
184
185 if (!pool->bo) {
186 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
187 if (pool->shadow == NULL)
188 return -1;
189 } else {
190 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
191
192 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
193 new_size_in_dw, new_size_in_dw * 4);
194
195 compute_memory_shadow(pool, pipe, 1);
196 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
197 if (pool->shadow == NULL)
198 return -1;
199
200 pool->size_in_dw = new_size_in_dw;
201 pool->screen->b.b.resource_destroy(
202 (struct pipe_screen *)pool->screen,
203 (struct pipe_resource *)pool->bo);
204 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
205 pool->screen,
206 pool->size_in_dw * 4);
207 compute_memory_shadow(pool, pipe, 0);
208 }
209
210 return 0;
211 }
212
213 /**
214 * Copy pool from device to host, or host to device.
215 */
216 void compute_memory_shadow(struct compute_memory_pool* pool,
217 struct pipe_context * pipe, int device_to_host)
218 {
219 struct compute_memory_item chunk;
220
221 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
222 device_to_host);
223
224 chunk.id = 0;
225 chunk.start_in_dw = 0;
226 chunk.size_in_dw = pool->size_in_dw;
227 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
228 pool->shadow, 0, pool->size_in_dw*4);
229 }
230
231 /**
232 * Allocates pending allocations in the pool
233 * @returns -1 if it fails, 0 otherwise
234 */
235 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
236 struct pipe_context * pipe)
237 {
238 struct compute_memory_item *item, *next;
239
240 int64_t allocated = 0;
241 int64_t unallocated = 0;
242
243 int err = 0;
244
245 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
246
247 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
248 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
249 "(%i bytes)\n",item->start_in_dw, item->id,
250 item->size_in_dw, item->size_in_dw * 4);
251 }
252
253 /* Calculate the total allocated size */
254 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
255 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
256 }
257
258 /* Calculate the total unallocated size of the items that
259 * will be promoted to the pool */
260 LIST_FOR_EACH_ENTRY(item, pool->unallocated_list, link) {
261 if (item->status & ITEM_FOR_PROMOTING)
262 unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
263 }
264
265 /* If we require more space than the size of the pool, then grow the
266 * pool.
267 *
268 * XXX: I'm pretty sure this won't work. Imagine this scenario:
269 *
270 * Offset Item Size
271 * 0 A 50
272 * 200 B 50
273 * 400 C 50
274 *
275 * Total size = 450
276 * Allocated size = 150
277 * Pending Item D Size = 200
278 *
279 * In this case, there are 300 units of free space in the pool, but
280 * they aren't contiguous, so it will be impossible to allocate Item D.
281 */
282 if (pool->size_in_dw < allocated + unallocated) {
283 err = compute_memory_grow_pool(pool, pipe, allocated + unallocated);
284 if (err == -1)
285 return -1;
286 }
287
288 /* Loop through all the unallocated items, check if they are marked
289 * for promoting, allocate space for them and add them to the item_list. */
290 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
291 if (item->status & ITEM_FOR_PROMOTING) {
292 err = compute_memory_promote_item(pool, item, pipe, allocated);
293 item->status ^= ITEM_FOR_PROMOTING;
294
295 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
296
297 if (err == -1)
298 return -1;
299 }
300 }
301
302 return 0;
303 }
304
305 /**
306 * Defragments the pool, so that there's no gap between items.
307 * \param pool The pool to be defragmented
308 */
309 void compute_memory_defrag(struct compute_memory_pool *pool,
310 struct pipe_context *pipe)
311 {
312 struct compute_memory_item *item;
313 int64_t last_pos;
314
315 COMPUTE_DBG(pool->screen, "* compute_memory_defrag()\n");
316
317 last_pos = 0;
318 LIST_FOR_EACH_ENTRY(item, pool->item_list, link) {
319 if (item->start_in_dw != last_pos) {
320 assert(last_pos < item->start_in_dw);
321
322 compute_memory_move_item(pool, item, last_pos, pipe);
323 }
324
325 last_pos += align(item->size_in_dw, ITEM_ALIGNMENT);
326 }
327 }
328
329 int compute_memory_promote_item(struct compute_memory_pool *pool,
330 struct compute_memory_item *item, struct pipe_context *pipe,
331 int64_t allocated)
332 {
333 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
334 struct r600_context *rctx = (struct r600_context *)pipe;
335 struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
336 struct pipe_resource *dst = NULL;
337 struct pipe_box box;
338
339 struct list_head *pos;
340 int64_t start_in_dw;
341 int err = 0;
342
343
344 /* Search for free space in the pool for this item. */
345 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
346 item->size_in_dw)) == -1) {
347 int64_t need = item->size_in_dw + 2048 -
348 (pool->size_in_dw - allocated);
349
350 if (need <= 0) {
351 /* There's enough free space, but it's too
352 * fragmented. Assume half of the item can fit
353 * int the last chunk */
354 need = (item->size_in_dw / 2) + ITEM_ALIGNMENT;
355 }
356
357 need = align(need, ITEM_ALIGNMENT);
358
359 err = compute_memory_grow_pool(pool,
360 pipe,
361 pool->size_in_dw + need);
362
363 if (err == -1)
364 return -1;
365 }
366 dst = (struct pipe_resource *)pool->bo;
367 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
368 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
369 item, item->id, start_in_dw, start_in_dw * 4,
370 item->size_in_dw, item->size_in_dw * 4);
371
372 /* Remove the item from the unallocated list */
373 list_del(&item->link);
374
375 /* Add it back to the item_list */
376 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
377 list_add(&item->link, pos);
378 item->start_in_dw = start_in_dw;
379
380 if (src != NULL) {
381 u_box_1d(0, item->size_in_dw * 4, &box);
382
383 rctx->b.b.resource_copy_region(pipe,
384 dst, 0, item->start_in_dw * 4, 0 ,0,
385 src, 0, &box);
386
387 /* We check if the item is mapped for reading.
388 * In this case, we need to keep the temporary buffer 'alive'
389 * because it is possible to keep a map active for reading
390 * while a kernel (that reads from it) executes */
391 if (!(item->status & ITEM_MAPPED_FOR_READING)) {
392 pool->screen->b.b.resource_destroy(screen, src);
393 item->real_buffer = NULL;
394 }
395 }
396
397 return 0;
398 }
399
400 void compute_memory_demote_item(struct compute_memory_pool *pool,
401 struct compute_memory_item *item, struct pipe_context *pipe)
402 {
403 struct r600_context *rctx = (struct r600_context *)pipe;
404 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
405 struct pipe_resource *dst;
406 struct pipe_box box;
407
408 /* First, we remove the item from the item_list */
409 list_del(&item->link);
410
411 /* Now we add it to the unallocated list */
412 list_addtail(&item->link, pool->unallocated_list);
413
414 /* We check if the intermediate buffer exists, and if it
415 * doesn't, we create it again */
416 if (item->real_buffer == NULL) {
417 item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
418 pool->screen, item->size_in_dw * 4);
419 }
420
421 dst = (struct pipe_resource *)item->real_buffer;
422
423 /* We transfer the memory from the item in the pool to the
424 * temporary buffer */
425 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
426
427 rctx->b.b.resource_copy_region(pipe,
428 dst, 0, 0, 0, 0,
429 src, 0, &box);
430
431 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
432 item->start_in_dw = -1;
433 }
434
435 /**
436 * Moves the item \a item forward in the pool to \a new_start_in_dw
437 *
438 * This function assumes two things:
439 * 1) The item is \b only moved forward
440 * 2) The item \b won't change it's position inside the \a item_list
441 *
442 * \param item The item that will be moved
443 * \param new_start_in_dw The new position of the item in \a item_list
444 * \see compute_memory_defrag
445 */
446 void compute_memory_move_item(struct compute_memory_pool *pool,
447 struct compute_memory_item *item, uint64_t new_start_in_dw,
448 struct pipe_context *pipe)
449 {
450 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
451 struct r600_context *rctx = (struct r600_context *)pipe;
452 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
453 struct pipe_resource *dst;
454 struct pipe_box box;
455
456 struct compute_memory_item *prev;
457
458 COMPUTE_DBG(pool->screen, "* compute_memory_move_item()\n"
459 " + Moving item %i from %u (%u bytes) to %u (%u bytes)\n",
460 item->id, item->start_in_dw, item->start_in_dw * 4,
461 new_start_in_dw, new_start_in_dw * 4);
462
463 if (pool->item_list != item->link.prev) {
464 prev = container_of(item->link.prev, item, link);
465 assert(prev->start_in_dw + prev->size_in_dw <= new_start_in_dw);
466 }
467
468 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
469
470 /* If the ranges don't overlap, we can just copy the item directly */
471 if (new_start_in_dw + item->size_in_dw <= item->start_in_dw) {
472 dst = (struct pipe_resource *)pool->bo;
473
474 rctx->b.b.resource_copy_region(pipe,
475 dst, 0, new_start_in_dw * 4, 0, 0,
476 src, 0, &box);
477 } else {
478 /* The ranges overlap, we will try first to use an intermediate
479 * resource to move the item */
480 dst = (struct pipe_resource *)r600_compute_buffer_alloc_vram(
481 pool->screen, item->size_in_dw * 4);
482
483 if (dst != NULL) {
484 rctx->b.b.resource_copy_region(pipe,
485 dst, 0, 0, 0, 0,
486 src, 0, &box);
487
488 src = dst;
489 dst = (struct pipe_resource *)pool->bo;
490
491 box.x = 0;
492
493 rctx->b.b.resource_copy_region(pipe,
494 dst, 0, new_start_in_dw * 4, 0, 0,
495 src, 0, &box);
496
497 pool->screen->b.b.resource_destroy(screen, src);
498
499 } else {
500 /* The allocation of the temporary resource failed,
501 * falling back to use mappings */
502 uint32_t *map;
503 int64_t offset;
504 struct pipe_transfer *trans;
505
506 offset = item->start_in_dw - new_start_in_dw;
507
508 u_box_1d(new_start_in_dw * 4, (offset + item->size_in_dw) * 4, &box);
509
510 map = pipe->transfer_map(pipe, src, 0, PIPE_TRANSFER_READ_WRITE,
511 &box, &trans);
512
513 assert(map);
514 assert(trans);
515
516 memmove(map, map + offset, item->size_in_dw * 4);
517
518 pipe->transfer_unmap(pipe, trans);
519 }
520 }
521
522 item->start_in_dw = new_start_in_dw;
523 }
524
525 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
526 {
527 struct compute_memory_item *item, *next;
528 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
529 struct pipe_resource *res;
530
531 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
532
533 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->item_list, link) {
534
535 if (item->id == id) {
536 list_del(&item->link);
537
538 if (item->real_buffer) {
539 res = (struct pipe_resource *)item->real_buffer;
540 pool->screen->b.b.resource_destroy(
541 screen, res);
542 }
543
544 free(item);
545
546 return;
547 }
548 }
549
550 LIST_FOR_EACH_ENTRY_SAFE(item, next, pool->unallocated_list, link) {
551
552 if (item->id == id) {
553 list_del(&item->link);
554
555 if (item->real_buffer) {
556 res = (struct pipe_resource *)item->real_buffer;
557 pool->screen->b.b.resource_destroy(
558 screen, res);
559 }
560
561 free(item);
562
563 return;
564 }
565 }
566
567 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
568 "for compute_memory_free\n", id);
569
570 assert(0 && "error");
571 }
572
573 /**
574 * Creates pending allocations
575 */
576 struct compute_memory_item* compute_memory_alloc(
577 struct compute_memory_pool* pool,
578 int64_t size_in_dw)
579 {
580 struct compute_memory_item *new_item = NULL;
581
582 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
583 size_in_dw, 4 * size_in_dw);
584
585 new_item = (struct compute_memory_item *)
586 CALLOC(sizeof(struct compute_memory_item), 1);
587 if (new_item == NULL)
588 return NULL;
589
590 new_item->size_in_dw = size_in_dw;
591 new_item->start_in_dw = -1; /* mark pending */
592 new_item->id = pool->next_id++;
593 new_item->pool = pool;
594 new_item->real_buffer = NULL;
595
596 list_addtail(&new_item->link, pool->unallocated_list);
597
598 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
599 new_item, new_item->id, new_item->size_in_dw,
600 new_item->size_in_dw * 4);
601 return new_item;
602 }
603
604 /**
605 * Transfer data host<->device, offset and size is in bytes
606 */
607 void compute_memory_transfer(
608 struct compute_memory_pool* pool,
609 struct pipe_context * pipe,
610 int device_to_host,
611 struct compute_memory_item* chunk,
612 void* data,
613 int offset_in_chunk,
614 int size)
615 {
616 int64_t aligned_size = pool->size_in_dw;
617 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
618 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
619
620 struct pipe_transfer *xfer;
621 uint32_t *map;
622
623 assert(gart);
624
625 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
626 "offset_in_chunk = %d, size = %d\n", device_to_host,
627 offset_in_chunk, size);
628
629 if (device_to_host) {
630 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
631 &(struct pipe_box) { .width = aligned_size * 4,
632 .height = 1, .depth = 1 }, &xfer);
633 assert(xfer);
634 assert(map);
635 memcpy(data, map + internal_offset, size);
636 pipe->transfer_unmap(pipe, xfer);
637 } else {
638 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
639 &(struct pipe_box) { .width = aligned_size * 4,
640 .height = 1, .depth = 1 }, &xfer);
641 assert(xfer);
642 assert(map);
643 memcpy(map + internal_offset, data, size);
644 pipe->transfer_unmap(pipe, xfer);
645 }
646 }
647
648 /**
649 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
650 */
651 void compute_memory_transfer_direct(
652 struct compute_memory_pool* pool,
653 int chunk_to_data,
654 struct compute_memory_item* chunk,
655 struct r600_resource* data,
656 int offset_in_chunk,
657 int offset_in_data,
658 int size)
659 {
660 ///TODO: DMA
661 }