2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
49 struct compute_memory_pool
* compute_memory_pool_new(
50 struct r600_screen
* rscreen
)
52 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
53 CALLOC(sizeof(struct compute_memory_pool
), 1);
57 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
59 pool
->screen
= rscreen
;
60 pool
->item_list
= (struct list_head
*)
61 CALLOC(sizeof(struct list_head
), 1);
62 pool
->unallocated_list
= (struct list_head
*)
63 CALLOC(sizeof(struct list_head
), 1);
64 list_inithead(pool
->item_list
);
65 list_inithead(pool
->unallocated_list
);
70 * Initializes the pool with a size of \a initial_size_in_dw.
71 * \param pool The pool to be initialized.
72 * \param initial_size_in_dw The initial size.
73 * \see compute_memory_grow_defrag_pool
75 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
76 unsigned initial_size_in_dw
)
79 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %u\n",
82 pool
->size_in_dw
= initial_size_in_dw
;
83 pool
->bo
= r600_compute_buffer_alloc_vram(pool
->screen
,
84 pool
->size_in_dw
* 4);
88 * Frees all stuff in the pool and the pool struct itself too.
90 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
92 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
95 pool
->screen
->b
.b
.resource_destroy((struct pipe_screen
*)
96 pool
->screen
, (struct pipe_resource
*)pool
->bo
);
98 /* In theory, all of the items were freed in compute_memory_free.
99 * Just delete the list heads
101 free(pool
->item_list
);
102 free(pool
->unallocated_list
);
103 /* And then the pool itself */
108 * Searches for an empty space in the pool, return with the pointer to the
109 * allocatable space in the pool.
110 * \param size_in_dw The size of the space we are looking for.
111 * \return -1 on failure
113 int64_t compute_memory_prealloc_chunk(
114 struct compute_memory_pool
* pool
,
117 struct compute_memory_item
*item
;
121 assert(size_in_dw
<= pool
->size_in_dw
);
123 COMPUTE_DBG(pool
->screen
, "* compute_memory_prealloc_chunk() size_in_dw = %"PRIi64
"\n",
126 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
127 if (last_end
+ size_in_dw
<= item
->start_in_dw
) {
131 last_end
= item
->start_in_dw
+ align(item
->size_in_dw
, ITEM_ALIGNMENT
);
134 if (pool
->size_in_dw
- last_end
< size_in_dw
) {
142 * Search for the chunk where we can link our new chunk after it.
143 * \param start_in_dw The position of the item we want to add to the pool.
144 * \return The item that is just before the passed position
146 struct list_head
*compute_memory_postalloc_chunk(
147 struct compute_memory_pool
* pool
,
150 struct compute_memory_item
*item
;
151 struct compute_memory_item
*next
;
152 struct list_head
*next_link
;
154 COMPUTE_DBG(pool
->screen
, "* compute_memory_postalloc_chunck() start_in_dw = %"PRIi64
"\n",
157 /* Check if we can insert it in the front of the list */
158 item
= LIST_ENTRY(struct compute_memory_item
, pool
->item_list
->next
, link
);
159 if (LIST_IS_EMPTY(pool
->item_list
) || item
->start_in_dw
> start_in_dw
) {
160 return pool
->item_list
;
163 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
164 next_link
= item
->link
.next
;
166 if (next_link
!= pool
->item_list
) {
167 next
= container_of(next_link
, item
, link
);
168 if (item
->start_in_dw
< start_in_dw
169 && next
->start_in_dw
> start_in_dw
) {
175 assert(item
->start_in_dw
< start_in_dw
);
180 assert(0 && "unreachable");
185 * Reallocates and defragments the pool, conserves data.
186 * \returns -1 if it fails, 0 otherwise
187 * \see compute_memory_finalize_pending
189 int compute_memory_grow_defrag_pool(struct compute_memory_pool
*pool
,
190 struct pipe_context
*pipe
, int new_size_in_dw
)
192 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
194 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_defrag_pool() "
195 "new_size_in_dw = %d (%d bytes)\n",
196 new_size_in_dw
, new_size_in_dw
* 4);
198 assert(new_size_in_dw
>= pool
->size_in_dw
);
201 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
203 struct r600_resource
*temp
= NULL
;
205 temp
= r600_compute_buffer_alloc_vram(pool
->screen
, new_size_in_dw
* 4);
208 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
209 struct pipe_resource
*dst
= (struct pipe_resource
*)temp
;
211 COMPUTE_DBG(pool
->screen
, " Growing and defragmenting the pool "
212 "using a temporary resource\n");
214 compute_memory_defrag(pool
, src
, dst
, pipe
);
216 pool
->screen
->b
.b
.resource_destroy(
217 (struct pipe_screen
*)pool
->screen
,
221 pool
->size_in_dw
= new_size_in_dw
;
224 COMPUTE_DBG(pool
->screen
, " The creation of the temporary resource failed\n"
225 " Falling back to using 'shadow'\n");
227 compute_memory_shadow(pool
, pipe
, 1);
228 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
* 4);
229 if (pool
->shadow
== NULL
)
232 pool
->size_in_dw
= new_size_in_dw
;
233 pool
->screen
->b
.b
.resource_destroy(
234 (struct pipe_screen
*)pool
->screen
,
235 (struct pipe_resource
*)pool
->bo
);
236 pool
->bo
= r600_compute_buffer_alloc_vram(pool
->screen
, pool
->size_in_dw
* 4);
237 compute_memory_shadow(pool
, pipe
, 0);
239 if (pool
->status
& POOL_FRAGMENTED
) {
240 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
241 compute_memory_defrag(pool
, src
, src
, pipe
);
250 * Copy pool from device to host, or host to device.
251 * \param device_to_host 1 for device->host, 0 for host->device
252 * \see compute_memory_grow_defrag_pool
254 void compute_memory_shadow(struct compute_memory_pool
* pool
,
255 struct pipe_context
* pipe
, int device_to_host
)
257 struct compute_memory_item chunk
;
259 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
263 chunk
.start_in_dw
= 0;
264 chunk
.size_in_dw
= pool
->size_in_dw
;
265 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
266 pool
->shadow
, 0, pool
->size_in_dw
*4);
270 * Moves all the items marked for promotion from the \a unallocated_list
271 * to the \a item_list.
272 * \return -1 if it fails, 0 otherwise
273 * \see evergreen_set_global_binding
275 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
276 struct pipe_context
* pipe
)
278 struct compute_memory_item
*item
, *next
;
280 int64_t allocated
= 0;
281 int64_t unallocated
= 0;
286 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
288 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
289 COMPUTE_DBG(pool
->screen
, " + list: offset = %"PRIi64
" id = %"PRIi64
" size = %"PRIi64
" "
290 "(%"PRIi64
" bytes)\n", item
->start_in_dw
, item
->id
,
291 item
->size_in_dw
, item
->size_in_dw
* 4);
294 /* Calculate the total allocated size */
295 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
296 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
299 /* Calculate the total unallocated size of the items that
300 * will be promoted to the pool */
301 LIST_FOR_EACH_ENTRY(item
, pool
->unallocated_list
, link
) {
302 if (item
->status
& ITEM_FOR_PROMOTING
)
303 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
306 if (unallocated
== 0) {
310 if (pool
->size_in_dw
< allocated
+ unallocated
) {
311 err
= compute_memory_grow_defrag_pool(pool
, pipe
, allocated
+ unallocated
);
315 else if (pool
->status
& POOL_FRAGMENTED
) {
316 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
317 compute_memory_defrag(pool
, src
, src
, pipe
);
320 /* After defragmenting the pool, allocated is equal to the first available
321 * position for new items in the pool */
322 last_pos
= allocated
;
324 /* Loop through all the unallocated items, check if they are marked
325 * for promoting, allocate space for them and add them to the item_list. */
326 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
327 if (item
->status
& ITEM_FOR_PROMOTING
) {
328 err
= compute_memory_promote_item(pool
, item
, pipe
, last_pos
);
329 item
->status
&= ~ITEM_FOR_PROMOTING
;
331 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
342 * Defragments the pool, so that there's no gap between items.
343 * \param pool The pool to be defragmented
344 * \param src The origin resource
345 * \param dst The destination resource
346 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
348 void compute_memory_defrag(struct compute_memory_pool
*pool
,
349 struct pipe_resource
*src
, struct pipe_resource
*dst
,
350 struct pipe_context
*pipe
)
352 struct compute_memory_item
*item
;
355 COMPUTE_DBG(pool
->screen
, "* compute_memory_defrag()\n");
358 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
359 if (src
!= dst
|| item
->start_in_dw
!= last_pos
) {
360 assert(last_pos
<= item
->start_in_dw
);
362 compute_memory_move_item(pool
, src
, dst
,
363 item
, last_pos
, pipe
);
366 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
369 pool
->status
&= ~POOL_FRAGMENTED
;
373 * Moves an item from the \a unallocated_list to the \a item_list.
374 * \param item The item that will be promoted.
375 * \return -1 if it fails, 0 otherwise
376 * \see compute_memory_finalize_pending
378 int compute_memory_promote_item(struct compute_memory_pool
*pool
,
379 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
382 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
383 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
384 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
385 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
388 COMPUTE_DBG(pool
->screen
, "* compute_memory_promote_item()\n"
389 " + Promoting Item: %"PRIi64
" , starting at: %"PRIi64
" (%"PRIi64
" bytes) "
390 "size: %"PRIi64
" (%"PRIi64
" bytes)\n\t\t\tnew start: %"PRIi64
" (%"PRIi64
" bytes)\n",
391 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
392 item
->size_in_dw
, item
->size_in_dw
* 4,
393 start_in_dw
, start_in_dw
* 4);
395 /* Remove the item from the unallocated list */
396 list_del(&item
->link
);
398 /* Add it back to the item_list */
399 list_addtail(&item
->link
, pool
->item_list
);
400 item
->start_in_dw
= start_in_dw
;
403 u_box_1d(0, item
->size_in_dw
* 4, &box
);
405 rctx
->b
.b
.resource_copy_region(pipe
,
406 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
409 /* We check if the item is mapped for reading.
410 * In this case, we need to keep the temporary buffer 'alive'
411 * because it is possible to keep a map active for reading
412 * while a kernel (that reads from it) executes */
413 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
414 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
415 item
->real_buffer
= NULL
;
423 * Moves an item from the \a item_list to the \a unallocated_list.
424 * \param item The item that will be demoted
425 * \see r600_compute_global_transfer_map
427 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
428 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
430 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
431 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
432 struct pipe_resource
*dst
;
435 COMPUTE_DBG(pool
->screen
, "* compute_memory_demote_item()\n"
436 " + Demoting Item: %"PRIi64
", starting at: %"PRIi64
" (%"PRIi64
" bytes) "
437 "size: %"PRIi64
" (%"PRIi64
" bytes)\n", item
->id
, item
->start_in_dw
,
438 item
->start_in_dw
* 4, item
->size_in_dw
, item
->size_in_dw
* 4);
440 /* First, we remove the item from the item_list */
441 list_del(&item
->link
);
443 /* Now we add it to the unallocated list */
444 list_addtail(&item
->link
, pool
->unallocated_list
);
446 /* We check if the intermediate buffer exists, and if it
447 * doesn't, we create it again */
448 if (item
->real_buffer
== NULL
) {
449 item
->real_buffer
= r600_compute_buffer_alloc_vram(
450 pool
->screen
, item
->size_in_dw
* 4);
453 dst
= (struct pipe_resource
*)item
->real_buffer
;
455 /* We transfer the memory from the item in the pool to the
456 * temporary buffer */
457 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
459 rctx
->b
.b
.resource_copy_region(pipe
,
463 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
464 item
->start_in_dw
= -1;
466 if (item
->link
.next
!= pool
->item_list
) {
467 pool
->status
|= POOL_FRAGMENTED
;
472 * Moves the item \a item forward from the resource \a src to the
473 * resource \a dst at \a new_start_in_dw
475 * This function assumes two things:
476 * 1) The item is \b only moved forward, unless src is different from dst
477 * 2) The item \b won't change it's position inside the \a item_list
479 * \param item The item that will be moved
480 * \param new_start_in_dw The new position of the item in \a item_list
481 * \see compute_memory_defrag
483 void compute_memory_move_item(struct compute_memory_pool
*pool
,
484 struct pipe_resource
*src
, struct pipe_resource
*dst
,
485 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
486 struct pipe_context
*pipe
)
488 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
489 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
492 struct compute_memory_item
*prev
;
494 COMPUTE_DBG(pool
->screen
, "* compute_memory_move_item()\n"
495 " + Moving item %"PRIi64
" from %"PRIi64
" (%"PRIi64
" bytes) to %"PRIu64
" (%"PRIu64
" bytes)\n",
496 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
497 new_start_in_dw
, new_start_in_dw
* 4);
499 if (pool
->item_list
!= item
->link
.prev
) {
500 prev
= container_of(item
->link
.prev
, item
, link
);
501 assert(prev
->start_in_dw
+ prev
->size_in_dw
<= new_start_in_dw
);
504 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
506 /* If the ranges don't overlap, or we are copying from one resource
507 * to another, we can just copy the item directly */
508 if (src
!= dst
|| new_start_in_dw
+ item
->size_in_dw
<= item
->start_in_dw
) {
510 rctx
->b
.b
.resource_copy_region(pipe
,
511 dst
, 0, new_start_in_dw
* 4, 0, 0,
514 /* The ranges overlap, we will try first to use an intermediate
515 * resource to move the item */
516 struct pipe_resource
*tmp
= (struct pipe_resource
*)
517 r600_compute_buffer_alloc_vram(pool
->screen
, item
->size_in_dw
* 4);
520 rctx
->b
.b
.resource_copy_region(pipe
,
526 rctx
->b
.b
.resource_copy_region(pipe
,
527 dst
, 0, new_start_in_dw
* 4, 0, 0,
530 pool
->screen
->b
.b
.resource_destroy(screen
, tmp
);
533 /* The allocation of the temporary resource failed,
534 * falling back to use mappings */
537 struct pipe_transfer
*trans
;
539 offset
= item
->start_in_dw
- new_start_in_dw
;
541 u_box_1d(new_start_in_dw
* 4, (offset
+ item
->size_in_dw
) * 4, &box
);
543 map
= pipe
->transfer_map(pipe
, src
, 0, PIPE_TRANSFER_READ_WRITE
,
549 memmove(map
, map
+ offset
, item
->size_in_dw
* 4);
551 pipe
->transfer_unmap(pipe
, trans
);
555 item
->start_in_dw
= new_start_in_dw
;
559 * Frees the memory asociated to the item with id \a id from the pool.
560 * \param id The id of the item to be freed.
562 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
564 struct compute_memory_item
*item
, *next
;
565 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
566 struct pipe_resource
*res
;
568 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %"PRIi64
" \n", id
);
570 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->item_list
, link
) {
572 if (item
->id
== id
) {
574 if (item
->link
.next
!= pool
->item_list
) {
575 pool
->status
|= POOL_FRAGMENTED
;
578 list_del(&item
->link
);
580 if (item
->real_buffer
) {
581 res
= (struct pipe_resource
*)item
->real_buffer
;
582 pool
->screen
->b
.b
.resource_destroy(
592 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
594 if (item
->id
== id
) {
595 list_del(&item
->link
);
597 if (item
->real_buffer
) {
598 res
= (struct pipe_resource
*)item
->real_buffer
;
599 pool
->screen
->b
.b
.resource_destroy(
609 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
610 "for compute_memory_free\n", id
);
612 assert(0 && "error");
616 * Creates pending allocations for new items, these items are
617 * placed in the unallocated_list.
618 * \param size_in_dw The size, in double words, of the new item.
619 * \return The new item
620 * \see r600_compute_global_buffer_create
622 struct compute_memory_item
* compute_memory_alloc(
623 struct compute_memory_pool
* pool
,
626 struct compute_memory_item
*new_item
= NULL
;
628 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %"PRIi64
" (%"PRIi64
" bytes)\n",
629 size_in_dw
, 4 * size_in_dw
);
631 new_item
= (struct compute_memory_item
*)
632 CALLOC(sizeof(struct compute_memory_item
), 1);
636 new_item
->size_in_dw
= size_in_dw
;
637 new_item
->start_in_dw
= -1; /* mark pending */
638 new_item
->id
= pool
->next_id
++;
639 new_item
->pool
= pool
;
640 new_item
->real_buffer
= NULL
;
642 list_addtail(&new_item
->link
, pool
->unallocated_list
);
644 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %"PRIi64
" size = %"PRIi64
" (%"PRIi64
" bytes)\n",
645 new_item
, new_item
->id
, new_item
->size_in_dw
,
646 new_item
->size_in_dw
* 4);
651 * Transfer data host<->device, offset and size is in bytes.
652 * \param device_to_host 1 for device->host, 0 for host->device.
653 * \see compute_memory_shadow
655 void compute_memory_transfer(
656 struct compute_memory_pool
* pool
,
657 struct pipe_context
* pipe
,
659 struct compute_memory_item
* chunk
,
664 int64_t aligned_size
= pool
->size_in_dw
;
665 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
666 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
668 struct pipe_transfer
*xfer
;
673 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
674 "offset_in_chunk = %d, size = %d\n", device_to_host
,
675 offset_in_chunk
, size
);
677 if (device_to_host
) {
678 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
679 &(struct pipe_box
) { .width
= aligned_size
* 4,
680 .height
= 1, .depth
= 1 }, &xfer
);
683 memcpy(data
, map
+ internal_offset
, size
);
684 pipe
->transfer_unmap(pipe
, xfer
);
686 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
687 &(struct pipe_box
) { .width
= aligned_size
* 4,
688 .height
= 1, .depth
= 1 }, &xfer
);
691 memcpy(map
+ internal_offset
, data
, size
);
692 pipe
->transfer_unmap(pipe
, xfer
);
697 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
699 void compute_memory_transfer_direct(
700 struct compute_memory_pool
* pool
,
702 struct compute_memory_item
* chunk
,
703 struct r600_resource
* data
,