2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
49 struct compute_memory_pool
* compute_memory_pool_new(
50 struct r600_screen
* rscreen
)
52 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
53 CALLOC(sizeof(struct compute_memory_pool
), 1);
57 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
59 pool
->screen
= rscreen
;
60 pool
->item_list
= (struct list_head
*)
61 CALLOC(sizeof(struct list_head
), 1);
62 pool
->unallocated_list
= (struct list_head
*)
63 CALLOC(sizeof(struct list_head
), 1);
64 list_inithead(pool
->item_list
);
65 list_inithead(pool
->unallocated_list
);
70 * Initializes the pool with a size of \a initial_size_in_dw.
71 * \param pool The pool to be initialized.
72 * \param initial_size_in_dw The initial size.
73 * \see compute_memory_grow_defrag_pool
75 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
76 unsigned initial_size_in_dw
)
79 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
82 pool
->size_in_dw
= initial_size_in_dw
;
83 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(pool
->screen
,
84 pool
->size_in_dw
* 4);
88 * Frees all stuff in the pool and the pool struct itself too.
90 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
92 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
95 pool
->screen
->b
.b
.resource_destroy((struct pipe_screen
*)
96 pool
->screen
, (struct pipe_resource
*)pool
->bo
);
102 * Searches for an empty space in the pool, return with the pointer to the
103 * allocatable space in the pool.
104 * \param size_in_dw The size of the space we are looking for.
105 * \return -1 on failure
107 int64_t compute_memory_prealloc_chunk(
108 struct compute_memory_pool
* pool
,
111 struct compute_memory_item
*item
;
115 assert(size_in_dw
<= pool
->size_in_dw
);
117 COMPUTE_DBG(pool
->screen
, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
120 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
121 if (last_end
+ size_in_dw
<= item
->start_in_dw
) {
125 last_end
= item
->start_in_dw
+ align(item
->size_in_dw
, ITEM_ALIGNMENT
);
128 if (pool
->size_in_dw
- last_end
< size_in_dw
) {
136 * Search for the chunk where we can link our new chunk after it.
137 * \param start_in_dw The position of the item we want to add to the pool.
138 * \return The item that is just before the passed position
140 struct list_head
*compute_memory_postalloc_chunk(
141 struct compute_memory_pool
* pool
,
144 struct compute_memory_item
*item
;
145 struct compute_memory_item
*next
;
146 struct list_head
*next_link
;
148 COMPUTE_DBG(pool
->screen
, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
151 /* Check if we can insert it in the front of the list */
152 item
= LIST_ENTRY(struct compute_memory_item
, pool
->item_list
->next
, link
);
153 if (LIST_IS_EMPTY(pool
->item_list
) || item
->start_in_dw
> start_in_dw
) {
154 return pool
->item_list
;
157 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
158 next_link
= item
->link
.next
;
160 if (next_link
!= pool
->item_list
) {
161 next
= container_of(next_link
, item
, link
);
162 if (item
->start_in_dw
< start_in_dw
163 && next
->start_in_dw
> start_in_dw
) {
169 assert(item
->start_in_dw
< start_in_dw
);
174 assert(0 && "unreachable");
179 * Reallocates and defragments the pool, conserves data.
180 * \returns -1 if it fails, 0 otherwise
181 * \see compute_memory_finalize_pending
183 int compute_memory_grow_defrag_pool(struct compute_memory_pool
*pool
,
184 struct pipe_context
*pipe
, int new_size_in_dw
)
186 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
188 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_defrag_pool() "
189 "new_size_in_dw = %d (%d bytes)\n",
190 new_size_in_dw
, new_size_in_dw
* 4);
192 assert(new_size_in_dw
>= pool
->size_in_dw
);
195 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
197 struct r600_resource
*temp
= NULL
;
199 temp
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
200 pool
->screen
, new_size_in_dw
* 4);
203 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
204 struct pipe_resource
*dst
= (struct pipe_resource
*)temp
;
206 COMPUTE_DBG(pool
->screen
, " Growing and defragmenting the pool "
207 "using a temporary resource\n");
209 compute_memory_defrag(pool
, src
, dst
, pipe
);
211 pool
->screen
->b
.b
.resource_destroy(
212 (struct pipe_screen
*)pool
->screen
,
216 pool
->size_in_dw
= new_size_in_dw
;
219 COMPUTE_DBG(pool
->screen
, " The creation of the temporary resource failed\n"
220 " Falling back to using 'shadow'\n");
222 compute_memory_shadow(pool
, pipe
, 1);
223 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
* 4);
224 if (pool
->shadow
== NULL
)
227 pool
->size_in_dw
= new_size_in_dw
;
228 pool
->screen
->b
.b
.resource_destroy(
229 (struct pipe_screen
*)pool
->screen
,
230 (struct pipe_resource
*)pool
->bo
);
231 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
233 pool
->size_in_dw
* 4);
234 compute_memory_shadow(pool
, pipe
, 0);
236 if (pool
->status
& POOL_FRAGMENTED
) {
237 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
238 compute_memory_defrag(pool
, src
, src
, pipe
);
247 * Copy pool from device to host, or host to device.
248 * \param device_to_host 1 for device->host, 0 for host->device
249 * \see compute_memory_grow_defrag_pool
251 void compute_memory_shadow(struct compute_memory_pool
* pool
,
252 struct pipe_context
* pipe
, int device_to_host
)
254 struct compute_memory_item chunk
;
256 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
260 chunk
.start_in_dw
= 0;
261 chunk
.size_in_dw
= pool
->size_in_dw
;
262 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
263 pool
->shadow
, 0, pool
->size_in_dw
*4);
267 * Moves all the items marked for promotion from the \a unallocated_list
268 * to the \a item_list.
269 * \return -1 if it fails, 0 otherwise
270 * \see evergreen_set_global_binding
272 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
273 struct pipe_context
* pipe
)
275 struct compute_memory_item
*item
, *next
;
277 int64_t allocated
= 0;
278 int64_t unallocated
= 0;
283 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
285 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
286 COMPUTE_DBG(pool
->screen
, " + list: offset = %i id = %i size = %i "
287 "(%i bytes)\n",item
->start_in_dw
, item
->id
,
288 item
->size_in_dw
, item
->size_in_dw
* 4);
291 /* Calculate the total allocated size */
292 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
293 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
296 /* Calculate the total unallocated size of the items that
297 * will be promoted to the pool */
298 LIST_FOR_EACH_ENTRY(item
, pool
->unallocated_list
, link
) {
299 if (item
->status
& ITEM_FOR_PROMOTING
)
300 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
303 if (unallocated
== 0) {
307 if (pool
->size_in_dw
< allocated
+ unallocated
) {
308 err
= compute_memory_grow_defrag_pool(pool
, pipe
, allocated
+ unallocated
);
312 else if (pool
->status
& POOL_FRAGMENTED
) {
313 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
314 compute_memory_defrag(pool
, src
, src
, pipe
);
317 /* After defragmenting the pool, allocated is equal to the first available
318 * position for new items in the pool */
319 last_pos
= allocated
;
321 /* Loop through all the unallocated items, check if they are marked
322 * for promoting, allocate space for them and add them to the item_list. */
323 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
324 if (item
->status
& ITEM_FOR_PROMOTING
) {
325 err
= compute_memory_promote_item(pool
, item
, pipe
, last_pos
);
326 item
->status
&= ~ITEM_FOR_PROMOTING
;
328 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
339 * Defragments the pool, so that there's no gap between items.
340 * \param pool The pool to be defragmented
341 * \param src The origin resource
342 * \param dst The destination resource
343 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
345 void compute_memory_defrag(struct compute_memory_pool
*pool
,
346 struct pipe_resource
*src
, struct pipe_resource
*dst
,
347 struct pipe_context
*pipe
)
349 struct compute_memory_item
*item
;
352 COMPUTE_DBG(pool
->screen
, "* compute_memory_defrag()\n");
355 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
356 if (src
!= dst
|| item
->start_in_dw
!= last_pos
) {
357 assert(last_pos
<= item
->start_in_dw
);
359 compute_memory_move_item(pool
, src
, dst
,
360 item
, last_pos
, pipe
);
363 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
366 pool
->status
&= ~POOL_FRAGMENTED
;
370 * Moves an item from the \a unallocated_list to the \a item_list.
371 * \param item The item that will be promoted.
372 * \return -1 if it fails, 0 otherwise
373 * \see compute_memory_finalize_pending
375 int compute_memory_promote_item(struct compute_memory_pool
*pool
,
376 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
379 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
380 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
381 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
382 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
385 COMPUTE_DBG(pool
->screen
, " + Found space for Item %p id = %u "
386 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
387 item
, item
->id
, start_in_dw
, start_in_dw
* 4,
388 item
->size_in_dw
, item
->size_in_dw
* 4);
390 /* Remove the item from the unallocated list */
391 list_del(&item
->link
);
393 /* Add it back to the item_list */
394 list_addtail(&item
->link
, pool
->item_list
);
395 item
->start_in_dw
= start_in_dw
;
398 u_box_1d(0, item
->size_in_dw
* 4, &box
);
400 rctx
->b
.b
.resource_copy_region(pipe
,
401 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
404 /* We check if the item is mapped for reading.
405 * In this case, we need to keep the temporary buffer 'alive'
406 * because it is possible to keep a map active for reading
407 * while a kernel (that reads from it) executes */
408 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
409 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
410 item
->real_buffer
= NULL
;
418 * Moves an item from the \a item_list to the \a unallocated_list.
419 * \param item The item that will be demoted
420 * \see r600_compute_global_transfer_map
422 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
423 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
425 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
426 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
427 struct pipe_resource
*dst
;
430 /* First, we remove the item from the item_list */
431 list_del(&item
->link
);
433 /* Now we add it to the unallocated list */
434 list_addtail(&item
->link
, pool
->unallocated_list
);
436 /* We check if the intermediate buffer exists, and if it
437 * doesn't, we create it again */
438 if (item
->real_buffer
== NULL
) {
439 item
->real_buffer
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
440 pool
->screen
, item
->size_in_dw
* 4);
443 dst
= (struct pipe_resource
*)item
->real_buffer
;
445 /* We transfer the memory from the item in the pool to the
446 * temporary buffer */
447 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
449 rctx
->b
.b
.resource_copy_region(pipe
,
453 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
454 item
->start_in_dw
= -1;
456 if (item
->link
.next
!= pool
->item_list
) {
457 pool
->status
|= POOL_FRAGMENTED
;
462 * Moves the item \a item forward from the resource \a src to the
463 * resource \a dst at \a new_start_in_dw
465 * This function assumes two things:
466 * 1) The item is \b only moved forward, unless src is different from dst
467 * 2) The item \b won't change it's position inside the \a item_list
469 * \param item The item that will be moved
470 * \param new_start_in_dw The new position of the item in \a item_list
471 * \see compute_memory_defrag
473 void compute_memory_move_item(struct compute_memory_pool
*pool
,
474 struct pipe_resource
*src
, struct pipe_resource
*dst
,
475 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
476 struct pipe_context
*pipe
)
478 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
479 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
482 struct compute_memory_item
*prev
;
484 COMPUTE_DBG(pool
->screen
, "* compute_memory_move_item()\n"
485 " + Moving item %i from %u (%u bytes) to %u (%u bytes)\n",
486 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
487 new_start_in_dw
, new_start_in_dw
* 4);
489 if (pool
->item_list
!= item
->link
.prev
) {
490 prev
= container_of(item
->link
.prev
, item
, link
);
491 assert(prev
->start_in_dw
+ prev
->size_in_dw
<= new_start_in_dw
);
494 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
496 /* If the ranges don't overlap, or we are copying from one resource
497 * to another, we can just copy the item directly */
498 if (src
!= dst
|| new_start_in_dw
+ item
->size_in_dw
<= item
->start_in_dw
) {
500 rctx
->b
.b
.resource_copy_region(pipe
,
501 dst
, 0, new_start_in_dw
* 4, 0, 0,
504 /* The ranges overlap, we will try first to use an intermediate
505 * resource to move the item */
506 struct pipe_resource
*tmp
= (struct pipe_resource
*)
507 r600_compute_buffer_alloc_vram(pool
->screen
, item
->size_in_dw
* 4);
510 rctx
->b
.b
.resource_copy_region(pipe
,
516 rctx
->b
.b
.resource_copy_region(pipe
,
517 dst
, 0, new_start_in_dw
* 4, 0, 0,
520 pool
->screen
->b
.b
.resource_destroy(screen
, tmp
);
523 /* The allocation of the temporary resource failed,
524 * falling back to use mappings */
527 struct pipe_transfer
*trans
;
529 offset
= item
->start_in_dw
- new_start_in_dw
;
531 u_box_1d(new_start_in_dw
* 4, (offset
+ item
->size_in_dw
) * 4, &box
);
533 map
= pipe
->transfer_map(pipe
, src
, 0, PIPE_TRANSFER_READ_WRITE
,
539 memmove(map
, map
+ offset
, item
->size_in_dw
* 4);
541 pipe
->transfer_unmap(pipe
, trans
);
545 item
->start_in_dw
= new_start_in_dw
;
549 * Frees the memory asociated to the item with id \a id from the pool.
550 * \param id The id of the item to be freed.
552 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
554 struct compute_memory_item
*item
, *next
;
555 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
556 struct pipe_resource
*res
;
558 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %ld \n", id
);
560 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->item_list
, link
) {
562 if (item
->id
== id
) {
564 if (item
->link
.next
!= pool
->item_list
) {
565 pool
->status
|= POOL_FRAGMENTED
;
568 list_del(&item
->link
);
570 if (item
->real_buffer
) {
571 res
= (struct pipe_resource
*)item
->real_buffer
;
572 pool
->screen
->b
.b
.resource_destroy(
582 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
584 if (item
->id
== id
) {
585 list_del(&item
->link
);
587 if (item
->real_buffer
) {
588 res
= (struct pipe_resource
*)item
->real_buffer
;
589 pool
->screen
->b
.b
.resource_destroy(
599 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
600 "for compute_memory_free\n", id
);
602 assert(0 && "error");
606 * Creates pending allocations for new items, these items are
607 * placed in the unallocated_list.
608 * \param size_in_dw The size, in double words, of the new item.
609 * \return The new item
610 * \see r600_compute_global_buffer_create
612 struct compute_memory_item
* compute_memory_alloc(
613 struct compute_memory_pool
* pool
,
616 struct compute_memory_item
*new_item
= NULL
;
618 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
619 size_in_dw
, 4 * size_in_dw
);
621 new_item
= (struct compute_memory_item
*)
622 CALLOC(sizeof(struct compute_memory_item
), 1);
623 if (new_item
== NULL
)
626 new_item
->size_in_dw
= size_in_dw
;
627 new_item
->start_in_dw
= -1; /* mark pending */
628 new_item
->id
= pool
->next_id
++;
629 new_item
->pool
= pool
;
630 new_item
->real_buffer
= NULL
;
632 list_addtail(&new_item
->link
, pool
->unallocated_list
);
634 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %u size = %u (%u bytes)\n",
635 new_item
, new_item
->id
, new_item
->size_in_dw
,
636 new_item
->size_in_dw
* 4);
641 * Transfer data host<->device, offset and size is in bytes.
642 * \param device_to_host 1 for device->host, 0 for host->device.
643 * \see compute_memory_shadow
645 void compute_memory_transfer(
646 struct compute_memory_pool
* pool
,
647 struct pipe_context
* pipe
,
649 struct compute_memory_item
* chunk
,
654 int64_t aligned_size
= pool
->size_in_dw
;
655 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
656 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
658 struct pipe_transfer
*xfer
;
663 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
664 "offset_in_chunk = %d, size = %d\n", device_to_host
,
665 offset_in_chunk
, size
);
667 if (device_to_host
) {
668 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
669 &(struct pipe_box
) { .width
= aligned_size
* 4,
670 .height
= 1, .depth
= 1 }, &xfer
);
673 memcpy(data
, map
+ internal_offset
, size
);
674 pipe
->transfer_unmap(pipe
, xfer
);
676 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
677 &(struct pipe_box
) { .width
= aligned_size
* 4,
678 .height
= 1, .depth
= 1 }, &xfer
);
681 memcpy(map
+ internal_offset
, data
, size
);
682 pipe
->transfer_unmap(pipe
, xfer
);
687 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
689 void compute_memory_transfer_direct(
690 struct compute_memory_pool
* pool
,
692 struct compute_memory_item
* chunk
,
693 struct r600_resource
* data
,