2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
49 struct compute_memory_pool
* compute_memory_pool_new(
50 struct r600_screen
* rscreen
)
52 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
53 CALLOC(sizeof(struct compute_memory_pool
), 1);
57 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
59 pool
->screen
= rscreen
;
60 pool
->item_list
= (struct list_head
*)
61 CALLOC(sizeof(struct list_head
), 1);
62 pool
->unallocated_list
= (struct list_head
*)
63 CALLOC(sizeof(struct list_head
), 1);
64 list_inithead(pool
->item_list
);
65 list_inithead(pool
->unallocated_list
);
70 * Initializes the pool with a size of \a initial_size_in_dw.
71 * \param pool The pool to be initialized.
72 * \param initial_size_in_dw The initial size.
73 * \see compute_memory_grow_defrag_pool
75 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
76 unsigned initial_size_in_dw
)
79 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
82 pool
->size_in_dw
= initial_size_in_dw
;
83 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(pool
->screen
,
84 pool
->size_in_dw
* 4);
88 * Frees all stuff in the pool and the pool struct itself too.
90 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
92 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
95 pool
->screen
->b
.b
.resource_destroy((struct pipe_screen
*)
96 pool
->screen
, (struct pipe_resource
*)pool
->bo
);
102 * Searches for an empty space in the pool, return with the pointer to the
103 * allocatable space in the pool.
104 * \param size_in_dw The size of the space we are looking for.
105 * \return -1 on failure
107 int64_t compute_memory_prealloc_chunk(
108 struct compute_memory_pool
* pool
,
111 struct compute_memory_item
*item
;
115 assert(size_in_dw
<= pool
->size_in_dw
);
117 COMPUTE_DBG(pool
->screen
, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
120 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
121 if (last_end
+ size_in_dw
<= item
->start_in_dw
) {
125 last_end
= item
->start_in_dw
+ align(item
->size_in_dw
, ITEM_ALIGNMENT
);
128 if (pool
->size_in_dw
- last_end
< size_in_dw
) {
136 * Search for the chunk where we can link our new chunk after it.
137 * \param start_in_dw The position of the item we want to add to the pool.
138 * \return The item that is just before the passed position
140 struct list_head
*compute_memory_postalloc_chunk(
141 struct compute_memory_pool
* pool
,
144 struct compute_memory_item
*item
;
145 struct compute_memory_item
*next
;
146 struct list_head
*next_link
;
148 COMPUTE_DBG(pool
->screen
, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
151 /* Check if we can insert it in the front of the list */
152 item
= LIST_ENTRY(struct compute_memory_item
, pool
->item_list
->next
, link
);
153 if (LIST_IS_EMPTY(pool
->item_list
) || item
->start_in_dw
> start_in_dw
) {
154 return pool
->item_list
;
157 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
158 next_link
= item
->link
.next
;
160 if (next_link
!= pool
->item_list
) {
161 next
= container_of(next_link
, item
, link
);
162 if (item
->start_in_dw
< start_in_dw
163 && next
->start_in_dw
> start_in_dw
) {
169 assert(item
->start_in_dw
< start_in_dw
);
174 assert(0 && "unreachable");
179 * Reallocates and defragments the pool, conserves data.
180 * \returns -1 if it fails, 0 otherwise
181 * \see compute_memory_finalize_pending
183 int compute_memory_grow_defrag_pool(struct compute_memory_pool
*pool
,
184 struct pipe_context
*pipe
, int new_size_in_dw
)
186 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
188 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_defrag_pool() "
189 "new_size_in_dw = %d (%d bytes)\n",
190 new_size_in_dw
, new_size_in_dw
* 4);
192 assert(new_size_in_dw
>= pool
->size_in_dw
);
195 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
197 struct r600_resource
*temp
= NULL
;
199 temp
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
200 pool
->screen
, new_size_in_dw
* 4);
203 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
204 struct pipe_resource
*dst
= (struct pipe_resource
*)temp
;
206 COMPUTE_DBG(pool
->screen
, " Growing and defragmenting the pool "
207 "using a temporary resource\n");
209 compute_memory_defrag(pool
, src
, dst
, pipe
);
211 pool
->screen
->b
.b
.resource_destroy(
212 (struct pipe_screen
*)pool
->screen
,
216 pool
->size_in_dw
= new_size_in_dw
;
219 COMPUTE_DBG(pool
->screen
, " The creation of the temporary resource failed\n"
220 " Falling back to using 'shadow'\n");
222 compute_memory_shadow(pool
, pipe
, 1);
223 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
* 4);
224 if (pool
->shadow
== NULL
)
227 pool
->size_in_dw
= new_size_in_dw
;
228 pool
->screen
->b
.b
.resource_destroy(
229 (struct pipe_screen
*)pool
->screen
,
230 (struct pipe_resource
*)pool
->bo
);
231 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
233 pool
->size_in_dw
* 4);
234 compute_memory_shadow(pool
, pipe
, 0);
236 if (pool
->status
& POOL_FRAGMENTED
) {
237 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
238 compute_memory_defrag(pool
, src
, src
, pipe
);
247 * Copy pool from device to host, or host to device.
248 * \param device_to_host 1 for device->host, 0 for host->device
249 * \see compute_memory_grow_defrag_pool
251 void compute_memory_shadow(struct compute_memory_pool
* pool
,
252 struct pipe_context
* pipe
, int device_to_host
)
254 struct compute_memory_item chunk
;
256 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
260 chunk
.start_in_dw
= 0;
261 chunk
.size_in_dw
= pool
->size_in_dw
;
262 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
263 pool
->shadow
, 0, pool
->size_in_dw
*4);
267 * Moves all the items marked for promotion from the \a unallocated_list
268 * to the \a item_list.
269 * \return -1 if it fails, 0 otherwise
270 * \see evergreen_set_global_binding
272 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
273 struct pipe_context
* pipe
)
275 struct compute_memory_item
*item
, *next
;
277 int64_t allocated
= 0;
278 int64_t unallocated
= 0;
283 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
285 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
286 COMPUTE_DBG(pool
->screen
, " + list: offset = %i id = %i size = %i "
287 "(%i bytes)\n",item
->start_in_dw
, item
->id
,
288 item
->size_in_dw
, item
->size_in_dw
* 4);
291 /* Calculate the total allocated size */
292 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
293 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
296 /* Calculate the total unallocated size of the items that
297 * will be promoted to the pool */
298 LIST_FOR_EACH_ENTRY(item
, pool
->unallocated_list
, link
) {
299 if (item
->status
& ITEM_FOR_PROMOTING
)
300 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
303 if (unallocated
== 0) {
307 if (pool
->size_in_dw
< allocated
+ unallocated
) {
308 err
= compute_memory_grow_defrag_pool(pool
, pipe
, allocated
+ unallocated
);
312 else if (pool
->status
& POOL_FRAGMENTED
) {
313 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
314 compute_memory_defrag(pool
, src
, src
, pipe
);
317 /* After defragmenting the pool, allocated is equal to the first available
318 * position for new items in the pool */
319 last_pos
= allocated
;
321 /* Loop through all the unallocated items, check if they are marked
322 * for promoting, allocate space for them and add them to the item_list. */
323 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
324 if (item
->status
& ITEM_FOR_PROMOTING
) {
325 err
= compute_memory_promote_item(pool
, item
, pipe
, last_pos
);
326 item
->status
&= ~ITEM_FOR_PROMOTING
;
328 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
339 * Defragments the pool, so that there's no gap between items.
340 * \param pool The pool to be defragmented
341 * \param src The origin resource
342 * \param dst The destination resource
343 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
345 void compute_memory_defrag(struct compute_memory_pool
*pool
,
346 struct pipe_resource
*src
, struct pipe_resource
*dst
,
347 struct pipe_context
*pipe
)
349 struct compute_memory_item
*item
;
352 COMPUTE_DBG(pool
->screen
, "* compute_memory_defrag()\n");
355 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
356 if (src
!= dst
|| item
->start_in_dw
!= last_pos
) {
357 assert(last_pos
<= item
->start_in_dw
);
359 compute_memory_move_item(pool
, src
, dst
,
360 item
, last_pos
, pipe
);
363 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
366 pool
->status
&= ~POOL_FRAGMENTED
;
370 * Moves an item from the \a unallocated_list to the \a item_list.
371 * \param item The item that will be promoted.
372 * \return -1 if it fails, 0 otherwise
373 * \see compute_memory_finalize_pending
375 int compute_memory_promote_item(struct compute_memory_pool
*pool
,
376 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
379 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
380 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
381 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
382 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
385 COMPUTE_DBG(pool
->screen
, "* compute_memory_promote_item()\n"
386 " + Promoting Item: %i , starting at: %u (%u bytes) "
387 "size: %u (%u bytes)\n\t\t\tnew start: %u (%u bytes)\n",
388 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
389 item
->size_in_dw
, item
->size_in_dw
* 4,
390 start_in_dw
, start_in_dw
* 4);
392 /* Remove the item from the unallocated list */
393 list_del(&item
->link
);
395 /* Add it back to the item_list */
396 list_addtail(&item
->link
, pool
->item_list
);
397 item
->start_in_dw
= start_in_dw
;
400 u_box_1d(0, item
->size_in_dw
* 4, &box
);
402 rctx
->b
.b
.resource_copy_region(pipe
,
403 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
406 /* We check if the item is mapped for reading.
407 * In this case, we need to keep the temporary buffer 'alive'
408 * because it is possible to keep a map active for reading
409 * while a kernel (that reads from it) executes */
410 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
411 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
412 item
->real_buffer
= NULL
;
420 * Moves an item from the \a item_list to the \a unallocated_list.
421 * \param item The item that will be demoted
422 * \see r600_compute_global_transfer_map
424 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
425 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
427 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
428 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
429 struct pipe_resource
*dst
;
432 COMPUTE_DBG(pool
->screen
, "* compute_memory_demote_item()\n"
433 " + Demoting Item: %i, starting at: %u (%u bytes) "
434 "size: %u (%u bytes)\n", item
->id
, item
->start_in_dw
,
435 item
->start_in_dw
* 4, item
->size_in_dw
, item
->size_in_dw
* 4);
437 /* First, we remove the item from the item_list */
438 list_del(&item
->link
);
440 /* Now we add it to the unallocated list */
441 list_addtail(&item
->link
, pool
->unallocated_list
);
443 /* We check if the intermediate buffer exists, and if it
444 * doesn't, we create it again */
445 if (item
->real_buffer
== NULL
) {
446 item
->real_buffer
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
447 pool
->screen
, item
->size_in_dw
* 4);
450 dst
= (struct pipe_resource
*)item
->real_buffer
;
452 /* We transfer the memory from the item in the pool to the
453 * temporary buffer */
454 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
456 rctx
->b
.b
.resource_copy_region(pipe
,
460 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
461 item
->start_in_dw
= -1;
463 if (item
->link
.next
!= pool
->item_list
) {
464 pool
->status
|= POOL_FRAGMENTED
;
469 * Moves the item \a item forward from the resource \a src to the
470 * resource \a dst at \a new_start_in_dw
472 * This function assumes two things:
473 * 1) The item is \b only moved forward, unless src is different from dst
474 * 2) The item \b won't change it's position inside the \a item_list
476 * \param item The item that will be moved
477 * \param new_start_in_dw The new position of the item in \a item_list
478 * \see compute_memory_defrag
480 void compute_memory_move_item(struct compute_memory_pool
*pool
,
481 struct pipe_resource
*src
, struct pipe_resource
*dst
,
482 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
483 struct pipe_context
*pipe
)
485 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
486 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
489 struct compute_memory_item
*prev
;
491 COMPUTE_DBG(pool
->screen
, "* compute_memory_move_item()\n"
492 " + Moving item %i from %u (%u bytes) to %u (%u bytes)\n",
493 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
494 new_start_in_dw
, new_start_in_dw
* 4);
496 if (pool
->item_list
!= item
->link
.prev
) {
497 prev
= container_of(item
->link
.prev
, item
, link
);
498 assert(prev
->start_in_dw
+ prev
->size_in_dw
<= new_start_in_dw
);
501 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
503 /* If the ranges don't overlap, or we are copying from one resource
504 * to another, we can just copy the item directly */
505 if (src
!= dst
|| new_start_in_dw
+ item
->size_in_dw
<= item
->start_in_dw
) {
507 rctx
->b
.b
.resource_copy_region(pipe
,
508 dst
, 0, new_start_in_dw
* 4, 0, 0,
511 /* The ranges overlap, we will try first to use an intermediate
512 * resource to move the item */
513 struct pipe_resource
*tmp
= (struct pipe_resource
*)
514 r600_compute_buffer_alloc_vram(pool
->screen
, item
->size_in_dw
* 4);
517 rctx
->b
.b
.resource_copy_region(pipe
,
523 rctx
->b
.b
.resource_copy_region(pipe
,
524 dst
, 0, new_start_in_dw
* 4, 0, 0,
527 pool
->screen
->b
.b
.resource_destroy(screen
, tmp
);
530 /* The allocation of the temporary resource failed,
531 * falling back to use mappings */
534 struct pipe_transfer
*trans
;
536 offset
= item
->start_in_dw
- new_start_in_dw
;
538 u_box_1d(new_start_in_dw
* 4, (offset
+ item
->size_in_dw
) * 4, &box
);
540 map
= pipe
->transfer_map(pipe
, src
, 0, PIPE_TRANSFER_READ_WRITE
,
546 memmove(map
, map
+ offset
, item
->size_in_dw
* 4);
548 pipe
->transfer_unmap(pipe
, trans
);
552 item
->start_in_dw
= new_start_in_dw
;
556 * Frees the memory asociated to the item with id \a id from the pool.
557 * \param id The id of the item to be freed.
559 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
561 struct compute_memory_item
*item
, *next
;
562 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
563 struct pipe_resource
*res
;
565 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %ld \n", id
);
567 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->item_list
, link
) {
569 if (item
->id
== id
) {
571 if (item
->link
.next
!= pool
->item_list
) {
572 pool
->status
|= POOL_FRAGMENTED
;
575 list_del(&item
->link
);
577 if (item
->real_buffer
) {
578 res
= (struct pipe_resource
*)item
->real_buffer
;
579 pool
->screen
->b
.b
.resource_destroy(
589 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
591 if (item
->id
== id
) {
592 list_del(&item
->link
);
594 if (item
->real_buffer
) {
595 res
= (struct pipe_resource
*)item
->real_buffer
;
596 pool
->screen
->b
.b
.resource_destroy(
606 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
607 "for compute_memory_free\n", id
);
609 assert(0 && "error");
613 * Creates pending allocations for new items, these items are
614 * placed in the unallocated_list.
615 * \param size_in_dw The size, in double words, of the new item.
616 * \return The new item
617 * \see r600_compute_global_buffer_create
619 struct compute_memory_item
* compute_memory_alloc(
620 struct compute_memory_pool
* pool
,
623 struct compute_memory_item
*new_item
= NULL
;
625 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
626 size_in_dw
, 4 * size_in_dw
);
628 new_item
= (struct compute_memory_item
*)
629 CALLOC(sizeof(struct compute_memory_item
), 1);
630 if (new_item
== NULL
)
633 new_item
->size_in_dw
= size_in_dw
;
634 new_item
->start_in_dw
= -1; /* mark pending */
635 new_item
->id
= pool
->next_id
++;
636 new_item
->pool
= pool
;
637 new_item
->real_buffer
= NULL
;
639 list_addtail(&new_item
->link
, pool
->unallocated_list
);
641 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %u size = %u (%u bytes)\n",
642 new_item
, new_item
->id
, new_item
->size_in_dw
,
643 new_item
->size_in_dw
* 4);
648 * Transfer data host<->device, offset and size is in bytes.
649 * \param device_to_host 1 for device->host, 0 for host->device.
650 * \see compute_memory_shadow
652 void compute_memory_transfer(
653 struct compute_memory_pool
* pool
,
654 struct pipe_context
* pipe
,
656 struct compute_memory_item
* chunk
,
661 int64_t aligned_size
= pool
->size_in_dw
;
662 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
663 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
665 struct pipe_transfer
*xfer
;
670 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
671 "offset_in_chunk = %d, size = %d\n", device_to_host
,
672 offset_in_chunk
, size
);
674 if (device_to_host
) {
675 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
676 &(struct pipe_box
) { .width
= aligned_size
* 4,
677 .height
= 1, .depth
= 1 }, &xfer
);
680 memcpy(data
, map
+ internal_offset
, size
);
681 pipe
->transfer_unmap(pipe
, xfer
);
683 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
684 &(struct pipe_box
) { .width
= aligned_size
* 4,
685 .height
= 1, .depth
= 1 }, &xfer
);
688 memcpy(map
+ internal_offset
, data
, size
);
689 pipe
->transfer_unmap(pipe
, xfer
);
694 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
696 void compute_memory_transfer_direct(
697 struct compute_memory_pool
* pool
,
699 struct compute_memory_item
* chunk
,
700 struct r600_resource
* data
,