2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
49 struct compute_memory_pool
* compute_memory_pool_new(
50 struct r600_screen
* rscreen
)
52 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
53 CALLOC(sizeof(struct compute_memory_pool
), 1);
57 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
59 pool
->screen
= rscreen
;
60 pool
->item_list
= (struct list_head
*)
61 CALLOC(sizeof(struct list_head
), 1);
62 pool
->unallocated_list
= (struct list_head
*)
63 CALLOC(sizeof(struct list_head
), 1);
64 list_inithead(pool
->item_list
);
65 list_inithead(pool
->unallocated_list
);
69 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
70 unsigned initial_size_in_dw
)
73 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
76 pool
->size_in_dw
= initial_size_in_dw
;
77 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(pool
->screen
,
78 pool
->size_in_dw
* 4);
82 * Frees all stuff in the pool and the pool struct itself too
84 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
86 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
89 pool
->screen
->b
.b
.resource_destroy((struct pipe_screen
*)
90 pool
->screen
, (struct pipe_resource
*)pool
->bo
);
96 * Searches for an empty space in the pool, return with the pointer to the
97 * allocatable space in the pool, returns -1 on failure.
99 int64_t compute_memory_prealloc_chunk(
100 struct compute_memory_pool
* pool
,
103 struct compute_memory_item
*item
;
107 assert(size_in_dw
<= pool
->size_in_dw
);
109 COMPUTE_DBG(pool
->screen
, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
112 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
113 if (last_end
+ size_in_dw
<= item
->start_in_dw
) {
117 last_end
= item
->start_in_dw
+ align(item
->size_in_dw
, ITEM_ALIGNMENT
);
120 if (pool
->size_in_dw
- last_end
< size_in_dw
) {
128 * Search for the chunk where we can link our new chunk after it.
130 struct list_head
*compute_memory_postalloc_chunk(
131 struct compute_memory_pool
* pool
,
134 struct compute_memory_item
*item
;
135 struct compute_memory_item
*next
;
136 struct list_head
*next_link
;
138 COMPUTE_DBG(pool
->screen
, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
141 /* Check if we can insert it in the front of the list */
142 item
= LIST_ENTRY(struct compute_memory_item
, pool
->item_list
->next
, link
);
143 if (LIST_IS_EMPTY(pool
->item_list
) || item
->start_in_dw
> start_in_dw
) {
144 return pool
->item_list
;
147 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
148 next_link
= item
->link
.next
;
150 if (next_link
!= pool
->item_list
) {
151 next
= container_of(next_link
, item
, link
);
152 if (item
->start_in_dw
< start_in_dw
153 && next
->start_in_dw
> start_in_dw
) {
159 assert(item
->start_in_dw
< start_in_dw
);
164 assert(0 && "unreachable");
169 * Reallocates pool, conserves data.
170 * @returns -1 if it fails, 0 otherwise
172 int compute_memory_grow_pool(struct compute_memory_pool
* pool
,
173 struct pipe_context
* pipe
, int new_size_in_dw
)
175 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_pool() "
176 "new_size_in_dw = %d (%d bytes)\n",
177 new_size_in_dw
, new_size_in_dw
* 4);
179 assert(new_size_in_dw
>= pool
->size_in_dw
);
182 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
184 struct r600_resource
*temp
= NULL
;
186 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
188 COMPUTE_DBG(pool
->screen
, " Aligned size = %d (%d bytes)\n",
189 new_size_in_dw
, new_size_in_dw
* 4);
191 temp
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
192 pool
->screen
, new_size_in_dw
* 4);
195 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
196 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
197 struct pipe_resource
*dst
= (struct pipe_resource
*)temp
;
200 COMPUTE_DBG(pool
->screen
, " Growing the pool using a temporary resource\n");
202 u_box_1d(0, pool
->size_in_dw
* 4, &box
);
204 rctx
->b
.b
.resource_copy_region(pipe
,
208 pool
->screen
->b
.b
.resource_destroy(
209 (struct pipe_screen
*)pool
->screen
,
213 pool
->size_in_dw
= new_size_in_dw
;
216 COMPUTE_DBG(pool
->screen
, " The creation of the temporary resource failed\n"
217 " Falling back to using 'shadow'\n");
219 compute_memory_shadow(pool
, pipe
, 1);
220 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
* 4);
221 if (pool
->shadow
== NULL
)
224 pool
->size_in_dw
= new_size_in_dw
;
225 pool
->screen
->b
.b
.resource_destroy(
226 (struct pipe_screen
*)pool
->screen
,
227 (struct pipe_resource
*)pool
->bo
);
228 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
230 pool
->size_in_dw
* 4);
231 compute_memory_shadow(pool
, pipe
, 0);
239 * Copy pool from device to host, or host to device.
241 void compute_memory_shadow(struct compute_memory_pool
* pool
,
242 struct pipe_context
* pipe
, int device_to_host
)
244 struct compute_memory_item chunk
;
246 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
250 chunk
.start_in_dw
= 0;
251 chunk
.size_in_dw
= pool
->size_in_dw
;
252 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
253 pool
->shadow
, 0, pool
->size_in_dw
*4);
257 * Allocates pending allocations in the pool
258 * @returns -1 if it fails, 0 otherwise
260 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
261 struct pipe_context
* pipe
)
263 struct compute_memory_item
*item
, *next
;
265 int64_t allocated
= 0;
266 int64_t unallocated
= 0;
271 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
273 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
274 COMPUTE_DBG(pool
->screen
, " + list: offset = %i id = %i size = %i "
275 "(%i bytes)\n",item
->start_in_dw
, item
->id
,
276 item
->size_in_dw
, item
->size_in_dw
* 4);
279 /* Calculate the total allocated size */
280 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
281 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
284 /* Calculate the total unallocated size of the items that
285 * will be promoted to the pool */
286 LIST_FOR_EACH_ENTRY(item
, pool
->unallocated_list
, link
) {
287 if (item
->status
& ITEM_FOR_PROMOTING
)
288 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
291 if (unallocated
== 0) {
295 if (pool
->status
& POOL_FRAGMENTED
) {
296 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
297 compute_memory_defrag(pool
, src
, src
, pipe
);
300 if (pool
->size_in_dw
< allocated
+ unallocated
) {
301 err
= compute_memory_grow_pool(pool
, pipe
, allocated
+ unallocated
);
306 /* After defragmenting the pool, allocated is equal to the first available
307 * position for new items in the pool */
308 last_pos
= allocated
;
310 /* Loop through all the unallocated items, check if they are marked
311 * for promoting, allocate space for them and add them to the item_list. */
312 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
313 if (item
->status
& ITEM_FOR_PROMOTING
) {
314 err
= compute_memory_promote_item(pool
, item
, pipe
, last_pos
);
315 item
->status
&= ~ITEM_FOR_PROMOTING
;
317 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
328 * Defragments the pool, so that there's no gap between items.
329 * \param pool The pool to be defragmented
331 void compute_memory_defrag(struct compute_memory_pool
*pool
,
332 struct pipe_resource
*src
, struct pipe_resource
*dst
,
333 struct pipe_context
*pipe
)
335 struct compute_memory_item
*item
;
338 COMPUTE_DBG(pool
->screen
, "* compute_memory_defrag()\n");
341 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
342 if (src
!= dst
|| item
->start_in_dw
!= last_pos
) {
343 assert(last_pos
<= item
->start_in_dw
);
345 compute_memory_move_item(pool
, src
, dst
,
346 item
, last_pos
, pipe
);
349 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
352 pool
->status
&= ~POOL_FRAGMENTED
;
355 int compute_memory_promote_item(struct compute_memory_pool
*pool
,
356 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
359 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
360 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
361 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
362 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
365 COMPUTE_DBG(pool
->screen
, " + Found space for Item %p id = %u "
366 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
367 item
, item
->id
, start_in_dw
, start_in_dw
* 4,
368 item
->size_in_dw
, item
->size_in_dw
* 4);
370 /* Remove the item from the unallocated list */
371 list_del(&item
->link
);
373 /* Add it back to the item_list */
374 list_addtail(&item
->link
, pool
->item_list
);
375 item
->start_in_dw
= start_in_dw
;
378 u_box_1d(0, item
->size_in_dw
* 4, &box
);
380 rctx
->b
.b
.resource_copy_region(pipe
,
381 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
384 /* We check if the item is mapped for reading.
385 * In this case, we need to keep the temporary buffer 'alive'
386 * because it is possible to keep a map active for reading
387 * while a kernel (that reads from it) executes */
388 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
389 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
390 item
->real_buffer
= NULL
;
397 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
398 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
400 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
401 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
402 struct pipe_resource
*dst
;
405 /* First, we remove the item from the item_list */
406 list_del(&item
->link
);
408 /* Now we add it to the unallocated list */
409 list_addtail(&item
->link
, pool
->unallocated_list
);
411 /* We check if the intermediate buffer exists, and if it
412 * doesn't, we create it again */
413 if (item
->real_buffer
== NULL
) {
414 item
->real_buffer
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
415 pool
->screen
, item
->size_in_dw
* 4);
418 dst
= (struct pipe_resource
*)item
->real_buffer
;
420 /* We transfer the memory from the item in the pool to the
421 * temporary buffer */
422 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
424 rctx
->b
.b
.resource_copy_region(pipe
,
428 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
429 item
->start_in_dw
= -1;
431 if (item
->link
.next
!= pool
->item_list
) {
432 pool
->status
|= POOL_FRAGMENTED
;
437 * Moves the item \a item forward from the resource \a src to the
438 * resource \a dst at \a new_start_in_dw
440 * This function assumes two things:
441 * 1) The item is \b only moved forward
442 * 2) The item \b won't change it's position inside the \a item_list
444 * \param item The item that will be moved
445 * \param new_start_in_dw The new position of the item in \a item_list
446 * \see compute_memory_defrag
448 void compute_memory_move_item(struct compute_memory_pool
*pool
,
449 struct pipe_resource
*src
, struct pipe_resource
*dst
,
450 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
451 struct pipe_context
*pipe
)
453 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
454 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
457 struct compute_memory_item
*prev
;
459 COMPUTE_DBG(pool
->screen
, "* compute_memory_move_item()\n"
460 " + Moving item %i from %u (%u bytes) to %u (%u bytes)\n",
461 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
462 new_start_in_dw
, new_start_in_dw
* 4);
464 if (pool
->item_list
!= item
->link
.prev
) {
465 prev
= container_of(item
->link
.prev
, item
, link
);
466 assert(prev
->start_in_dw
+ prev
->size_in_dw
<= new_start_in_dw
);
469 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
471 /* If the ranges don't overlap, or we are copying from one resource
472 * to another, we can just copy the item directly */
473 if (src
!= dst
|| new_start_in_dw
+ item
->size_in_dw
<= item
->start_in_dw
) {
475 rctx
->b
.b
.resource_copy_region(pipe
,
476 dst
, 0, new_start_in_dw
* 4, 0, 0,
479 /* The ranges overlap, we will try first to use an intermediate
480 * resource to move the item */
481 struct pipe_resource
*tmp
= (struct pipe_resource
*)
482 r600_compute_buffer_alloc_vram(pool
->screen
, item
->size_in_dw
* 4);
485 rctx
->b
.b
.resource_copy_region(pipe
,
491 rctx
->b
.b
.resource_copy_region(pipe
,
492 dst
, 0, new_start_in_dw
* 4, 0, 0,
495 pool
->screen
->b
.b
.resource_destroy(screen
, tmp
);
498 /* The allocation of the temporary resource failed,
499 * falling back to use mappings */
502 struct pipe_transfer
*trans
;
504 offset
= item
->start_in_dw
- new_start_in_dw
;
506 u_box_1d(new_start_in_dw
* 4, (offset
+ item
->size_in_dw
) * 4, &box
);
508 map
= pipe
->transfer_map(pipe
, src
, 0, PIPE_TRANSFER_READ_WRITE
,
514 memmove(map
, map
+ offset
, item
->size_in_dw
* 4);
516 pipe
->transfer_unmap(pipe
, trans
);
520 item
->start_in_dw
= new_start_in_dw
;
523 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
525 struct compute_memory_item
*item
, *next
;
526 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
527 struct pipe_resource
*res
;
529 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %ld \n", id
);
531 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->item_list
, link
) {
533 if (item
->id
== id
) {
535 if (item
->link
.next
!= pool
->item_list
) {
536 pool
->status
|= POOL_FRAGMENTED
;
539 list_del(&item
->link
);
541 if (item
->real_buffer
) {
542 res
= (struct pipe_resource
*)item
->real_buffer
;
543 pool
->screen
->b
.b
.resource_destroy(
553 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
555 if (item
->id
== id
) {
556 list_del(&item
->link
);
558 if (item
->real_buffer
) {
559 res
= (struct pipe_resource
*)item
->real_buffer
;
560 pool
->screen
->b
.b
.resource_destroy(
570 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
571 "for compute_memory_free\n", id
);
573 assert(0 && "error");
577 * Creates pending allocations
579 struct compute_memory_item
* compute_memory_alloc(
580 struct compute_memory_pool
* pool
,
583 struct compute_memory_item
*new_item
= NULL
;
585 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
586 size_in_dw
, 4 * size_in_dw
);
588 new_item
= (struct compute_memory_item
*)
589 CALLOC(sizeof(struct compute_memory_item
), 1);
590 if (new_item
== NULL
)
593 new_item
->size_in_dw
= size_in_dw
;
594 new_item
->start_in_dw
= -1; /* mark pending */
595 new_item
->id
= pool
->next_id
++;
596 new_item
->pool
= pool
;
597 new_item
->real_buffer
= NULL
;
599 list_addtail(&new_item
->link
, pool
->unallocated_list
);
601 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %u size = %u (%u bytes)\n",
602 new_item
, new_item
->id
, new_item
->size_in_dw
,
603 new_item
->size_in_dw
* 4);
608 * Transfer data host<->device, offset and size is in bytes
610 void compute_memory_transfer(
611 struct compute_memory_pool
* pool
,
612 struct pipe_context
* pipe
,
614 struct compute_memory_item
* chunk
,
619 int64_t aligned_size
= pool
->size_in_dw
;
620 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
621 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
623 struct pipe_transfer
*xfer
;
628 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
629 "offset_in_chunk = %d, size = %d\n", device_to_host
,
630 offset_in_chunk
, size
);
632 if (device_to_host
) {
633 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
634 &(struct pipe_box
) { .width
= aligned_size
* 4,
635 .height
= 1, .depth
= 1 }, &xfer
);
638 memcpy(data
, map
+ internal_offset
, size
);
639 pipe
->transfer_unmap(pipe
, xfer
);
641 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
642 &(struct pipe_box
) { .width
= aligned_size
* 4,
643 .height
= 1, .depth
= 1 }, &xfer
);
646 memcpy(map
+ internal_offset
, data
, size
);
647 pipe
->transfer_unmap(pipe
, xfer
);
652 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
654 void compute_memory_transfer_direct(
655 struct compute_memory_pool
* pool
,
657 struct compute_memory_item
* chunk
,
658 struct r600_resource
* data
,