2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
47 /* A few forward declarations of static functions */
48 static void compute_memory_shadow(struct compute_memory_pool
* pool
,
49 struct pipe_context
*pipe
, int device_to_host
);
51 static void compute_memory_defrag(struct compute_memory_pool
*pool
,
52 struct pipe_resource
*src
, struct pipe_resource
*dst
,
53 struct pipe_context
*pipe
);
55 static int compute_memory_promote_item(struct compute_memory_pool
*pool
,
56 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
59 static void compute_memory_move_item(struct compute_memory_pool
*pool
,
60 struct pipe_resource
*src
, struct pipe_resource
*dst
,
61 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
62 struct pipe_context
*pipe
);
64 static void compute_memory_transfer(struct compute_memory_pool
* pool
,
65 struct pipe_context
* pipe
, int device_to_host
,
66 struct compute_memory_item
* chunk
, void* data
,
67 int offset_in_chunk
, int size
);
72 struct compute_memory_pool
* compute_memory_pool_new(
73 struct r600_screen
* rscreen
)
75 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
76 CALLOC(sizeof(struct compute_memory_pool
), 1);
80 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
82 pool
->screen
= rscreen
;
83 pool
->item_list
= (struct list_head
*)
84 CALLOC(sizeof(struct list_head
), 1);
85 pool
->unallocated_list
= (struct list_head
*)
86 CALLOC(sizeof(struct list_head
), 1);
87 list_inithead(pool
->item_list
);
88 list_inithead(pool
->unallocated_list
);
93 * Initializes the pool with a size of \a initial_size_in_dw.
94 * \param pool The pool to be initialized.
95 * \param initial_size_in_dw The initial size.
96 * \see compute_memory_grow_defrag_pool
98 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
99 unsigned initial_size_in_dw
)
102 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %u\n",
105 pool
->size_in_dw
= initial_size_in_dw
;
106 pool
->bo
= r600_compute_buffer_alloc_vram(pool
->screen
,
107 pool
->size_in_dw
* 4);
111 * Frees all stuff in the pool and the pool struct itself too.
113 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
115 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
117 r600_resource_reference(&pool
->bo
, NULL
);
118 /* In theory, all of the items were freed in compute_memory_free.
119 * Just delete the list heads
121 free(pool
->item_list
);
122 free(pool
->unallocated_list
);
123 /* And then the pool itself */
128 * Reallocates and defragments the pool, conserves data.
129 * \returns -1 if it fails, 0 otherwise
130 * \see compute_memory_finalize_pending
132 static int compute_memory_grow_defrag_pool(struct compute_memory_pool
*pool
,
133 struct pipe_context
*pipe
, int new_size_in_dw
)
135 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
137 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_defrag_pool() "
138 "new_size_in_dw = %d (%d bytes)\n",
139 new_size_in_dw
, new_size_in_dw
* 4);
141 assert(new_size_in_dw
>= pool
->size_in_dw
);
144 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
146 struct r600_resource
*temp
= NULL
;
148 temp
= r600_compute_buffer_alloc_vram(pool
->screen
, new_size_in_dw
* 4);
151 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
152 struct pipe_resource
*dst
= (struct pipe_resource
*)temp
;
154 COMPUTE_DBG(pool
->screen
, " Growing and defragmenting the pool "
155 "using a temporary resource\n");
157 compute_memory_defrag(pool
, src
, dst
, pipe
);
159 /* Release the old buffer */
160 r600_resource_reference(&pool
->bo
, NULL
);
162 pool
->size_in_dw
= new_size_in_dw
;
165 COMPUTE_DBG(pool
->screen
, " The creation of the temporary resource failed\n"
166 " Falling back to using 'shadow'\n");
168 compute_memory_shadow(pool
, pipe
, 1);
169 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
* 4);
170 if (pool
->shadow
== NULL
)
173 pool
->size_in_dw
= new_size_in_dw
;
174 /* Release the old buffer */
175 r600_resource_reference(&pool
->bo
, NULL
);
176 pool
->bo
= r600_compute_buffer_alloc_vram(pool
->screen
, pool
->size_in_dw
* 4);
177 compute_memory_shadow(pool
, pipe
, 0);
179 if (pool
->status
& POOL_FRAGMENTED
) {
180 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
181 compute_memory_defrag(pool
, src
, src
, pipe
);
190 * Copy pool from device to host, or host to device.
191 * \param device_to_host 1 for device->host, 0 for host->device
192 * \see compute_memory_grow_defrag_pool
194 static void compute_memory_shadow(struct compute_memory_pool
* pool
,
195 struct pipe_context
* pipe
, int device_to_host
)
197 struct compute_memory_item chunk
;
199 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
203 chunk
.start_in_dw
= 0;
204 chunk
.size_in_dw
= pool
->size_in_dw
;
205 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
206 pool
->shadow
, 0, pool
->size_in_dw
*4);
210 * Moves all the items marked for promotion from the \a unallocated_list
211 * to the \a item_list.
212 * \return -1 if it fails, 0 otherwise
213 * \see evergreen_set_global_binding
215 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
216 struct pipe_context
* pipe
)
218 struct compute_memory_item
*item
, *next
;
220 int64_t allocated
= 0;
221 int64_t unallocated
= 0;
226 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
228 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
229 COMPUTE_DBG(pool
->screen
, " + list: offset = %"PRIi64
" id = %"PRIi64
" size = %"PRIi64
" "
230 "(%"PRIi64
" bytes)\n", item
->start_in_dw
, item
->id
,
231 item
->size_in_dw
, item
->size_in_dw
* 4);
234 /* Calculate the total allocated size */
235 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
236 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
239 /* Calculate the total unallocated size of the items that
240 * will be promoted to the pool */
241 LIST_FOR_EACH_ENTRY(item
, pool
->unallocated_list
, link
) {
242 if (item
->status
& ITEM_FOR_PROMOTING
)
243 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
246 if (unallocated
== 0) {
250 if (pool
->size_in_dw
< allocated
+ unallocated
) {
251 err
= compute_memory_grow_defrag_pool(pool
, pipe
, allocated
+ unallocated
);
255 else if (pool
->status
& POOL_FRAGMENTED
) {
256 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
257 compute_memory_defrag(pool
, src
, src
, pipe
);
260 /* After defragmenting the pool, allocated is equal to the first available
261 * position for new items in the pool */
262 last_pos
= allocated
;
264 /* Loop through all the unallocated items, check if they are marked
265 * for promoting, allocate space for them and add them to the item_list. */
266 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
267 if (item
->status
& ITEM_FOR_PROMOTING
) {
268 err
= compute_memory_promote_item(pool
, item
, pipe
, last_pos
);
269 item
->status
&= ~ITEM_FOR_PROMOTING
;
271 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
282 * Defragments the pool, so that there's no gap between items.
283 * \param pool The pool to be defragmented
284 * \param src The origin resource
285 * \param dst The destination resource
286 * \see compute_memory_grow_defrag_pool and compute_memory_finalize_pending
288 static void compute_memory_defrag(struct compute_memory_pool
*pool
,
289 struct pipe_resource
*src
, struct pipe_resource
*dst
,
290 struct pipe_context
*pipe
)
292 struct compute_memory_item
*item
;
295 COMPUTE_DBG(pool
->screen
, "* compute_memory_defrag()\n");
298 LIST_FOR_EACH_ENTRY(item
, pool
->item_list
, link
) {
299 if (src
!= dst
|| item
->start_in_dw
!= last_pos
) {
300 assert(last_pos
<= item
->start_in_dw
);
302 compute_memory_move_item(pool
, src
, dst
,
303 item
, last_pos
, pipe
);
306 last_pos
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
309 pool
->status
&= ~POOL_FRAGMENTED
;
313 * Moves an item from the \a unallocated_list to the \a item_list.
314 * \param item The item that will be promoted.
315 * \return -1 if it fails, 0 otherwise
316 * \see compute_memory_finalize_pending
318 static int compute_memory_promote_item(struct compute_memory_pool
*pool
,
319 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
322 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
323 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
324 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
325 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
328 COMPUTE_DBG(pool
->screen
, "* compute_memory_promote_item()\n"
329 " + Promoting Item: %"PRIi64
" , starting at: %"PRIi64
" (%"PRIi64
" bytes) "
330 "size: %"PRIi64
" (%"PRIi64
" bytes)\n\t\t\tnew start: %"PRIi64
" (%"PRIi64
" bytes)\n",
331 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
332 item
->size_in_dw
, item
->size_in_dw
* 4,
333 start_in_dw
, start_in_dw
* 4);
335 /* Remove the item from the unallocated list */
336 list_del(&item
->link
);
338 /* Add it back to the item_list */
339 list_addtail(&item
->link
, pool
->item_list
);
340 item
->start_in_dw
= start_in_dw
;
343 u_box_1d(0, item
->size_in_dw
* 4, &box
);
345 rctx
->b
.b
.resource_copy_region(pipe
,
346 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
349 /* We check if the item is mapped for reading.
350 * In this case, we need to keep the temporary buffer 'alive'
351 * because it is possible to keep a map active for reading
352 * while a kernel (that reads from it) executes */
353 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
354 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
355 item
->real_buffer
= NULL
;
363 * Moves an item from the \a item_list to the \a unallocated_list.
364 * \param item The item that will be demoted
365 * \see r600_compute_global_transfer_map
367 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
368 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
370 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
371 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
372 struct pipe_resource
*dst
;
375 COMPUTE_DBG(pool
->screen
, "* compute_memory_demote_item()\n"
376 " + Demoting Item: %"PRIi64
", starting at: %"PRIi64
" (%"PRIi64
" bytes) "
377 "size: %"PRIi64
" (%"PRIi64
" bytes)\n", item
->id
, item
->start_in_dw
,
378 item
->start_in_dw
* 4, item
->size_in_dw
, item
->size_in_dw
* 4);
380 /* First, we remove the item from the item_list */
381 list_del(&item
->link
);
383 /* Now we add it to the unallocated list */
384 list_addtail(&item
->link
, pool
->unallocated_list
);
386 /* We check if the intermediate buffer exists, and if it
387 * doesn't, we create it again */
388 if (item
->real_buffer
== NULL
) {
389 item
->real_buffer
= r600_compute_buffer_alloc_vram(
390 pool
->screen
, item
->size_in_dw
* 4);
393 dst
= (struct pipe_resource
*)item
->real_buffer
;
395 /* We transfer the memory from the item in the pool to the
396 * temporary buffer */
397 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
399 rctx
->b
.b
.resource_copy_region(pipe
,
403 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
404 item
->start_in_dw
= -1;
406 if (item
->link
.next
!= pool
->item_list
) {
407 pool
->status
|= POOL_FRAGMENTED
;
412 * Moves the item \a item forward from the resource \a src to the
413 * resource \a dst at \a new_start_in_dw
415 * This function assumes two things:
416 * 1) The item is \b only moved forward, unless src is different from dst
417 * 2) The item \b won't change it's position inside the \a item_list
419 * \param item The item that will be moved
420 * \param new_start_in_dw The new position of the item in \a item_list
421 * \see compute_memory_defrag
423 static void compute_memory_move_item(struct compute_memory_pool
*pool
,
424 struct pipe_resource
*src
, struct pipe_resource
*dst
,
425 struct compute_memory_item
*item
, uint64_t new_start_in_dw
,
426 struct pipe_context
*pipe
)
428 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
429 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
432 COMPUTE_DBG(pool
->screen
, "* compute_memory_move_item()\n"
433 " + Moving item %"PRIi64
" from %"PRIi64
" (%"PRIi64
" bytes) to %"PRIu64
" (%"PRIu64
" bytes)\n",
434 item
->id
, item
->start_in_dw
, item
->start_in_dw
* 4,
435 new_start_in_dw
, new_start_in_dw
* 4);
437 if (pool
->item_list
!= item
->link
.prev
) {
438 ASSERTED
struct compute_memory_item
*prev
;
439 prev
= container_of(item
->link
.prev
, item
, link
);
440 assert(prev
->start_in_dw
+ prev
->size_in_dw
<= new_start_in_dw
);
443 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
445 /* If the ranges don't overlap, or we are copying from one resource
446 * to another, we can just copy the item directly */
447 if (src
!= dst
|| new_start_in_dw
+ item
->size_in_dw
<= item
->start_in_dw
) {
449 rctx
->b
.b
.resource_copy_region(pipe
,
450 dst
, 0, new_start_in_dw
* 4, 0, 0,
453 /* The ranges overlap, we will try first to use an intermediate
454 * resource to move the item */
455 struct pipe_resource
*tmp
= (struct pipe_resource
*)
456 r600_compute_buffer_alloc_vram(pool
->screen
, item
->size_in_dw
* 4);
459 rctx
->b
.b
.resource_copy_region(pipe
,
465 rctx
->b
.b
.resource_copy_region(pipe
,
466 dst
, 0, new_start_in_dw
* 4, 0, 0,
469 pool
->screen
->b
.b
.resource_destroy(screen
, tmp
);
472 /* The allocation of the temporary resource failed,
473 * falling back to use mappings */
476 struct pipe_transfer
*trans
;
478 offset
= item
->start_in_dw
- new_start_in_dw
;
480 u_box_1d(new_start_in_dw
* 4, (offset
+ item
->size_in_dw
) * 4, &box
);
482 map
= pipe
->transfer_map(pipe
, src
, 0, PIPE_TRANSFER_READ_WRITE
,
488 memmove(map
, map
+ offset
, item
->size_in_dw
* 4);
490 pipe
->transfer_unmap(pipe
, trans
);
494 item
->start_in_dw
= new_start_in_dw
;
498 * Frees the memory asociated to the item with id \a id from the pool.
499 * \param id The id of the item to be freed.
501 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
503 struct compute_memory_item
*item
, *next
;
504 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
505 struct pipe_resource
*res
;
507 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %"PRIi64
" \n", id
);
509 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->item_list
, link
) {
511 if (item
->id
== id
) {
513 if (item
->link
.next
!= pool
->item_list
) {
514 pool
->status
|= POOL_FRAGMENTED
;
517 list_del(&item
->link
);
519 if (item
->real_buffer
) {
520 res
= (struct pipe_resource
*)item
->real_buffer
;
521 pool
->screen
->b
.b
.resource_destroy(
531 LIST_FOR_EACH_ENTRY_SAFE(item
, next
, pool
->unallocated_list
, link
) {
533 if (item
->id
== id
) {
534 list_del(&item
->link
);
536 if (item
->real_buffer
) {
537 res
= (struct pipe_resource
*)item
->real_buffer
;
538 pool
->screen
->b
.b
.resource_destroy(
548 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
549 "for compute_memory_free\n", id
);
551 assert(0 && "error");
555 * Creates pending allocations for new items, these items are
556 * placed in the unallocated_list.
557 * \param size_in_dw The size, in double words, of the new item.
558 * \return The new item
559 * \see r600_compute_global_buffer_create
561 struct compute_memory_item
* compute_memory_alloc(
562 struct compute_memory_pool
* pool
,
565 struct compute_memory_item
*new_item
= NULL
;
567 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %"PRIi64
" (%"PRIi64
" bytes)\n",
568 size_in_dw
, 4 * size_in_dw
);
570 new_item
= (struct compute_memory_item
*)
571 CALLOC(sizeof(struct compute_memory_item
), 1);
575 new_item
->size_in_dw
= size_in_dw
;
576 new_item
->start_in_dw
= -1; /* mark pending */
577 new_item
->id
= pool
->next_id
++;
578 new_item
->pool
= pool
;
579 new_item
->real_buffer
= NULL
;
581 list_addtail(&new_item
->link
, pool
->unallocated_list
);
583 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %"PRIi64
" size = %"PRIi64
" (%"PRIi64
" bytes)\n",
584 new_item
, new_item
->id
, new_item
->size_in_dw
,
585 new_item
->size_in_dw
* 4);
590 * Transfer data host<->device, offset and size is in bytes.
591 * \param device_to_host 1 for device->host, 0 for host->device.
592 * \see compute_memory_shadow
594 static void compute_memory_transfer(
595 struct compute_memory_pool
* pool
,
596 struct pipe_context
* pipe
,
598 struct compute_memory_item
* chunk
,
603 int64_t aligned_size
= pool
->size_in_dw
;
604 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
605 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
607 struct pipe_transfer
*xfer
;
612 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
613 "offset_in_chunk = %d, size = %d\n", device_to_host
,
614 offset_in_chunk
, size
);
616 if (device_to_host
) {
617 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
618 &(struct pipe_box
) { .width
= aligned_size
* 4,
619 .height
= 1, .depth
= 1 }, &xfer
);
622 memcpy(data
, map
+ internal_offset
, size
);
623 pipe
->transfer_unmap(pipe
, xfer
);
625 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
626 &(struct pipe_box
) { .width
= aligned_size
* 4,
627 .height
= 1, .depth
= 1 }, &xfer
);
630 memcpy(map
+ internal_offset
, data
, size
);
631 pipe
->transfer_unmap(pipe
, xfer
);