2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * Adam Rak <adam.rak@streamnovation.com>
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
45 #define ITEM_ALIGNMENT 1024
49 struct compute_memory_pool
* compute_memory_pool_new(
50 struct r600_screen
* rscreen
)
52 struct compute_memory_pool
* pool
= (struct compute_memory_pool
*)
53 CALLOC(sizeof(struct compute_memory_pool
), 1);
57 COMPUTE_DBG(rscreen
, "* compute_memory_pool_new()\n");
59 pool
->screen
= rscreen
;
63 static void compute_memory_pool_init(struct compute_memory_pool
* pool
,
64 unsigned initial_size_in_dw
)
67 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
70 pool
->shadow
= (uint32_t*)CALLOC(initial_size_in_dw
, 4);
71 if (pool
->shadow
== NULL
)
74 pool
->size_in_dw
= initial_size_in_dw
;
75 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(pool
->screen
,
76 pool
->size_in_dw
* 4);
80 * Frees all stuff in the pool and the pool struct itself too
82 void compute_memory_pool_delete(struct compute_memory_pool
* pool
)
84 COMPUTE_DBG(pool
->screen
, "* compute_memory_pool_delete()\n");
87 pool
->screen
->b
.b
.resource_destroy((struct pipe_screen
*)
88 pool
->screen
, (struct pipe_resource
*)pool
->bo
);
94 * Searches for an empty space in the pool, return with the pointer to the
95 * allocatable space in the pool, returns -1 on failure.
97 int64_t compute_memory_prealloc_chunk(
98 struct compute_memory_pool
* pool
,
101 struct compute_memory_item
*item
;
105 assert(size_in_dw
<= pool
->size_in_dw
);
107 COMPUTE_DBG(pool
->screen
, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
110 for (item
= pool
->item_list
; item
; item
= item
->next
) {
111 if (last_end
+ size_in_dw
<= item
->start_in_dw
) {
115 last_end
= item
->start_in_dw
+ align(item
->size_in_dw
, ITEM_ALIGNMENT
);
118 if (pool
->size_in_dw
- last_end
< size_in_dw
) {
126 * Search for the chunk where we can link our new chunk after it.
128 struct compute_memory_item
* compute_memory_postalloc_chunk(
129 struct compute_memory_pool
* pool
,
132 struct compute_memory_item
* item
;
134 COMPUTE_DBG(pool
->screen
, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
137 /* Check if we can insert it in the front of the list */
138 if (pool
->item_list
&& pool
->item_list
->start_in_dw
> start_in_dw
) {
142 for (item
= pool
->item_list
; item
; item
= item
->next
) {
144 if (item
->start_in_dw
< start_in_dw
145 && item
->next
->start_in_dw
> start_in_dw
) {
151 assert(item
->start_in_dw
< start_in_dw
);
156 assert(0 && "unreachable");
161 * Reallocates pool, conserves data.
162 * @returns -1 if it fails, 0 otherwise
164 int compute_memory_grow_pool(struct compute_memory_pool
* pool
,
165 struct pipe_context
* pipe
, int new_size_in_dw
)
167 COMPUTE_DBG(pool
->screen
, "* compute_memory_grow_pool() "
168 "new_size_in_dw = %d (%d bytes)\n",
169 new_size_in_dw
, new_size_in_dw
* 4);
171 assert(new_size_in_dw
>= pool
->size_in_dw
);
174 compute_memory_pool_init(pool
, MAX2(new_size_in_dw
, 1024 * 16));
175 if (pool
->shadow
== NULL
)
178 new_size_in_dw
= align(new_size_in_dw
, ITEM_ALIGNMENT
);
180 COMPUTE_DBG(pool
->screen
, " Aligned size = %d (%d bytes)\n",
181 new_size_in_dw
, new_size_in_dw
* 4);
183 compute_memory_shadow(pool
, pipe
, 1);
184 pool
->shadow
= realloc(pool
->shadow
, new_size_in_dw
*4);
185 if (pool
->shadow
== NULL
)
188 pool
->size_in_dw
= new_size_in_dw
;
189 pool
->screen
->b
.b
.resource_destroy(
190 (struct pipe_screen
*)pool
->screen
,
191 (struct pipe_resource
*)pool
->bo
);
192 pool
->bo
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
194 pool
->size_in_dw
* 4);
195 compute_memory_shadow(pool
, pipe
, 0);
202 * Copy pool from device to host, or host to device.
204 void compute_memory_shadow(struct compute_memory_pool
* pool
,
205 struct pipe_context
* pipe
, int device_to_host
)
207 struct compute_memory_item chunk
;
209 COMPUTE_DBG(pool
->screen
, "* compute_memory_shadow() device_to_host = %d\n",
213 chunk
.start_in_dw
= 0;
214 chunk
.size_in_dw
= pool
->size_in_dw
;
215 chunk
.prev
= chunk
.next
= NULL
;
216 compute_memory_transfer(pool
, pipe
, device_to_host
, &chunk
,
217 pool
->shadow
, 0, pool
->size_in_dw
*4);
221 * Allocates pending allocations in the pool
222 * @returns -1 if it fails, 0 otherwise
224 int compute_memory_finalize_pending(struct compute_memory_pool
* pool
,
225 struct pipe_context
* pipe
)
227 struct compute_memory_item
*item
, *next
;
229 int64_t allocated
= 0;
230 int64_t unallocated
= 0;
234 COMPUTE_DBG(pool
->screen
, "* compute_memory_finalize_pending()\n");
236 for (item
= pool
->item_list
; item
; item
= item
->next
) {
237 COMPUTE_DBG(pool
->screen
, " + list: offset = %i id = %i size = %i "
238 "(%i bytes)\n",item
->start_in_dw
, item
->id
,
239 item
->size_in_dw
, item
->size_in_dw
* 4);
242 /* Calculate the total allocated size */
243 for (item
= pool
->item_list
; item
; item
= next
) {
245 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
248 /* Calculate the total unallocated size of the items that
249 * will be promoted to the pool */
250 for (item
= pool
->unallocated_list
; item
; item
= next
) {
252 if (item
->status
& ITEM_FOR_PROMOTING
)
253 unallocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
256 /* If we require more space than the size of the pool, then grow the
259 * XXX: I'm pretty sure this won't work. Imagine this scenario:
267 * Allocated size = 150
268 * Pending Item D Size = 200
270 * In this case, there are 300 units of free space in the pool, but
271 * they aren't contiguous, so it will be impossible to allocate Item D.
273 if (pool
->size_in_dw
< allocated
+ unallocated
) {
274 err
= compute_memory_grow_pool(pool
, pipe
, allocated
+ unallocated
);
279 /* Loop through all the unallocated items, check if they are marked
280 * for promoting, allocate space for them and add them to the item_list. */
281 for (item
= pool
->unallocated_list
; item
; item
= next
) {
284 if (item
->status
& ITEM_FOR_PROMOTING
) {
285 err
= compute_memory_promote_item(pool
, item
, pipe
, allocated
);
286 item
->status
^= ITEM_FOR_PROMOTING
;
288 allocated
+= align(item
->size_in_dw
, ITEM_ALIGNMENT
);
298 int compute_memory_promote_item(struct compute_memory_pool
*pool
,
299 struct compute_memory_item
*item
, struct pipe_context
*pipe
,
302 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
303 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
304 struct pipe_resource
*dst
= (struct pipe_resource
*)pool
->bo
;
305 struct pipe_resource
*src
= (struct pipe_resource
*)item
->real_buffer
;
312 /* Search for free space in the pool for this item. */
313 while ((start_in_dw
=compute_memory_prealloc_chunk(pool
,
314 item
->size_in_dw
)) == -1) {
315 int64_t need
= item
->size_in_dw
+ 2048 -
316 (pool
->size_in_dw
- allocated
);
319 need
= pool
->size_in_dw
/ 10;
322 need
= align(need
, ITEM_ALIGNMENT
);
324 err
= compute_memory_grow_pool(pool
,
326 pool
->size_in_dw
+ need
);
331 COMPUTE_DBG(pool
->screen
, " + Found space for Item %p id = %u "
332 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
333 item
, item
->id
, start_in_dw
, start_in_dw
* 4,
334 item
->size_in_dw
, item
->size_in_dw
* 4);
336 /* Remove the item from the unallocated list */
337 if (item
->prev
== NULL
)
338 pool
->unallocated_list
= item
->next
;
340 item
->prev
->next
= item
->next
;
342 if (item
->next
!= NULL
)
343 item
->next
->prev
= item
->prev
;
345 item
->start_in_dw
= start_in_dw
;
349 if (pool
->item_list
) {
350 struct compute_memory_item
*pos
;
352 pos
= compute_memory_postalloc_chunk(pool
, start_in_dw
);
355 item
->next
= pos
->next
;
358 item
->next
->prev
= item
;
361 /* Add item to the front of the list */
362 item
->next
= pool
->item_list
;
363 item
->prev
= pool
->item_list
->prev
;
364 pool
->item_list
->prev
= item
;
365 pool
->item_list
= item
;
369 pool
->item_list
= item
;
372 u_box_1d(0, item
->size_in_dw
* 4, &box
);
374 rctx
->b
.b
.resource_copy_region(pipe
,
375 dst
, 0, item
->start_in_dw
* 4, 0 ,0,
378 /* We check if the item is mapped for reading.
379 * In this case, we need to keep the temporary buffer 'alive'
380 * because it is possible to keep a map active for reading
381 * while a kernel (that reads from it) executes */
382 if (!(item
->status
& ITEM_MAPPED_FOR_READING
)) {
383 pool
->screen
->b
.b
.resource_destroy(screen
, src
);
384 item
->real_buffer
= NULL
;
390 void compute_memory_demote_item(struct compute_memory_pool
*pool
,
391 struct compute_memory_item
*item
, struct pipe_context
*pipe
)
393 struct r600_context
*rctx
= (struct r600_context
*)pipe
;
394 struct pipe_resource
*src
= (struct pipe_resource
*)pool
->bo
;
395 struct pipe_resource
*dst
;
398 /* First, we remove the item from the item_list */
399 if (item
->prev
== NULL
)
400 pool
->item_list
= item
->next
;
402 item
->prev
->next
= item
->next
;
404 if (item
->next
!= NULL
)
405 item
->next
->prev
= item
->prev
;
408 /* Now we add it to the beginning of the unallocated list
409 * NOTE: we could also add it to the end, but this is easier */
412 if (pool
->unallocated_list
) {
413 item
->next
= pool
->unallocated_list
;
414 item
->next
->prev
= item
;
415 pool
->unallocated_list
= item
;
418 pool
->unallocated_list
= item
;
420 /* We check if the intermediate buffer exists, and if it
421 * doesn't, we create it again */
422 if (item
->real_buffer
== NULL
) {
423 item
->real_buffer
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
424 pool
->screen
, item
->size_in_dw
* 4);
427 dst
= (struct pipe_resource
*)item
->real_buffer
;
429 /* We transfer the memory from the item in the pool to the
430 * temporary buffer */
431 u_box_1d(item
->start_in_dw
* 4, item
->size_in_dw
* 4, &box
);
433 rctx
->b
.b
.resource_copy_region(pipe
,
437 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
438 item
->start_in_dw
= -1;
441 void compute_memory_free(struct compute_memory_pool
* pool
, int64_t id
)
443 struct compute_memory_item
*item
, *next
;
444 struct pipe_screen
*screen
= (struct pipe_screen
*)pool
->screen
;
445 struct pipe_resource
*res
;
447 COMPUTE_DBG(pool
->screen
, "* compute_memory_free() id + %ld \n", id
);
449 for (item
= pool
->item_list
; item
; item
= next
) {
452 if (item
->id
== id
) {
454 item
->prev
->next
= item
->next
;
457 pool
->item_list
= item
->next
;
461 item
->next
->prev
= item
->prev
;
464 if (item
->real_buffer
) {
465 res
= (struct pipe_resource
*)item
->real_buffer
;
466 pool
->screen
->b
.b
.resource_destroy(
476 for (item
= pool
->unallocated_list
; item
; item
= next
) {
479 if (item
->id
== id
) {
481 item
->prev
->next
= item
->next
;
484 pool
->unallocated_list
= item
->next
;
488 item
->next
->prev
= item
->prev
;
491 if (item
->real_buffer
) {
492 res
= (struct pipe_resource
*)item
->real_buffer
;
493 pool
->screen
->b
.b
.resource_destroy(
503 fprintf(stderr
, "Internal error, invalid id %"PRIi64
" "
504 "for compute_memory_free\n", id
);
506 assert(0 && "error");
510 * Creates pending allocations
512 struct compute_memory_item
* compute_memory_alloc(
513 struct compute_memory_pool
* pool
,
516 struct compute_memory_item
*new_item
= NULL
, *last_item
= NULL
;
518 COMPUTE_DBG(pool
->screen
, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
519 size_in_dw
, 4 * size_in_dw
);
521 new_item
= (struct compute_memory_item
*)
522 CALLOC(sizeof(struct compute_memory_item
), 1);
523 if (new_item
== NULL
)
526 new_item
->size_in_dw
= size_in_dw
;
527 new_item
->start_in_dw
= -1; /* mark pending */
528 new_item
->id
= pool
->next_id
++;
529 new_item
->pool
= pool
;
530 new_item
->real_buffer
= (struct r600_resource
*)r600_compute_buffer_alloc_vram(
531 pool
->screen
, size_in_dw
* 4);
533 if (pool
->unallocated_list
) {
534 for (last_item
= pool
->unallocated_list
; last_item
->next
;
535 last_item
= last_item
->next
);
537 last_item
->next
= new_item
;
538 new_item
->prev
= last_item
;
541 pool
->unallocated_list
= new_item
;
544 COMPUTE_DBG(pool
->screen
, " + Adding item %p id = %u size = %u (%u bytes)\n",
545 new_item
, new_item
->id
, new_item
->size_in_dw
,
546 new_item
->size_in_dw
* 4);
551 * Transfer data host<->device, offset and size is in bytes
553 void compute_memory_transfer(
554 struct compute_memory_pool
* pool
,
555 struct pipe_context
* pipe
,
557 struct compute_memory_item
* chunk
,
562 int64_t aligned_size
= pool
->size_in_dw
;
563 struct pipe_resource
* gart
= (struct pipe_resource
*)pool
->bo
;
564 int64_t internal_offset
= chunk
->start_in_dw
*4 + offset_in_chunk
;
566 struct pipe_transfer
*xfer
;
571 COMPUTE_DBG(pool
->screen
, "* compute_memory_transfer() device_to_host = %d, "
572 "offset_in_chunk = %d, size = %d\n", device_to_host
,
573 offset_in_chunk
, size
);
575 if (device_to_host
) {
576 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_READ
,
577 &(struct pipe_box
) { .width
= aligned_size
* 4,
578 .height
= 1, .depth
= 1 }, &xfer
);
581 memcpy(data
, map
+ internal_offset
, size
);
582 pipe
->transfer_unmap(pipe
, xfer
);
584 map
= pipe
->transfer_map(pipe
, gart
, 0, PIPE_TRANSFER_WRITE
,
585 &(struct pipe_box
) { .width
= aligned_size
* 4,
586 .height
= 1, .depth
= 1 }, &xfer
);
589 memcpy(map
+ internal_offset
, data
, size
);
590 pipe
->transfer_unmap(pipe
, xfer
);
595 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
597 void compute_memory_transfer_direct(
598 struct compute_memory_pool
* pool
,
600 struct compute_memory_item
* chunk
,
601 struct r600_resource
* data
,