6409b349cd0da37d009c0418f7f8dc4a07ecd345
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 #define ITEM_ALIGNMENT 1024
46 /**
47 * Creates a new pool
48 */
49 struct compute_memory_pool* compute_memory_pool_new(
50 struct r600_screen * rscreen)
51 {
52 struct compute_memory_pool* pool = (struct compute_memory_pool*)
53 CALLOC(sizeof(struct compute_memory_pool), 1);
54 if (pool == NULL)
55 return NULL;
56
57 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
58
59 pool->screen = rscreen;
60 return pool;
61 }
62
63 static void compute_memory_pool_init(struct compute_memory_pool * pool,
64 unsigned initial_size_in_dw)
65 {
66
67 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
68 initial_size_in_dw);
69
70 pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
71 if (pool->shadow == NULL)
72 return;
73
74 pool->size_in_dw = initial_size_in_dw;
75 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
76 pool->size_in_dw * 4);
77 }
78
79 /**
80 * Frees all stuff in the pool and the pool struct itself too
81 */
82 void compute_memory_pool_delete(struct compute_memory_pool* pool)
83 {
84 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
85 free(pool->shadow);
86 if (pool->bo) {
87 pool->screen->b.b.resource_destroy((struct pipe_screen *)
88 pool->screen, (struct pipe_resource *)pool->bo);
89 }
90 free(pool);
91 }
92
93 /**
94 * Searches for an empty space in the pool, return with the pointer to the
95 * allocatable space in the pool, returns -1 on failure.
96 */
97 int64_t compute_memory_prealloc_chunk(
98 struct compute_memory_pool* pool,
99 int64_t size_in_dw)
100 {
101 struct compute_memory_item *item;
102
103 int last_end = 0;
104
105 assert(size_in_dw <= pool->size_in_dw);
106
107 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
108 size_in_dw);
109
110 for (item = pool->item_list; item; item = item->next) {
111 if (last_end + size_in_dw <= item->start_in_dw) {
112 return last_end;
113 }
114
115 last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
116 }
117
118 if (pool->size_in_dw - last_end < size_in_dw) {
119 return -1;
120 }
121
122 return last_end;
123 }
124
125 /**
126 * Search for the chunk where we can link our new chunk after it.
127 */
128 struct compute_memory_item* compute_memory_postalloc_chunk(
129 struct compute_memory_pool* pool,
130 int64_t start_in_dw)
131 {
132 struct compute_memory_item* item;
133
134 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
135 start_in_dw);
136
137 /* Check if we can insert it in the front of the list */
138 if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
139 return NULL;
140 }
141
142 for (item = pool->item_list; item; item = item->next) {
143 if (item->next) {
144 if (item->start_in_dw < start_in_dw
145 && item->next->start_in_dw > start_in_dw) {
146 return item;
147 }
148 }
149 else {
150 /* end of chain */
151 assert(item->start_in_dw < start_in_dw);
152 return item;
153 }
154 }
155
156 assert(0 && "unreachable");
157 return NULL;
158 }
159
160 /**
161 * Reallocates pool, conserves data.
162 * @returns -1 if it fails, 0 otherwise
163 */
164 int compute_memory_grow_pool(struct compute_memory_pool* pool,
165 struct pipe_context * pipe, int new_size_in_dw)
166 {
167 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
168 "new_size_in_dw = %d (%d bytes)\n",
169 new_size_in_dw, new_size_in_dw * 4);
170
171 assert(new_size_in_dw >= pool->size_in_dw);
172
173 if (!pool->bo) {
174 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
175 if (pool->shadow == NULL)
176 return -1;
177 } else {
178 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
179
180 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
181 new_size_in_dw, new_size_in_dw * 4);
182
183 compute_memory_shadow(pool, pipe, 1);
184 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
185 if (pool->shadow == NULL)
186 return -1;
187
188 pool->size_in_dw = new_size_in_dw;
189 pool->screen->b.b.resource_destroy(
190 (struct pipe_screen *)pool->screen,
191 (struct pipe_resource *)pool->bo);
192 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
193 pool->screen,
194 pool->size_in_dw * 4);
195 compute_memory_shadow(pool, pipe, 0);
196 }
197
198 return 0;
199 }
200
201 /**
202 * Copy pool from device to host, or host to device.
203 */
204 void compute_memory_shadow(struct compute_memory_pool* pool,
205 struct pipe_context * pipe, int device_to_host)
206 {
207 struct compute_memory_item chunk;
208
209 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
210 device_to_host);
211
212 chunk.id = 0;
213 chunk.start_in_dw = 0;
214 chunk.size_in_dw = pool->size_in_dw;
215 chunk.prev = chunk.next = NULL;
216 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
217 pool->shadow, 0, pool->size_in_dw*4);
218 }
219
220 /**
221 * Allocates pending allocations in the pool
222 * @returns -1 if it fails, 0 otherwise
223 */
224 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
225 struct pipe_context * pipe)
226 {
227 struct compute_memory_item *item, *next;
228
229 int64_t allocated = 0;
230 int64_t unallocated = 0;
231
232 int err = 0;
233
234 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
235
236 for (item = pool->item_list; item; item = item->next) {
237 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
238 "(%i bytes)\n",item->start_in_dw, item->id,
239 item->size_in_dw, item->size_in_dw * 4);
240 }
241
242 /* Calculate the total allocated size */
243 for (item = pool->item_list; item; item = next) {
244 next = item->next;
245 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
246 }
247
248 /* Calculate the total unallocated size of the items that
249 * will be promoted to the pool */
250 for (item = pool->unallocated_list; item; item = next) {
251 next = item->next;
252 if (item->status & ITEM_FOR_PROMOTING)
253 unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
254 }
255
256 /* If we require more space than the size of the pool, then grow the
257 * pool.
258 *
259 * XXX: I'm pretty sure this won't work. Imagine this scenario:
260 *
261 * Offset Item Size
262 * 0 A 50
263 * 200 B 50
264 * 400 C 50
265 *
266 * Total size = 450
267 * Allocated size = 150
268 * Pending Item D Size = 200
269 *
270 * In this case, there are 300 units of free space in the pool, but
271 * they aren't contiguous, so it will be impossible to allocate Item D.
272 */
273 if (pool->size_in_dw < allocated + unallocated) {
274 err = compute_memory_grow_pool(pool, pipe, allocated + unallocated);
275 if (err == -1)
276 return -1;
277 }
278
279 /* Loop through all the unallocated items, check if they are marked
280 * for promoting, allocate space for them and add them to the item_list. */
281 for (item = pool->unallocated_list; item; item = next) {
282 next = item->next;
283
284 if (item->status & ITEM_FOR_PROMOTING) {
285 err = compute_memory_promote_item(pool, item, pipe, allocated);
286 item->status ^= ITEM_FOR_PROMOTING;
287
288 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
289
290 if (err == -1)
291 return -1;
292 }
293 }
294
295 return 0;
296 }
297
298 int compute_memory_promote_item(struct compute_memory_pool *pool,
299 struct compute_memory_item *item, struct pipe_context *pipe,
300 int64_t allocated)
301 {
302 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
303 struct r600_context *rctx = (struct r600_context *)pipe;
304 struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
305 struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
306 struct pipe_box box;
307
308 int64_t start_in_dw;
309 int err = 0;
310
311
312 /* Search for free space in the pool for this item. */
313 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
314 item->size_in_dw)) == -1) {
315 int64_t need = item->size_in_dw + 2048 -
316 (pool->size_in_dw - allocated);
317
318 if (need < 0) {
319 need = pool->size_in_dw / 10;
320 }
321
322 need = align(need, ITEM_ALIGNMENT);
323
324 err = compute_memory_grow_pool(pool,
325 pipe,
326 pool->size_in_dw + need);
327
328 if (err == -1)
329 return -1;
330 }
331 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
332 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
333 item, item->id, start_in_dw, start_in_dw * 4,
334 item->size_in_dw, item->size_in_dw * 4);
335
336 /* Remove the item from the unallocated list */
337 if (item->prev == NULL)
338 pool->unallocated_list = item->next;
339 else
340 item->prev->next = item->next;
341
342 if (item->next != NULL)
343 item->next->prev = item->prev;
344
345 item->start_in_dw = start_in_dw;
346 item->next = NULL;
347 item->prev = NULL;
348
349 if (pool->item_list) {
350 struct compute_memory_item *pos;
351
352 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
353 if (pos) {
354 item->prev = pos;
355 item->next = pos->next;
356 pos->next = item;
357 if (item->next) {
358 item->next->prev = item;
359 }
360 } else {
361 /* Add item to the front of the list */
362 item->next = pool->item_list;
363 item->prev = pool->item_list->prev;
364 pool->item_list->prev = item;
365 pool->item_list = item;
366 }
367 }
368 else {
369 pool->item_list = item;
370 }
371
372 u_box_1d(0, item->size_in_dw * 4, &box);
373
374 rctx->b.b.resource_copy_region(pipe,
375 dst, 0, item->start_in_dw * 4, 0 ,0,
376 src, 0, &box);
377
378 /* We check if the item is mapped for reading.
379 * In this case, we need to keep the temporary buffer 'alive'
380 * because it is possible to keep a map active for reading
381 * while a kernel (that reads from it) executes */
382 if (!(item->status & ITEM_MAPPED_FOR_READING)) {
383 pool->screen->b.b.resource_destroy(screen, src);
384 item->real_buffer = NULL;
385 }
386
387 return 0;
388 }
389
390 void compute_memory_demote_item(struct compute_memory_pool *pool,
391 struct compute_memory_item *item, struct pipe_context *pipe)
392 {
393 struct r600_context *rctx = (struct r600_context *)pipe;
394 struct pipe_resource *src = (struct pipe_resource *)pool->bo;
395 struct pipe_resource *dst;
396 struct pipe_box box;
397
398 /* First, we remove the item from the item_list */
399 if (item->prev == NULL)
400 pool->item_list = item->next;
401 else
402 item->prev->next = item->next;
403
404 if (item->next != NULL)
405 item->next->prev = item->prev;
406
407
408 /* Now we add it to the beginning of the unallocated list
409 * NOTE: we could also add it to the end, but this is easier */
410 item->next = NULL;
411 item->prev = NULL;
412 if (pool->unallocated_list) {
413 item->next = pool->unallocated_list;
414 item->next->prev = item;
415 pool->unallocated_list = item;
416 }
417 else
418 pool->unallocated_list = item;
419
420 /* We check if the intermediate buffer exists, and if it
421 * doesn't, we create it again */
422 if (item->real_buffer == NULL) {
423 item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
424 pool->screen, item->size_in_dw * 4);
425 }
426
427 dst = (struct pipe_resource *)item->real_buffer;
428
429 /* We transfer the memory from the item in the pool to the
430 * temporary buffer */
431 u_box_1d(item->start_in_dw * 4, item->size_in_dw * 4, &box);
432
433 rctx->b.b.resource_copy_region(pipe,
434 dst, 0, 0, 0, 0,
435 src, 0, &box);
436
437 /* Remember to mark the buffer as 'pending' by setting start_in_dw to -1 */
438 item->start_in_dw = -1;
439 }
440
441 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
442 {
443 struct compute_memory_item *item, *next;
444 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
445 struct pipe_resource *res;
446
447 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
448
449 for (item = pool->item_list; item; item = next) {
450 next = item->next;
451
452 if (item->id == id) {
453 if (item->prev) {
454 item->prev->next = item->next;
455 }
456 else {
457 pool->item_list = item->next;
458 }
459
460 if (item->next) {
461 item->next->prev = item->prev;
462 }
463
464 if (item->real_buffer) {
465 res = (struct pipe_resource *)item->real_buffer;
466 pool->screen->b.b.resource_destroy(
467 screen, res);
468 }
469
470 free(item);
471
472 return;
473 }
474 }
475
476 for (item = pool->unallocated_list; item; item = next) {
477 next = item->next;
478
479 if (item->id == id) {
480 if (item->prev) {
481 item->prev->next = item->next;
482 }
483 else {
484 pool->unallocated_list = item->next;
485 }
486
487 if (item->next) {
488 item->next->prev = item->prev;
489 }
490
491 if (item->real_buffer) {
492 res = (struct pipe_resource *)item->real_buffer;
493 pool->screen->b.b.resource_destroy(
494 screen, res);
495 }
496
497 free(item);
498
499 return;
500 }
501 }
502
503 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
504 "for compute_memory_free\n", id);
505
506 assert(0 && "error");
507 }
508
509 /**
510 * Creates pending allocations
511 */
512 struct compute_memory_item* compute_memory_alloc(
513 struct compute_memory_pool* pool,
514 int64_t size_in_dw)
515 {
516 struct compute_memory_item *new_item = NULL, *last_item = NULL;
517
518 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
519 size_in_dw, 4 * size_in_dw);
520
521 new_item = (struct compute_memory_item *)
522 CALLOC(sizeof(struct compute_memory_item), 1);
523 if (new_item == NULL)
524 return NULL;
525
526 new_item->size_in_dw = size_in_dw;
527 new_item->start_in_dw = -1; /* mark pending */
528 new_item->id = pool->next_id++;
529 new_item->pool = pool;
530 new_item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
531 pool->screen, size_in_dw * 4);
532
533 if (pool->unallocated_list) {
534 for (last_item = pool->unallocated_list; last_item->next;
535 last_item = last_item->next);
536
537 last_item->next = new_item;
538 new_item->prev = last_item;
539 }
540 else {
541 pool->unallocated_list = new_item;
542 }
543
544 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
545 new_item, new_item->id, new_item->size_in_dw,
546 new_item->size_in_dw * 4);
547 return new_item;
548 }
549
550 /**
551 * Transfer data host<->device, offset and size is in bytes
552 */
553 void compute_memory_transfer(
554 struct compute_memory_pool* pool,
555 struct pipe_context * pipe,
556 int device_to_host,
557 struct compute_memory_item* chunk,
558 void* data,
559 int offset_in_chunk,
560 int size)
561 {
562 int64_t aligned_size = pool->size_in_dw;
563 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
564 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
565
566 struct pipe_transfer *xfer;
567 uint32_t *map;
568
569 assert(gart);
570
571 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
572 "offset_in_chunk = %d, size = %d\n", device_to_host,
573 offset_in_chunk, size);
574
575 if (device_to_host) {
576 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
577 &(struct pipe_box) { .width = aligned_size * 4,
578 .height = 1, .depth = 1 }, &xfer);
579 assert(xfer);
580 assert(map);
581 memcpy(data, map + internal_offset, size);
582 pipe->transfer_unmap(pipe, xfer);
583 } else {
584 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
585 &(struct pipe_box) { .width = aligned_size * 4,
586 .height = 1, .depth = 1 }, &xfer);
587 assert(xfer);
588 assert(map);
589 memcpy(map + internal_offset, data, size);
590 pipe->transfer_unmap(pipe, xfer);
591 }
592 }
593
594 /**
595 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
596 */
597 void compute_memory_transfer_direct(
598 struct compute_memory_pool* pool,
599 int chunk_to_data,
600 struct compute_memory_item* chunk,
601 struct r600_resource* data,
602 int offset_in_chunk,
603 int offset_in_data,
604 int size)
605 {
606 ///TODO: DMA
607 }