f232f9fbc4fcf2ca286364f21c29f235a4a78ee5
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 #define ITEM_ALIGNMENT 1024
46 /**
47 * Creates a new pool
48 */
49 struct compute_memory_pool* compute_memory_pool_new(
50 struct r600_screen * rscreen)
51 {
52 struct compute_memory_pool* pool = (struct compute_memory_pool*)
53 CALLOC(sizeof(struct compute_memory_pool), 1);
54 if (pool == NULL)
55 return NULL;
56
57 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
58
59 pool->screen = rscreen;
60 return pool;
61 }
62
63 static void compute_memory_pool_init(struct compute_memory_pool * pool,
64 unsigned initial_size_in_dw)
65 {
66
67 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
68 initial_size_in_dw);
69
70 pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
71 if (pool->shadow == NULL)
72 return;
73
74 pool->size_in_dw = initial_size_in_dw;
75 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
76 pool->size_in_dw * 4);
77 }
78
79 /**
80 * Frees all stuff in the pool and the pool struct itself too
81 */
82 void compute_memory_pool_delete(struct compute_memory_pool* pool)
83 {
84 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
85 free(pool->shadow);
86 if (pool->bo) {
87 pool->screen->b.b.resource_destroy((struct pipe_screen *)
88 pool->screen, (struct pipe_resource *)pool->bo);
89 }
90 free(pool);
91 }
92
93 /**
94 * Searches for an empty space in the pool, return with the pointer to the
95 * allocatable space in the pool, returns -1 on failure.
96 */
97 int64_t compute_memory_prealloc_chunk(
98 struct compute_memory_pool* pool,
99 int64_t size_in_dw)
100 {
101 struct compute_memory_item *item;
102
103 int last_end = 0;
104
105 assert(size_in_dw <= pool->size_in_dw);
106
107 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
108 size_in_dw);
109
110 for (item = pool->item_list; item; item = item->next) {
111 if (last_end + size_in_dw <= item->start_in_dw) {
112 return last_end;
113 }
114
115 last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
116 }
117
118 if (pool->size_in_dw - last_end < size_in_dw) {
119 return -1;
120 }
121
122 return last_end;
123 }
124
125 /**
126 * Search for the chunk where we can link our new chunk after it.
127 */
128 struct compute_memory_item* compute_memory_postalloc_chunk(
129 struct compute_memory_pool* pool,
130 int64_t start_in_dw)
131 {
132 struct compute_memory_item* item;
133
134 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
135 start_in_dw);
136
137 /* Check if we can insert it in the front of the list */
138 if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
139 return NULL;
140 }
141
142 for (item = pool->item_list; item; item = item->next) {
143 if (item->next) {
144 if (item->start_in_dw < start_in_dw
145 && item->next->start_in_dw > start_in_dw) {
146 return item;
147 }
148 }
149 else {
150 /* end of chain */
151 assert(item->start_in_dw < start_in_dw);
152 return item;
153 }
154 }
155
156 assert(0 && "unreachable");
157 return NULL;
158 }
159
160 /**
161 * Reallocates pool, conserves data.
162 * @returns -1 if it fails, 0 otherwise
163 */
164 int compute_memory_grow_pool(struct compute_memory_pool* pool,
165 struct pipe_context * pipe, int new_size_in_dw)
166 {
167 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
168 "new_size_in_dw = %d (%d bytes)\n",
169 new_size_in_dw, new_size_in_dw * 4);
170
171 assert(new_size_in_dw >= pool->size_in_dw);
172
173 if (!pool->bo) {
174 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
175 if (pool->shadow == NULL)
176 return -1;
177 } else {
178 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
179
180 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
181 new_size_in_dw, new_size_in_dw * 4);
182
183 compute_memory_shadow(pool, pipe, 1);
184 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
185 if (pool->shadow == NULL)
186 return -1;
187
188 pool->size_in_dw = new_size_in_dw;
189 pool->screen->b.b.resource_destroy(
190 (struct pipe_screen *)pool->screen,
191 (struct pipe_resource *)pool->bo);
192 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
193 pool->screen,
194 pool->size_in_dw * 4);
195 compute_memory_shadow(pool, pipe, 0);
196 }
197
198 return 0;
199 }
200
201 /**
202 * Copy pool from device to host, or host to device.
203 */
204 void compute_memory_shadow(struct compute_memory_pool* pool,
205 struct pipe_context * pipe, int device_to_host)
206 {
207 struct compute_memory_item chunk;
208
209 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
210 device_to_host);
211
212 chunk.id = 0;
213 chunk.start_in_dw = 0;
214 chunk.size_in_dw = pool->size_in_dw;
215 chunk.prev = chunk.next = NULL;
216 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
217 pool->shadow, 0, pool->size_in_dw*4);
218 }
219
220 /**
221 * Allocates pending allocations in the pool
222 * @returns -1 if it fails, 0 otherwise
223 */
224 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
225 struct pipe_context * pipe)
226 {
227 struct compute_memory_item *item, *next;
228
229 int64_t allocated = 0;
230 int64_t unallocated = 0;
231
232 int err = 0;
233
234 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
235
236 for (item = pool->item_list; item; item = item->next) {
237 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
238 "(%i bytes)\n",item->start_in_dw, item->id,
239 item->size_in_dw, item->size_in_dw * 4);
240 }
241
242 /* Calculate the total allocated size */
243 for (item = pool->item_list; item; item = next) {
244 next = item->next;
245 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
246 }
247
248 /* Calculate the total unallocated size of the items that
249 * will be promoted to the pool */
250 for (item = pool->unallocated_list; item; item = next) {
251 next = item->next;
252 if (item->status & ITEM_FOR_PROMOTING)
253 unallocated += align(item->size_in_dw, ITEM_ALIGNMENT);
254 }
255
256 /* If we require more space than the size of the pool, then grow the
257 * pool.
258 *
259 * XXX: I'm pretty sure this won't work. Imagine this scenario:
260 *
261 * Offset Item Size
262 * 0 A 50
263 * 200 B 50
264 * 400 C 50
265 *
266 * Total size = 450
267 * Allocated size = 150
268 * Pending Item D Size = 200
269 *
270 * In this case, there are 300 units of free space in the pool, but
271 * they aren't contiguous, so it will be impossible to allocate Item D.
272 */
273 if (pool->size_in_dw < allocated + unallocated) {
274 err = compute_memory_grow_pool(pool, pipe, allocated + unallocated);
275 if (err == -1)
276 return -1;
277 }
278
279 /* Loop through all the unallocated items, check if they are marked
280 * for promoting, allocate space for them and add them to the item_list. */
281 for (item = pool->unallocated_list; item; item = next) {
282 next = item->next;
283
284 if (item->status & ITEM_FOR_PROMOTING) {
285 err = compute_memory_promote_item(pool, item, pipe, allocated);
286 item->status ^= ITEM_FOR_PROMOTING;
287
288 allocated += align(item->size_in_dw, ITEM_ALIGNMENT);
289
290 if (err == -1)
291 return -1;
292 }
293 }
294
295 return 0;
296 }
297
298 int compute_memory_promote_item(struct compute_memory_pool *pool,
299 struct compute_memory_item *item, struct pipe_context *pipe,
300 int64_t allocated)
301 {
302 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
303 struct r600_context *rctx = (struct r600_context *)pipe;
304 struct pipe_resource *dst = (struct pipe_resource *)pool->bo;
305 struct pipe_resource *src = (struct pipe_resource *)item->real_buffer;
306 struct pipe_box box;
307
308 int64_t start_in_dw;
309 int err = 0;
310
311
312 /* Search for free space in the pool for this item. */
313 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
314 item->size_in_dw)) == -1) {
315 int64_t need = item->size_in_dw + 2048 -
316 (pool->size_in_dw - allocated);
317
318 if (need < 0) {
319 need = pool->size_in_dw / 10;
320 }
321
322 need = align(need, ITEM_ALIGNMENT);
323
324 err = compute_memory_grow_pool(pool,
325 pipe,
326 pool->size_in_dw + need);
327
328 if (err == -1)
329 return -1;
330 }
331 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
332 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
333 item, item->id, start_in_dw, start_in_dw * 4,
334 item->size_in_dw, item->size_in_dw * 4);
335
336 /* Remove the item from the unallocated list */
337 if (item->prev == NULL)
338 pool->unallocated_list = item->next;
339 else
340 item->prev->next = item->next;
341
342 if (item->next != NULL)
343 item->next->prev = item->prev;
344
345 item->start_in_dw = start_in_dw;
346 item->next = NULL;
347 item->prev = NULL;
348
349 if (pool->item_list) {
350 struct compute_memory_item *pos;
351
352 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
353 if (pos) {
354 item->prev = pos;
355 item->next = pos->next;
356 pos->next = item;
357 if (item->next) {
358 item->next->prev = item;
359 }
360 } else {
361 /* Add item to the front of the list */
362 item->next = pool->item_list;
363 item->prev = pool->item_list->prev;
364 pool->item_list->prev = item;
365 pool->item_list = item;
366 }
367 }
368 else {
369 pool->item_list = item;
370 }
371
372 u_box_1d(0, item->size_in_dw * 4, &box);
373
374 rctx->b.b.resource_copy_region(pipe,
375 dst, 0, item->start_in_dw * 4, 0 ,0,
376 src, 0, &box);
377
378 /* We check if the item is mapped for reading.
379 * In this case, we need to keep the temporary buffer 'alive'
380 * because it is possible to keep a map active for reading
381 * while a kernel (that reads from it) executes */
382 if (!(item->status & ITEM_MAPPED_FOR_READING)) {
383 pool->screen->b.b.resource_destroy(screen, src);
384 item->real_buffer = NULL;
385 }
386
387 return 0;
388 }
389
390 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
391 {
392 struct compute_memory_item *item, *next;
393 struct pipe_screen *screen = (struct pipe_screen *)pool->screen;
394 struct pipe_resource *res;
395
396 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
397
398 for (item = pool->item_list; item; item = next) {
399 next = item->next;
400
401 if (item->id == id) {
402 if (item->prev) {
403 item->prev->next = item->next;
404 }
405 else {
406 pool->item_list = item->next;
407 }
408
409 if (item->next) {
410 item->next->prev = item->prev;
411 }
412
413 if (item->real_buffer) {
414 res = (struct pipe_resource *)item->real_buffer;
415 pool->screen->b.b.resource_destroy(
416 screen, res);
417 }
418
419 free(item);
420
421 return;
422 }
423 }
424
425 for (item = pool->unallocated_list; item; item = next) {
426 next = item->next;
427
428 if (item->id == id) {
429 if (item->prev) {
430 item->prev->next = item->next;
431 }
432 else {
433 pool->unallocated_list = item->next;
434 }
435
436 if (item->next) {
437 item->next->prev = item->prev;
438 }
439
440 if (item->real_buffer) {
441 res = (struct pipe_resource *)item->real_buffer;
442 pool->screen->b.b.resource_destroy(
443 screen, res);
444 }
445
446 free(item);
447
448 return;
449 }
450 }
451
452 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
453 "for compute_memory_free\n", id);
454
455 assert(0 && "error");
456 }
457
458 /**
459 * Creates pending allocations
460 */
461 struct compute_memory_item* compute_memory_alloc(
462 struct compute_memory_pool* pool,
463 int64_t size_in_dw)
464 {
465 struct compute_memory_item *new_item = NULL, *last_item = NULL;
466
467 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
468 size_in_dw, 4 * size_in_dw);
469
470 new_item = (struct compute_memory_item *)
471 CALLOC(sizeof(struct compute_memory_item), 1);
472 if (new_item == NULL)
473 return NULL;
474
475 new_item->size_in_dw = size_in_dw;
476 new_item->start_in_dw = -1; /* mark pending */
477 new_item->id = pool->next_id++;
478 new_item->pool = pool;
479 new_item->real_buffer = (struct r600_resource*)r600_compute_buffer_alloc_vram(
480 pool->screen, size_in_dw * 4);
481
482 if (pool->unallocated_list) {
483 for (last_item = pool->unallocated_list; last_item->next;
484 last_item = last_item->next);
485
486 last_item->next = new_item;
487 new_item->prev = last_item;
488 }
489 else {
490 pool->unallocated_list = new_item;
491 }
492
493 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
494 new_item, new_item->id, new_item->size_in_dw,
495 new_item->size_in_dw * 4);
496 return new_item;
497 }
498
499 /**
500 * Transfer data host<->device, offset and size is in bytes
501 */
502 void compute_memory_transfer(
503 struct compute_memory_pool* pool,
504 struct pipe_context * pipe,
505 int device_to_host,
506 struct compute_memory_item* chunk,
507 void* data,
508 int offset_in_chunk,
509 int size)
510 {
511 int64_t aligned_size = pool->size_in_dw;
512 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
513 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
514
515 struct pipe_transfer *xfer;
516 uint32_t *map;
517
518 assert(gart);
519
520 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
521 "offset_in_chunk = %d, size = %d\n", device_to_host,
522 offset_in_chunk, size);
523
524 if (device_to_host) {
525 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
526 &(struct pipe_box) { .width = aligned_size * 4,
527 .height = 1, .depth = 1 }, &xfer);
528 assert(xfer);
529 assert(map);
530 memcpy(data, map + internal_offset, size);
531 pipe->transfer_unmap(pipe, xfer);
532 } else {
533 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
534 &(struct pipe_box) { .width = aligned_size * 4,
535 .height = 1, .depth = 1 }, &xfer);
536 assert(xfer);
537 assert(map);
538 memcpy(map + internal_offset, data, size);
539 pipe->transfer_unmap(pipe, xfer);
540 }
541 }
542
543 /**
544 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
545 */
546 void compute_memory_transfer_direct(
547 struct compute_memory_pool* pool,
548 int chunk_to_data,
549 struct compute_memory_item* chunk,
550 struct r600_resource* data,
551 int offset_in_chunk,
552 int offset_in_data,
553 int size)
554 {
555 ///TODO: DMA
556 }