r600g/compute: Tidy a bit compute_memory_finalize_pending
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "util/u_framebuffer.h"
36 #include "r600_shader.h"
37 #include "r600_pipe.h"
38 #include "r600_formats.h"
39 #include "compute_memory_pool.h"
40 #include "evergreen_compute.h"
41 #include "evergreen_compute_internal.h"
42 #include <inttypes.h>
43
44 /**
45 * Creates a new pool
46 */
47 struct compute_memory_pool* compute_memory_pool_new(
48 struct r600_screen * rscreen)
49 {
50 struct compute_memory_pool* pool = (struct compute_memory_pool*)
51 CALLOC(sizeof(struct compute_memory_pool), 1);
52 if (pool == NULL)
53 return NULL;
54
55 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
56
57 pool->screen = rscreen;
58 return pool;
59 }
60
61 static void compute_memory_pool_init(struct compute_memory_pool * pool,
62 unsigned initial_size_in_dw)
63 {
64
65 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
66 initial_size_in_dw);
67
68 pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
69 if (pool->shadow == NULL)
70 return;
71
72 pool->next_id = 1;
73 pool->size_in_dw = initial_size_in_dw;
74 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
75 pool->size_in_dw * 4);
76 }
77
78 /**
79 * Frees all stuff in the pool and the pool struct itself too
80 */
81 void compute_memory_pool_delete(struct compute_memory_pool* pool)
82 {
83 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
84 free(pool->shadow);
85 if (pool->bo) {
86 pool->screen->b.b.resource_destroy((struct pipe_screen *)
87 pool->screen, (struct pipe_resource *)pool->bo);
88 }
89 free(pool);
90 }
91
92 /**
93 * Searches for an empty space in the pool, return with the pointer to the
94 * allocatable space in the pool, returns -1 on failure.
95 */
96 int64_t compute_memory_prealloc_chunk(
97 struct compute_memory_pool* pool,
98 int64_t size_in_dw)
99 {
100 struct compute_memory_item *item;
101
102 int last_end = 0;
103
104 assert(size_in_dw <= pool->size_in_dw);
105
106 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
107 size_in_dw);
108
109 for (item = pool->item_list; item; item = item->next) {
110 if (item->start_in_dw > -1) {
111 if (item->start_in_dw-last_end > size_in_dw) {
112 return last_end;
113 }
114
115 last_end = item->start_in_dw + item->size_in_dw;
116 last_end += (1024 - last_end % 1024);
117 }
118 }
119
120 if (pool->size_in_dw - last_end < size_in_dw) {
121 return -1;
122 }
123
124 return last_end;
125 }
126
127 /**
128 * Search for the chunk where we can link our new chunk after it.
129 */
130 struct compute_memory_item* compute_memory_postalloc_chunk(
131 struct compute_memory_pool* pool,
132 int64_t start_in_dw)
133 {
134 struct compute_memory_item* item;
135
136 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
137 start_in_dw);
138
139 /* Check if we can insert it in the front of the list */
140 if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
141 return NULL;
142 }
143
144 for (item = pool->item_list; item; item = item->next) {
145 if (item->next) {
146 if (item->start_in_dw < start_in_dw
147 && item->next->start_in_dw > start_in_dw) {
148 return item;
149 }
150 }
151 else {
152 /* end of chain */
153 assert(item->start_in_dw < start_in_dw);
154 return item;
155 }
156 }
157
158 assert(0 && "unreachable");
159 return NULL;
160 }
161
162 /**
163 * Reallocates pool, conserves data.
164 * @returns -1 if it fails, 0 otherwise
165 */
166 int compute_memory_grow_pool(struct compute_memory_pool* pool,
167 struct pipe_context * pipe, int new_size_in_dw)
168 {
169 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
170 "new_size_in_dw = %d (%d bytes)\n",
171 new_size_in_dw, new_size_in_dw * 4);
172
173 assert(new_size_in_dw >= pool->size_in_dw);
174
175 if (!pool->bo) {
176 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
177 if (pool->shadow == NULL)
178 return -1;
179 } else {
180 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
181
182 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
183 new_size_in_dw, new_size_in_dw * 4);
184
185 compute_memory_shadow(pool, pipe, 1);
186 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
187 if (pool->shadow == NULL)
188 return -1;
189
190 pool->size_in_dw = new_size_in_dw;
191 pool->screen->b.b.resource_destroy(
192 (struct pipe_screen *)pool->screen,
193 (struct pipe_resource *)pool->bo);
194 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
195 pool->screen,
196 pool->size_in_dw * 4);
197 compute_memory_shadow(pool, pipe, 0);
198 }
199
200 return 0;
201 }
202
203 /**
204 * Copy pool from device to host, or host to device.
205 */
206 void compute_memory_shadow(struct compute_memory_pool* pool,
207 struct pipe_context * pipe, int device_to_host)
208 {
209 struct compute_memory_item chunk;
210
211 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
212 device_to_host);
213
214 chunk.id = 0;
215 chunk.start_in_dw = 0;
216 chunk.size_in_dw = pool->size_in_dw;
217 chunk.prev = chunk.next = NULL;
218 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
219 pool->shadow, 0, pool->size_in_dw*4);
220 }
221
222 /**
223 * Allocates pending allocations in the pool
224 * @returns -1 if it fails, 0 otherwise
225 */
226 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
227 struct pipe_context * pipe)
228 {
229 struct compute_memory_item *pending_list = NULL, *end_p = NULL;
230 struct compute_memory_item *item, *next;
231
232 int64_t allocated = 0;
233 int64_t unallocated = 0;
234
235 int64_t start_in_dw = 0;
236
237 int err = 0;
238
239 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
240
241 for (item = pool->item_list; item; item = item->next) {
242 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
243 "(%i bytes)\n",item->start_in_dw, item->id,
244 item->size_in_dw, item->size_in_dw * 4);
245 }
246
247 /* Search through the list of memory items in the pool */
248 for (item = pool->item_list; item; item = next) {
249 next = item->next;
250
251 /* Check if the item is pending. */
252 if (item->start_in_dw == -1) {
253 /* It is pending, so add it to the pending_list... */
254 if (end_p) {
255 end_p->next = item;
256 }
257 else {
258 pending_list = item;
259 }
260
261 /* ... and then remove it from the item list. */
262 if (item->prev) {
263 item->prev->next = next;
264 }
265 else {
266 pool->item_list = next;
267 }
268
269 if (next) {
270 next->prev = item->prev;
271 }
272
273 /* This sequence makes the item be at the end of the list */
274 item->prev = end_p;
275 item->next = NULL;
276 end_p = item;
277
278 /* Update the amount of space we will need to allocate. */
279 unallocated += item->size_in_dw+1024;
280 }
281 else {
282 /* The item is not pending, so update the amount of space
283 * that has already been allocated. */
284 allocated += item->size_in_dw;
285 }
286 }
287
288 /* If we require more space than the size of the pool, then grow the
289 * pool.
290 *
291 * XXX: I'm pretty sure this won't work. Imagine this scenario:
292 *
293 * Offset Item Size
294 * 0 A 50
295 * 200 B 50
296 * 400 C 50
297 *
298 * Total size = 450
299 * Allocated size = 150
300 * Pending Item D Size = 200
301 *
302 * In this case, there are 300 units of free space in the pool, but
303 * they aren't contiguous, so it will be impossible to allocate Item D.
304 */
305 if (pool->size_in_dw < allocated+unallocated) {
306 err = compute_memory_grow_pool(pool, pipe, allocated+unallocated);
307 if (err == -1)
308 return -1;
309 }
310
311 /* Loop through all the pending items, allocate space for them and
312 * add them back to the item_list. */
313 for (item = pending_list; item; item = next) {
314 next = item->next;
315
316 /* Search for free space in the pool for this item. */
317 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
318 item->size_in_dw)) == -1) {
319 int64_t need = item->size_in_dw+2048 -
320 (pool->size_in_dw - allocated);
321
322 if (need < 0) {
323 need = pool->size_in_dw / 10;
324 }
325
326 need += 1024 - (need % 1024);
327
328 err = compute_memory_grow_pool(pool,
329 pipe,
330 pool->size_in_dw + need);
331
332 if (err == -1)
333 return -1;
334 }
335 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
336 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
337 item, item->id, start_in_dw, start_in_dw * 4,
338 item->size_in_dw, item->size_in_dw * 4);
339
340 item->start_in_dw = start_in_dw;
341 item->next = NULL;
342 item->prev = NULL;
343
344 if (pool->item_list) {
345 struct compute_memory_item *pos;
346
347 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
348 if (pos) {
349 item->prev = pos;
350 item->next = pos->next;
351 pos->next = item;
352 if (item->next) {
353 item->next->prev = item;
354 }
355 } else {
356 /* Add item to the front of the list */
357 item->next = pool->item_list;
358 item->prev = pool->item_list->prev;
359 pool->item_list->prev = item;
360 pool->item_list = item;
361 }
362 }
363 else {
364 pool->item_list = item;
365 }
366
367 allocated += item->size_in_dw;
368 }
369
370 return 0;
371 }
372
373
374 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
375 {
376 struct compute_memory_item *item, *next;
377
378 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
379
380 for (item = pool->item_list; item; item = next) {
381 next = item->next;
382
383 if (item->id == id) {
384 if (item->prev) {
385 item->prev->next = item->next;
386 }
387 else {
388 pool->item_list = item->next;
389 }
390
391 if (item->next) {
392 item->next->prev = item->prev;
393 }
394
395 free(item);
396
397 return;
398 }
399 }
400
401 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
402 "for compute_memory_free\n", id);
403
404 assert(0 && "error");
405 }
406
407 /**
408 * Creates pending allocations
409 */
410 struct compute_memory_item* compute_memory_alloc(
411 struct compute_memory_pool* pool,
412 int64_t size_in_dw)
413 {
414 struct compute_memory_item *new_item = NULL, *last_item = NULL;
415
416 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
417 size_in_dw, 4 * size_in_dw);
418
419 new_item = (struct compute_memory_item *)
420 CALLOC(sizeof(struct compute_memory_item), 1);
421 if (new_item == NULL)
422 return NULL;
423
424 new_item->size_in_dw = size_in_dw;
425 new_item->start_in_dw = -1; /* mark pending */
426 new_item->id = pool->next_id++;
427 new_item->pool = pool;
428
429 if (pool->item_list) {
430 for (last_item = pool->item_list; last_item->next;
431 last_item = last_item->next);
432
433 last_item->next = new_item;
434 new_item->prev = last_item;
435 }
436 else {
437 pool->item_list = new_item;
438 }
439
440 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
441 new_item, new_item->id, new_item->size_in_dw,
442 new_item->size_in_dw * 4);
443 return new_item;
444 }
445
446 /**
447 * Transfer data host<->device, offset and size is in bytes
448 */
449 void compute_memory_transfer(
450 struct compute_memory_pool* pool,
451 struct pipe_context * pipe,
452 int device_to_host,
453 struct compute_memory_item* chunk,
454 void* data,
455 int offset_in_chunk,
456 int size)
457 {
458 int64_t aligned_size = pool->size_in_dw;
459 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
460 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
461
462 struct pipe_transfer *xfer;
463 uint32_t *map;
464
465 assert(gart);
466
467 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
468 "offset_in_chunk = %d, size = %d\n", device_to_host,
469 offset_in_chunk, size);
470
471 if (device_to_host) {
472 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
473 &(struct pipe_box) { .width = aligned_size * 4,
474 .height = 1, .depth = 1 }, &xfer);
475 assert(xfer);
476 assert(map);
477 memcpy(data, map + internal_offset, size);
478 pipe->transfer_unmap(pipe, xfer);
479 } else {
480 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
481 &(struct pipe_box) { .width = aligned_size * 4,
482 .height = 1, .depth = 1 }, &xfer);
483 assert(xfer);
484 assert(map);
485 memcpy(map + internal_offset, data, size);
486 pipe->transfer_unmap(pipe, xfer);
487 }
488 }
489
490 /**
491 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
492 */
493 void compute_memory_transfer_direct(
494 struct compute_memory_pool* pool,
495 int chunk_to_data,
496 struct compute_memory_item* chunk,
497 struct r600_resource* data,
498 int offset_in_chunk,
499 int offset_in_data,
500 int size)
501 {
502 ///TODO: DMA
503 }