r600g/compute: align items correctly
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "util/u_inlines.h"
36 #include "util/u_framebuffer.h"
37 #include "r600_shader.h"
38 #include "r600_pipe.h"
39 #include "r600_formats.h"
40 #include "compute_memory_pool.h"
41 #include "evergreen_compute.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 #define ITEM_ALIGNMENT 1024
46 /**
47 * Creates a new pool
48 */
49 struct compute_memory_pool* compute_memory_pool_new(
50 struct r600_screen * rscreen)
51 {
52 struct compute_memory_pool* pool = (struct compute_memory_pool*)
53 CALLOC(sizeof(struct compute_memory_pool), 1);
54 if (pool == NULL)
55 return NULL;
56
57 COMPUTE_DBG(rscreen, "* compute_memory_pool_new()\n");
58
59 pool->screen = rscreen;
60 return pool;
61 }
62
63 static void compute_memory_pool_init(struct compute_memory_pool * pool,
64 unsigned initial_size_in_dw)
65 {
66
67 COMPUTE_DBG(pool->screen, "* compute_memory_pool_init() initial_size_in_dw = %ld\n",
68 initial_size_in_dw);
69
70 pool->shadow = (uint32_t*)CALLOC(initial_size_in_dw, 4);
71 if (pool->shadow == NULL)
72 return;
73
74 pool->next_id = 1;
75 pool->size_in_dw = initial_size_in_dw;
76 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
77 pool->size_in_dw * 4);
78 }
79
80 /**
81 * Frees all stuff in the pool and the pool struct itself too
82 */
83 void compute_memory_pool_delete(struct compute_memory_pool* pool)
84 {
85 COMPUTE_DBG(pool->screen, "* compute_memory_pool_delete()\n");
86 free(pool->shadow);
87 if (pool->bo) {
88 pool->screen->b.b.resource_destroy((struct pipe_screen *)
89 pool->screen, (struct pipe_resource *)pool->bo);
90 }
91 free(pool);
92 }
93
94 /**
95 * Searches for an empty space in the pool, return with the pointer to the
96 * allocatable space in the pool, returns -1 on failure.
97 */
98 int64_t compute_memory_prealloc_chunk(
99 struct compute_memory_pool* pool,
100 int64_t size_in_dw)
101 {
102 struct compute_memory_item *item;
103
104 int last_end = 0;
105
106 assert(size_in_dw <= pool->size_in_dw);
107
108 COMPUTE_DBG(pool->screen, "* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
109 size_in_dw);
110
111 for (item = pool->item_list; item; item = item->next) {
112 if (item->start_in_dw > -1) {
113 if (item->start_in_dw-last_end > size_in_dw) {
114 return last_end;
115 }
116
117 last_end = item->start_in_dw + align(item->size_in_dw, ITEM_ALIGNMENT);
118 }
119 }
120
121 if (pool->size_in_dw - last_end < size_in_dw) {
122 return -1;
123 }
124
125 return last_end;
126 }
127
128 /**
129 * Search for the chunk where we can link our new chunk after it.
130 */
131 struct compute_memory_item* compute_memory_postalloc_chunk(
132 struct compute_memory_pool* pool,
133 int64_t start_in_dw)
134 {
135 struct compute_memory_item* item;
136
137 COMPUTE_DBG(pool->screen, "* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
138 start_in_dw);
139
140 /* Check if we can insert it in the front of the list */
141 if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
142 return NULL;
143 }
144
145 for (item = pool->item_list; item; item = item->next) {
146 if (item->next) {
147 if (item->start_in_dw < start_in_dw
148 && item->next->start_in_dw > start_in_dw) {
149 return item;
150 }
151 }
152 else {
153 /* end of chain */
154 assert(item->start_in_dw < start_in_dw);
155 return item;
156 }
157 }
158
159 assert(0 && "unreachable");
160 return NULL;
161 }
162
163 /**
164 * Reallocates pool, conserves data.
165 * @returns -1 if it fails, 0 otherwise
166 */
167 int compute_memory_grow_pool(struct compute_memory_pool* pool,
168 struct pipe_context * pipe, int new_size_in_dw)
169 {
170 COMPUTE_DBG(pool->screen, "* compute_memory_grow_pool() "
171 "new_size_in_dw = %d (%d bytes)\n",
172 new_size_in_dw, new_size_in_dw * 4);
173
174 assert(new_size_in_dw >= pool->size_in_dw);
175
176 if (!pool->bo) {
177 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
178 if (pool->shadow == NULL)
179 return -1;
180 } else {
181 new_size_in_dw = align(new_size_in_dw, ITEM_ALIGNMENT);
182
183 COMPUTE_DBG(pool->screen, " Aligned size = %d (%d bytes)\n",
184 new_size_in_dw, new_size_in_dw * 4);
185
186 compute_memory_shadow(pool, pipe, 1);
187 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
188 if (pool->shadow == NULL)
189 return -1;
190
191 pool->size_in_dw = new_size_in_dw;
192 pool->screen->b.b.resource_destroy(
193 (struct pipe_screen *)pool->screen,
194 (struct pipe_resource *)pool->bo);
195 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
196 pool->screen,
197 pool->size_in_dw * 4);
198 compute_memory_shadow(pool, pipe, 0);
199 }
200
201 return 0;
202 }
203
204 /**
205 * Copy pool from device to host, or host to device.
206 */
207 void compute_memory_shadow(struct compute_memory_pool* pool,
208 struct pipe_context * pipe, int device_to_host)
209 {
210 struct compute_memory_item chunk;
211
212 COMPUTE_DBG(pool->screen, "* compute_memory_shadow() device_to_host = %d\n",
213 device_to_host);
214
215 chunk.id = 0;
216 chunk.start_in_dw = 0;
217 chunk.size_in_dw = pool->size_in_dw;
218 chunk.prev = chunk.next = NULL;
219 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
220 pool->shadow, 0, pool->size_in_dw*4);
221 }
222
223 /**
224 * Allocates pending allocations in the pool
225 * @returns -1 if it fails, 0 otherwise
226 */
227 int compute_memory_finalize_pending(struct compute_memory_pool* pool,
228 struct pipe_context * pipe)
229 {
230 struct compute_memory_item *pending_list = NULL, *end_p = NULL;
231 struct compute_memory_item *item, *next;
232
233 int64_t allocated = 0;
234 int64_t unallocated = 0;
235
236 int64_t start_in_dw = 0;
237
238 int err = 0;
239
240 COMPUTE_DBG(pool->screen, "* compute_memory_finalize_pending()\n");
241
242 for (item = pool->item_list; item; item = item->next) {
243 COMPUTE_DBG(pool->screen, " + list: offset = %i id = %i size = %i "
244 "(%i bytes)\n",item->start_in_dw, item->id,
245 item->size_in_dw, item->size_in_dw * 4);
246 }
247
248 /* Search through the list of memory items in the pool */
249 for (item = pool->item_list; item; item = next) {
250 next = item->next;
251
252 /* Check if the item is pending. */
253 if (item->start_in_dw == -1) {
254 /* It is pending, so add it to the pending_list... */
255 if (end_p) {
256 end_p->next = item;
257 }
258 else {
259 pending_list = item;
260 }
261
262 /* ... and then remove it from the item list. */
263 if (item->prev) {
264 item->prev->next = next;
265 }
266 else {
267 pool->item_list = next;
268 }
269
270 if (next) {
271 next->prev = item->prev;
272 }
273
274 /* This sequence makes the item be at the end of the list */
275 item->prev = end_p;
276 item->next = NULL;
277 end_p = item;
278
279 /* Update the amount of space we will need to allocate. */
280 unallocated += item->size_in_dw+1024;
281 }
282 else {
283 /* The item is not pending, so update the amount of space
284 * that has already been allocated. */
285 allocated += item->size_in_dw;
286 }
287 }
288
289 /* If we require more space than the size of the pool, then grow the
290 * pool.
291 *
292 * XXX: I'm pretty sure this won't work. Imagine this scenario:
293 *
294 * Offset Item Size
295 * 0 A 50
296 * 200 B 50
297 * 400 C 50
298 *
299 * Total size = 450
300 * Allocated size = 150
301 * Pending Item D Size = 200
302 *
303 * In this case, there are 300 units of free space in the pool, but
304 * they aren't contiguous, so it will be impossible to allocate Item D.
305 */
306 if (pool->size_in_dw < allocated+unallocated) {
307 err = compute_memory_grow_pool(pool, pipe, allocated+unallocated);
308 if (err == -1)
309 return -1;
310 }
311
312 /* Loop through all the pending items, allocate space for them and
313 * add them back to the item_list. */
314 for (item = pending_list; item; item = next) {
315 next = item->next;
316
317 /* Search for free space in the pool for this item. */
318 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
319 item->size_in_dw)) == -1) {
320 int64_t need = item->size_in_dw+2048 -
321 (pool->size_in_dw - allocated);
322
323 if (need < 0) {
324 need = pool->size_in_dw / 10;
325 }
326
327 need = align(need, ITEM_ALIGNMENT);
328
329 err = compute_memory_grow_pool(pool,
330 pipe,
331 pool->size_in_dw + need);
332
333 if (err == -1)
334 return -1;
335 }
336 COMPUTE_DBG(pool->screen, " + Found space for Item %p id = %u "
337 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
338 item, item->id, start_in_dw, start_in_dw * 4,
339 item->size_in_dw, item->size_in_dw * 4);
340
341 item->start_in_dw = start_in_dw;
342 item->next = NULL;
343 item->prev = NULL;
344
345 if (pool->item_list) {
346 struct compute_memory_item *pos;
347
348 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
349 if (pos) {
350 item->prev = pos;
351 item->next = pos->next;
352 pos->next = item;
353 if (item->next) {
354 item->next->prev = item;
355 }
356 } else {
357 /* Add item to the front of the list */
358 item->next = pool->item_list;
359 item->prev = pool->item_list->prev;
360 pool->item_list->prev = item;
361 pool->item_list = item;
362 }
363 }
364 else {
365 pool->item_list = item;
366 }
367
368 allocated += item->size_in_dw;
369 }
370
371 return 0;
372 }
373
374
375 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
376 {
377 struct compute_memory_item *item, *next;
378
379 COMPUTE_DBG(pool->screen, "* compute_memory_free() id + %ld \n", id);
380
381 for (item = pool->item_list; item; item = next) {
382 next = item->next;
383
384 if (item->id == id) {
385 if (item->prev) {
386 item->prev->next = item->next;
387 }
388 else {
389 pool->item_list = item->next;
390 }
391
392 if (item->next) {
393 item->next->prev = item->prev;
394 }
395
396 free(item);
397
398 return;
399 }
400 }
401
402 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
403 "for compute_memory_free\n", id);
404
405 assert(0 && "error");
406 }
407
408 /**
409 * Creates pending allocations
410 */
411 struct compute_memory_item* compute_memory_alloc(
412 struct compute_memory_pool* pool,
413 int64_t size_in_dw)
414 {
415 struct compute_memory_item *new_item = NULL, *last_item = NULL;
416
417 COMPUTE_DBG(pool->screen, "* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
418 size_in_dw, 4 * size_in_dw);
419
420 new_item = (struct compute_memory_item *)
421 CALLOC(sizeof(struct compute_memory_item), 1);
422 if (new_item == NULL)
423 return NULL;
424
425 new_item->size_in_dw = size_in_dw;
426 new_item->start_in_dw = -1; /* mark pending */
427 new_item->id = pool->next_id++;
428 new_item->pool = pool;
429
430 if (pool->item_list) {
431 for (last_item = pool->item_list; last_item->next;
432 last_item = last_item->next);
433
434 last_item->next = new_item;
435 new_item->prev = last_item;
436 }
437 else {
438 pool->item_list = new_item;
439 }
440
441 COMPUTE_DBG(pool->screen, " + Adding item %p id = %u size = %u (%u bytes)\n",
442 new_item, new_item->id, new_item->size_in_dw,
443 new_item->size_in_dw * 4);
444 return new_item;
445 }
446
447 /**
448 * Transfer data host<->device, offset and size is in bytes
449 */
450 void compute_memory_transfer(
451 struct compute_memory_pool* pool,
452 struct pipe_context * pipe,
453 int device_to_host,
454 struct compute_memory_item* chunk,
455 void* data,
456 int offset_in_chunk,
457 int size)
458 {
459 int64_t aligned_size = pool->size_in_dw;
460 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
461 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
462
463 struct pipe_transfer *xfer;
464 uint32_t *map;
465
466 assert(gart);
467
468 COMPUTE_DBG(pool->screen, "* compute_memory_transfer() device_to_host = %d, "
469 "offset_in_chunk = %d, size = %d\n", device_to_host,
470 offset_in_chunk, size);
471
472 if (device_to_host) {
473 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_READ,
474 &(struct pipe_box) { .width = aligned_size * 4,
475 .height = 1, .depth = 1 }, &xfer);
476 assert(xfer);
477 assert(map);
478 memcpy(data, map + internal_offset, size);
479 pipe->transfer_unmap(pipe, xfer);
480 } else {
481 map = pipe->transfer_map(pipe, gart, 0, PIPE_TRANSFER_WRITE,
482 &(struct pipe_box) { .width = aligned_size * 4,
483 .height = 1, .depth = 1 }, &xfer);
484 assert(xfer);
485 assert(map);
486 memcpy(map + internal_offset, data, size);
487 pipe->transfer_unmap(pipe, xfer);
488 }
489 }
490
491 /**
492 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
493 */
494 void compute_memory_transfer_direct(
495 struct compute_memory_pool* pool,
496 int chunk_to_data,
497 struct compute_memory_item* chunk,
498 struct r600_resource* data,
499 int offset_in_chunk,
500 int offset_in_data,
501 int size)
502 {
503 ///TODO: DMA
504 }