04d24f6cbd33354c4f71ef3dd8734e6b8c66f3ab
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "util/u_framebuffer.h"
36 #include "r600.h"
37 #include "r600_resource.h"
38 #include "r600_shader.h"
39 #include "r600_pipe.h"
40 #include "r600_formats.h"
41 #include "compute_memory_pool.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 /**
46 * Creates a new pool
47 */
48 struct compute_memory_pool* compute_memory_pool_new(
49 struct r600_screen * rscreen)
50 {
51 struct compute_memory_pool* pool = (struct compute_memory_pool*)
52 CALLOC(sizeof(struct compute_memory_pool), 1);
53
54 COMPUTE_DBG("* compute_memory_pool_new()\n");
55
56 pool->screen = rscreen;
57 return pool;
58 }
59
60 static void compute_memory_pool_init(struct compute_memory_pool * pool,
61 unsigned initial_size_in_dw)
62 {
63
64 COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
65 initial_size_in_dw);
66
67 /* XXX: pool->shadow is used when the buffer needs to be resized, but
68 * resizing does not work at the moment.
69 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
70 */
71 pool->next_id = 1;
72 pool->size_in_dw = initial_size_in_dw;
73 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(pool->screen,
74 pool->size_in_dw * 4);
75 }
76
77 /**
78 * Frees all stuff in the pool and the pool struct itself too
79 */
80 void compute_memory_pool_delete(struct compute_memory_pool* pool)
81 {
82 COMPUTE_DBG("* compute_memory_pool_delete()\n");
83 free(pool->shadow);
84 if (pool->bo) {
85 pool->screen->screen.resource_destroy((struct pipe_screen *)
86 pool->screen, (struct pipe_resource *)pool->bo);
87 }
88 free(pool);
89 }
90
91 /**
92 * Searches for an empty space in the pool, return with the pointer to the
93 * allocatable space in the pool, returns -1 on failure.
94 */
95 int64_t compute_memory_prealloc_chunk(
96 struct compute_memory_pool* pool,
97 int64_t size_in_dw)
98 {
99 assert(size_in_dw <= pool->size_in_dw);
100
101 struct compute_memory_item *item;
102
103 int last_end = 0;
104
105 COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
106 size_in_dw);
107
108 for (item = pool->item_list; item; item = item->next) {
109 if (item->start_in_dw > -1) {
110 if (item->start_in_dw-last_end > size_in_dw) {
111 return last_end;
112 }
113
114 last_end = item->start_in_dw + item->size_in_dw;
115 last_end += (1024 - last_end % 1024);
116 }
117 }
118
119 if (pool->size_in_dw - last_end < size_in_dw) {
120 return -1;
121 }
122
123 return last_end;
124 }
125
126 /**
127 * Search for the chunk where we can link our new chunk after it.
128 */
129 struct compute_memory_item* compute_memory_postalloc_chunk(
130 struct compute_memory_pool* pool,
131 int64_t start_in_dw)
132 {
133 struct compute_memory_item* item;
134
135 COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
136 start_in_dw);
137
138 /* Check if we can insert it in the front of the list */
139 if (pool->item_list && pool->item_list->start_in_dw > start_in_dw) {
140 return NULL;
141 }
142
143 for (item = pool->item_list; item; item = item->next) {
144 if (item->next) {
145 if (item->start_in_dw < start_in_dw
146 && item->next->start_in_dw > start_in_dw) {
147 return item;
148 }
149 }
150 else {
151 /* end of chain */
152 assert(item->start_in_dw < start_in_dw);
153 return item;
154 }
155 }
156
157 assert(0 && "unreachable");
158 return NULL;
159 }
160
161 /**
162 * Reallocates pool, conserves data
163 */
164 void compute_memory_grow_pool(struct compute_memory_pool* pool,
165 struct pipe_context * pipe, int new_size_in_dw)
166 {
167 COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
168 new_size_in_dw);
169
170 assert(new_size_in_dw >= pool->size_in_dw);
171
172 assert(!pool->bo && "Growing the global memory pool is not yet "
173 "supported. You will see this message if you are trying to"
174 "use more than 64 kb of memory");
175
176 if (!pool->bo) {
177 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
178 } else {
179 /* XXX: Growing memory pools does not work at the moment. I think
180 * it is because we are using fragment shaders to copy data to
181 * the new texture and some of the compute registers are being
182 * included in the 3D command stream. */
183 fprintf(stderr, "Warning: growing the global memory pool to"
184 "more than 64 kb is not yet supported\n");
185 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
186
187 COMPUTE_DBG(" Aligned size = %d\n", new_size_in_dw);
188
189 compute_memory_shadow(pool, pipe, 1);
190 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
191 pool->size_in_dw = new_size_in_dw;
192 pool->screen->screen.resource_destroy(
193 (struct pipe_screen *)pool->screen,
194 (struct pipe_resource *)pool->bo);
195 pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
196 pool->screen,
197 pool->size_in_dw * 4);
198 compute_memory_shadow(pool, pipe, 0);
199 }
200 }
201
202 /**
203 * Copy pool from device to host, or host to device.
204 */
205 void compute_memory_shadow(struct compute_memory_pool* pool,
206 struct pipe_context * pipe, int device_to_host)
207 {
208 struct compute_memory_item chunk;
209
210 COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
211 device_to_host);
212
213 chunk.id = 0;
214 chunk.start_in_dw = 0;
215 chunk.size_in_dw = pool->size_in_dw;
216 chunk.prev = chunk.next = NULL;
217 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
218 pool->shadow, 0, pool->size_in_dw*4);
219 }
220
221 /**
222 * Allocates pending allocations in the pool
223 */
224 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
225 struct pipe_context * pipe)
226 {
227 struct compute_memory_item *pending_list = NULL, *end_p = NULL;
228 struct compute_memory_item *item, *next;
229
230 int64_t allocated = 0;
231 int64_t unallocated = 0;
232
233 COMPUTE_DBG("* compute_memory_finalize_pending()\n");
234
235 for (item = pool->item_list; item; item = item->next) {
236 COMPUTE_DBG(" + list: offset = %i id = %i size = %i "
237 "(%i bytes)\n",item->start_in_dw, item->id,
238 item->size_in_dw, item->size_in_dw * 4);
239 }
240
241 /* Search through the list of memory items in the pool */
242 for (item = pool->item_list; item; item = next) {
243 next = item->next;
244
245 /* Check if the item is pending. */
246 if (item->start_in_dw == -1) {
247 /* It is pending, so add it to the pending_list... */
248 if (end_p) {
249 end_p->next = item;
250 }
251 else {
252 pending_list = item;
253 }
254
255 /* ... and then remove it from the item list. */
256 if (item->prev) {
257 item->prev->next = next;
258 }
259 else {
260 pool->item_list = next;
261 }
262
263 if (next) {
264 next->prev = item->prev;
265 }
266
267 /* This sequence makes the item be at the end of the list */
268 item->prev = end_p;
269 item->next = NULL;
270 end_p = item;
271
272 /* Update the amount of space we will need to allocate. */
273 unallocated += item->size_in_dw+1024;
274 }
275 else {
276 /* The item is not pendng, so update the amount of space
277 * that has already been allocated. */
278 allocated += item->size_in_dw;
279 }
280 }
281
282 /* If we require more space than the size of the pool, then grow the
283 * pool.
284 *
285 * XXX: I'm pretty sure this won't work. Imagine this scenario:
286 *
287 * Offset Item Size
288 * 0 A 50
289 * 200 B 50
290 * 400 C 50
291 *
292 * Total size = 450
293 * Allocated size = 150
294 * Pending Item D Size = 200
295 *
296 * In this case, there are 300 units of free space in the pool, but
297 * they aren't contiguous, so it will be impossible to allocate Item D.
298 */
299 if (pool->size_in_dw < allocated+unallocated) {
300 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
301 }
302
303 /* Loop through all the pending items, allocate space for them and
304 * add them back to the item_list. */
305 for (item = pending_list; item; item = next) {
306 next = item->next;
307
308 int64_t start_in_dw;
309
310 /* Search for free space in the pool for this item. */
311 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
312 item->size_in_dw)) == -1) {
313 int64_t need = item->size_in_dw+2048 -
314 (pool->size_in_dw - allocated);
315
316 need += 1024 - (need % 1024);
317
318 if (need > 0) {
319 compute_memory_grow_pool(pool,
320 pipe,
321 pool->size_in_dw + need);
322 }
323 else {
324 need = pool->size_in_dw / 10;
325 need += 1024 - (need % 1024);
326 compute_memory_grow_pool(pool,
327 pipe,
328 pool->size_in_dw + need);
329 }
330 }
331 COMPUTE_DBG(" + Found space for Item %p id = %u "
332 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
333 item, item->id, start_in_dw, start_in_dw * 4,
334 item->size_in_dw, item->size_in_dw * 4);
335
336 item->start_in_dw = start_in_dw;
337 item->next = NULL;
338 item->prev = NULL;
339
340 if (pool->item_list) {
341 struct compute_memory_item *pos;
342
343 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
344 if (pos) {
345 item->prev = pos;
346 item->next = pos->next;
347 pos->next = item;
348 if (item->next) {
349 item->next->prev = item;
350 }
351 } else {
352 /* Add item to the front of the list */
353 item->next = pool->item_list->next;
354 if (pool->item_list->next) {
355 pool->item_list->next->prev = item;
356 }
357 item->prev = pool->item_list->prev;
358 if (pool->item_list->prev) {
359 pool->item_list->prev->next = item;
360 }
361 pool->item_list = item;
362 }
363 }
364 else {
365 pool->item_list = item;
366 }
367
368 allocated += item->size_in_dw;
369 }
370 }
371
372
373 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
374 {
375 struct compute_memory_item *item, *next;
376
377 COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
378
379 for (item = pool->item_list; item; item = next) {
380 next = item->next;
381
382 if (item->id == id) {
383 if (item->prev) {
384 item->prev->next = item->next;
385 }
386 else {
387 pool->item_list = item->next;
388 }
389
390 if (item->next) {
391 item->next->prev = item->prev;
392 }
393
394 free(item);
395
396 return;
397 }
398 }
399
400 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
401 "for compute_memory_free\n", id);
402
403 assert(0 && "error");
404 }
405
406 /**
407 * Creates pending allocations
408 */
409 struct compute_memory_item* compute_memory_alloc(
410 struct compute_memory_pool* pool,
411 int64_t size_in_dw)
412 {
413 struct compute_memory_item *new_item;
414
415 COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
416 size_in_dw, 4 * size_in_dw);
417
418 new_item = (struct compute_memory_item *)
419 CALLOC(sizeof(struct compute_memory_item), 1);
420 new_item->size_in_dw = size_in_dw;
421 new_item->start_in_dw = -1; /* mark pending */
422 new_item->id = pool->next_id++;
423 new_item->pool = pool;
424
425 struct compute_memory_item *last_item;
426
427 if (pool->item_list) {
428 for (last_item = pool->item_list; last_item->next;
429 last_item = last_item->next);
430
431 last_item->next = new_item;
432 new_item->prev = last_item;
433 }
434 else {
435 pool->item_list = new_item;
436 }
437
438 COMPUTE_DBG(" + Adding item %p id = %u size = %u (%u bytes)\n",
439 new_item, new_item->id, new_item->size_in_dw,
440 new_item->size_in_dw * 4);
441 return new_item;
442 }
443
444 /**
445 * Transfer data host<->device, offset and size is in bytes
446 */
447 void compute_memory_transfer(
448 struct compute_memory_pool* pool,
449 struct pipe_context * pipe,
450 int device_to_host,
451 struct compute_memory_item* chunk,
452 void* data,
453 int offset_in_chunk,
454 int size)
455 {
456 int64_t aligned_size = pool->size_in_dw;
457 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
458 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
459
460 struct pipe_transfer *xfer;
461 uint32_t *map;
462
463 assert(gart);
464
465 COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
466 "offset_in_chunk = %d, size = %d\n", device_to_host,
467 offset_in_chunk, size);
468
469 if (device_to_host)
470 {
471 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
472 &(struct pipe_box) { .width = aligned_size,
473 .height = 1, .depth = 1 });
474 assert(xfer);
475 map = pipe->transfer_map(pipe, xfer);
476 assert(map);
477 memcpy(data, map + internal_offset, size);
478 pipe->transfer_unmap(pipe, xfer);
479 pipe->transfer_destroy(pipe, xfer);
480 } else {
481 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
482 &(struct pipe_box) { .width = aligned_size,
483 .height = 1, .depth = 1 });
484 assert(xfer);
485 map = pipe->transfer_map(pipe, xfer);
486 assert(map);
487 memcpy(map + internal_offset, data, size);
488 pipe->transfer_unmap(pipe, xfer);
489 pipe->transfer_destroy(pipe, xfer);
490 }
491 }
492
493 /**
494 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
495 */
496 void compute_memory_transfer_direct(
497 struct compute_memory_pool* pool,
498 int chunk_to_data,
499 struct compute_memory_item* chunk,
500 struct r600_resource* data,
501 int offset_in_chunk,
502 int offset_in_data,
503 int size)
504 {
505 ///TODO: DMA
506 }