r600g: Make sure to init the compute memory pool with enough memory
[mesa.git] / src / gallium / drivers / r600 / compute_memory_pool.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "pipe/p_context.h"
28 #include "util/u_blitter.h"
29 #include "util/u_double_list.h"
30 #include "util/u_transfer.h"
31 #include "util/u_surface.h"
32 #include "util/u_pack_color.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "util/u_framebuffer.h"
36 #include "r600.h"
37 #include "r600_resource.h"
38 #include "r600_shader.h"
39 #include "r600_pipe.h"
40 #include "r600_formats.h"
41 #include "compute_memory_pool.h"
42 #include "evergreen_compute_internal.h"
43 #include <inttypes.h>
44
45 static struct r600_texture * create_pool_texture(struct r600_screen * screen,
46 unsigned size_in_dw)
47 {
48
49 struct pipe_resource templ;
50 struct r600_texture * tex;
51
52 if (size_in_dw == 0) {
53 return NULL;
54 }
55 memset(&templ, 0, sizeof(templ));
56 templ.target = PIPE_TEXTURE_1D;
57 templ.format = PIPE_FORMAT_R32_UINT;
58 templ.bind = PIPE_BIND_CUSTOM;
59 templ.usage = PIPE_USAGE_IMMUTABLE;
60 templ.flags = 0;
61 templ.width0 = size_in_dw;
62 templ.height0 = 1;
63 templ.depth0 = 1;
64 templ.array_size = 1;
65
66 tex = (struct r600_texture *)r600_texture_create(
67 &screen->screen, &templ);
68 /* XXX: Propagate this error */
69 assert(tex && "Out of memory");
70 tex->is_rat = 1;
71 return tex;
72 }
73
74 /**
75 * Creates a new pool
76 */
77 struct compute_memory_pool* compute_memory_pool_new(
78 struct r600_screen * rscreen)
79 {
80 struct compute_memory_pool* pool = (struct compute_memory_pool*)
81 CALLOC(sizeof(struct compute_memory_pool), 1);
82
83 COMPUTE_DBG("* compute_memory_pool_new()\n");
84
85 pool->screen = rscreen;
86 return pool;
87 }
88
89 static void compute_memory_pool_init(struct compute_memory_pool * pool,
90 unsigned initial_size_in_dw)
91 {
92
93 COMPUTE_DBG("* compute_memory_pool_init() initial_size_in_dw = %ld\n",
94 initial_size_in_dw);
95
96 /* XXX: pool->shadow is used when the buffer needs to be resized, but
97 * resizing does not work at the moment.
98 * pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
99 */
100 pool->next_id = 1;
101 pool->size_in_dw = initial_size_in_dw;
102 pool->bo = (struct r600_resource*)create_pool_texture(pool->screen,
103 pool->size_in_dw);
104 }
105
106 /**
107 * Frees all stuff in the pool and the pool struct itself too
108 */
109 void compute_memory_pool_delete(struct compute_memory_pool* pool)
110 {
111 COMPUTE_DBG("* compute_memory_pool_delete()\n");
112 free(pool->shadow);
113 if (pool->bo) {
114 pool->screen->screen.resource_destroy((struct pipe_screen *)
115 pool->screen, (struct pipe_resource *)pool->bo);
116 }
117 free(pool);
118 }
119
120 /**
121 * Searches for an empty space in the pool, return with the pointer to the
122 * allocatable space in the pool, returns -1 on failure.
123 */
124 int64_t compute_memory_prealloc_chunk(
125 struct compute_memory_pool* pool,
126 int64_t size_in_dw)
127 {
128 assert(size_in_dw <= pool->size_in_dw);
129
130 struct compute_memory_item *item;
131
132 int last_end = 0;
133
134 COMPUTE_DBG("* compute_memory_prealloc_chunk() size_in_dw = %ld\n",
135 size_in_dw);
136
137 for (item = pool->item_list; item; item = item->next) {
138 if (item->start_in_dw > -1) {
139 if (item->start_in_dw-last_end > size_in_dw) {
140 return last_end;
141 }
142
143 last_end = item->start_in_dw + item->size_in_dw;
144 last_end += (1024 - last_end % 1024);
145 }
146 }
147
148 if (pool->size_in_dw - last_end < size_in_dw) {
149 return -1;
150 }
151
152 return last_end;
153 }
154
155 /**
156 * Search for the chunk where we can link our new chunk after it.
157 */
158 struct compute_memory_item* compute_memory_postalloc_chunk(
159 struct compute_memory_pool* pool,
160 int64_t start_in_dw)
161 {
162 struct compute_memory_item* item;
163
164 COMPUTE_DBG("* compute_memory_postalloc_chunck() start_in_dw = %ld\n",
165 start_in_dw);
166
167 for (item = pool->item_list; item; item = item->next) {
168 if (item->next) {
169 if (item->start_in_dw < start_in_dw
170 && item->next->start_in_dw > start_in_dw) {
171 return item;
172 }
173 }
174 else {
175 /* end of chain */
176 assert(item->start_in_dw < start_in_dw);
177 return item;
178 }
179 }
180
181 assert(0 && "unreachable");
182 return NULL;
183 }
184
185 /**
186 * Reallocates pool, conserves data
187 */
188 void compute_memory_grow_pool(struct compute_memory_pool* pool,
189 struct pipe_context * pipe, int new_size_in_dw)
190 {
191 COMPUTE_DBG("* compute_memory_grow_pool() new_size_in_dw = %d\n",
192 new_size_in_dw);
193
194 assert(new_size_in_dw >= pool->size_in_dw);
195
196 assert(!pool->bo && "Growing the global memory pool is not yet "
197 "supported. You will see this message if you are trying to"
198 "use more than 64 kb of memory");
199
200 if (!pool->bo) {
201 compute_memory_pool_init(pool, MAX2(new_size_in_dw, 1024 * 16));
202 } else {
203 /* XXX: Growing memory pools does not work at the moment. I think
204 * it is because we are using fragment shaders to copy data to
205 * the new texture and some of the compute registers are being
206 * included in the 3D command stream. */
207 fprintf(stderr, "Warning: growing the global memory pool to"
208 "more than 64 kb is not yet supported\n");
209 new_size_in_dw += 1024 - (new_size_in_dw % 1024);
210
211 COMPUTE_DBG(" Aligned size = %d\n", new_size_in_dw);
212
213 compute_memory_shadow(pool, pipe, 1);
214 pool->shadow = realloc(pool->shadow, new_size_in_dw*4);
215 pool->size_in_dw = new_size_in_dw;
216 pool->screen->screen.resource_destroy(
217 (struct pipe_screen *)pool->screen,
218 (struct pipe_resource *)pool->bo);
219 pool->bo = (struct r600_resource*)create_pool_texture(
220 pool->screen,
221 pool->size_in_dw);
222 compute_memory_shadow(pool, pipe, 0);
223 }
224 }
225
226 /**
227 * Copy pool from device to host, or host to device.
228 */
229 void compute_memory_shadow(struct compute_memory_pool* pool,
230 struct pipe_context * pipe, int device_to_host)
231 {
232 struct compute_memory_item chunk;
233
234 COMPUTE_DBG("* compute_memory_shadow() device_to_host = %d\n",
235 device_to_host);
236
237 chunk.id = 0;
238 chunk.start_in_dw = 0;
239 chunk.size_in_dw = pool->size_in_dw;
240 chunk.prev = chunk.next = NULL;
241 compute_memory_transfer(pool, pipe, device_to_host, &chunk,
242 pool->shadow, 0, pool->size_in_dw*4);
243 }
244
245 /**
246 * Allocates pending allocations in the pool
247 */
248 void compute_memory_finalize_pending(struct compute_memory_pool* pool,
249 struct pipe_context * pipe)
250 {
251 struct compute_memory_item *pending_list = NULL, *end_p = NULL;
252 struct compute_memory_item *item, *next;
253
254 int64_t allocated = 0;
255 int64_t unallocated = 0;
256
257 COMPUTE_DBG("* compute_memory_finalize_pending()\n");
258
259 for (item = pool->item_list; item; item = item->next) {
260 COMPUTE_DBG(" + list: offset = %i id = %i size = %i "
261 "(%i bytes)\n",item->start_in_dw, item->id,
262 item->size_in_dw, item->size_in_dw * 4);
263 }
264
265 /* Search through the list of memory items in the pool */
266 for (item = pool->item_list; item; item = next) {
267 next = item->next;
268
269 /* Check if the item is pending. */
270 if (item->start_in_dw == -1) {
271 /* It is pending, so add it to the pending_list... */
272 if (end_p) {
273 end_p->next = item;
274 }
275 else {
276 pending_list = item;
277 }
278
279 /* ... and then remove it from the item list. */
280 if (item->prev) {
281 item->prev->next = next;
282 }
283 else {
284 pool->item_list = next;
285 }
286
287 if (next) {
288 next->prev = item->prev;
289 }
290
291 /* This sequence makes the item be at the end of the list */
292 item->prev = end_p;
293 item->next = NULL;
294 end_p = item;
295
296 /* Update the amount of space we will need to allocate. */
297 unallocated += item->size_in_dw+1024;
298 }
299 else {
300 /* The item is not pendng, so update the amount of space
301 * that has already been allocated. */
302 allocated += item->size_in_dw;
303 }
304 }
305
306 /* If we require more space than the size of the pool, then grow the
307 * pool.
308 *
309 * XXX: I'm pretty sure this won't work. Imagine this scenario:
310 *
311 * Offset Item Size
312 * 0 A 50
313 * 200 B 50
314 * 400 C 50
315 *
316 * Total size = 450
317 * Allocated size = 150
318 * Pending Item D Size = 200
319 *
320 * In this case, there are 300 units of free space in the pool, but
321 * they aren't contiguous, so it will be impossible to allocate Item D.
322 */
323 if (pool->size_in_dw < allocated+unallocated) {
324 compute_memory_grow_pool(pool, pipe, allocated+unallocated);
325 }
326
327 /* Loop through all the pending items, allocate space for them and
328 * add them back to the item_list. */
329 for (item = pending_list; item; item = next) {
330 next = item->next;
331
332 int64_t start_in_dw;
333
334 /* Search for free space in the pool for this item. */
335 while ((start_in_dw=compute_memory_prealloc_chunk(pool,
336 item->size_in_dw)) == -1) {
337 int64_t need = item->size_in_dw+2048 -
338 (pool->size_in_dw - allocated);
339
340 need += 1024 - (need % 1024);
341
342 if (need > 0) {
343 compute_memory_grow_pool(pool,
344 pipe,
345 pool->size_in_dw + need);
346 }
347 else {
348 need = pool->size_in_dw / 10;
349 need += 1024 - (need % 1024);
350 compute_memory_grow_pool(pool,
351 pipe,
352 pool->size_in_dw + need);
353 }
354 }
355 COMPUTE_DBG(" + Found space for Item %p id = %u "
356 "start_in_dw = %u (%u bytes) size_in_dw = %u (%u bytes)\n",
357 item, item->id, start_in_dw, start_in_dw * 4,
358 item->size_in_dw, item->size_in_dw * 4);
359
360 item->start_in_dw = start_in_dw;
361 item->next = NULL;
362 item->prev = NULL;
363
364 if (pool->item_list) {
365 struct compute_memory_item *pos;
366
367 pos = compute_memory_postalloc_chunk(pool, start_in_dw);
368 item->prev = pos;
369 item->next = pos->next;
370 pos->next = item;
371
372 if (item->next) {
373 item->next->prev = item;
374 }
375 }
376 else {
377 pool->item_list = item;
378 }
379
380 allocated += item->size_in_dw;
381 }
382 }
383
384
385 void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
386 {
387 struct compute_memory_item *item, *next;
388
389 COMPUTE_DBG("* compute_memory_free() id + %ld \n", id);
390
391 for (item = pool->item_list; item; item = next) {
392 next = item->next;
393
394 if (item->id == id) {
395 if (item->prev) {
396 item->prev->next = item->next;
397 }
398 else {
399 pool->item_list = item->next;
400 }
401
402 if (item->next) {
403 item->next->prev = item->prev;
404 }
405
406 free(item);
407
408 return;
409 }
410 }
411
412 fprintf(stderr, "Internal error, invalid id %"PRIi64" "
413 "for compute_memory_free\n", id);
414
415 assert(0 && "error");
416 }
417
418 /**
419 * Creates pending allocations
420 */
421 struct compute_memory_item* compute_memory_alloc(
422 struct compute_memory_pool* pool,
423 int64_t size_in_dw)
424 {
425 struct compute_memory_item *new_item;
426
427 COMPUTE_DBG("* compute_memory_alloc() size_in_dw = %ld (%ld bytes)\n",
428 size_in_dw, 4 * size_in_dw);
429
430 new_item = (struct compute_memory_item *)
431 CALLOC(sizeof(struct compute_memory_item), 1);
432 new_item->size_in_dw = size_in_dw;
433 new_item->start_in_dw = -1; /* mark pending */
434 new_item->id = pool->next_id++;
435 new_item->pool = pool;
436
437 struct compute_memory_item *last_item;
438
439 if (pool->item_list) {
440 for (last_item = pool->item_list; last_item->next;
441 last_item = last_item->next);
442
443 last_item->next = new_item;
444 new_item->prev = last_item;
445 }
446 else {
447 pool->item_list = new_item;
448 }
449
450 COMPUTE_DBG(" + Adding item %p id = %u size = %u (%u bytes)\n",
451 new_item, new_item->id, new_item->size_in_dw,
452 new_item->size_in_dw * 4);
453 return new_item;
454 }
455
456 /**
457 * Transfer data host<->device, offset and size is in bytes
458 */
459 void compute_memory_transfer(
460 struct compute_memory_pool* pool,
461 struct pipe_context * pipe,
462 int device_to_host,
463 struct compute_memory_item* chunk,
464 void* data,
465 int offset_in_chunk,
466 int size)
467 {
468 int64_t aligned_size = pool->size_in_dw;
469 struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
470 int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
471
472 struct pipe_transfer *xfer;
473 uint32_t *map;
474
475 assert(gart);
476
477 COMPUTE_DBG("* compute_memory_transfer() device_to_host = %d, "
478 "offset_in_chunk = %d, size = %d\n", device_to_host,
479 offset_in_chunk, size);
480
481 if (device_to_host)
482 {
483 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
484 &(struct pipe_box) { .width = aligned_size,
485 .height = 1, .depth = 1 });
486 assert(xfer);
487 map = pipe->transfer_map(pipe, xfer);
488 assert(map);
489 memcpy(data, map + internal_offset, size);
490 pipe->transfer_unmap(pipe, xfer);
491 pipe->transfer_destroy(pipe, xfer);
492 } else {
493 xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
494 &(struct pipe_box) { .width = aligned_size,
495 .height = 1, .depth = 1 });
496 assert(xfer);
497 map = pipe->transfer_map(pipe, xfer);
498 assert(map);
499 memcpy(map + internal_offset, data, size);
500 pipe->transfer_unmap(pipe, xfer);
501 pipe->transfer_destroy(pipe, xfer);
502 }
503 }
504
505 /**
506 * Transfer data between chunk<->data, it is for VRAM<->GART transfers
507 */
508 void compute_memory_transfer_direct(
509 struct compute_memory_pool* pool,
510 int chunk_to_data,
511 struct compute_memory_item* chunk,
512 struct r600_resource* data,
513 int offset_in_chunk,
514 int offset_in_data,
515 int size)
516 {
517 ///TODO: DMA
518 }