vc4: Try to schedule QIR instructions between writing to and reading math.
[mesa.git] / src / gallium / drivers / vc4 / vc4_bufmgr.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <errno.h>
25 #include <err.h>
26 #include <sys/mman.h>
27 #include <fcntl.h>
28 #include <xf86drm.h>
29 #include <xf86drmMode.h>
30
31 #include "util/u_hash_table.h"
32 #include "util/u_memory.h"
33 #include "util/ralloc.h"
34
35 #include "vc4_context.h"
36 #include "vc4_screen.h"
37
38 #ifdef HAVE_VALGRIND
39 #include <valgrind.h>
40 #include <memcheck.h>
41 #define VG(x) x
42 #else
43 #define VG(x)
44 #endif
45
46 static bool dump_stats = false;
47
48 static void
49 vc4_bo_cache_free_all(struct vc4_bo_cache *cache);
50
51 static void
52 vc4_bo_dump_stats(struct vc4_screen *screen)
53 {
54 struct vc4_bo_cache *cache = &screen->bo_cache;
55
56 fprintf(stderr, " BOs allocated: %d\n", screen->bo_count);
57 fprintf(stderr, " BOs size: %dkb\n", screen->bo_size / 1024);
58 fprintf(stderr, " BOs cached: %d\n", cache->bo_count);
59 fprintf(stderr, " BOs cached size: %dkb\n", cache->bo_size / 1024);
60
61 if (!list_empty(&cache->time_list)) {
62 struct vc4_bo *first = LIST_ENTRY(struct vc4_bo,
63 cache->time_list.next,
64 time_list);
65 struct vc4_bo *last = LIST_ENTRY(struct vc4_bo,
66 cache->time_list.prev,
67 time_list);
68
69 fprintf(stderr, " oldest cache time: %ld\n",
70 (long)first->free_time);
71 fprintf(stderr, " newest cache time: %ld\n",
72 (long)last->free_time);
73
74 struct timespec time;
75 clock_gettime(CLOCK_MONOTONIC, &time);
76 fprintf(stderr, " now: %ld\n",
77 time.tv_sec);
78 }
79 }
80
81 static void
82 vc4_bo_remove_from_cache(struct vc4_bo_cache *cache, struct vc4_bo *bo)
83 {
84 list_del(&bo->time_list);
85 list_del(&bo->size_list);
86 cache->bo_count--;
87 cache->bo_size -= bo->size;
88 }
89
90 static struct vc4_bo *
91 vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
92 {
93 struct vc4_bo_cache *cache = &screen->bo_cache;
94 uint32_t page_index = size / 4096 - 1;
95
96 if (cache->size_list_size <= page_index)
97 return NULL;
98
99 struct vc4_bo *bo = NULL;
100 pipe_mutex_lock(cache->lock);
101 if (!list_empty(&cache->size_list[page_index])) {
102 bo = LIST_ENTRY(struct vc4_bo, cache->size_list[page_index].next,
103 size_list);
104
105 /* Check that the BO has gone idle. If not, then we want to
106 * allocate something new instead, since we assume that the
107 * user will proceed to CPU map it and fill it with stuff.
108 */
109 if (!vc4_bo_wait(bo, 0, NULL)) {
110 pipe_mutex_unlock(cache->lock);
111 return NULL;
112 }
113
114 pipe_reference_init(&bo->reference, 1);
115 vc4_bo_remove_from_cache(cache, bo);
116
117 bo->name = name;
118 }
119 pipe_mutex_unlock(cache->lock);
120 return bo;
121 }
122
123 struct vc4_bo *
124 vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
125 {
126 struct vc4_bo *bo;
127 int ret;
128
129 size = align(size, 4096);
130
131 bo = vc4_bo_from_cache(screen, size, name);
132 if (bo) {
133 if (dump_stats) {
134 fprintf(stderr, "Allocated %s %dkb from cache:\n",
135 name, size / 1024);
136 vc4_bo_dump_stats(screen);
137 }
138 return bo;
139 }
140
141 bo = CALLOC_STRUCT(vc4_bo);
142 if (!bo)
143 return NULL;
144
145 pipe_reference_init(&bo->reference, 1);
146 bo->screen = screen;
147 bo->size = size;
148 bo->name = name;
149 bo->private = true;
150
151 retry:
152 ;
153
154 bool cleared_and_retried = false;
155 struct drm_vc4_create_bo create = {
156 .size = size
157 };
158
159 ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_CREATE_BO, &create);
160 bo->handle = create.handle;
161
162 if (ret != 0) {
163 if (!list_empty(&screen->bo_cache.time_list) &&
164 !cleared_and_retried) {
165 cleared_and_retried = true;
166 vc4_bo_cache_free_all(&screen->bo_cache);
167 goto retry;
168 }
169
170 free(bo);
171 return NULL;
172 }
173
174 screen->bo_count++;
175 screen->bo_size += bo->size;
176 if (dump_stats) {
177 fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
178 vc4_bo_dump_stats(screen);
179 }
180
181 return bo;
182 }
183
184 void
185 vc4_bo_last_unreference(struct vc4_bo *bo)
186 {
187 struct vc4_screen *screen = bo->screen;
188
189 struct timespec time;
190 clock_gettime(CLOCK_MONOTONIC, &time);
191 pipe_mutex_lock(screen->bo_cache.lock);
192 vc4_bo_last_unreference_locked_timed(bo, time.tv_sec);
193 pipe_mutex_unlock(screen->bo_cache.lock);
194 }
195
196 static void
197 vc4_bo_free(struct vc4_bo *bo)
198 {
199 struct vc4_screen *screen = bo->screen;
200
201 if (bo->map) {
202 if (using_vc4_simulator && bo->name &&
203 strcmp(bo->name, "winsys") == 0) {
204 free(bo->map);
205 } else {
206 munmap(bo->map, bo->size);
207 VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
208 }
209 }
210
211 struct drm_gem_close c;
212 memset(&c, 0, sizeof(c));
213 c.handle = bo->handle;
214 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
215 if (ret != 0)
216 fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
217
218 screen->bo_count--;
219 screen->bo_size -= bo->size;
220
221 if (dump_stats) {
222 fprintf(stderr, "Freed %s%s%dkb:\n",
223 bo->name ? bo->name : "",
224 bo->name ? " " : "",
225 bo->size / 1024);
226 vc4_bo_dump_stats(screen);
227 }
228
229 free(bo);
230 }
231
232 static void
233 free_stale_bos(struct vc4_screen *screen, time_t time)
234 {
235 struct vc4_bo_cache *cache = &screen->bo_cache;
236 bool freed_any = false;
237
238 list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
239 time_list) {
240 if (dump_stats && !freed_any) {
241 fprintf(stderr, "Freeing stale BOs:\n");
242 vc4_bo_dump_stats(screen);
243 freed_any = true;
244 }
245
246 /* If it's more than a second old, free it. */
247 if (time - bo->free_time > 2) {
248 vc4_bo_remove_from_cache(cache, bo);
249 vc4_bo_free(bo);
250 } else {
251 break;
252 }
253 }
254
255 if (dump_stats && freed_any) {
256 fprintf(stderr, "Freed stale BOs:\n");
257 vc4_bo_dump_stats(screen);
258 }
259 }
260
261 static void
262 vc4_bo_cache_free_all(struct vc4_bo_cache *cache)
263 {
264 pipe_mutex_lock(cache->lock);
265 list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
266 time_list) {
267 vc4_bo_remove_from_cache(cache, bo);
268 vc4_bo_free(bo);
269 }
270 pipe_mutex_unlock(cache->lock);
271 }
272
273 void
274 vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
275 {
276 struct vc4_screen *screen = bo->screen;
277 struct vc4_bo_cache *cache = &screen->bo_cache;
278 uint32_t page_index = bo->size / 4096 - 1;
279
280 if (!bo->private) {
281 vc4_bo_free(bo);
282 return;
283 }
284
285 if (cache->size_list_size <= page_index) {
286 struct list_head *new_list =
287 ralloc_array(screen, struct list_head, page_index + 1);
288
289 /* Move old list contents over (since the array has moved, and
290 * therefore the pointers to the list heads have to change).
291 */
292 for (int i = 0; i < cache->size_list_size; i++) {
293 struct list_head *old_head = &cache->size_list[i];
294 if (list_empty(old_head))
295 list_inithead(&new_list[i]);
296 else {
297 new_list[i].next = old_head->next;
298 new_list[i].prev = old_head->prev;
299 new_list[i].next->prev = &new_list[i];
300 new_list[i].prev->next = &new_list[i];
301 }
302 }
303 for (int i = cache->size_list_size; i < page_index + 1; i++)
304 list_inithead(&new_list[i]);
305
306 cache->size_list = new_list;
307 cache->size_list_size = page_index + 1;
308 }
309
310 bo->free_time = time;
311 list_addtail(&bo->size_list, &cache->size_list[page_index]);
312 list_addtail(&bo->time_list, &cache->time_list);
313 cache->bo_count++;
314 cache->bo_size += bo->size;
315 if (dump_stats) {
316 fprintf(stderr, "Freed %s %dkb to cache:\n",
317 bo->name, bo->size / 1024);
318 vc4_bo_dump_stats(screen);
319 }
320 bo->name = NULL;
321
322 free_stale_bos(screen, time);
323 }
324
325 static struct vc4_bo *
326 vc4_bo_open_handle(struct vc4_screen *screen,
327 uint32_t winsys_stride,
328 uint32_t handle, uint32_t size)
329 {
330 struct vc4_bo *bo;
331
332 assert(size);
333
334 pipe_mutex_lock(screen->bo_handles_mutex);
335
336 bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
337 if (bo) {
338 pipe_reference(NULL, &bo->reference);
339 goto done;
340 }
341
342 bo = CALLOC_STRUCT(vc4_bo);
343 pipe_reference_init(&bo->reference, 1);
344 bo->screen = screen;
345 bo->handle = handle;
346 bo->size = size;
347 bo->name = "winsys";
348 bo->private = false;
349
350 #ifdef USE_VC4_SIMULATOR
351 vc4_simulator_open_from_handle(screen->fd, winsys_stride,
352 bo->handle, bo->size);
353 bo->map = malloc(bo->size);
354 #endif
355
356 util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
357
358 done:
359 pipe_mutex_unlock(screen->bo_handles_mutex);
360 return bo;
361 }
362
363 struct vc4_bo *
364 vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
365 uint32_t winsys_stride)
366 {
367 struct drm_gem_open o = {
368 .name = name
369 };
370 int ret = vc4_ioctl(screen->fd, DRM_IOCTL_GEM_OPEN, &o);
371 if (ret) {
372 fprintf(stderr, "Failed to open bo %d: %s\n",
373 name, strerror(errno));
374 return NULL;
375 }
376
377 return vc4_bo_open_handle(screen, winsys_stride, o.handle, o.size);
378 }
379
380 struct vc4_bo *
381 vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd, uint32_t winsys_stride)
382 {
383 uint32_t handle;
384 int ret = drmPrimeFDToHandle(screen->fd, fd, &handle);
385 int size;
386 if (ret) {
387 fprintf(stderr, "Failed to get vc4 handle for dmabuf %d\n", fd);
388 return NULL;
389 }
390
391 /* Determine the size of the bo we were handed. */
392 size = lseek(fd, 0, SEEK_END);
393 if (size == -1) {
394 fprintf(stderr, "Couldn't get size of dmabuf fd %d.\n", fd);
395 return NULL;
396 }
397
398 return vc4_bo_open_handle(screen, winsys_stride, handle, size);
399 }
400
401 int
402 vc4_bo_get_dmabuf(struct vc4_bo *bo)
403 {
404 int fd;
405 int ret = drmPrimeHandleToFD(bo->screen->fd, bo->handle,
406 O_CLOEXEC, &fd);
407 if (ret != 0) {
408 fprintf(stderr, "Failed to export gem bo %d to dmabuf\n",
409 bo->handle);
410 return -1;
411 }
412
413 pipe_mutex_lock(bo->screen->bo_handles_mutex);
414 bo->private = false;
415 util_hash_table_set(bo->screen->bo_handles, (void *)(uintptr_t)bo->handle, bo);
416 pipe_mutex_unlock(bo->screen->bo_handles_mutex);
417
418 return fd;
419 }
420
421 struct vc4_bo *
422 vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
423 {
424 struct vc4_bo *bo;
425 int ret;
426
427 bo = CALLOC_STRUCT(vc4_bo);
428 if (!bo)
429 return NULL;
430
431 pipe_reference_init(&bo->reference, 1);
432 bo->screen = screen;
433 bo->size = align(size, 4096);
434 bo->name = "code";
435 bo->private = false; /* Make sure it doesn't go back to the cache. */
436
437 struct drm_vc4_create_shader_bo create = {
438 .size = size,
439 .data = (uintptr_t)data,
440 };
441
442 ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_CREATE_SHADER_BO,
443 &create);
444 bo->handle = create.handle;
445
446 if (ret != 0) {
447 fprintf(stderr, "create shader ioctl failure\n");
448 abort();
449 }
450
451 screen->bo_count++;
452 screen->bo_size += bo->size;
453 if (dump_stats) {
454 fprintf(stderr, "Allocated shader %dkb:\n", bo->size / 1024);
455 vc4_bo_dump_stats(screen);
456 }
457
458 return bo;
459 }
460
461 bool
462 vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
463 {
464 struct drm_gem_flink flink = {
465 .handle = bo->handle,
466 };
467 int ret = vc4_ioctl(bo->screen->fd, DRM_IOCTL_GEM_FLINK, &flink);
468 if (ret) {
469 fprintf(stderr, "Failed to flink bo %d: %s\n",
470 bo->handle, strerror(errno));
471 free(bo);
472 return false;
473 }
474
475 bo->private = false;
476 *name = flink.name;
477
478 return true;
479 }
480
481 static int vc4_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
482 {
483 struct drm_vc4_wait_seqno wait = {
484 .seqno = seqno,
485 .timeout_ns = timeout_ns,
486 };
487 int ret = vc4_ioctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
488 if (ret == -1)
489 return -errno;
490 else
491 return 0;
492
493 }
494
495 bool
496 vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
497 const char *reason)
498 {
499 if (screen->finished_seqno >= seqno)
500 return true;
501
502 if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
503 if (vc4_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
504 fprintf(stderr, "Blocking on seqno %lld for %s\n",
505 (long long)seqno, reason);
506 }
507 }
508
509 int ret = vc4_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
510 if (ret) {
511 if (ret != -ETIME) {
512 fprintf(stderr, "wait failed: %d\n", ret);
513 abort();
514 }
515
516 return false;
517 }
518
519 screen->finished_seqno = seqno;
520 return true;
521 }
522
523 static int vc4_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
524 {
525 struct drm_vc4_wait_bo wait = {
526 .handle = handle,
527 .timeout_ns = timeout_ns,
528 };
529 int ret = vc4_ioctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
530 if (ret == -1)
531 return -errno;
532 else
533 return 0;
534
535 }
536
537 bool
538 vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason)
539 {
540 struct vc4_screen *screen = bo->screen;
541
542 if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
543 if (vc4_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
544 fprintf(stderr, "Blocking on %s BO for %s\n",
545 bo->name, reason);
546 }
547 }
548
549 int ret = vc4_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
550 if (ret) {
551 if (ret != -ETIME) {
552 fprintf(stderr, "wait failed: %d\n", ret);
553 abort();
554 }
555
556 return false;
557 }
558
559 return true;
560 }
561
562 void *
563 vc4_bo_map_unsynchronized(struct vc4_bo *bo)
564 {
565 uint64_t offset;
566 int ret;
567
568 if (bo->map)
569 return bo->map;
570
571 struct drm_vc4_mmap_bo map;
572 memset(&map, 0, sizeof(map));
573 map.handle = bo->handle;
574 ret = vc4_ioctl(bo->screen->fd, DRM_IOCTL_VC4_MMAP_BO, &map);
575 offset = map.offset;
576 if (ret != 0) {
577 fprintf(stderr, "map ioctl failure\n");
578 abort();
579 }
580
581 bo->map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
582 bo->screen->fd, offset);
583 if (bo->map == MAP_FAILED) {
584 fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
585 bo->handle, (long long)offset, bo->size);
586 abort();
587 }
588 VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
589
590 return bo->map;
591 }
592
593 void *
594 vc4_bo_map(struct vc4_bo *bo)
595 {
596 void *map = vc4_bo_map_unsynchronized(bo);
597
598 bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
599 if (!ok) {
600 fprintf(stderr, "BO wait for map failed\n");
601 abort();
602 }
603
604 return map;
605 }
606
607 void
608 vc4_bufmgr_destroy(struct pipe_screen *pscreen)
609 {
610 struct vc4_screen *screen = vc4_screen(pscreen);
611 struct vc4_bo_cache *cache = &screen->bo_cache;
612
613 vc4_bo_cache_free_all(cache);
614
615 if (dump_stats) {
616 fprintf(stderr, "BO stats after screen destroy:\n");
617 vc4_bo_dump_stats(screen);
618 }
619 }