panfrost: Print synced traces to stderr
[mesa.git] / src / gallium / drivers / panfrost / pan_bo.c
1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <errno.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <xf86drm.h>
30 #include <pthread.h>
31 #include "drm-uapi/panfrost_drm.h"
32
33 #include "pan_bo.h"
34 #include "pan_screen.h"
35 #include "pan_util.h"
36 #include "pandecode/decode.h"
37
38 #include "os/os_mman.h"
39
40 #include "util/u_inlines.h"
41 #include "util/u_math.h"
42
43 /* This file implements a userspace BO cache. Allocating and freeing
44 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
45 * adds more work than we would like at this point. So caching BOs in userspace
46 * solves both of these problems and does not require kernel updates.
47 *
48 * Cached BOs are sorted into a bucket based on rounding their size down to the
49 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
50 * objects. Putting a BO into the cache is accomplished by adding it to the
51 * corresponding bucket. Getting a BO from the cache consists of finding the
52 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
53 * BO and removing it from the bucket. We special case evicting all BOs from
54 * the cache, since that's what helpful in practice and avoids extra logic
55 * around the linked list.
56 */
57
58 static struct panfrost_bo *
59 panfrost_bo_alloc(struct panfrost_screen *screen, size_t size,
60 uint32_t flags)
61 {
62 struct drm_panfrost_create_bo create_bo = { .size = size };
63 struct panfrost_bo *bo;
64 int ret;
65
66 if (screen->kernel_version->version_major > 1 ||
67 screen->kernel_version->version_minor >= 1) {
68 if (flags & PAN_BO_GROWABLE)
69 create_bo.flags |= PANFROST_BO_HEAP;
70 if (!(flags & PAN_BO_EXECUTE))
71 create_bo.flags |= PANFROST_BO_NOEXEC;
72 }
73
74 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
75 if (ret) {
76 DBG("DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
77 return NULL;
78 }
79
80 bo = rzalloc(screen, struct panfrost_bo);
81 assert(bo);
82 bo->size = create_bo.size;
83 bo->gpu = create_bo.offset;
84 bo->gem_handle = create_bo.handle;
85 bo->flags = flags;
86 bo->screen = screen;
87 return bo;
88 }
89
90 static void
91 panfrost_bo_free(struct panfrost_bo *bo)
92 {
93 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
94 int ret;
95
96 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
97 if (ret) {
98 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
99 assert(0);
100 }
101
102 ralloc_free(bo);
103 }
104
105 /* Returns true if the BO is ready, false otherwise.
106 * access_type is encoding the type of access one wants to ensure is done.
107 * Say you want to make sure all writers are done writing, you should pass
108 * PAN_BO_ACCESS_WRITE.
109 * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
110 * PAN_BO_ACCESS_READ would work too as waiting for readers implies
111 * waiting for writers as well, but we want to make things explicit and waiting
112 * only for readers is impossible.
113 */
114 bool
115 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
116 uint32_t access_type)
117 {
118 struct drm_panfrost_wait_bo req = {
119 .handle = bo->gem_handle,
120 .timeout_ns = timeout_ns,
121 };
122 int ret;
123
124 assert(access_type == PAN_BO_ACCESS_WRITE ||
125 access_type == PAN_BO_ACCESS_RW);
126
127 /* If the BO has been exported or imported we can't rely on the cached
128 * state, we need to call the WAIT_BO ioctl.
129 */
130 if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
131 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
132 if (!bo->gpu_access)
133 return true;
134
135 /* If the caller only wants to wait for writers and no
136 * writes are pending, we don't have to wait.
137 */
138 if (access_type == PAN_BO_ACCESS_WRITE &&
139 !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
140 return true;
141 }
142
143 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
144 * -1 otherwise.
145 */
146 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
147 if (ret != -1) {
148 /* Set gpu_access to 0 so that the next call to bo_wait()
149 * doesn't have to call the WAIT_BO ioctl.
150 */
151 bo->gpu_access = 0;
152 return true;
153 }
154
155 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
156 * is invalid, which shouldn't happen here.
157 */
158 assert(errno == ETIMEDOUT || errno == EBUSY);
159 return false;
160 }
161
162 /* Helper to calculate the bucket index of a BO */
163
164 static unsigned
165 pan_bucket_index(unsigned size)
166 {
167 /* Round down to POT to compute a bucket index */
168
169 unsigned bucket_index = util_logbase2(size);
170
171 /* Clamp the bucket index; all huge allocations will be
172 * sorted into the largest bucket */
173
174 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
175
176 /* The minimum bucket size must equal the minimum allocation
177 * size; the maximum we clamped */
178
179 assert(bucket_index >= MIN_BO_CACHE_BUCKET);
180 assert(bucket_index <= MAX_BO_CACHE_BUCKET);
181
182 /* Reindex from 0 */
183 return (bucket_index - MIN_BO_CACHE_BUCKET);
184 }
185
186 static struct list_head *
187 pan_bucket(struct panfrost_screen *screen, unsigned size)
188 {
189 return &screen->bo_cache.buckets[pan_bucket_index(size)];
190 }
191
192 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
193 * BO cache. If it succeeds, it returns that BO and removes the BO from the
194 * cache. If it fails, it returns NULL signaling the caller to allocate a new
195 * BO. */
196
197 static struct panfrost_bo *
198 panfrost_bo_cache_fetch(struct panfrost_screen *screen,
199 size_t size, uint32_t flags, bool dontwait)
200 {
201 pthread_mutex_lock(&screen->bo_cache.lock);
202 struct list_head *bucket = pan_bucket(screen, size);
203 struct panfrost_bo *bo = NULL;
204
205 /* Iterate the bucket looking for something suitable */
206 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
207 bucket_link) {
208 if (entry->size < size || entry->flags != flags)
209 continue;
210
211 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
212 PAN_BO_ACCESS_RW))
213 continue;
214
215 struct drm_panfrost_madvise madv = {
216 .handle = entry->gem_handle,
217 .madv = PANFROST_MADV_WILLNEED,
218 };
219 int ret;
220
221 /* This one works, splice it out of the cache */
222 list_del(&entry->bucket_link);
223 list_del(&entry->lru_link);
224
225 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
226 if (!ret && !madv.retained) {
227 panfrost_bo_free(entry);
228 continue;
229 }
230 /* Let's go! */
231 bo = entry;
232 break;
233 }
234 pthread_mutex_unlock(&screen->bo_cache.lock);
235
236 return bo;
237 }
238
239 static void
240 panfrost_bo_cache_evict_stale_bos(struct panfrost_screen *screen)
241 {
242 struct timespec time;
243
244 clock_gettime(CLOCK_MONOTONIC, &time);
245 list_for_each_entry_safe(struct panfrost_bo, entry,
246 &screen->bo_cache.lru, lru_link) {
247 /* We want all entries that have been used more than 1 sec
248 * ago to be dropped, others can be kept.
249 * Note the <= 2 check and not <= 1. It's here to account for
250 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
251 * That means we might keep entries that are between 1 and 2
252 * seconds old, but we don't really care, as long as unused BOs
253 * are dropped at some point.
254 */
255 if (time.tv_sec - entry->last_used <= 2)
256 break;
257
258 list_del(&entry->bucket_link);
259 list_del(&entry->lru_link);
260 panfrost_bo_free(entry);
261 }
262 }
263
264 /* Tries to add a BO to the cache. Returns if it was
265 * successful */
266
267 static bool
268 panfrost_bo_cache_put(struct panfrost_bo *bo)
269 {
270 struct panfrost_screen *screen = bo->screen;
271
272 if (bo->flags & PAN_BO_DONT_REUSE)
273 return false;
274
275 pthread_mutex_lock(&screen->bo_cache.lock);
276 struct list_head *bucket = pan_bucket(screen, bo->size);
277 struct drm_panfrost_madvise madv;
278 struct timespec time;
279
280 madv.handle = bo->gem_handle;
281 madv.madv = PANFROST_MADV_DONTNEED;
282 madv.retained = 0;
283
284 drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
285
286 /* Add us to the bucket */
287 list_addtail(&bo->bucket_link, bucket);
288
289 /* Add us to the LRU list and update the last_used field. */
290 list_addtail(&bo->lru_link, &screen->bo_cache.lru);
291 clock_gettime(CLOCK_MONOTONIC, &time);
292 bo->last_used = time.tv_sec;
293
294 /* Let's do some cleanup in the BO cache while we hold the
295 * lock.
296 */
297 panfrost_bo_cache_evict_stale_bos(screen);
298 pthread_mutex_unlock(&screen->bo_cache.lock);
299
300 return true;
301 }
302
303 /* Evicts all BOs from the cache. Called during context
304 * destroy or during low-memory situations (to free up
305 * memory that may be unused by us just sitting in our
306 * cache, but still reserved from the perspective of the
307 * OS) */
308
309 void
310 panfrost_bo_cache_evict_all(
311 struct panfrost_screen *screen)
312 {
313 pthread_mutex_lock(&screen->bo_cache.lock);
314 for (unsigned i = 0; i < ARRAY_SIZE(screen->bo_cache.buckets); ++i) {
315 struct list_head *bucket = &screen->bo_cache.buckets[i];
316
317 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
318 bucket_link) {
319 list_del(&entry->bucket_link);
320 list_del(&entry->lru_link);
321 panfrost_bo_free(entry);
322 }
323 }
324 pthread_mutex_unlock(&screen->bo_cache.lock);
325 }
326
327 void
328 panfrost_bo_mmap(struct panfrost_bo *bo)
329 {
330 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
331 int ret;
332
333 if (bo->cpu)
334 return;
335
336 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
337 if (ret) {
338 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
339 assert(0);
340 }
341
342 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
343 bo->screen->fd, mmap_bo.offset);
344 if (bo->cpu == MAP_FAILED) {
345 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
346 assert(0);
347 }
348
349 /* Record the mmap if we're tracing */
350 if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
351 pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
352 }
353
354 static void
355 panfrost_bo_munmap(struct panfrost_bo *bo)
356 {
357 if (!bo->cpu)
358 return;
359
360 if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
361 perror("munmap");
362 abort();
363 }
364
365 bo->cpu = NULL;
366 }
367
368 struct panfrost_bo *
369 panfrost_bo_create(struct panfrost_screen *screen, size_t size,
370 uint32_t flags)
371 {
372 struct panfrost_bo *bo;
373
374 /* Kernel will fail (confusingly) with EPERM otherwise */
375 assert(size > 0);
376
377 /* To maximize BO cache usage, don't allocate tiny BOs */
378 size = MAX2(size, 4096);
379
380 /* GROWABLE BOs cannot be mmapped */
381 if (flags & PAN_BO_GROWABLE)
382 assert(flags & PAN_BO_INVISIBLE);
383
384 /* Before creating a BO, we first want to check the cache but without
385 * waiting for BO readiness (BOs in the cache can still be referenced
386 * by jobs that are not finished yet).
387 * If the cached allocation fails we fall back on fresh BO allocation,
388 * and if that fails too, we try one more time to allocate from the
389 * cache, but this time we accept to wait.
390 */
391 bo = panfrost_bo_cache_fetch(screen, size, flags, true);
392 if (!bo)
393 bo = panfrost_bo_alloc(screen, size, flags);
394 if (!bo)
395 bo = panfrost_bo_cache_fetch(screen, size, flags, false);
396
397 if (!bo)
398 fprintf(stderr, "BO creation failed\n");
399
400 assert(bo);
401
402 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
403 * never map since we don't care about their contents; they're purely
404 * for GPU-internal use. But we do trace them anyway. */
405
406 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
407 panfrost_bo_mmap(bo);
408 else if (flags & PAN_BO_INVISIBLE) {
409 if (pan_debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
410 pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
411 }
412
413 pipe_reference_init(&bo->reference, 1);
414
415 pthread_mutex_lock(&screen->active_bos_lock);
416 _mesa_set_add(bo->screen->active_bos, bo);
417 pthread_mutex_unlock(&screen->active_bos_lock);
418
419 return bo;
420 }
421
422 void
423 panfrost_bo_reference(struct panfrost_bo *bo)
424 {
425 if (bo)
426 pipe_reference(NULL, &bo->reference);
427 }
428
429 void
430 panfrost_bo_unreference(struct panfrost_bo *bo)
431 {
432 if (!bo)
433 return;
434
435 if (!pipe_reference(&bo->reference, NULL))
436 return;
437
438 struct panfrost_screen *screen = bo->screen;
439
440 pthread_mutex_lock(&screen->active_bos_lock);
441 /* Someone might have imported this BO while we were waiting for the
442 * lock, let's make sure it's still not referenced before freeing it.
443 */
444 if (!pipe_is_referenced(&bo->reference)) {
445 _mesa_set_remove_key(bo->screen->active_bos, bo);
446
447 /* When the reference count goes to zero, we need to cleanup */
448 panfrost_bo_munmap(bo);
449
450 /* Rather than freeing the BO now, we'll cache the BO for later
451 * allocations if we're allowed to.
452 */
453 if (!panfrost_bo_cache_put(bo))
454 panfrost_bo_free(bo);
455 }
456 pthread_mutex_unlock(&screen->active_bos_lock);
457 }
458
459 struct panfrost_bo *
460 panfrost_bo_import(struct panfrost_screen *screen, int fd)
461 {
462 struct panfrost_bo *bo, *newbo = rzalloc(screen, struct panfrost_bo);
463 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
464 struct set_entry *entry;
465 ASSERTED int ret;
466 unsigned gem_handle;
467
468 newbo->screen = screen;
469
470 ret = drmPrimeFDToHandle(screen->fd, fd, &gem_handle);
471 assert(!ret);
472
473 newbo->gem_handle = gem_handle;
474
475 pthread_mutex_lock(&screen->active_bos_lock);
476 entry = _mesa_set_search_or_add(screen->active_bos, newbo);
477 assert(entry);
478 bo = (struct panfrost_bo *)entry->key;
479 if (newbo == bo) {
480 get_bo_offset.handle = gem_handle;
481 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
482 assert(!ret);
483
484 newbo->gpu = (mali_ptr) get_bo_offset.offset;
485 newbo->size = lseek(fd, 0, SEEK_END);
486 newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
487 assert(newbo->size > 0);
488 pipe_reference_init(&newbo->reference, 1);
489 // TODO map and unmap on demand?
490 panfrost_bo_mmap(newbo);
491 } else {
492 ralloc_free(newbo);
493 /* !pipe_is_referenced(&bo->reference) can happen if the BO
494 * was being released but panfrost_bo_import() acquired the
495 * lock before panfrost_bo_unreference(). In that case, refcnt
496 * is 0 and we can't use panfrost_bo_reference() directly, we
497 * have to re-initialize it with pipe_reference_init().
498 * Note that panfrost_bo_unreference() checks
499 * pipe_is_referenced() value just after acquiring the lock to
500 * make sure the object is not freed if panfrost_bo_import()
501 * acquired it in the meantime.
502 */
503 if (!pipe_is_referenced(&bo->reference))
504 pipe_reference_init(&newbo->reference, 1);
505 else
506 panfrost_bo_reference(bo);
507 assert(bo->cpu);
508 }
509 pthread_mutex_unlock(&screen->active_bos_lock);
510
511 return bo;
512 }
513
514 int
515 panfrost_bo_export(struct panfrost_bo *bo)
516 {
517 struct drm_prime_handle args = {
518 .handle = bo->gem_handle,
519 .flags = DRM_CLOEXEC,
520 };
521
522 int ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
523 if (ret == -1)
524 return -1;
525
526 bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
527 return args.fd;
528 }
529