2 * Copyright 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
31 #include "drm-uapi/panfrost_drm.h"
35 #include "../pandecode/public.h"
37 #include "os/os_mman.h"
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
57 static struct panfrost_bo
*
58 panfrost_bo_alloc(struct panfrost_device
*dev
, size_t size
,
61 struct drm_panfrost_create_bo create_bo
= { .size
= size
};
62 struct panfrost_bo
*bo
;
65 if (dev
->kernel_version
->version_major
> 1 ||
66 dev
->kernel_version
->version_minor
>= 1) {
67 if (flags
& PAN_BO_GROWABLE
)
68 create_bo
.flags
|= PANFROST_BO_HEAP
;
69 if (!(flags
& PAN_BO_EXECUTE
))
70 create_bo
.flags
|= PANFROST_BO_NOEXEC
;
73 ret
= drmIoctl(dev
->fd
, DRM_IOCTL_PANFROST_CREATE_BO
, &create_bo
);
75 fprintf(stderr
, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
79 bo
= rzalloc(dev
->memctx
, struct panfrost_bo
);
81 bo
->size
= create_bo
.size
;
82 bo
->gpu
= create_bo
.offset
;
83 bo
->gem_handle
= create_bo
.handle
;
90 panfrost_bo_free(struct panfrost_bo
*bo
)
92 struct drm_gem_close gem_close
= { .handle
= bo
->gem_handle
};
95 ret
= drmIoctl(bo
->dev
->fd
, DRM_IOCTL_GEM_CLOSE
, &gem_close
);
97 fprintf(stderr
, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
104 /* Returns true if the BO is ready, false otherwise.
105 * access_type is encoding the type of access one wants to ensure is done.
106 * Say you want to make sure all writers are done writing, you should pass
107 * PAN_BO_ACCESS_WRITE.
108 * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
109 * PAN_BO_ACCESS_READ would work too as waiting for readers implies
110 * waiting for writers as well, but we want to make things explicit and waiting
111 * only for readers is impossible.
114 panfrost_bo_wait(struct panfrost_bo
*bo
, int64_t timeout_ns
,
115 uint32_t access_type
)
117 struct drm_panfrost_wait_bo req
= {
118 .handle
= bo
->gem_handle
,
119 .timeout_ns
= timeout_ns
,
123 assert(access_type
== PAN_BO_ACCESS_WRITE
||
124 access_type
== PAN_BO_ACCESS_RW
);
126 /* If the BO has been exported or imported we can't rely on the cached
127 * state, we need to call the WAIT_BO ioctl.
129 if (!(bo
->flags
& (PAN_BO_IMPORTED
| PAN_BO_EXPORTED
))) {
130 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
134 /* If the caller only wants to wait for writers and no
135 * writes are pending, we don't have to wait.
137 if (access_type
== PAN_BO_ACCESS_WRITE
&&
138 !(bo
->gpu_access
& PAN_BO_ACCESS_WRITE
))
142 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
145 ret
= drmIoctl(bo
->dev
->fd
, DRM_IOCTL_PANFROST_WAIT_BO
, &req
);
147 /* Set gpu_access to 0 so that the next call to bo_wait()
148 * doesn't have to call the WAIT_BO ioctl.
154 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
155 * is invalid, which shouldn't happen here.
157 assert(errno
== ETIMEDOUT
|| errno
== EBUSY
);
161 /* Helper to calculate the bucket index of a BO */
164 pan_bucket_index(unsigned size
)
166 /* Round down to POT to compute a bucket index */
168 unsigned bucket_index
= util_logbase2(size
);
170 /* Clamp the bucket index; all huge allocations will be
171 * sorted into the largest bucket */
173 bucket_index
= MIN2(bucket_index
, MAX_BO_CACHE_BUCKET
);
175 /* The minimum bucket size must equal the minimum allocation
176 * size; the maximum we clamped */
178 assert(bucket_index
>= MIN_BO_CACHE_BUCKET
);
179 assert(bucket_index
<= MAX_BO_CACHE_BUCKET
);
182 return (bucket_index
- MIN_BO_CACHE_BUCKET
);
185 static struct list_head
*
186 pan_bucket(struct panfrost_device
*dev
, unsigned size
)
188 return &dev
->bo_cache
.buckets
[pan_bucket_index(size
)];
191 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
192 * BO cache. If it succeeds, it returns that BO and removes the BO from the
193 * cache. If it fails, it returns NULL signaling the caller to allocate a new
196 static struct panfrost_bo
*
197 panfrost_bo_cache_fetch(struct panfrost_device
*dev
,
198 size_t size
, uint32_t flags
, bool dontwait
)
200 pthread_mutex_lock(&dev
->bo_cache
.lock
);
201 struct list_head
*bucket
= pan_bucket(dev
, size
);
202 struct panfrost_bo
*bo
= NULL
;
204 /* Iterate the bucket looking for something suitable */
205 list_for_each_entry_safe(struct panfrost_bo
, entry
, bucket
,
207 if (entry
->size
< size
|| entry
->flags
!= flags
)
210 if (!panfrost_bo_wait(entry
, dontwait
? 0 : INT64_MAX
,
214 struct drm_panfrost_madvise madv
= {
215 .handle
= entry
->gem_handle
,
216 .madv
= PANFROST_MADV_WILLNEED
,
220 /* This one works, splice it out of the cache */
221 list_del(&entry
->bucket_link
);
222 list_del(&entry
->lru_link
);
224 ret
= drmIoctl(dev
->fd
, DRM_IOCTL_PANFROST_MADVISE
, &madv
);
225 if (!ret
&& !madv
.retained
) {
226 panfrost_bo_free(entry
);
233 pthread_mutex_unlock(&dev
->bo_cache
.lock
);
239 panfrost_bo_cache_evict_stale_bos(struct panfrost_device
*dev
)
241 struct timespec time
;
243 clock_gettime(CLOCK_MONOTONIC
, &time
);
244 list_for_each_entry_safe(struct panfrost_bo
, entry
,
245 &dev
->bo_cache
.lru
, lru_link
) {
246 /* We want all entries that have been used more than 1 sec
247 * ago to be dropped, others can be kept.
248 * Note the <= 2 check and not <= 1. It's here to account for
249 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
250 * That means we might keep entries that are between 1 and 2
251 * seconds old, but we don't really care, as long as unused BOs
252 * are dropped at some point.
254 if (time
.tv_sec
- entry
->last_used
<= 2)
257 list_del(&entry
->bucket_link
);
258 list_del(&entry
->lru_link
);
259 panfrost_bo_free(entry
);
263 /* Tries to add a BO to the cache. Returns if it was
267 panfrost_bo_cache_put(struct panfrost_bo
*bo
)
269 struct panfrost_device
*dev
= bo
->dev
;
271 if (bo
->flags
& PAN_BO_DONT_REUSE
)
274 pthread_mutex_lock(&dev
->bo_cache
.lock
);
275 struct list_head
*bucket
= pan_bucket(dev
, MAX2(bo
->size
, 4096));
276 struct drm_panfrost_madvise madv
;
277 struct timespec time
;
279 madv
.handle
= bo
->gem_handle
;
280 madv
.madv
= PANFROST_MADV_DONTNEED
;
283 drmIoctl(dev
->fd
, DRM_IOCTL_PANFROST_MADVISE
, &madv
);
285 /* Add us to the bucket */
286 list_addtail(&bo
->bucket_link
, bucket
);
288 /* Add us to the LRU list and update the last_used field. */
289 list_addtail(&bo
->lru_link
, &dev
->bo_cache
.lru
);
290 clock_gettime(CLOCK_MONOTONIC
, &time
);
291 bo
->last_used
= time
.tv_sec
;
293 /* Let's do some cleanup in the BO cache while we hold the
296 panfrost_bo_cache_evict_stale_bos(dev
);
297 pthread_mutex_unlock(&dev
->bo_cache
.lock
);
302 /* Evicts all BOs from the cache. Called during context
303 * destroy or during low-memory situations (to free up
304 * memory that may be unused by us just sitting in our
305 * cache, but still reserved from the perspective of the
309 panfrost_bo_cache_evict_all(
310 struct panfrost_device
*dev
)
312 pthread_mutex_lock(&dev
->bo_cache
.lock
);
313 for (unsigned i
= 0; i
< ARRAY_SIZE(dev
->bo_cache
.buckets
); ++i
) {
314 struct list_head
*bucket
= &dev
->bo_cache
.buckets
[i
];
316 list_for_each_entry_safe(struct panfrost_bo
, entry
, bucket
,
318 list_del(&entry
->bucket_link
);
319 list_del(&entry
->lru_link
);
320 panfrost_bo_free(entry
);
323 pthread_mutex_unlock(&dev
->bo_cache
.lock
);
327 panfrost_bo_mmap(struct panfrost_bo
*bo
)
329 struct drm_panfrost_mmap_bo mmap_bo
= { .handle
= bo
->gem_handle
};
335 ret
= drmIoctl(bo
->dev
->fd
, DRM_IOCTL_PANFROST_MMAP_BO
, &mmap_bo
);
337 fprintf(stderr
, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
341 bo
->cpu
= os_mmap(NULL
, bo
->size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
342 bo
->dev
->fd
, mmap_bo
.offset
);
343 if (bo
->cpu
== MAP_FAILED
) {
344 fprintf(stderr
, "mmap failed: %p %m\n", bo
->cpu
);
350 panfrost_bo_munmap(struct panfrost_bo
*bo
)
355 if (os_munmap((void *) (uintptr_t)bo
->cpu
, bo
->size
)) {
364 panfrost_bo_create(struct panfrost_device
*dev
, size_t size
,
367 struct panfrost_bo
*bo
;
369 /* Kernel will fail (confusingly) with EPERM otherwise */
372 /* To maximize BO cache usage, don't allocate tiny BOs */
373 size
= MAX2(size
, 4096);
375 /* GROWABLE BOs cannot be mmapped */
376 if (flags
& PAN_BO_GROWABLE
)
377 assert(flags
& PAN_BO_INVISIBLE
);
379 /* Before creating a BO, we first want to check the cache but without
380 * waiting for BO readiness (BOs in the cache can still be referenced
381 * by jobs that are not finished yet).
382 * If the cached allocation fails we fall back on fresh BO allocation,
383 * and if that fails too, we try one more time to allocate from the
384 * cache, but this time we accept to wait.
386 bo
= panfrost_bo_cache_fetch(dev
, size
, flags
, true);
388 bo
= panfrost_bo_alloc(dev
, size
, flags
);
390 bo
= panfrost_bo_cache_fetch(dev
, size
, flags
, false);
393 fprintf(stderr
, "BO creation failed\n");
397 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
398 * never map since we don't care about their contents; they're purely
399 * for GPU-internal use. But we do trace them anyway. */
401 if (!(flags
& (PAN_BO_INVISIBLE
| PAN_BO_DELAY_MMAP
)))
402 panfrost_bo_mmap(bo
);
404 p_atomic_set(&bo
->refcnt
, 1);
406 pthread_mutex_lock(&dev
->active_bos_lock
);
407 _mesa_set_add(bo
->dev
->active_bos
, bo
);
408 pthread_mutex_unlock(&dev
->active_bos_lock
);
410 if (dev
->debug
& (PAN_DBG_TRACE
| PAN_DBG_SYNC
)) {
411 if (flags
& PAN_BO_INVISIBLE
)
412 pandecode_inject_mmap(bo
->gpu
, NULL
, bo
->size
, NULL
);
413 else if (!(flags
& PAN_BO_DELAY_MMAP
))
414 pandecode_inject_mmap(bo
->gpu
, bo
->cpu
, bo
->size
, NULL
);
421 panfrost_bo_reference(struct panfrost_bo
*bo
)
424 ASSERTED
int count
= p_atomic_inc_return(&bo
->refcnt
);
430 panfrost_bo_unreference(struct panfrost_bo
*bo
)
435 /* Don't return to cache if there are still references */
436 if (p_atomic_dec_return(&bo
->refcnt
))
439 struct panfrost_device
*dev
= bo
->dev
;
441 pthread_mutex_lock(&dev
->active_bos_lock
);
442 /* Someone might have imported this BO while we were waiting for the
443 * lock, let's make sure it's still not referenced before freeing it.
445 if (p_atomic_read(&bo
->refcnt
) == 0) {
446 _mesa_set_remove_key(bo
->dev
->active_bos
, bo
);
448 /* When the reference count goes to zero, we need to cleanup */
449 panfrost_bo_munmap(bo
);
451 /* Rather than freeing the BO now, we'll cache the BO for later
452 * allocations if we're allowed to.
454 if (!panfrost_bo_cache_put(bo
))
455 panfrost_bo_free(bo
);
457 pthread_mutex_unlock(&dev
->active_bos_lock
);
461 panfrost_bo_import(struct panfrost_device
*dev
, int fd
)
463 struct panfrost_bo
*bo
, *newbo
= rzalloc(dev
->memctx
, struct panfrost_bo
);
464 struct drm_panfrost_get_bo_offset get_bo_offset
= {0,};
465 struct set_entry
*entry
;
471 ret
= drmPrimeFDToHandle(dev
->fd
, fd
, &gem_handle
);
474 newbo
->gem_handle
= gem_handle
;
476 pthread_mutex_lock(&dev
->active_bos_lock
);
477 entry
= _mesa_set_search_or_add(dev
->active_bos
, newbo
);
479 bo
= (struct panfrost_bo
*)entry
->key
;
481 get_bo_offset
.handle
= gem_handle
;
482 ret
= drmIoctl(dev
->fd
, DRM_IOCTL_PANFROST_GET_BO_OFFSET
, &get_bo_offset
);
485 newbo
->gpu
= (mali_ptr
) get_bo_offset
.offset
;
486 newbo
->size
= lseek(fd
, 0, SEEK_END
);
487 newbo
->flags
|= PAN_BO_DONT_REUSE
| PAN_BO_IMPORTED
;
488 assert(newbo
->size
> 0);
489 p_atomic_set(&newbo
->refcnt
, 1);
490 // TODO map and unmap on demand?
491 panfrost_bo_mmap(newbo
);
494 /* bo->refcnt == 0 can happen if the BO
495 * was being released but panfrost_bo_import() acquired the
496 * lock before panfrost_bo_unreference(). In that case, refcnt
497 * is 0 and we can't use panfrost_bo_reference() directly, we
498 * have to re-initialize the refcnt().
499 * Note that panfrost_bo_unreference() checks
500 * refcnt value just after acquiring the lock to
501 * make sure the object is not freed if panfrost_bo_import()
502 * acquired it in the meantime.
504 if (p_atomic_read(&bo
->refcnt
) == 0)
505 p_atomic_set(&newbo
->refcnt
, 1);
507 panfrost_bo_reference(bo
);
510 pthread_mutex_unlock(&dev
->active_bos_lock
);
516 panfrost_bo_export(struct panfrost_bo
*bo
)
518 struct drm_prime_handle args
= {
519 .handle
= bo
->gem_handle
,
520 .flags
= DRM_CLOEXEC
,
523 int ret
= drmIoctl(bo
->dev
->fd
, DRM_IOCTL_PRIME_HANDLE_TO_FD
, &args
);
527 bo
->flags
|= PAN_BO_DONT_REUSE
| PAN_BO_EXPORTED
;