2 * Copyright 2019 Collabora, Ltd.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
30 #include "drm-uapi/panfrost_drm.h"
33 #include "pan_screen.h"
35 #include "pandecode/decode.h"
37 #include "os/os_mman.h"
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
57 static struct panfrost_bo
*
58 panfrost_bo_alloc(struct panfrost_screen
*screen
, size_t size
,
61 struct drm_panfrost_create_bo create_bo
= { .size
= size
};
62 struct panfrost_bo
*bo
;
65 if (screen
->kernel_version
->version_major
> 1 ||
66 screen
->kernel_version
->version_minor
>= 1) {
67 if (flags
& PAN_BO_GROWABLE
)
68 create_bo
.flags
|= PANFROST_BO_HEAP
;
69 if (!(flags
& PAN_BO_EXECUTE
))
70 create_bo
.flags
|= PANFROST_BO_NOEXEC
;
73 ret
= drmIoctl(screen
->fd
, DRM_IOCTL_PANFROST_CREATE_BO
, &create_bo
);
75 fprintf(stderr
, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
79 bo
= rzalloc(screen
, struct panfrost_bo
);
81 bo
->size
= create_bo
.size
;
82 bo
->gpu
= create_bo
.offset
;
83 bo
->gem_handle
= create_bo
.handle
;
90 panfrost_bo_free(struct panfrost_bo
*bo
)
92 struct drm_gem_close gem_close
= { .handle
= bo
->gem_handle
};
95 ret
= drmIoctl(bo
->screen
->fd
, DRM_IOCTL_GEM_CLOSE
, &gem_close
);
97 fprintf(stderr
, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
104 /* Helper to calculate the bucket index of a BO */
107 pan_bucket_index(unsigned size
)
109 /* Round down to POT to compute a bucket index */
111 unsigned bucket_index
= util_logbase2(size
);
113 /* Clamp the bucket index; all huge allocations will be
114 * sorted into the largest bucket */
116 bucket_index
= MIN2(bucket_index
, MAX_BO_CACHE_BUCKET
);
118 /* The minimum bucket size must equal the minimum allocation
119 * size; the maximum we clamped */
121 assert(bucket_index
>= MIN_BO_CACHE_BUCKET
);
122 assert(bucket_index
<= MAX_BO_CACHE_BUCKET
);
125 return (bucket_index
- MIN_BO_CACHE_BUCKET
);
128 static struct list_head
*
129 pan_bucket(struct panfrost_screen
*screen
, unsigned size
)
131 return &screen
->bo_cache
[pan_bucket_index(size
)];
134 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
135 * BO cache. If it succeeds, it returns that BO and removes the BO from the
136 * cache. If it fails, it returns NULL signaling the caller to allocate a new
139 static struct panfrost_bo
*
140 panfrost_bo_cache_fetch(
141 struct panfrost_screen
*screen
,
142 size_t size
, uint32_t flags
)
144 pthread_mutex_lock(&screen
->bo_cache_lock
);
145 struct list_head
*bucket
= pan_bucket(screen
, size
);
146 struct panfrost_bo
*bo
= NULL
;
148 /* Iterate the bucket looking for something suitable */
149 list_for_each_entry_safe(struct panfrost_bo
, entry
, bucket
, link
) {
150 if (entry
->size
>= size
&&
151 entry
->flags
== flags
) {
153 struct drm_panfrost_madvise madv
;
155 /* This one works, splice it out of the cache */
156 list_del(&entry
->link
);
158 madv
.handle
= entry
->gem_handle
;
159 madv
.madv
= PANFROST_MADV_WILLNEED
;
162 ret
= drmIoctl(screen
->fd
, DRM_IOCTL_PANFROST_MADVISE
, &madv
);
163 if (!ret
&& !madv
.retained
) {
164 panfrost_bo_free(entry
);
172 pthread_mutex_unlock(&screen
->bo_cache_lock
);
177 /* Tries to add a BO to the cache. Returns if it was
181 panfrost_bo_cache_put(struct panfrost_bo
*bo
)
183 struct panfrost_screen
*screen
= bo
->screen
;
185 if (bo
->flags
& PAN_BO_DONT_REUSE
)
188 pthread_mutex_lock(&screen
->bo_cache_lock
);
189 struct list_head
*bucket
= pan_bucket(screen
, bo
->size
);
190 struct drm_panfrost_madvise madv
;
192 madv
.handle
= bo
->gem_handle
;
193 madv
.madv
= PANFROST_MADV_DONTNEED
;
196 drmIoctl(screen
->fd
, DRM_IOCTL_PANFROST_MADVISE
, &madv
);
198 /* Add us to the bucket */
199 list_addtail(&bo
->link
, bucket
);
200 pthread_mutex_unlock(&screen
->bo_cache_lock
);
205 /* Evicts all BOs from the cache. Called during context
206 * destroy or during low-memory situations (to free up
207 * memory that may be unused by us just sitting in our
208 * cache, but still reserved from the perspective of the
212 panfrost_bo_cache_evict_all(
213 struct panfrost_screen
*screen
)
215 pthread_mutex_lock(&screen
->bo_cache_lock
);
216 for (unsigned i
= 0; i
< ARRAY_SIZE(screen
->bo_cache
); ++i
) {
217 struct list_head
*bucket
= &screen
->bo_cache
[i
];
219 list_for_each_entry_safe(struct panfrost_bo
, entry
, bucket
, link
) {
220 list_del(&entry
->link
);
221 panfrost_bo_free(entry
);
224 pthread_mutex_unlock(&screen
->bo_cache_lock
);
228 panfrost_bo_mmap(struct panfrost_bo
*bo
)
230 struct drm_panfrost_mmap_bo mmap_bo
= { .handle
= bo
->gem_handle
};
236 ret
= drmIoctl(bo
->screen
->fd
, DRM_IOCTL_PANFROST_MMAP_BO
, &mmap_bo
);
238 fprintf(stderr
, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
242 bo
->cpu
= os_mmap(NULL
, bo
->size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
243 bo
->screen
->fd
, mmap_bo
.offset
);
244 if (bo
->cpu
== MAP_FAILED
) {
245 fprintf(stderr
, "mmap failed: %p %m\n", bo
->cpu
);
249 /* Record the mmap if we're tracing */
250 if (pan_debug
& PAN_DBG_TRACE
)
251 pandecode_inject_mmap(bo
->gpu
, bo
->cpu
, bo
->size
, NULL
);
255 panfrost_bo_munmap(struct panfrost_bo
*bo
)
260 if (os_munmap((void *) (uintptr_t)bo
->cpu
, bo
->size
)) {
269 panfrost_bo_create(struct panfrost_screen
*screen
, size_t size
,
272 struct panfrost_bo
*bo
;
274 /* Kernel will fail (confusingly) with EPERM otherwise */
277 /* To maximize BO cache usage, don't allocate tiny BOs */
278 size
= MAX2(size
, 4096);
280 /* GROWABLE BOs cannot be mmapped */
281 if (flags
& PAN_BO_GROWABLE
)
282 assert(flags
& PAN_BO_INVISIBLE
);
284 /* Before creating a BO, we first want to check the cache, otherwise,
285 * the cache misses and we need to allocate a BO fresh from the kernel
287 bo
= panfrost_bo_cache_fetch(screen
, size
, flags
);
289 bo
= panfrost_bo_alloc(screen
, size
, flags
);
292 fprintf(stderr
, "BO creation failed\n");
296 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
297 * never map since we don't care about their contents; they're purely
298 * for GPU-internal use. But we do trace them anyway. */
300 if (!(flags
& (PAN_BO_INVISIBLE
| PAN_BO_DELAY_MMAP
)))
301 panfrost_bo_mmap(bo
);
302 else if (flags
& PAN_BO_INVISIBLE
) {
303 if (pan_debug
& PAN_DBG_TRACE
)
304 pandecode_inject_mmap(bo
->gpu
, NULL
, bo
->size
, NULL
);
307 pipe_reference_init(&bo
->reference
, 1);
312 panfrost_bo_reference(struct panfrost_bo
*bo
)
315 pipe_reference(NULL
, &bo
->reference
);
319 panfrost_bo_unreference(struct panfrost_bo
*bo
)
324 if (!pipe_reference(&bo
->reference
, NULL
))
327 /* When the reference count goes to zero, we need to cleanup */
328 panfrost_bo_munmap(bo
);
330 /* Rather than freeing the BO now, we'll cache the BO for later
331 * allocations if we're allowed to.
333 if (panfrost_bo_cache_put(bo
))
336 panfrost_bo_free(bo
);
340 panfrost_bo_import(struct panfrost_screen
*screen
, int fd
)
342 struct panfrost_bo
*bo
= rzalloc(screen
, struct panfrost_bo
);
343 struct drm_panfrost_get_bo_offset get_bo_offset
= {0,};
347 ret
= drmPrimeFDToHandle(screen
->fd
, fd
, &gem_handle
);
350 get_bo_offset
.handle
= gem_handle
;
351 ret
= drmIoctl(screen
->fd
, DRM_IOCTL_PANFROST_GET_BO_OFFSET
, &get_bo_offset
);
355 bo
->gem_handle
= gem_handle
;
356 bo
->gpu
= (mali_ptr
) get_bo_offset
.offset
;
357 bo
->size
= lseek(fd
, 0, SEEK_END
);
358 bo
->flags
|= PAN_BO_DONT_REUSE
;
359 assert(bo
->size
> 0);
360 pipe_reference_init(&bo
->reference
, 1);
362 // TODO map and unmap on demand?
363 panfrost_bo_mmap(bo
);
368 panfrost_bo_export(struct panfrost_bo
*bo
)
370 struct drm_prime_handle args
= {
371 .handle
= bo
->gem_handle
,
372 .flags
= DRM_CLOEXEC
,
375 int ret
= drmIoctl(bo
->screen
->fd
, DRM_IOCTL_PRIME_HANDLE_TO_FD
, &args
);
379 bo
->flags
|= PAN_BO_DONT_REUSE
;