aca4f8f38483d6843eb86cc0446fee7a6e0f2a12
[mesa.git] / src / panfrost / encoder / pan_bo.c
1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <errno.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <xf86drm.h>
30 #include <pthread.h>
31 #include "drm-uapi/panfrost_drm.h"
32
33 #include "pan_bo.h"
34
35 #include "os/os_mman.h"
36
37 #include "util/u_inlines.h"
38 #include "util/u_math.h"
39
40 /* This file implements a userspace BO cache. Allocating and freeing
41 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
42 * adds more work than we would like at this point. So caching BOs in userspace
43 * solves both of these problems and does not require kernel updates.
44 *
45 * Cached BOs are sorted into a bucket based on rounding their size down to the
46 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
47 * objects. Putting a BO into the cache is accomplished by adding it to the
48 * corresponding bucket. Getting a BO from the cache consists of finding the
49 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
50 * BO and removing it from the bucket. We special case evicting all BOs from
51 * the cache, since that's what helpful in practice and avoids extra logic
52 * around the linked list.
53 */
54
55 static struct panfrost_bo *
56 panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
57 uint32_t flags)
58 {
59 struct drm_panfrost_create_bo create_bo = { .size = size };
60 struct panfrost_bo *bo;
61 int ret;
62
63 if (dev->kernel_version->version_major > 1 ||
64 dev->kernel_version->version_minor >= 1) {
65 if (flags & PAN_BO_GROWABLE)
66 create_bo.flags |= PANFROST_BO_HEAP;
67 if (!(flags & PAN_BO_EXECUTE))
68 create_bo.flags |= PANFROST_BO_NOEXEC;
69 }
70
71 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
72 if (ret) {
73 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
74 return NULL;
75 }
76
77 bo = rzalloc(dev->memctx, struct panfrost_bo);
78 assert(bo);
79 bo->size = create_bo.size;
80 bo->gpu = create_bo.offset;
81 bo->gem_handle = create_bo.handle;
82 bo->flags = flags;
83 bo->dev = dev;
84 return bo;
85 }
86
87 static void
88 panfrost_bo_free(struct panfrost_bo *bo)
89 {
90 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
91 int ret;
92
93 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
94 if (ret) {
95 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
96 assert(0);
97 }
98
99 ralloc_free(bo);
100 }
101
102 /* Returns true if the BO is ready, false otherwise.
103 * access_type is encoding the type of access one wants to ensure is done.
104 * Say you want to make sure all writers are done writing, you should pass
105 * PAN_BO_ACCESS_WRITE.
106 * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
107 * PAN_BO_ACCESS_READ would work too as waiting for readers implies
108 * waiting for writers as well, but we want to make things explicit and waiting
109 * only for readers is impossible.
110 */
111 bool
112 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
113 uint32_t access_type)
114 {
115 struct drm_panfrost_wait_bo req = {
116 .handle = bo->gem_handle,
117 .timeout_ns = timeout_ns,
118 };
119 int ret;
120
121 assert(access_type == PAN_BO_ACCESS_WRITE ||
122 access_type == PAN_BO_ACCESS_RW);
123
124 /* If the BO has been exported or imported we can't rely on the cached
125 * state, we need to call the WAIT_BO ioctl.
126 */
127 if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
128 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
129 if (!bo->gpu_access)
130 return true;
131
132 /* If the caller only wants to wait for writers and no
133 * writes are pending, we don't have to wait.
134 */
135 if (access_type == PAN_BO_ACCESS_WRITE &&
136 !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
137 return true;
138 }
139
140 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
141 * -1 otherwise.
142 */
143 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
144 if (ret != -1) {
145 /* Set gpu_access to 0 so that the next call to bo_wait()
146 * doesn't have to call the WAIT_BO ioctl.
147 */
148 bo->gpu_access = 0;
149 return true;
150 }
151
152 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
153 * is invalid, which shouldn't happen here.
154 */
155 assert(errno == ETIMEDOUT || errno == EBUSY);
156 return false;
157 }
158
159 /* Helper to calculate the bucket index of a BO */
160
161 static unsigned
162 pan_bucket_index(unsigned size)
163 {
164 /* Round down to POT to compute a bucket index */
165
166 unsigned bucket_index = util_logbase2(size);
167
168 /* Clamp the bucket index; all huge allocations will be
169 * sorted into the largest bucket */
170
171 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
172
173 /* The minimum bucket size must equal the minimum allocation
174 * size; the maximum we clamped */
175
176 assert(bucket_index >= MIN_BO_CACHE_BUCKET);
177 assert(bucket_index <= MAX_BO_CACHE_BUCKET);
178
179 /* Reindex from 0 */
180 return (bucket_index - MIN_BO_CACHE_BUCKET);
181 }
182
183 static struct list_head *
184 pan_bucket(struct panfrost_device *dev, unsigned size)
185 {
186 return &dev->bo_cache.buckets[pan_bucket_index(size)];
187 }
188
189 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
190 * BO cache. If it succeeds, it returns that BO and removes the BO from the
191 * cache. If it fails, it returns NULL signaling the caller to allocate a new
192 * BO. */
193
194 static struct panfrost_bo *
195 panfrost_bo_cache_fetch(struct panfrost_device *dev,
196 size_t size, uint32_t flags, bool dontwait)
197 {
198 pthread_mutex_lock(&dev->bo_cache.lock);
199 struct list_head *bucket = pan_bucket(dev, size);
200 struct panfrost_bo *bo = NULL;
201
202 /* Iterate the bucket looking for something suitable */
203 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
204 bucket_link) {
205 if (entry->size < size || entry->flags != flags)
206 continue;
207
208 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
209 PAN_BO_ACCESS_RW))
210 continue;
211
212 struct drm_panfrost_madvise madv = {
213 .handle = entry->gem_handle,
214 .madv = PANFROST_MADV_WILLNEED,
215 };
216 int ret;
217
218 /* This one works, splice it out of the cache */
219 list_del(&entry->bucket_link);
220 list_del(&entry->lru_link);
221
222 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
223 if (!ret && !madv.retained) {
224 panfrost_bo_free(entry);
225 continue;
226 }
227 /* Let's go! */
228 bo = entry;
229 break;
230 }
231 pthread_mutex_unlock(&dev->bo_cache.lock);
232
233 return bo;
234 }
235
236 static void
237 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
238 {
239 struct timespec time;
240
241 clock_gettime(CLOCK_MONOTONIC, &time);
242 list_for_each_entry_safe(struct panfrost_bo, entry,
243 &dev->bo_cache.lru, lru_link) {
244 /* We want all entries that have been used more than 1 sec
245 * ago to be dropped, others can be kept.
246 * Note the <= 2 check and not <= 1. It's here to account for
247 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
248 * That means we might keep entries that are between 1 and 2
249 * seconds old, but we don't really care, as long as unused BOs
250 * are dropped at some point.
251 */
252 if (time.tv_sec - entry->last_used <= 2)
253 break;
254
255 list_del(&entry->bucket_link);
256 list_del(&entry->lru_link);
257 panfrost_bo_free(entry);
258 }
259 }
260
261 /* Tries to add a BO to the cache. Returns if it was
262 * successful */
263
264 static bool
265 panfrost_bo_cache_put(struct panfrost_bo *bo)
266 {
267 struct panfrost_device *dev = bo->dev;
268
269 if (bo->flags & PAN_BO_DONT_REUSE)
270 return false;
271
272 pthread_mutex_lock(&dev->bo_cache.lock);
273 struct list_head *bucket = pan_bucket(dev, bo->size);
274 struct drm_panfrost_madvise madv;
275 struct timespec time;
276
277 madv.handle = bo->gem_handle;
278 madv.madv = PANFROST_MADV_DONTNEED;
279 madv.retained = 0;
280
281 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
282
283 /* Add us to the bucket */
284 list_addtail(&bo->bucket_link, bucket);
285
286 /* Add us to the LRU list and update the last_used field. */
287 list_addtail(&bo->lru_link, &dev->bo_cache.lru);
288 clock_gettime(CLOCK_MONOTONIC, &time);
289 bo->last_used = time.tv_sec;
290
291 /* Let's do some cleanup in the BO cache while we hold the
292 * lock.
293 */
294 panfrost_bo_cache_evict_stale_bos(dev);
295 pthread_mutex_unlock(&dev->bo_cache.lock);
296
297 return true;
298 }
299
300 /* Evicts all BOs from the cache. Called during context
301 * destroy or during low-memory situations (to free up
302 * memory that may be unused by us just sitting in our
303 * cache, but still reserved from the perspective of the
304 * OS) */
305
306 void
307 panfrost_bo_cache_evict_all(
308 struct panfrost_device *dev)
309 {
310 pthread_mutex_lock(&dev->bo_cache.lock);
311 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
312 struct list_head *bucket = &dev->bo_cache.buckets[i];
313
314 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
315 bucket_link) {
316 list_del(&entry->bucket_link);
317 list_del(&entry->lru_link);
318 panfrost_bo_free(entry);
319 }
320 }
321 pthread_mutex_unlock(&dev->bo_cache.lock);
322 }
323
324 void
325 panfrost_bo_mmap(struct panfrost_bo *bo)
326 {
327 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
328 int ret;
329
330 if (bo->cpu)
331 return;
332
333 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
334 if (ret) {
335 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
336 assert(0);
337 }
338
339 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
340 bo->dev->fd, mmap_bo.offset);
341 if (bo->cpu == MAP_FAILED) {
342 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
343 assert(0);
344 }
345 }
346
347 static void
348 panfrost_bo_munmap(struct panfrost_bo *bo)
349 {
350 if (!bo->cpu)
351 return;
352
353 if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
354 perror("munmap");
355 abort();
356 }
357
358 bo->cpu = NULL;
359 }
360
361 struct panfrost_bo *
362 panfrost_bo_create(struct panfrost_device *dev, size_t size,
363 uint32_t flags)
364 {
365 struct panfrost_bo *bo;
366
367 /* Kernel will fail (confusingly) with EPERM otherwise */
368 assert(size > 0);
369
370 /* To maximize BO cache usage, don't allocate tiny BOs */
371 size = MAX2(size, 4096);
372
373 /* GROWABLE BOs cannot be mmapped */
374 if (flags & PAN_BO_GROWABLE)
375 assert(flags & PAN_BO_INVISIBLE);
376
377 /* Before creating a BO, we first want to check the cache but without
378 * waiting for BO readiness (BOs in the cache can still be referenced
379 * by jobs that are not finished yet).
380 * If the cached allocation fails we fall back on fresh BO allocation,
381 * and if that fails too, we try one more time to allocate from the
382 * cache, but this time we accept to wait.
383 */
384 bo = panfrost_bo_cache_fetch(dev, size, flags, true);
385 if (!bo)
386 bo = panfrost_bo_alloc(dev, size, flags);
387 if (!bo)
388 bo = panfrost_bo_cache_fetch(dev, size, flags, false);
389
390 if (!bo)
391 fprintf(stderr, "BO creation failed\n");
392
393 assert(bo);
394
395 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
396 * never map since we don't care about their contents; they're purely
397 * for GPU-internal use. But we do trace them anyway. */
398
399 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
400 panfrost_bo_mmap(bo);
401
402 p_atomic_set(&bo->refcnt, 1);
403
404 pthread_mutex_lock(&dev->active_bos_lock);
405 _mesa_set_add(bo->dev->active_bos, bo);
406 pthread_mutex_unlock(&dev->active_bos_lock);
407
408 return bo;
409 }
410
411 void
412 panfrost_bo_reference(struct panfrost_bo *bo)
413 {
414 if (bo) {
415 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
416 assert(count != 1);
417 }
418 }
419
420 void
421 panfrost_bo_unreference(struct panfrost_bo *bo)
422 {
423 if (!bo)
424 return;
425
426 /* Don't return to cache if there are still references */
427 if (p_atomic_dec_return(&bo->refcnt))
428 return;
429
430 struct panfrost_device *dev = bo->dev;
431
432 pthread_mutex_lock(&dev->active_bos_lock);
433 /* Someone might have imported this BO while we were waiting for the
434 * lock, let's make sure it's still not referenced before freeing it.
435 */
436 if (p_atomic_read(&bo->refcnt) == 0) {
437 _mesa_set_remove_key(bo->dev->active_bos, bo);
438
439 /* When the reference count goes to zero, we need to cleanup */
440 panfrost_bo_munmap(bo);
441
442 /* Rather than freeing the BO now, we'll cache the BO for later
443 * allocations if we're allowed to.
444 */
445 if (!panfrost_bo_cache_put(bo))
446 panfrost_bo_free(bo);
447 }
448 pthread_mutex_unlock(&dev->active_bos_lock);
449 }
450
451 struct panfrost_bo *
452 panfrost_bo_import(struct panfrost_device *dev, int fd)
453 {
454 struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo);
455 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
456 struct set_entry *entry;
457 ASSERTED int ret;
458 unsigned gem_handle;
459
460 newbo->dev = dev;
461
462 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
463 assert(!ret);
464
465 newbo->gem_handle = gem_handle;
466
467 pthread_mutex_lock(&dev->active_bos_lock);
468 entry = _mesa_set_search_or_add(dev->active_bos, newbo);
469 assert(entry);
470 bo = (struct panfrost_bo *)entry->key;
471 if (newbo == bo) {
472 get_bo_offset.handle = gem_handle;
473 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
474 assert(!ret);
475
476 newbo->gpu = (mali_ptr) get_bo_offset.offset;
477 newbo->size = lseek(fd, 0, SEEK_END);
478 newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
479 assert(newbo->size > 0);
480 p_atomic_set(&newbo->refcnt, 1);
481 // TODO map and unmap on demand?
482 panfrost_bo_mmap(newbo);
483 } else {
484 ralloc_free(newbo);
485 /* bo->refcnt != 0 can happen if the BO
486 * was being released but panfrost_bo_import() acquired the
487 * lock before panfrost_bo_unreference(). In that case, refcnt
488 * is 0 and we can't use panfrost_bo_reference() directly, we
489 * have to re-initialize the refcnt().
490 * Note that panfrost_bo_unreference() checks
491 * refcnt value just after acquiring the lock to
492 * make sure the object is not freed if panfrost_bo_import()
493 * acquired it in the meantime.
494 */
495 if (p_atomic_read(&bo->refcnt))
496 p_atomic_set(&newbo->refcnt, 1);
497 else
498 panfrost_bo_reference(bo);
499 assert(bo->cpu);
500 }
501 pthread_mutex_unlock(&dev->active_bos_lock);
502
503 return bo;
504 }
505
506 int
507 panfrost_bo_export(struct panfrost_bo *bo)
508 {
509 struct drm_prime_handle args = {
510 .handle = bo->gem_handle,
511 .flags = DRM_CLOEXEC,
512 };
513
514 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
515 if (ret == -1)
516 return -1;
517
518 bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
519 return args.fd;
520 }
521