panfrost: Fix write to free'd memory
[mesa.git] / src / panfrost / encoder / pan_bo.c
1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <errno.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <xf86drm.h>
30 #include <pthread.h>
31 #include "drm-uapi/panfrost_drm.h"
32
33 #include "pan_bo.h"
34 #include "pan_util.h"
35 #include "../pandecode/public.h"
36
37 #include "os/os_mman.h"
38
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
41
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
46 *
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
55 */
56
57 static struct panfrost_bo *
58 panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
59 uint32_t flags)
60 {
61 struct drm_panfrost_create_bo create_bo = { .size = size };
62 struct panfrost_bo *bo;
63 int ret;
64
65 if (dev->kernel_version->version_major > 1 ||
66 dev->kernel_version->version_minor >= 1) {
67 if (flags & PAN_BO_GROWABLE)
68 create_bo.flags |= PANFROST_BO_HEAP;
69 if (!(flags & PAN_BO_EXECUTE))
70 create_bo.flags |= PANFROST_BO_NOEXEC;
71 }
72
73 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
74 if (ret) {
75 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
76 return NULL;
77 }
78
79 bo = rzalloc(dev->memctx, struct panfrost_bo);
80 assert(bo);
81 bo->size = create_bo.size;
82 bo->gpu = create_bo.offset;
83 bo->gem_handle = create_bo.handle;
84 bo->flags = flags;
85 bo->dev = dev;
86 return bo;
87 }
88
89 static void
90 panfrost_bo_free(struct panfrost_bo *bo)
91 {
92 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
93 int ret;
94
95 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
96 if (ret) {
97 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
98 assert(0);
99 }
100
101 ralloc_free(bo);
102 }
103
104 /* Returns true if the BO is ready, false otherwise.
105 * access_type is encoding the type of access one wants to ensure is done.
106 * Say you want to make sure all writers are done writing, you should pass
107 * PAN_BO_ACCESS_WRITE.
108 * If you want to wait for all users, you should pass PAN_BO_ACCESS_RW.
109 * PAN_BO_ACCESS_READ would work too as waiting for readers implies
110 * waiting for writers as well, but we want to make things explicit and waiting
111 * only for readers is impossible.
112 */
113 bool
114 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
115 uint32_t access_type)
116 {
117 struct drm_panfrost_wait_bo req = {
118 .handle = bo->gem_handle,
119 .timeout_ns = timeout_ns,
120 };
121 int ret;
122
123 assert(access_type == PAN_BO_ACCESS_WRITE ||
124 access_type == PAN_BO_ACCESS_RW);
125
126 /* If the BO has been exported or imported we can't rely on the cached
127 * state, we need to call the WAIT_BO ioctl.
128 */
129 if (!(bo->flags & (PAN_BO_IMPORTED | PAN_BO_EXPORTED))) {
130 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
131 if (!bo->gpu_access)
132 return true;
133
134 /* If the caller only wants to wait for writers and no
135 * writes are pending, we don't have to wait.
136 */
137 if (access_type == PAN_BO_ACCESS_WRITE &&
138 !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
139 return true;
140 }
141
142 /* The ioctl returns >= 0 value when the BO we are waiting for is ready
143 * -1 otherwise.
144 */
145 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
146 if (ret != -1) {
147 /* Set gpu_access to 0 so that the next call to bo_wait()
148 * doesn't have to call the WAIT_BO ioctl.
149 */
150 bo->gpu_access = 0;
151 return true;
152 }
153
154 /* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
155 * is invalid, which shouldn't happen here.
156 */
157 assert(errno == ETIMEDOUT || errno == EBUSY);
158 return false;
159 }
160
161 /* Helper to calculate the bucket index of a BO */
162
163 static unsigned
164 pan_bucket_index(unsigned size)
165 {
166 /* Round down to POT to compute a bucket index */
167
168 unsigned bucket_index = util_logbase2(size);
169
170 /* Clamp the bucket index; all huge allocations will be
171 * sorted into the largest bucket */
172
173 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
174
175 /* The minimum bucket size must equal the minimum allocation
176 * size; the maximum we clamped */
177
178 assert(bucket_index >= MIN_BO_CACHE_BUCKET);
179 assert(bucket_index <= MAX_BO_CACHE_BUCKET);
180
181 /* Reindex from 0 */
182 return (bucket_index - MIN_BO_CACHE_BUCKET);
183 }
184
185 static struct list_head *
186 pan_bucket(struct panfrost_device *dev, unsigned size)
187 {
188 return &dev->bo_cache.buckets[pan_bucket_index(size)];
189 }
190
191 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
192 * BO cache. If it succeeds, it returns that BO and removes the BO from the
193 * cache. If it fails, it returns NULL signaling the caller to allocate a new
194 * BO. */
195
196 static struct panfrost_bo *
197 panfrost_bo_cache_fetch(struct panfrost_device *dev,
198 size_t size, uint32_t flags, bool dontwait)
199 {
200 pthread_mutex_lock(&dev->bo_cache.lock);
201 struct list_head *bucket = pan_bucket(dev, size);
202 struct panfrost_bo *bo = NULL;
203
204 /* Iterate the bucket looking for something suitable */
205 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
206 bucket_link) {
207 if (entry->size < size || entry->flags != flags)
208 continue;
209
210 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
211 PAN_BO_ACCESS_RW))
212 continue;
213
214 struct drm_panfrost_madvise madv = {
215 .handle = entry->gem_handle,
216 .madv = PANFROST_MADV_WILLNEED,
217 };
218 int ret;
219
220 /* This one works, splice it out of the cache */
221 list_del(&entry->bucket_link);
222 list_del(&entry->lru_link);
223
224 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
225 if (!ret && !madv.retained) {
226 panfrost_bo_free(entry);
227 continue;
228 }
229 /* Let's go! */
230 bo = entry;
231 break;
232 }
233 pthread_mutex_unlock(&dev->bo_cache.lock);
234
235 return bo;
236 }
237
238 static void
239 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
240 {
241 struct timespec time;
242
243 clock_gettime(CLOCK_MONOTONIC, &time);
244 list_for_each_entry_safe(struct panfrost_bo, entry,
245 &dev->bo_cache.lru, lru_link) {
246 /* We want all entries that have been used more than 1 sec
247 * ago to be dropped, others can be kept.
248 * Note the <= 2 check and not <= 1. It's here to account for
249 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
250 * That means we might keep entries that are between 1 and 2
251 * seconds old, but we don't really care, as long as unused BOs
252 * are dropped at some point.
253 */
254 if (time.tv_sec - entry->last_used <= 2)
255 break;
256
257 list_del(&entry->bucket_link);
258 list_del(&entry->lru_link);
259 panfrost_bo_free(entry);
260 }
261 }
262
263 /* Tries to add a BO to the cache. Returns if it was
264 * successful */
265
266 static bool
267 panfrost_bo_cache_put(struct panfrost_bo *bo)
268 {
269 struct panfrost_device *dev = bo->dev;
270
271 if (bo->flags & PAN_BO_DONT_REUSE)
272 return false;
273
274 pthread_mutex_lock(&dev->bo_cache.lock);
275 struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
276 struct drm_panfrost_madvise madv;
277 struct timespec time;
278
279 madv.handle = bo->gem_handle;
280 madv.madv = PANFROST_MADV_DONTNEED;
281 madv.retained = 0;
282
283 drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
284
285 /* Add us to the bucket */
286 list_addtail(&bo->bucket_link, bucket);
287
288 /* Add us to the LRU list and update the last_used field. */
289 list_addtail(&bo->lru_link, &dev->bo_cache.lru);
290 clock_gettime(CLOCK_MONOTONIC, &time);
291 bo->last_used = time.tv_sec;
292
293 /* Let's do some cleanup in the BO cache while we hold the
294 * lock.
295 */
296 panfrost_bo_cache_evict_stale_bos(dev);
297 pthread_mutex_unlock(&dev->bo_cache.lock);
298
299 return true;
300 }
301
302 /* Evicts all BOs from the cache. Called during context
303 * destroy or during low-memory situations (to free up
304 * memory that may be unused by us just sitting in our
305 * cache, but still reserved from the perspective of the
306 * OS) */
307
308 void
309 panfrost_bo_cache_evict_all(
310 struct panfrost_device *dev)
311 {
312 pthread_mutex_lock(&dev->bo_cache.lock);
313 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
314 struct list_head *bucket = &dev->bo_cache.buckets[i];
315
316 list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
317 bucket_link) {
318 list_del(&entry->bucket_link);
319 list_del(&entry->lru_link);
320 panfrost_bo_free(entry);
321 }
322 }
323 pthread_mutex_unlock(&dev->bo_cache.lock);
324 }
325
326 void
327 panfrost_bo_mmap(struct panfrost_bo *bo)
328 {
329 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
330 int ret;
331
332 if (bo->cpu)
333 return;
334
335 ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
336 if (ret) {
337 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
338 assert(0);
339 }
340
341 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
342 bo->dev->fd, mmap_bo.offset);
343 if (bo->cpu == MAP_FAILED) {
344 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
345 assert(0);
346 }
347 }
348
349 static void
350 panfrost_bo_munmap(struct panfrost_bo *bo)
351 {
352 if (!bo->cpu)
353 return;
354
355 if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
356 perror("munmap");
357 abort();
358 }
359
360 bo->cpu = NULL;
361 }
362
363 struct panfrost_bo *
364 panfrost_bo_create(struct panfrost_device *dev, size_t size,
365 uint32_t flags)
366 {
367 struct panfrost_bo *bo;
368
369 /* Kernel will fail (confusingly) with EPERM otherwise */
370 assert(size > 0);
371
372 /* To maximize BO cache usage, don't allocate tiny BOs */
373 size = MAX2(size, 4096);
374
375 /* GROWABLE BOs cannot be mmapped */
376 if (flags & PAN_BO_GROWABLE)
377 assert(flags & PAN_BO_INVISIBLE);
378
379 /* Before creating a BO, we first want to check the cache but without
380 * waiting for BO readiness (BOs in the cache can still be referenced
381 * by jobs that are not finished yet).
382 * If the cached allocation fails we fall back on fresh BO allocation,
383 * and if that fails too, we try one more time to allocate from the
384 * cache, but this time we accept to wait.
385 */
386 bo = panfrost_bo_cache_fetch(dev, size, flags, true);
387 if (!bo)
388 bo = panfrost_bo_alloc(dev, size, flags);
389 if (!bo)
390 bo = panfrost_bo_cache_fetch(dev, size, flags, false);
391
392 if (!bo)
393 fprintf(stderr, "BO creation failed\n");
394
395 assert(bo);
396
397 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
398 * never map since we don't care about their contents; they're purely
399 * for GPU-internal use. But we do trace them anyway. */
400
401 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
402 panfrost_bo_mmap(bo);
403
404 p_atomic_set(&bo->refcnt, 1);
405
406 pthread_mutex_lock(&dev->active_bos_lock);
407 _mesa_set_add(bo->dev->active_bos, bo);
408 pthread_mutex_unlock(&dev->active_bos_lock);
409
410 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
411 if (flags & PAN_BO_INVISIBLE)
412 pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
413 else if (!(flags & PAN_BO_DELAY_MMAP))
414 pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
415 }
416
417 return bo;
418 }
419
420 void
421 panfrost_bo_reference(struct panfrost_bo *bo)
422 {
423 if (bo) {
424 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
425 assert(count != 1);
426 }
427 }
428
429 void
430 panfrost_bo_unreference(struct panfrost_bo *bo)
431 {
432 if (!bo)
433 return;
434
435 /* Don't return to cache if there are still references */
436 if (p_atomic_dec_return(&bo->refcnt))
437 return;
438
439 struct panfrost_device *dev = bo->dev;
440
441 pthread_mutex_lock(&dev->active_bos_lock);
442 /* Someone might have imported this BO while we were waiting for the
443 * lock, let's make sure it's still not referenced before freeing it.
444 */
445 if (p_atomic_read(&bo->refcnt) == 0) {
446 _mesa_set_remove_key(bo->dev->active_bos, bo);
447
448 /* When the reference count goes to zero, we need to cleanup */
449 panfrost_bo_munmap(bo);
450
451 /* Rather than freeing the BO now, we'll cache the BO for later
452 * allocations if we're allowed to.
453 */
454 if (!panfrost_bo_cache_put(bo))
455 panfrost_bo_free(bo);
456 }
457 pthread_mutex_unlock(&dev->active_bos_lock);
458 }
459
460 struct panfrost_bo *
461 panfrost_bo_import(struct panfrost_device *dev, int fd)
462 {
463 struct panfrost_bo *bo, *newbo = rzalloc(dev->memctx, struct panfrost_bo);
464 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
465 struct set_entry *entry;
466 ASSERTED int ret;
467 unsigned gem_handle;
468
469 newbo->dev = dev;
470
471 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
472 assert(!ret);
473
474 newbo->gem_handle = gem_handle;
475
476 pthread_mutex_lock(&dev->active_bos_lock);
477 entry = _mesa_set_search_or_add(dev->active_bos, newbo);
478 assert(entry);
479 bo = (struct panfrost_bo *)entry->key;
480 if (newbo == bo) {
481 get_bo_offset.handle = gem_handle;
482 ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
483 assert(!ret);
484
485 newbo->gpu = (mali_ptr) get_bo_offset.offset;
486 newbo->size = lseek(fd, 0, SEEK_END);
487 newbo->flags |= PAN_BO_DONT_REUSE | PAN_BO_IMPORTED;
488 assert(newbo->size > 0);
489 p_atomic_set(&newbo->refcnt, 1);
490 // TODO map and unmap on demand?
491 panfrost_bo_mmap(newbo);
492 } else {
493 ralloc_free(newbo);
494 /* bo->refcnt == 0 can happen if the BO
495 * was being released but panfrost_bo_import() acquired the
496 * lock before panfrost_bo_unreference(). In that case, refcnt
497 * is 0 and we can't use panfrost_bo_reference() directly, we
498 * have to re-initialize the refcnt().
499 * Note that panfrost_bo_unreference() checks
500 * refcnt value just after acquiring the lock to
501 * make sure the object is not freed if panfrost_bo_import()
502 * acquired it in the meantime.
503 */
504 if (p_atomic_read(&bo->refcnt) == 0)
505 p_atomic_set(&bo->refcnt, 1);
506 else
507 panfrost_bo_reference(bo);
508 assert(bo->cpu);
509 }
510 pthread_mutex_unlock(&dev->active_bos_lock);
511
512 return bo;
513 }
514
515 int
516 panfrost_bo_export(struct panfrost_bo *bo)
517 {
518 struct drm_prime_handle args = {
519 .handle = bo->gem_handle,
520 .flags = DRM_CLOEXEC,
521 };
522
523 int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
524 if (ret == -1)
525 return -1;
526
527 bo->flags |= PAN_BO_DONT_REUSE | PAN_BO_EXPORTED;
528 return args.fd;
529 }
530