panfrost: Stop passing screen around for BO operations
[mesa.git] / src / gallium / drivers / panfrost / pan_bo.c
1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <xf86drm.h>
29 #include <pthread.h>
30 #include "drm-uapi/panfrost_drm.h"
31
32 #include "pan_bo.h"
33 #include "pan_screen.h"
34 #include "pan_util.h"
35 #include "pandecode/decode.h"
36
37 #include "os/os_mman.h"
38
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
41
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
46 *
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
55 */
56
57 /* Helper to calculate the bucket index of a BO */
58
59 static unsigned
60 pan_bucket_index(unsigned size)
61 {
62 /* Round down to POT to compute a bucket index */
63
64 unsigned bucket_index = util_logbase2(size);
65
66 /* Clamp the bucket index; all huge allocations will be
67 * sorted into the largest bucket */
68
69 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
70
71 /* The minimum bucket size must equal the minimum allocation
72 * size; the maximum we clamped */
73
74 assert(bucket_index >= MIN_BO_CACHE_BUCKET);
75 assert(bucket_index <= MAX_BO_CACHE_BUCKET);
76
77 /* Reindex from 0 */
78 return (bucket_index - MIN_BO_CACHE_BUCKET);
79 }
80
81 static struct list_head *
82 pan_bucket(struct panfrost_screen *screen, unsigned size)
83 {
84 return &screen->bo_cache[pan_bucket_index(size)];
85 }
86
87 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
88 * BO cache. If it succeeds, it returns that BO and removes the BO from the
89 * cache. If it fails, it returns NULL signaling the caller to allocate a new
90 * BO. */
91
92 static struct panfrost_bo *
93 panfrost_bo_cache_fetch(
94 struct panfrost_screen *screen,
95 size_t size, uint32_t flags)
96 {
97 pthread_mutex_lock(&screen->bo_cache_lock);
98 struct list_head *bucket = pan_bucket(screen, size);
99 struct panfrost_bo *bo = NULL;
100
101 /* Iterate the bucket looking for something suitable */
102 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
103 if (entry->size >= size &&
104 entry->flags == flags) {
105 int ret;
106 struct drm_panfrost_madvise madv;
107
108 /* This one works, splice it out of the cache */
109 list_del(&entry->link);
110
111 madv.handle = entry->gem_handle;
112 madv.madv = PANFROST_MADV_WILLNEED;
113 madv.retained = 0;
114
115 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
116 if (!ret && !madv.retained) {
117 panfrost_bo_release(entry, false);
118 continue;
119 }
120 /* Let's go! */
121 bo = entry;
122 break;
123 }
124 }
125 pthread_mutex_unlock(&screen->bo_cache_lock);
126
127 return bo;
128 }
129
130 /* Tries to add a BO to the cache. Returns if it was
131 * successful */
132
133 static bool
134 panfrost_bo_cache_put(struct panfrost_bo *bo)
135 {
136 struct panfrost_screen *screen = bo->screen;
137
138 pthread_mutex_lock(&screen->bo_cache_lock);
139 struct list_head *bucket = pan_bucket(screen, bo->size);
140 struct drm_panfrost_madvise madv;
141
142 madv.handle = bo->gem_handle;
143 madv.madv = PANFROST_MADV_DONTNEED;
144 madv.retained = 0;
145
146 drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
147
148 /* Add us to the bucket */
149 list_addtail(&bo->link, bucket);
150 pthread_mutex_unlock(&screen->bo_cache_lock);
151
152 return true;
153 }
154
155 /* Evicts all BOs from the cache. Called during context
156 * destroy or during low-memory situations (to free up
157 * memory that may be unused by us just sitting in our
158 * cache, but still reserved from the perspective of the
159 * OS) */
160
161 void
162 panfrost_bo_cache_evict_all(
163 struct panfrost_screen *screen)
164 {
165 pthread_mutex_lock(&screen->bo_cache_lock);
166 for (unsigned i = 0; i < ARRAY_SIZE(screen->bo_cache); ++i) {
167 struct list_head *bucket = &screen->bo_cache[i];
168
169 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
170 list_del(&entry->link);
171 panfrost_bo_release(entry, false);
172 }
173 }
174 pthread_mutex_unlock(&screen->bo_cache_lock);
175 }
176
177 void
178 panfrost_bo_mmap(struct panfrost_bo *bo)
179 {
180 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
181 int ret;
182
183 if (bo->cpu)
184 return;
185
186 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
187 if (ret) {
188 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
189 assert(0);
190 }
191
192 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
193 bo->screen->fd, mmap_bo.offset);
194 if (bo->cpu == MAP_FAILED) {
195 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
196 assert(0);
197 }
198
199 /* Record the mmap if we're tracing */
200 if (pan_debug & PAN_DBG_TRACE)
201 pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
202 }
203
204 static void
205 panfrost_bo_munmap(struct panfrost_bo *bo)
206 {
207 if (!bo->cpu)
208 return;
209
210 if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
211 perror("munmap");
212 abort();
213 }
214
215 bo->cpu = NULL;
216 }
217
218 struct panfrost_bo *
219 panfrost_bo_create(struct panfrost_screen *screen, size_t size,
220 uint32_t flags)
221 {
222 struct panfrost_bo *bo;
223
224 /* Kernel will fail (confusingly) with EPERM otherwise */
225 assert(size > 0);
226
227 /* To maximize BO cache usage, don't allocate tiny BOs */
228 size = MAX2(size, 4096);
229
230 /* GROWABLE BOs cannot be mmapped */
231 if (flags & PAN_BO_GROWABLE)
232 assert(flags & PAN_BO_INVISIBLE);
233
234 unsigned translated_flags = 0;
235
236 if (screen->kernel_version->version_major > 1 ||
237 screen->kernel_version->version_minor >= 1) {
238 if (flags & PAN_BO_GROWABLE)
239 translated_flags |= PANFROST_BO_HEAP;
240 if (!(flags & PAN_BO_EXECUTE))
241 translated_flags |= PANFROST_BO_NOEXEC;
242 }
243
244 struct drm_panfrost_create_bo create_bo = {
245 .size = size,
246 .flags = translated_flags,
247 };
248
249 /* Before creating a BO, we first want to check the cache */
250
251 bo = panfrost_bo_cache_fetch(screen, size, flags);
252
253 if (bo == NULL) {
254 /* Otherwise, the cache misses and we need to allocate a BO fresh from
255 * the kernel */
256
257 int ret;
258
259 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
260 if (ret) {
261 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
262 assert(0);
263 }
264
265 /* We have a BO allocated from the kernel; fill in the userspace
266 * version */
267
268 bo = rzalloc(screen, struct panfrost_bo);
269 bo->size = create_bo.size;
270 bo->gpu = create_bo.offset;
271 bo->gem_handle = create_bo.handle;
272 bo->flags = flags;
273 }
274
275 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
276 * never map since we don't care about their contents; they're purely
277 * for GPU-internal use. But we do trace them anyway. */
278
279 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
280 panfrost_bo_mmap(bo);
281 else if (flags & PAN_BO_INVISIBLE) {
282 if (pan_debug & PAN_DBG_TRACE)
283 pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
284 }
285
286 pipe_reference_init(&bo->reference, 1);
287 return bo;
288 }
289
290 void
291 panfrost_bo_release(struct panfrost_bo *bo, bool cacheable)
292 {
293 if (!bo)
294 return;
295
296 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
297 int ret;
298
299 /* Rather than freeing the BO now, we'll cache the BO for later
300 * allocations if we're allowed to */
301
302 panfrost_bo_munmap(bo);
303
304 if (cacheable) {
305 bool cached = panfrost_bo_cache_put(bo);
306
307 if (cached)
308 return;
309 }
310
311 /* Otherwise, if the BO wasn't cached, we'll legitimately free the BO */
312
313 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
314 if (ret) {
315 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
316 assert(0);
317 }
318
319 ralloc_free(bo);
320 }
321
322 void
323 panfrost_bo_reference(struct panfrost_bo *bo)
324 {
325 if (bo)
326 pipe_reference(NULL, &bo->reference);
327 }
328
329 void
330 panfrost_bo_unreference(struct panfrost_bo *bo)
331 {
332 if (!bo)
333 return;
334
335 /* When the reference count goes to zero, we need to cleanup */
336
337 if (pipe_reference(&bo->reference, NULL))
338 panfrost_bo_release(bo, true);
339 }
340
341 struct panfrost_bo *
342 panfrost_bo_import(struct panfrost_screen *screen, int fd)
343 {
344 struct panfrost_bo *bo = rzalloc(screen, struct panfrost_bo);
345 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
346 ASSERTED int ret;
347 unsigned gem_handle;
348
349 ret = drmPrimeFDToHandle(screen->fd, fd, &gem_handle);
350 assert(!ret);
351
352 get_bo_offset.handle = gem_handle;
353 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
354 assert(!ret);
355
356 bo->screen = screen;
357 bo->gem_handle = gem_handle;
358 bo->gpu = (mali_ptr) get_bo_offset.offset;
359 bo->size = lseek(fd, 0, SEEK_END);
360 assert(bo->size > 0);
361 pipe_reference_init(&bo->reference, 1);
362
363 // TODO map and unmap on demand?
364 panfrost_bo_mmap(bo);
365 return bo;
366 }
367
368 int
369 panfrost_bo_export(struct panfrost_bo *bo)
370 {
371 struct drm_prime_handle args = {
372 .handle = bo->gem_handle,
373 .flags = DRM_CLOEXEC,
374 };
375
376 int ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
377 if (ret == -1)
378 return -1;
379
380 return args.fd;
381 }
382