panfrost: Add the shader BO to the batch in patch_shader_state()
[mesa.git] / src / gallium / drivers / panfrost / pan_bo.c
1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26 #include <stdio.h>
27 #include <fcntl.h>
28 #include <xf86drm.h>
29 #include <pthread.h>
30 #include "drm-uapi/panfrost_drm.h"
31
32 #include "pan_bo.h"
33 #include "pan_screen.h"
34 #include "pan_util.h"
35 #include "pandecode/decode.h"
36
37 #include "os/os_mman.h"
38
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
41
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
46 *
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
55 */
56
57 static struct panfrost_bo *
58 panfrost_bo_alloc(struct panfrost_screen *screen, size_t size,
59 uint32_t flags)
60 {
61 struct drm_panfrost_create_bo create_bo = { .size = size };
62 struct panfrost_bo *bo;
63 int ret;
64
65 if (screen->kernel_version->version_major > 1 ||
66 screen->kernel_version->version_minor >= 1) {
67 if (flags & PAN_BO_GROWABLE)
68 create_bo.flags |= PANFROST_BO_HEAP;
69 if (!(flags & PAN_BO_EXECUTE))
70 create_bo.flags |= PANFROST_BO_NOEXEC;
71 }
72
73 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
74 if (ret) {
75 fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
76 return NULL;
77 }
78
79 bo = rzalloc(screen, struct panfrost_bo);
80 assert(bo);
81 bo->size = create_bo.size;
82 bo->gpu = create_bo.offset;
83 bo->gem_handle = create_bo.handle;
84 bo->flags = flags;
85 bo->screen = screen;
86 return bo;
87 }
88
89 static void
90 panfrost_bo_free(struct panfrost_bo *bo)
91 {
92 struct drm_gem_close gem_close = { .handle = bo->gem_handle };
93 int ret;
94
95 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
96 if (ret) {
97 fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
98 assert(0);
99 }
100
101 ralloc_free(bo);
102 }
103
104 /* Helper to calculate the bucket index of a BO */
105
106 static unsigned
107 pan_bucket_index(unsigned size)
108 {
109 /* Round down to POT to compute a bucket index */
110
111 unsigned bucket_index = util_logbase2(size);
112
113 /* Clamp the bucket index; all huge allocations will be
114 * sorted into the largest bucket */
115
116 bucket_index = MIN2(bucket_index, MAX_BO_CACHE_BUCKET);
117
118 /* The minimum bucket size must equal the minimum allocation
119 * size; the maximum we clamped */
120
121 assert(bucket_index >= MIN_BO_CACHE_BUCKET);
122 assert(bucket_index <= MAX_BO_CACHE_BUCKET);
123
124 /* Reindex from 0 */
125 return (bucket_index - MIN_BO_CACHE_BUCKET);
126 }
127
128 static struct list_head *
129 pan_bucket(struct panfrost_screen *screen, unsigned size)
130 {
131 return &screen->bo_cache[pan_bucket_index(size)];
132 }
133
134 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
135 * BO cache. If it succeeds, it returns that BO and removes the BO from the
136 * cache. If it fails, it returns NULL signaling the caller to allocate a new
137 * BO. */
138
139 static struct panfrost_bo *
140 panfrost_bo_cache_fetch(
141 struct panfrost_screen *screen,
142 size_t size, uint32_t flags)
143 {
144 pthread_mutex_lock(&screen->bo_cache_lock);
145 struct list_head *bucket = pan_bucket(screen, size);
146 struct panfrost_bo *bo = NULL;
147
148 /* Iterate the bucket looking for something suitable */
149 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
150 if (entry->size >= size &&
151 entry->flags == flags) {
152 int ret;
153 struct drm_panfrost_madvise madv;
154
155 /* This one works, splice it out of the cache */
156 list_del(&entry->link);
157
158 madv.handle = entry->gem_handle;
159 madv.madv = PANFROST_MADV_WILLNEED;
160 madv.retained = 0;
161
162 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
163 if (!ret && !madv.retained) {
164 panfrost_bo_free(entry);
165 continue;
166 }
167 /* Let's go! */
168 bo = entry;
169 break;
170 }
171 }
172 pthread_mutex_unlock(&screen->bo_cache_lock);
173
174 return bo;
175 }
176
177 /* Tries to add a BO to the cache. Returns if it was
178 * successful */
179
180 static bool
181 panfrost_bo_cache_put(struct panfrost_bo *bo)
182 {
183 struct panfrost_screen *screen = bo->screen;
184
185 if (bo->flags & PAN_BO_DONT_REUSE)
186 return false;
187
188 pthread_mutex_lock(&screen->bo_cache_lock);
189 struct list_head *bucket = pan_bucket(screen, bo->size);
190 struct drm_panfrost_madvise madv;
191
192 madv.handle = bo->gem_handle;
193 madv.madv = PANFROST_MADV_DONTNEED;
194 madv.retained = 0;
195
196 drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
197
198 /* Add us to the bucket */
199 list_addtail(&bo->link, bucket);
200 pthread_mutex_unlock(&screen->bo_cache_lock);
201
202 return true;
203 }
204
205 /* Evicts all BOs from the cache. Called during context
206 * destroy or during low-memory situations (to free up
207 * memory that may be unused by us just sitting in our
208 * cache, but still reserved from the perspective of the
209 * OS) */
210
211 void
212 panfrost_bo_cache_evict_all(
213 struct panfrost_screen *screen)
214 {
215 pthread_mutex_lock(&screen->bo_cache_lock);
216 for (unsigned i = 0; i < ARRAY_SIZE(screen->bo_cache); ++i) {
217 struct list_head *bucket = &screen->bo_cache[i];
218
219 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
220 list_del(&entry->link);
221 panfrost_bo_free(entry);
222 }
223 }
224 pthread_mutex_unlock(&screen->bo_cache_lock);
225 }
226
227 void
228 panfrost_bo_mmap(struct panfrost_bo *bo)
229 {
230 struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
231 int ret;
232
233 if (bo->cpu)
234 return;
235
236 ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
237 if (ret) {
238 fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
239 assert(0);
240 }
241
242 bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
243 bo->screen->fd, mmap_bo.offset);
244 if (bo->cpu == MAP_FAILED) {
245 fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
246 assert(0);
247 }
248
249 /* Record the mmap if we're tracing */
250 if (pan_debug & PAN_DBG_TRACE)
251 pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
252 }
253
254 static void
255 panfrost_bo_munmap(struct panfrost_bo *bo)
256 {
257 if (!bo->cpu)
258 return;
259
260 if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
261 perror("munmap");
262 abort();
263 }
264
265 bo->cpu = NULL;
266 }
267
268 struct panfrost_bo *
269 panfrost_bo_create(struct panfrost_screen *screen, size_t size,
270 uint32_t flags)
271 {
272 struct panfrost_bo *bo;
273
274 /* Kernel will fail (confusingly) with EPERM otherwise */
275 assert(size > 0);
276
277 /* To maximize BO cache usage, don't allocate tiny BOs */
278 size = MAX2(size, 4096);
279
280 /* GROWABLE BOs cannot be mmapped */
281 if (flags & PAN_BO_GROWABLE)
282 assert(flags & PAN_BO_INVISIBLE);
283
284 /* Before creating a BO, we first want to check the cache, otherwise,
285 * the cache misses and we need to allocate a BO fresh from the kernel
286 */
287 bo = panfrost_bo_cache_fetch(screen, size, flags);
288 if (!bo)
289 bo = panfrost_bo_alloc(screen, size, flags);
290
291 if (!bo)
292 fprintf(stderr, "BO creation failed\n");
293
294 assert(bo);
295
296 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
297 * never map since we don't care about their contents; they're purely
298 * for GPU-internal use. But we do trace them anyway. */
299
300 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
301 panfrost_bo_mmap(bo);
302 else if (flags & PAN_BO_INVISIBLE) {
303 if (pan_debug & PAN_DBG_TRACE)
304 pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
305 }
306
307 pipe_reference_init(&bo->reference, 1);
308 return bo;
309 }
310
311 void
312 panfrost_bo_reference(struct panfrost_bo *bo)
313 {
314 if (bo)
315 pipe_reference(NULL, &bo->reference);
316 }
317
318 void
319 panfrost_bo_unreference(struct panfrost_bo *bo)
320 {
321 if (!bo)
322 return;
323
324 if (!pipe_reference(&bo->reference, NULL))
325 return;
326
327 /* When the reference count goes to zero, we need to cleanup */
328 panfrost_bo_munmap(bo);
329
330 /* Rather than freeing the BO now, we'll cache the BO for later
331 * allocations if we're allowed to.
332 */
333 if (panfrost_bo_cache_put(bo))
334 return;
335
336 panfrost_bo_free(bo);
337 }
338
339 struct panfrost_bo *
340 panfrost_bo_import(struct panfrost_screen *screen, int fd)
341 {
342 struct panfrost_bo *bo = rzalloc(screen, struct panfrost_bo);
343 struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
344 ASSERTED int ret;
345 unsigned gem_handle;
346
347 ret = drmPrimeFDToHandle(screen->fd, fd, &gem_handle);
348 assert(!ret);
349
350 get_bo_offset.handle = gem_handle;
351 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
352 assert(!ret);
353
354 bo->screen = screen;
355 bo->gem_handle = gem_handle;
356 bo->gpu = (mali_ptr) get_bo_offset.offset;
357 bo->size = lseek(fd, 0, SEEK_END);
358 bo->flags |= PAN_BO_DONT_REUSE;
359 assert(bo->size > 0);
360 pipe_reference_init(&bo->reference, 1);
361
362 // TODO map and unmap on demand?
363 panfrost_bo_mmap(bo);
364 return bo;
365 }
366
367 int
368 panfrost_bo_export(struct panfrost_bo *bo)
369 {
370 struct drm_prime_handle args = {
371 .handle = bo->gem_handle,
372 .flags = DRM_CLOEXEC,
373 };
374
375 int ret = drmIoctl(bo->screen->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
376 if (ret == -1)
377 return -1;
378
379 bo->flags |= PAN_BO_DONT_REUSE;
380 return args.fd;
381 }
382