4362b51b04e3582bbb721b526e184d61214ef47f
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_debug.h"
60 #include "common/gen_device_info.h"
61 #include "libdrm_macros.h"
62 #include "main/macros.h"
63 #include "util/macros.h"
64 #include "util/list.h"
65 #include "brw_bufmgr.h"
66 #include "string.h"
67
68 #include "i915_drm.h"
69 #include "uthash.h"
70
71 #ifdef HAVE_VALGRIND
72 #include <valgrind.h>
73 #include <memcheck.h>
74 #define VG(x) x
75 #else
76 #define VG(x)
77 #endif
78
79 #define memclear(s) memset(&s, 0, sizeof(s))
80
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
82
83 static inline int
84 atomic_add_unless(int *v, int add, int unless)
85 {
86 int c, old;
87 c = p_atomic_read(v);
88 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
89 c = old;
90 return c == unless;
91 }
92
93 /**
94 * upper_32_bits - return bits 32-63 of a number
95 * @n: the number we're accessing
96 *
97 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
98 * the "right shift count >= width of type" warning when that quantity is
99 * 32-bits.
100 */
101 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
102
103 /**
104 * lower_32_bits - return bits 0-31 of a number
105 * @n: the number we're accessing
106 */
107 #define lower_32_bits(n) ((__u32)(n))
108
109 struct _drm_bacon_context {
110 unsigned int ctx_id;
111 struct _drm_bacon_bufmgr *bufmgr;
112 };
113
114 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
115
116 struct drm_bacon_gem_bo_bucket {
117 struct list_head head;
118 unsigned long size;
119 };
120
121 typedef struct _drm_bacon_bufmgr {
122 int refcount;
123
124 int fd;
125
126 int max_relocs;
127
128 pthread_mutex_t lock;
129
130 struct drm_i915_gem_exec_object2 *exec2_objects;
131 drm_bacon_bo **exec_bos;
132 int exec_size;
133 int exec_count;
134
135 /** Array of lists of cached gem objects of power-of-two sizes */
136 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
137 int num_buckets;
138 time_t time;
139
140 struct list_head managers;
141
142 drm_bacon_bo_gem *name_table;
143 drm_bacon_bo_gem *handle_table;
144
145 struct list_head vma_cache;
146 int vma_count, vma_open, vma_max;
147
148 uint64_t gtt_size;
149 unsigned int has_llc : 1;
150 unsigned int bo_reuse : 1;
151 unsigned int no_exec : 1;
152 unsigned int has_exec_async : 1;
153
154 struct {
155 void *ptr;
156 uint32_t handle;
157 } userptr_active;
158
159 } drm_bacon_bufmgr;
160
161 typedef struct _drm_bacon_reloc_target_info {
162 drm_bacon_bo *bo;
163 } drm_bacon_reloc_target;
164
165 struct _drm_bacon_bo_gem {
166 drm_bacon_bo bo;
167
168 int refcount;
169 uint32_t gem_handle;
170 const char *name;
171
172 /**
173 * Kenel-assigned global name for this object
174 *
175 * List contains both flink named and prime fd'd objects
176 */
177 unsigned int global_name;
178
179 UT_hash_handle handle_hh;
180 UT_hash_handle name_hh;
181
182 /**
183 * Index of the buffer within the validation list while preparing a
184 * batchbuffer execution.
185 */
186 int validate_index;
187
188 /**
189 * Current tiling mode
190 */
191 uint32_t tiling_mode;
192 uint32_t swizzle_mode;
193 unsigned long stride;
194
195 unsigned long kflags;
196
197 time_t free_time;
198
199 /** Array passed to the DRM containing relocation information. */
200 struct drm_i915_gem_relocation_entry *relocs;
201 /**
202 * Array of info structs corresponding to relocs[i].target_handle etc
203 */
204 drm_bacon_reloc_target *reloc_target_info;
205 /** Number of entries in relocs */
206 int reloc_count;
207 /** Array of BOs that are referenced by this buffer and will be softpinned */
208 drm_bacon_bo **softpin_target;
209 /** Number softpinned BOs that are referenced by this buffer */
210 int softpin_target_count;
211 /** Maximum amount of softpinned BOs that are referenced by this buffer */
212 int softpin_target_size;
213
214 /** Mapped address for the buffer, saved across map/unmap cycles */
215 void *mem_virtual;
216 /** GTT virtual address for the buffer, saved across map/unmap cycles */
217 void *gtt_virtual;
218 /** WC CPU address for the buffer, saved across map/unmap cycles */
219 void *wc_virtual;
220 /**
221 * Virtual address of the buffer allocated by user, used for userptr
222 * objects only.
223 */
224 void *user_virtual;
225 int map_count;
226 struct list_head vma_list;
227
228 /** BO cache list */
229 struct list_head head;
230
231 /**
232 * Boolean of whether this BO and its children have been included in
233 * the current drm_bacon_bufmgr_check_aperture_space() total.
234 */
235 bool included_in_check_aperture;
236
237 /**
238 * Boolean of whether this buffer has been used as a relocation
239 * target and had its size accounted for, and thus can't have any
240 * further relocations added to it.
241 */
242 bool used_as_reloc_target;
243
244 /**
245 * Boolean of whether we have encountered an error whilst building the relocation tree.
246 */
247 bool has_error;
248
249 /**
250 * Boolean of whether this buffer can be re-used
251 */
252 bool reusable;
253
254 /**
255 * Boolean of whether the GPU is definitely not accessing the buffer.
256 *
257 * This is only valid when reusable, since non-reusable
258 * buffers are those that have been shared with other
259 * processes, so we don't know their state.
260 */
261 bool idle;
262
263 /**
264 * Boolean of whether this buffer was allocated with userptr
265 */
266 bool is_userptr;
267
268 /**
269 * Size in bytes of this buffer and its relocation descendents.
270 *
271 * Used to avoid costly tree walking in
272 * drm_bacon_bufmgr_check_aperture in the common case.
273 */
274 int reloc_tree_size;
275
276 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
277 bool mapped_cpu_write;
278 };
279
280 static unsigned int
281 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
282
283 static unsigned int
284 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
285
286 static int
287 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
288 uint32_t tiling_mode,
289 uint32_t stride);
290
291 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
292 time_t time);
293
294 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
295
296 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
297 {
298 return (drm_bacon_bo_gem *)bo;
299 }
300
301 static unsigned long
302 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr *bufmgr, unsigned long size,
303 uint32_t *tiling_mode)
304 {
305 if (*tiling_mode == I915_TILING_NONE)
306 return size;
307
308 /* 965+ just need multiples of page size for tiling */
309 return ALIGN(size, 4096);
310 }
311
312 /*
313 * Round a given pitch up to the minimum required for X tiling on a
314 * given chip. We use 512 as the minimum to allow for a later tiling
315 * change.
316 */
317 static unsigned long
318 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr *bufmgr,
319 unsigned long pitch, uint32_t *tiling_mode)
320 {
321 unsigned long tile_width;
322
323 /* If untiled, then just align it so that we can do rendering
324 * to it with the 3D engine.
325 */
326 if (*tiling_mode == I915_TILING_NONE)
327 return ALIGN(pitch, 64);
328
329 if (*tiling_mode == I915_TILING_X)
330 tile_width = 512;
331 else
332 tile_width = 128;
333
334 /* 965 is flexible */
335 return ALIGN(pitch, tile_width);
336 }
337
338 static struct drm_bacon_gem_bo_bucket *
339 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr *bufmgr,
340 unsigned long size)
341 {
342 int i;
343
344 for (i = 0; i < bufmgr->num_buckets; i++) {
345 struct drm_bacon_gem_bo_bucket *bucket =
346 &bufmgr->cache_bucket[i];
347 if (bucket->size >= size) {
348 return bucket;
349 }
350 }
351
352 return NULL;
353 }
354
355 static void
356 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr *bufmgr)
357 {
358 int i, j;
359
360 for (i = 0; i < bufmgr->exec_count; i++) {
361 drm_bacon_bo *bo = bufmgr->exec_bos[i];
362 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
363
364 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
365 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
366 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
367 bo_gem->name);
368 continue;
369 }
370
371 for (j = 0; j < bo_gem->reloc_count; j++) {
372 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[j].bo;
373 drm_bacon_bo_gem *target_gem =
374 (drm_bacon_bo_gem *) target_bo;
375
376 DBG("%2d: %d %s(%s)@0x%08x %08x -> "
377 "%d (%s)@0x%08x %08x + 0x%08x\n",
378 i,
379 bo_gem->gem_handle,
380 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
381 bo_gem->name,
382 upper_32_bits(bo_gem->relocs[j].offset),
383 lower_32_bits(bo_gem->relocs[j].offset),
384 target_gem->gem_handle,
385 target_gem->name,
386 upper_32_bits(target_bo->offset64),
387 lower_32_bits(target_bo->offset64),
388 bo_gem->relocs[j].delta);
389 }
390
391 for (j = 0; j < bo_gem->softpin_target_count; j++) {
392 drm_bacon_bo *target_bo = bo_gem->softpin_target[j];
393 drm_bacon_bo_gem *target_gem =
394 (drm_bacon_bo_gem *) target_bo;
395 DBG("%2d: %d %s(%s) -> "
396 "%d *(%s)@0x%08x %08x\n",
397 i,
398 bo_gem->gem_handle,
399 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
400 bo_gem->name,
401 target_gem->gem_handle,
402 target_gem->name,
403 upper_32_bits(target_bo->offset64),
404 lower_32_bits(target_bo->offset64));
405 }
406 }
407 }
408
409 inline void
410 drm_bacon_bo_reference(drm_bacon_bo *bo)
411 {
412 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
413
414 p_atomic_inc(&bo_gem->refcount);
415 }
416
417 static void
418 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo)
419 {
420 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
421 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
422 int index;
423
424 if (bo_gem->validate_index != -1)
425 return;
426
427 /* Extend the array of validation entries as necessary. */
428 if (bufmgr->exec_count == bufmgr->exec_size) {
429 int new_size = bufmgr->exec_size * 2;
430
431 if (new_size == 0)
432 new_size = 5;
433
434 bufmgr->exec2_objects =
435 realloc(bufmgr->exec2_objects,
436 sizeof(*bufmgr->exec2_objects) * new_size);
437 bufmgr->exec_bos =
438 realloc(bufmgr->exec_bos,
439 sizeof(*bufmgr->exec_bos) * new_size);
440 bufmgr->exec_size = new_size;
441 }
442
443 index = bufmgr->exec_count;
444 bo_gem->validate_index = index;
445 /* Fill in array entry */
446 bufmgr->exec2_objects[index].handle = bo_gem->gem_handle;
447 bufmgr->exec2_objects[index].relocation_count = bo_gem->reloc_count;
448 bufmgr->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
449 bufmgr->exec2_objects[index].alignment = bo->align;
450 bufmgr->exec2_objects[index].offset = bo->offset64;
451 bufmgr->exec2_objects[index].flags = bo_gem->kflags;
452 bufmgr->exec2_objects[index].rsvd1 = 0;
453 bufmgr->exec2_objects[index].rsvd2 = 0;
454 bufmgr->exec_bos[index] = bo;
455 bufmgr->exec_count++;
456 }
457
458 static void
459 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr *bufmgr,
460 drm_bacon_bo_gem *bo_gem,
461 unsigned int alignment)
462 {
463 unsigned int size;
464
465 assert(!bo_gem->used_as_reloc_target);
466
467 /* The older chipsets are far-less flexible in terms of tiling,
468 * and require tiled buffer to be size aligned in the aperture.
469 * This means that in the worst possible case we will need a hole
470 * twice as large as the object in order for it to fit into the
471 * aperture. Optimal packing is for wimps.
472 */
473 size = bo_gem->bo.size;
474
475 bo_gem->reloc_tree_size = size + alignment;
476 }
477
478 static int
479 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
480 {
481 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
482 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
483 unsigned int max_relocs = bufmgr->max_relocs;
484
485 if (bo->size / 4 < max_relocs)
486 max_relocs = bo->size / 4;
487
488 bo_gem->relocs = malloc(max_relocs *
489 sizeof(struct drm_i915_gem_relocation_entry));
490 bo_gem->reloc_target_info = malloc(max_relocs *
491 sizeof(drm_bacon_reloc_target));
492 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
493 bo_gem->has_error = true;
494
495 free (bo_gem->relocs);
496 bo_gem->relocs = NULL;
497
498 free (bo_gem->reloc_target_info);
499 bo_gem->reloc_target_info = NULL;
500
501 return 1;
502 }
503
504 return 0;
505 }
506
507 int
508 drm_bacon_bo_busy(drm_bacon_bo *bo)
509 {
510 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
511 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
512 struct drm_i915_gem_busy busy;
513 int ret;
514
515 if (bo_gem->reusable && bo_gem->idle)
516 return false;
517
518 memclear(busy);
519 busy.handle = bo_gem->gem_handle;
520
521 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
522 if (ret == 0) {
523 bo_gem->idle = !busy.busy;
524 return busy.busy;
525 } else {
526 return false;
527 }
528 return (ret == 0 && busy.busy);
529 }
530
531 static int
532 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr *bufmgr,
533 drm_bacon_bo_gem *bo_gem, int state)
534 {
535 struct drm_i915_gem_madvise madv;
536
537 memclear(madv);
538 madv.handle = bo_gem->gem_handle;
539 madv.madv = state;
540 madv.retained = 1;
541 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
542
543 return madv.retained;
544 }
545
546 int
547 drm_bacon_bo_madvise(drm_bacon_bo *bo, int madv)
548 {
549 return drm_bacon_gem_bo_madvise_internal(bo->bufmgr,
550 (drm_bacon_bo_gem *) bo,
551 madv);
552 }
553
554 /* drop the oldest entries that have been purged by the kernel */
555 static void
556 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr *bufmgr,
557 struct drm_bacon_gem_bo_bucket *bucket)
558 {
559 while (!list_empty(&bucket->head)) {
560 drm_bacon_bo_gem *bo_gem;
561
562 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
563 bucket->head.next, head);
564 if (drm_bacon_gem_bo_madvise_internal
565 (bufmgr, bo_gem, I915_MADV_DONTNEED))
566 break;
567
568 list_del(&bo_gem->head);
569 drm_bacon_gem_bo_free(&bo_gem->bo);
570 }
571 }
572
573 static drm_bacon_bo *
574 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
575 const char *name,
576 unsigned long size,
577 unsigned long flags,
578 uint32_t tiling_mode,
579 unsigned long stride,
580 unsigned int alignment)
581 {
582 drm_bacon_bo_gem *bo_gem;
583 unsigned int page_size = getpagesize();
584 int ret;
585 struct drm_bacon_gem_bo_bucket *bucket;
586 bool alloc_from_cache;
587 unsigned long bo_size;
588 bool for_render = false;
589
590 if (flags & BO_ALLOC_FOR_RENDER)
591 for_render = true;
592
593 /* Round the allocated size up to a power of two number of pages. */
594 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, size);
595
596 /* If we don't have caching at this size, don't actually round the
597 * allocation up.
598 */
599 if (bucket == NULL) {
600 bo_size = size;
601 if (bo_size < page_size)
602 bo_size = page_size;
603 } else {
604 bo_size = bucket->size;
605 }
606
607 pthread_mutex_lock(&bufmgr->lock);
608 /* Get a buffer out of the cache if available */
609 retry:
610 alloc_from_cache = false;
611 if (bucket != NULL && !list_empty(&bucket->head)) {
612 if (for_render) {
613 /* Allocate new render-target BOs from the tail (MRU)
614 * of the list, as it will likely be hot in the GPU
615 * cache and in the aperture for us.
616 */
617 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
618 bucket->head.prev, head);
619 list_del(&bo_gem->head);
620 alloc_from_cache = true;
621 bo_gem->bo.align = alignment;
622 } else {
623 assert(alignment == 0);
624 /* For non-render-target BOs (where we're probably
625 * going to map it first thing in order to fill it
626 * with data), check if the last BO in the cache is
627 * unbusy, and only reuse in that case. Otherwise,
628 * allocating a new buffer is probably faster than
629 * waiting for the GPU to finish.
630 */
631 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
632 bucket->head.next, head);
633 if (!drm_bacon_bo_busy(&bo_gem->bo)) {
634 alloc_from_cache = true;
635 list_del(&bo_gem->head);
636 }
637 }
638
639 if (alloc_from_cache) {
640 if (!drm_bacon_gem_bo_madvise_internal
641 (bufmgr, bo_gem, I915_MADV_WILLNEED)) {
642 drm_bacon_gem_bo_free(&bo_gem->bo);
643 drm_bacon_gem_bo_cache_purge_bucket(bufmgr,
644 bucket);
645 goto retry;
646 }
647
648 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
649 tiling_mode,
650 stride)) {
651 drm_bacon_gem_bo_free(&bo_gem->bo);
652 goto retry;
653 }
654 }
655 }
656
657 if (!alloc_from_cache) {
658 struct drm_i915_gem_create create;
659
660 bo_gem = calloc(1, sizeof(*bo_gem));
661 if (!bo_gem)
662 goto err;
663
664 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
665 list (vma_list), so better set the list head here */
666 list_inithead(&bo_gem->vma_list);
667
668 bo_gem->bo.size = bo_size;
669
670 memclear(create);
671 create.size = bo_size;
672
673 ret = drmIoctl(bufmgr->fd,
674 DRM_IOCTL_I915_GEM_CREATE,
675 &create);
676 if (ret != 0) {
677 free(bo_gem);
678 goto err;
679 }
680
681 bo_gem->gem_handle = create.handle;
682 HASH_ADD(handle_hh, bufmgr->handle_table,
683 gem_handle, sizeof(bo_gem->gem_handle),
684 bo_gem);
685
686 bo_gem->bo.handle = bo_gem->gem_handle;
687 bo_gem->bo.bufmgr = bufmgr;
688 bo_gem->bo.align = alignment;
689
690 bo_gem->tiling_mode = I915_TILING_NONE;
691 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
692 bo_gem->stride = 0;
693
694 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
695 tiling_mode,
696 stride))
697 goto err_free;
698 }
699
700 bo_gem->name = name;
701 p_atomic_set(&bo_gem->refcount, 1);
702 bo_gem->validate_index = -1;
703 bo_gem->used_as_reloc_target = false;
704 bo_gem->has_error = false;
705 bo_gem->reusable = true;
706
707 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, alignment);
708 pthread_mutex_unlock(&bufmgr->lock);
709
710 DBG("bo_create: buf %d (%s) %ldb\n",
711 bo_gem->gem_handle, bo_gem->name, size);
712
713 return &bo_gem->bo;
714
715 err_free:
716 drm_bacon_gem_bo_free(&bo_gem->bo);
717 err:
718 pthread_mutex_unlock(&bufmgr->lock);
719 return NULL;
720 }
721
722 drm_bacon_bo *
723 drm_bacon_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
724 const char *name,
725 unsigned long size,
726 unsigned int alignment)
727 {
728 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
729 BO_ALLOC_FOR_RENDER,
730 I915_TILING_NONE, 0,
731 alignment);
732 }
733
734 drm_bacon_bo *
735 drm_bacon_bo_alloc(drm_bacon_bufmgr *bufmgr,
736 const char *name,
737 unsigned long size,
738 unsigned int alignment)
739 {
740 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
741 I915_TILING_NONE, 0, 0);
742 }
743
744 drm_bacon_bo *
745 drm_bacon_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
746 int x, int y, int cpp, uint32_t *tiling_mode,
747 unsigned long *pitch, unsigned long flags)
748 {
749 unsigned long size, stride;
750 uint32_t tiling;
751
752 do {
753 unsigned long aligned_y, height_alignment;
754
755 tiling = *tiling_mode;
756
757 /* If we're tiled, our allocations are in 8 or 32-row blocks,
758 * so failure to align our height means that we won't allocate
759 * enough pages.
760 *
761 * If we're untiled, we still have to align to 2 rows high
762 * because the data port accesses 2x2 blocks even if the
763 * bottom row isn't to be rendered, so failure to align means
764 * we could walk off the end of the GTT and fault. This is
765 * documented on 965, and may be the case on older chipsets
766 * too so we try to be careful.
767 */
768 aligned_y = y;
769 height_alignment = 2;
770
771 if (tiling == I915_TILING_X)
772 height_alignment = 8;
773 else if (tiling == I915_TILING_Y)
774 height_alignment = 32;
775 aligned_y = ALIGN(y, height_alignment);
776
777 stride = x * cpp;
778 stride = drm_bacon_gem_bo_tile_pitch(bufmgr, stride, tiling_mode);
779 size = stride * aligned_y;
780 size = drm_bacon_gem_bo_tile_size(bufmgr, size, tiling_mode);
781 } while (*tiling_mode != tiling);
782 *pitch = stride;
783
784 if (tiling == I915_TILING_NONE)
785 stride = 0;
786
787 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
788 tiling, stride, 0);
789 }
790
791 drm_bacon_bo *
792 drm_bacon_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
793 const char *name,
794 void *addr,
795 uint32_t tiling_mode,
796 uint32_t stride,
797 unsigned long size,
798 unsigned long flags)
799 {
800 drm_bacon_bo_gem *bo_gem;
801 int ret;
802 struct drm_i915_gem_userptr userptr;
803
804 /* Tiling with userptr surfaces is not supported
805 * on all hardware so refuse it for time being.
806 */
807 if (tiling_mode != I915_TILING_NONE)
808 return NULL;
809
810 bo_gem = calloc(1, sizeof(*bo_gem));
811 if (!bo_gem)
812 return NULL;
813
814 p_atomic_set(&bo_gem->refcount, 1);
815 list_inithead(&bo_gem->vma_list);
816
817 bo_gem->bo.size = size;
818
819 memclear(userptr);
820 userptr.user_ptr = (__u64)((unsigned long)addr);
821 userptr.user_size = size;
822 userptr.flags = flags;
823
824 ret = drmIoctl(bufmgr->fd,
825 DRM_IOCTL_I915_GEM_USERPTR,
826 &userptr);
827 if (ret != 0) {
828 DBG("bo_create_userptr: "
829 "ioctl failed with user ptr %p size 0x%lx, "
830 "user flags 0x%lx\n", addr, size, flags);
831 free(bo_gem);
832 return NULL;
833 }
834
835 pthread_mutex_lock(&bufmgr->lock);
836
837 bo_gem->gem_handle = userptr.handle;
838 bo_gem->bo.handle = bo_gem->gem_handle;
839 bo_gem->bo.bufmgr = bufmgr;
840 bo_gem->is_userptr = true;
841 bo_gem->bo.virtual = addr;
842 /* Save the address provided by user */
843 bo_gem->user_virtual = addr;
844 bo_gem->tiling_mode = I915_TILING_NONE;
845 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
846 bo_gem->stride = 0;
847
848 HASH_ADD(handle_hh, bufmgr->handle_table,
849 gem_handle, sizeof(bo_gem->gem_handle),
850 bo_gem);
851
852 bo_gem->name = name;
853 bo_gem->validate_index = -1;
854 bo_gem->used_as_reloc_target = false;
855 bo_gem->has_error = false;
856 bo_gem->reusable = false;
857
858 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
859 pthread_mutex_unlock(&bufmgr->lock);
860
861 DBG("bo_create_userptr: "
862 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
863 addr, bo_gem->gem_handle, bo_gem->name,
864 size, stride, tiling_mode);
865
866 return &bo_gem->bo;
867 }
868
869 bool
870 drm_bacon_has_userptr(drm_bacon_bufmgr *bufmgr)
871 {
872 int ret;
873 void *ptr;
874 long pgsz;
875 struct drm_i915_gem_userptr userptr;
876
877 pgsz = sysconf(_SC_PAGESIZE);
878 assert(pgsz > 0);
879
880 ret = posix_memalign(&ptr, pgsz, pgsz);
881 if (ret) {
882 DBG("Failed to get a page (%ld) for userptr detection!\n",
883 pgsz);
884 return false;
885 }
886
887 memclear(userptr);
888 userptr.user_ptr = (__u64)(unsigned long)ptr;
889 userptr.user_size = pgsz;
890
891 retry:
892 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
893 if (ret) {
894 if (errno == ENODEV && userptr.flags == 0) {
895 userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
896 goto retry;
897 }
898 free(ptr);
899 return false;
900 }
901
902 /* We don't release the userptr bo here as we want to keep the
903 * kernel mm tracking alive for our lifetime. The first time we
904 * create a userptr object the kernel has to install a mmu_notifer
905 * which is a heavyweight operation (e.g. it requires taking all
906 * mm_locks and stop_machine()).
907 */
908
909 bufmgr->userptr_active.ptr = ptr;
910 bufmgr->userptr_active.handle = userptr.handle;
911
912 return true;
913 }
914
915 /**
916 * Returns a drm_bacon_bo wrapping the given buffer object handle.
917 *
918 * This can be used when one application needs to pass a buffer object
919 * to another.
920 */
921 drm_bacon_bo *
922 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
923 const char *name,
924 unsigned int handle)
925 {
926 drm_bacon_bo_gem *bo_gem;
927 int ret;
928 struct drm_gem_open open_arg;
929 struct drm_i915_gem_get_tiling get_tiling;
930
931 /* At the moment most applications only have a few named bo.
932 * For instance, in a DRI client only the render buffers passed
933 * between X and the client are named. And since X returns the
934 * alternating names for the front/back buffer a linear search
935 * provides a sufficiently fast match.
936 */
937 pthread_mutex_lock(&bufmgr->lock);
938 HASH_FIND(name_hh, bufmgr->name_table,
939 &handle, sizeof(handle), bo_gem);
940 if (bo_gem) {
941 drm_bacon_bo_reference(&bo_gem->bo);
942 goto out;
943 }
944
945 memclear(open_arg);
946 open_arg.name = handle;
947 ret = drmIoctl(bufmgr->fd,
948 DRM_IOCTL_GEM_OPEN,
949 &open_arg);
950 if (ret != 0) {
951 DBG("Couldn't reference %s handle 0x%08x: %s\n",
952 name, handle, strerror(errno));
953 bo_gem = NULL;
954 goto out;
955 }
956 /* Now see if someone has used a prime handle to get this
957 * object from the kernel before by looking through the list
958 * again for a matching gem_handle
959 */
960 HASH_FIND(handle_hh, bufmgr->handle_table,
961 &open_arg.handle, sizeof(open_arg.handle), bo_gem);
962 if (bo_gem) {
963 drm_bacon_bo_reference(&bo_gem->bo);
964 goto out;
965 }
966
967 bo_gem = calloc(1, sizeof(*bo_gem));
968 if (!bo_gem)
969 goto out;
970
971 p_atomic_set(&bo_gem->refcount, 1);
972 list_inithead(&bo_gem->vma_list);
973
974 bo_gem->bo.size = open_arg.size;
975 bo_gem->bo.offset64 = 0;
976 bo_gem->bo.virtual = NULL;
977 bo_gem->bo.bufmgr = bufmgr;
978 bo_gem->name = name;
979 bo_gem->validate_index = -1;
980 bo_gem->gem_handle = open_arg.handle;
981 bo_gem->bo.handle = open_arg.handle;
982 bo_gem->global_name = handle;
983 bo_gem->reusable = false;
984
985 HASH_ADD(handle_hh, bufmgr->handle_table,
986 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
987 HASH_ADD(name_hh, bufmgr->name_table,
988 global_name, sizeof(bo_gem->global_name), bo_gem);
989
990 memclear(get_tiling);
991 get_tiling.handle = bo_gem->gem_handle;
992 ret = drmIoctl(bufmgr->fd,
993 DRM_IOCTL_I915_GEM_GET_TILING,
994 &get_tiling);
995 if (ret != 0)
996 goto err_unref;
997
998 bo_gem->tiling_mode = get_tiling.tiling_mode;
999 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1000 /* XXX stride is unknown */
1001 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1002 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1003
1004 out:
1005 pthread_mutex_unlock(&bufmgr->lock);
1006 return &bo_gem->bo;
1007
1008 err_unref:
1009 drm_bacon_gem_bo_free(&bo_gem->bo);
1010 pthread_mutex_unlock(&bufmgr->lock);
1011 return NULL;
1012 }
1013
1014 static void
1015 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
1016 {
1017 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1018 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1019 struct drm_gem_close close;
1020 int ret;
1021
1022 list_del(&bo_gem->vma_list);
1023 if (bo_gem->mem_virtual) {
1024 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1025 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1026 bufmgr->vma_count--;
1027 }
1028 if (bo_gem->wc_virtual) {
1029 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1030 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1031 bufmgr->vma_count--;
1032 }
1033 if (bo_gem->gtt_virtual) {
1034 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1035 bufmgr->vma_count--;
1036 }
1037
1038 if (bo_gem->global_name)
1039 HASH_DELETE(name_hh, bufmgr->name_table, bo_gem);
1040 HASH_DELETE(handle_hh, bufmgr->handle_table, bo_gem);
1041
1042 /* Close this object */
1043 memclear(close);
1044 close.handle = bo_gem->gem_handle;
1045 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
1046 if (ret != 0) {
1047 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1048 bo_gem->gem_handle, bo_gem->name, strerror(errno));
1049 }
1050 free(bo);
1051 }
1052
1053 static void
1054 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
1055 {
1056 #if HAVE_VALGRIND
1057 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1058
1059 if (bo_gem->mem_virtual)
1060 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1061
1062 if (bo_gem->wc_virtual)
1063 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1064
1065 if (bo_gem->gtt_virtual)
1066 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1067 #endif
1068 }
1069
1070 /** Frees all cached buffers significantly older than @time. */
1071 static void
1072 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr *bufmgr, time_t time)
1073 {
1074 int i;
1075
1076 if (bufmgr->time == time)
1077 return;
1078
1079 for (i = 0; i < bufmgr->num_buckets; i++) {
1080 struct drm_bacon_gem_bo_bucket *bucket =
1081 &bufmgr->cache_bucket[i];
1082
1083 while (!list_empty(&bucket->head)) {
1084 drm_bacon_bo_gem *bo_gem;
1085
1086 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1087 bucket->head.next, head);
1088 if (time - bo_gem->free_time <= 1)
1089 break;
1090
1091 list_del(&bo_gem->head);
1092
1093 drm_bacon_gem_bo_free(&bo_gem->bo);
1094 }
1095 }
1096
1097 bufmgr->time = time;
1098 }
1099
1100 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr *bufmgr)
1101 {
1102 int limit;
1103
1104 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1105 bufmgr->vma_count, bufmgr->vma_open, bufmgr->vma_max);
1106
1107 if (bufmgr->vma_max < 0)
1108 return;
1109
1110 /* We may need to evict a few entries in order to create new mmaps */
1111 limit = bufmgr->vma_max - 2*bufmgr->vma_open;
1112 if (limit < 0)
1113 limit = 0;
1114
1115 while (bufmgr->vma_count > limit) {
1116 drm_bacon_bo_gem *bo_gem;
1117
1118 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1119 bufmgr->vma_cache.next,
1120 vma_list);
1121 assert(bo_gem->map_count == 0);
1122 list_delinit(&bo_gem->vma_list);
1123
1124 if (bo_gem->mem_virtual) {
1125 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1126 bo_gem->mem_virtual = NULL;
1127 bufmgr->vma_count--;
1128 }
1129 if (bo_gem->wc_virtual) {
1130 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1131 bo_gem->wc_virtual = NULL;
1132 bufmgr->vma_count--;
1133 }
1134 if (bo_gem->gtt_virtual) {
1135 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1136 bo_gem->gtt_virtual = NULL;
1137 bufmgr->vma_count--;
1138 }
1139 }
1140 }
1141
1142 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr *bufmgr,
1143 drm_bacon_bo_gem *bo_gem)
1144 {
1145 bufmgr->vma_open--;
1146 list_addtail(&bo_gem->vma_list, &bufmgr->vma_cache);
1147 if (bo_gem->mem_virtual)
1148 bufmgr->vma_count++;
1149 if (bo_gem->wc_virtual)
1150 bufmgr->vma_count++;
1151 if (bo_gem->gtt_virtual)
1152 bufmgr->vma_count++;
1153 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1154 }
1155
1156 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr *bufmgr,
1157 drm_bacon_bo_gem *bo_gem)
1158 {
1159 bufmgr->vma_open++;
1160 list_del(&bo_gem->vma_list);
1161 if (bo_gem->mem_virtual)
1162 bufmgr->vma_count--;
1163 if (bo_gem->wc_virtual)
1164 bufmgr->vma_count--;
1165 if (bo_gem->gtt_virtual)
1166 bufmgr->vma_count--;
1167 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1168 }
1169
1170 static void
1171 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1172 {
1173 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1174 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1175 struct drm_bacon_gem_bo_bucket *bucket;
1176 int i;
1177
1178 /* Unreference all the target buffers */
1179 for (i = 0; i < bo_gem->reloc_count; i++) {
1180 if (bo_gem->reloc_target_info[i].bo != bo) {
1181 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1182 reloc_target_info[i].bo,
1183 time);
1184 }
1185 }
1186 for (i = 0; i < bo_gem->softpin_target_count; i++)
1187 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1188 time);
1189 bo_gem->kflags = 0;
1190 bo_gem->reloc_count = 0;
1191 bo_gem->used_as_reloc_target = false;
1192 bo_gem->softpin_target_count = 0;
1193
1194 DBG("bo_unreference final: %d (%s)\n",
1195 bo_gem->gem_handle, bo_gem->name);
1196
1197 /* release memory associated with this object */
1198 if (bo_gem->reloc_target_info) {
1199 free(bo_gem->reloc_target_info);
1200 bo_gem->reloc_target_info = NULL;
1201 }
1202 if (bo_gem->relocs) {
1203 free(bo_gem->relocs);
1204 bo_gem->relocs = NULL;
1205 }
1206 if (bo_gem->softpin_target) {
1207 free(bo_gem->softpin_target);
1208 bo_gem->softpin_target = NULL;
1209 bo_gem->softpin_target_size = 0;
1210 }
1211
1212 /* Clear any left-over mappings */
1213 if (bo_gem->map_count) {
1214 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1215 bo_gem->map_count = 0;
1216 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1217 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1218 }
1219
1220 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, bo->size);
1221 /* Put the buffer into our internal cache for reuse if we can. */
1222 if (bufmgr->bo_reuse && bo_gem->reusable && bucket != NULL &&
1223 drm_bacon_gem_bo_madvise_internal(bufmgr, bo_gem,
1224 I915_MADV_DONTNEED)) {
1225 bo_gem->free_time = time;
1226
1227 bo_gem->name = NULL;
1228 bo_gem->validate_index = -1;
1229
1230 list_addtail(&bo_gem->head, &bucket->head);
1231 } else {
1232 drm_bacon_gem_bo_free(bo);
1233 }
1234 }
1235
1236 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1237 time_t time)
1238 {
1239 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1240
1241 assert(p_atomic_read(&bo_gem->refcount) > 0);
1242 if (p_atomic_dec_zero(&bo_gem->refcount))
1243 drm_bacon_gem_bo_unreference_final(bo, time);
1244 }
1245
1246 void
1247 drm_bacon_bo_unreference(drm_bacon_bo *bo)
1248 {
1249 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1250
1251 if (bo == NULL)
1252 return;
1253
1254 assert(p_atomic_read(&bo_gem->refcount) > 0);
1255
1256 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1257 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1258 struct timespec time;
1259
1260 clock_gettime(CLOCK_MONOTONIC, &time);
1261
1262 pthread_mutex_lock(&bufmgr->lock);
1263
1264 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1265 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1266 drm_bacon_gem_cleanup_bo_cache(bufmgr, time.tv_sec);
1267 }
1268
1269 pthread_mutex_unlock(&bufmgr->lock);
1270 }
1271 }
1272
1273 int
1274 drm_bacon_bo_map(drm_bacon_bo *bo, int write_enable)
1275 {
1276 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1277 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1278 struct drm_i915_gem_set_domain set_domain;
1279 int ret;
1280
1281 if (bo_gem->is_userptr) {
1282 /* Return the same user ptr */
1283 bo->virtual = bo_gem->user_virtual;
1284 return 0;
1285 }
1286
1287 pthread_mutex_lock(&bufmgr->lock);
1288
1289 if (bo_gem->map_count++ == 0)
1290 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1291
1292 if (!bo_gem->mem_virtual) {
1293 struct drm_i915_gem_mmap mmap_arg;
1294
1295 DBG("bo_map: %d (%s), map_count=%d\n",
1296 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1297
1298 memclear(mmap_arg);
1299 mmap_arg.handle = bo_gem->gem_handle;
1300 mmap_arg.size = bo->size;
1301 ret = drmIoctl(bufmgr->fd,
1302 DRM_IOCTL_I915_GEM_MMAP,
1303 &mmap_arg);
1304 if (ret != 0) {
1305 ret = -errno;
1306 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1307 __FILE__, __LINE__, bo_gem->gem_handle,
1308 bo_gem->name, strerror(errno));
1309 if (--bo_gem->map_count == 0)
1310 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1311 pthread_mutex_unlock(&bufmgr->lock);
1312 return ret;
1313 }
1314 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1315 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1316 }
1317 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1318 bo_gem->mem_virtual);
1319 bo->virtual = bo_gem->mem_virtual;
1320
1321 memclear(set_domain);
1322 set_domain.handle = bo_gem->gem_handle;
1323 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1324 if (write_enable)
1325 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1326 else
1327 set_domain.write_domain = 0;
1328 ret = drmIoctl(bufmgr->fd,
1329 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1330 &set_domain);
1331 if (ret != 0) {
1332 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1333 __FILE__, __LINE__, bo_gem->gem_handle,
1334 strerror(errno));
1335 }
1336
1337 if (write_enable)
1338 bo_gem->mapped_cpu_write = true;
1339
1340 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1341 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1342 pthread_mutex_unlock(&bufmgr->lock);
1343
1344 return 0;
1345 }
1346
1347 static int
1348 map_gtt(drm_bacon_bo *bo)
1349 {
1350 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1351 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1352 int ret;
1353
1354 if (bo_gem->is_userptr)
1355 return -EINVAL;
1356
1357 if (bo_gem->map_count++ == 0)
1358 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1359
1360 /* Get a mapping of the buffer if we haven't before. */
1361 if (bo_gem->gtt_virtual == NULL) {
1362 struct drm_i915_gem_mmap_gtt mmap_arg;
1363
1364 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1365 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1366
1367 memclear(mmap_arg);
1368 mmap_arg.handle = bo_gem->gem_handle;
1369
1370 /* Get the fake offset back... */
1371 ret = drmIoctl(bufmgr->fd,
1372 DRM_IOCTL_I915_GEM_MMAP_GTT,
1373 &mmap_arg);
1374 if (ret != 0) {
1375 ret = -errno;
1376 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1377 __FILE__, __LINE__,
1378 bo_gem->gem_handle, bo_gem->name,
1379 strerror(errno));
1380 if (--bo_gem->map_count == 0)
1381 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1382 return ret;
1383 }
1384
1385 /* and mmap it */
1386 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1387 MAP_SHARED, bufmgr->fd,
1388 mmap_arg.offset);
1389 if (bo_gem->gtt_virtual == MAP_FAILED) {
1390 bo_gem->gtt_virtual = NULL;
1391 ret = -errno;
1392 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1393 __FILE__, __LINE__,
1394 bo_gem->gem_handle, bo_gem->name,
1395 strerror(errno));
1396 if (--bo_gem->map_count == 0)
1397 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1398 return ret;
1399 }
1400 }
1401
1402 bo->virtual = bo_gem->gtt_virtual;
1403
1404 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1405 bo_gem->gtt_virtual);
1406
1407 return 0;
1408 }
1409
1410 int
1411 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1412 {
1413 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1414 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1415 struct drm_i915_gem_set_domain set_domain;
1416 int ret;
1417
1418 pthread_mutex_lock(&bufmgr->lock);
1419
1420 ret = map_gtt(bo);
1421 if (ret) {
1422 pthread_mutex_unlock(&bufmgr->lock);
1423 return ret;
1424 }
1425
1426 /* Now move it to the GTT domain so that the GPU and CPU
1427 * caches are flushed and the GPU isn't actively using the
1428 * buffer.
1429 *
1430 * The pagefault handler does this domain change for us when
1431 * it has unbound the BO from the GTT, but it's up to us to
1432 * tell it when we're about to use things if we had done
1433 * rendering and it still happens to be bound to the GTT.
1434 */
1435 memclear(set_domain);
1436 set_domain.handle = bo_gem->gem_handle;
1437 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1438 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1439 ret = drmIoctl(bufmgr->fd,
1440 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1441 &set_domain);
1442 if (ret != 0) {
1443 DBG("%s:%d: Error setting domain %d: %s\n",
1444 __FILE__, __LINE__, bo_gem->gem_handle,
1445 strerror(errno));
1446 }
1447
1448 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1449 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1450 pthread_mutex_unlock(&bufmgr->lock);
1451
1452 return 0;
1453 }
1454
1455 /**
1456 * Performs a mapping of the buffer object like the normal GTT
1457 * mapping, but avoids waiting for the GPU to be done reading from or
1458 * rendering to the buffer.
1459 *
1460 * This is used in the implementation of GL_ARB_map_buffer_range: The
1461 * user asks to create a buffer, then does a mapping, fills some
1462 * space, runs a drawing command, then asks to map it again without
1463 * synchronizing because it guarantees that it won't write over the
1464 * data that the GPU is busy using (or, more specifically, that if it
1465 * does write over the data, it acknowledges that rendering is
1466 * undefined).
1467 */
1468
1469 int
1470 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1471 {
1472 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1473 #ifdef HAVE_VALGRIND
1474 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1475 #endif
1476 int ret;
1477
1478 /* If the CPU cache isn't coherent with the GTT, then use a
1479 * regular synchronized mapping. The problem is that we don't
1480 * track where the buffer was last used on the CPU side in
1481 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1482 * we would potentially corrupt the buffer even when the user
1483 * does reasonable things.
1484 */
1485 if (!bufmgr->has_llc)
1486 return drm_bacon_gem_bo_map_gtt(bo);
1487
1488 pthread_mutex_lock(&bufmgr->lock);
1489
1490 ret = map_gtt(bo);
1491 if (ret == 0) {
1492 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1493 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1494 }
1495
1496 pthread_mutex_unlock(&bufmgr->lock);
1497
1498 return ret;
1499 }
1500
1501 int
1502 drm_bacon_bo_unmap(drm_bacon_bo *bo)
1503 {
1504 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1505 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1506 int ret = 0;
1507
1508 if (bo == NULL)
1509 return 0;
1510
1511 if (bo_gem->is_userptr)
1512 return 0;
1513
1514 pthread_mutex_lock(&bufmgr->lock);
1515
1516 if (bo_gem->map_count <= 0) {
1517 DBG("attempted to unmap an unmapped bo\n");
1518 pthread_mutex_unlock(&bufmgr->lock);
1519 /* Preserve the old behaviour of just treating this as a
1520 * no-op rather than reporting the error.
1521 */
1522 return 0;
1523 }
1524
1525 if (bo_gem->mapped_cpu_write) {
1526 struct drm_i915_gem_sw_finish sw_finish;
1527
1528 /* Cause a flush to happen if the buffer's pinned for
1529 * scanout, so the results show up in a timely manner.
1530 * Unlike GTT set domains, this only does work if the
1531 * buffer should be scanout-related.
1532 */
1533 memclear(sw_finish);
1534 sw_finish.handle = bo_gem->gem_handle;
1535 ret = drmIoctl(bufmgr->fd,
1536 DRM_IOCTL_I915_GEM_SW_FINISH,
1537 &sw_finish);
1538 ret = ret == -1 ? -errno : 0;
1539
1540 bo_gem->mapped_cpu_write = false;
1541 }
1542
1543 /* We need to unmap after every innovation as we cannot track
1544 * an open vma for every bo as that will exhaust the system
1545 * limits and cause later failures.
1546 */
1547 if (--bo_gem->map_count == 0) {
1548 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1549 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1550 bo->virtual = NULL;
1551 }
1552 pthread_mutex_unlock(&bufmgr->lock);
1553
1554 return ret;
1555 }
1556
1557 int
1558 drm_bacon_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1559 unsigned long size, const void *data)
1560 {
1561 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1562 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1563 struct drm_i915_gem_pwrite pwrite;
1564 int ret;
1565
1566 if (bo_gem->is_userptr)
1567 return -EINVAL;
1568
1569 memclear(pwrite);
1570 pwrite.handle = bo_gem->gem_handle;
1571 pwrite.offset = offset;
1572 pwrite.size = size;
1573 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1574 ret = drmIoctl(bufmgr->fd,
1575 DRM_IOCTL_I915_GEM_PWRITE,
1576 &pwrite);
1577 if (ret != 0) {
1578 ret = -errno;
1579 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1580 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1581 (int)size, strerror(errno));
1582 }
1583
1584 return ret;
1585 }
1586
1587 int
1588 drm_bacon_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1589 unsigned long size, void *data)
1590 {
1591 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1592 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1593 struct drm_i915_gem_pread pread;
1594 int ret;
1595
1596 if (bo_gem->is_userptr)
1597 return -EINVAL;
1598
1599 memclear(pread);
1600 pread.handle = bo_gem->gem_handle;
1601 pread.offset = offset;
1602 pread.size = size;
1603 pread.data_ptr = (uint64_t) (uintptr_t) data;
1604 ret = drmIoctl(bufmgr->fd,
1605 DRM_IOCTL_I915_GEM_PREAD,
1606 &pread);
1607 if (ret != 0) {
1608 ret = -errno;
1609 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1610 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1611 (int)size, strerror(errno));
1612 }
1613
1614 return ret;
1615 }
1616
1617 /** Waits for all GPU rendering with the object to have completed. */
1618 void
1619 drm_bacon_bo_wait_rendering(drm_bacon_bo *bo)
1620 {
1621 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1622 }
1623
1624 /**
1625 * Waits on a BO for the given amount of time.
1626 *
1627 * @bo: buffer object to wait for
1628 * @timeout_ns: amount of time to wait in nanoseconds.
1629 * If value is less than 0, an infinite wait will occur.
1630 *
1631 * Returns 0 if the wait was successful ie. the last batch referencing the
1632 * object has completed within the allotted time. Otherwise some negative return
1633 * value describes the error. Of particular interest is -ETIME when the wait has
1634 * failed to yield the desired result.
1635 *
1636 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1637 * the operation to give up after a certain amount of time. Another subtle
1638 * difference is the internal locking semantics are different (this variant does
1639 * not hold the lock for the duration of the wait). This makes the wait subject
1640 * to a larger userspace race window.
1641 *
1642 * The implementation shall wait until the object is no longer actively
1643 * referenced within a batch buffer at the time of the call. The wait will
1644 * not guarantee that the buffer is re-issued via another thread, or an flinked
1645 * handle. Userspace must make sure this race does not occur if such precision
1646 * is important.
1647 *
1648 * Note that some kernels have broken the inifite wait for negative values
1649 * promise, upgrade to latest stable kernels if this is the case.
1650 */
1651 int
1652 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1653 {
1654 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1655 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1656 struct drm_i915_gem_wait wait;
1657 int ret;
1658
1659 memclear(wait);
1660 wait.bo_handle = bo_gem->gem_handle;
1661 wait.timeout_ns = timeout_ns;
1662 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1663 if (ret == -1)
1664 return -errno;
1665
1666 return ret;
1667 }
1668
1669 /**
1670 * Sets the object to the GTT read and possibly write domain, used by the X
1671 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1672 *
1673 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1674 * can do tiled pixmaps this way.
1675 */
1676 void
1677 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1678 {
1679 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1680 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1681 struct drm_i915_gem_set_domain set_domain;
1682 int ret;
1683
1684 memclear(set_domain);
1685 set_domain.handle = bo_gem->gem_handle;
1686 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1687 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1688 ret = drmIoctl(bufmgr->fd,
1689 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1690 &set_domain);
1691 if (ret != 0) {
1692 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1693 __FILE__, __LINE__, bo_gem->gem_handle,
1694 set_domain.read_domains, set_domain.write_domain,
1695 strerror(errno));
1696 }
1697 }
1698
1699 static void
1700 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr *bufmgr)
1701 {
1702 struct drm_gem_close close_bo;
1703 int i, ret;
1704
1705 free(bufmgr->exec2_objects);
1706 free(bufmgr->exec_bos);
1707
1708 pthread_mutex_destroy(&bufmgr->lock);
1709
1710 /* Free any cached buffer objects we were going to reuse */
1711 for (i = 0; i < bufmgr->num_buckets; i++) {
1712 struct drm_bacon_gem_bo_bucket *bucket =
1713 &bufmgr->cache_bucket[i];
1714 drm_bacon_bo_gem *bo_gem;
1715
1716 while (!list_empty(&bucket->head)) {
1717 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1718 bucket->head.next, head);
1719 list_del(&bo_gem->head);
1720
1721 drm_bacon_gem_bo_free(&bo_gem->bo);
1722 }
1723 }
1724
1725 /* Release userptr bo kept hanging around for optimisation. */
1726 if (bufmgr->userptr_active.ptr) {
1727 memclear(close_bo);
1728 close_bo.handle = bufmgr->userptr_active.handle;
1729 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1730 free(bufmgr->userptr_active.ptr);
1731 if (ret)
1732 fprintf(stderr,
1733 "Failed to release test userptr object! (%d) "
1734 "i915 kernel driver may not be sane!\n", errno);
1735 }
1736
1737 free(bufmgr);
1738 }
1739
1740 /**
1741 * Adds the target buffer to the validation list and adds the relocation
1742 * to the reloc_buffer's relocation list.
1743 *
1744 * The relocation entry at the given offset must already contain the
1745 * precomputed relocation value, because the kernel will optimize out
1746 * the relocation entry write when the buffer hasn't moved from the
1747 * last known offset in target_bo.
1748 */
1749 static int
1750 do_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1751 drm_bacon_bo *target_bo, uint32_t target_offset,
1752 uint32_t read_domains, uint32_t write_domain)
1753 {
1754 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1755 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1756 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1757
1758 if (bo_gem->has_error)
1759 return -ENOMEM;
1760
1761 if (target_bo_gem->has_error) {
1762 bo_gem->has_error = true;
1763 return -ENOMEM;
1764 }
1765
1766 /* Create a new relocation list if needed */
1767 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1768 return -ENOMEM;
1769
1770 /* Check overflow */
1771 assert(bo_gem->reloc_count < bufmgr->max_relocs);
1772
1773 /* Check args */
1774 assert(offset <= bo->size - 4);
1775 assert((write_domain & (write_domain - 1)) == 0);
1776
1777 /* Make sure that we're not adding a reloc to something whose size has
1778 * already been accounted for.
1779 */
1780 assert(!bo_gem->used_as_reloc_target);
1781 if (target_bo_gem != bo_gem) {
1782 target_bo_gem->used_as_reloc_target = true;
1783 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1784 }
1785
1786 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1787 if (target_bo != bo)
1788 drm_bacon_bo_reference(target_bo);
1789
1790 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1791 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1792 bo_gem->relocs[bo_gem->reloc_count].target_handle =
1793 target_bo_gem->gem_handle;
1794 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1795 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1796 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1797 bo_gem->reloc_count++;
1798
1799 return 0;
1800 }
1801
1802 static int
1803 drm_bacon_gem_bo_add_softpin_target(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
1804 {
1805 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1806 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1807 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1808
1809 if (bo_gem->has_error)
1810 return -ENOMEM;
1811
1812 if (target_bo_gem->has_error) {
1813 bo_gem->has_error = true;
1814 return -ENOMEM;
1815 }
1816
1817 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
1818 return -EINVAL;
1819 if (target_bo_gem == bo_gem)
1820 return -EINVAL;
1821
1822 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
1823 int new_size = bo_gem->softpin_target_size * 2;
1824 if (new_size == 0)
1825 new_size = bufmgr->max_relocs;
1826
1827 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
1828 sizeof(drm_bacon_bo *));
1829 if (!bo_gem->softpin_target)
1830 return -ENOMEM;
1831
1832 bo_gem->softpin_target_size = new_size;
1833 }
1834 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
1835 drm_bacon_bo_reference(target_bo);
1836 bo_gem->softpin_target_count++;
1837
1838 return 0;
1839 }
1840
1841 int
1842 drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1843 drm_bacon_bo *target_bo, uint32_t target_offset,
1844 uint32_t read_domains, uint32_t write_domain)
1845 {
1846 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *)target_bo;
1847
1848 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
1849 return drm_bacon_gem_bo_add_softpin_target(bo, target_bo);
1850 else
1851 return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1852 read_domains, write_domain);
1853 }
1854
1855 int
1856 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
1857 {
1858 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1859
1860 return bo_gem->reloc_count;
1861 }
1862
1863 /**
1864 * Removes existing relocation entries in the BO after "start".
1865 *
1866 * This allows a user to avoid a two-step process for state setup with
1867 * counting up all the buffer objects and doing a
1868 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1869 * relocations for the state setup. Instead, save the state of the
1870 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1871 * state, and then check if it still fits in the aperture.
1872 *
1873 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1874 * involving this buffer in the tree are undefined after this call.
1875 *
1876 * This also removes all softpinned targets being referenced by the BO.
1877 */
1878 void
1879 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
1880 {
1881 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1882 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1883 int i;
1884 struct timespec time;
1885
1886 clock_gettime(CLOCK_MONOTONIC, &time);
1887
1888 assert(bo_gem->reloc_count >= start);
1889
1890 /* Unreference the cleared target buffers */
1891 pthread_mutex_lock(&bufmgr->lock);
1892
1893 for (i = start; i < bo_gem->reloc_count; i++) {
1894 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_target_info[i].bo;
1895 if (&target_bo_gem->bo != bo) {
1896 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1897 time.tv_sec);
1898 }
1899 }
1900 bo_gem->reloc_count = start;
1901
1902 for (i = 0; i < bo_gem->softpin_target_count; i++) {
1903 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->softpin_target[i];
1904 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
1905 }
1906 bo_gem->softpin_target_count = 0;
1907
1908 pthread_mutex_unlock(&bufmgr->lock);
1909
1910 }
1911
1912 static void
1913 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
1914 {
1915 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1916 int i;
1917
1918 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
1919 return;
1920
1921 for (i = 0; i < bo_gem->reloc_count; i++) {
1922 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1923
1924 if (target_bo == bo)
1925 continue;
1926
1927 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1928
1929 /* Continue walking the tree depth-first. */
1930 drm_bacon_gem_bo_process_reloc2(target_bo);
1931
1932 /* Add the target to the validate list */
1933 drm_bacon_add_validate_buffer2(target_bo);
1934 }
1935
1936 for (i = 0; i < bo_gem->softpin_target_count; i++) {
1937 drm_bacon_bo *target_bo = bo_gem->softpin_target[i];
1938
1939 if (target_bo == bo)
1940 continue;
1941
1942 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1943 drm_bacon_gem_bo_process_reloc2(target_bo);
1944 drm_bacon_add_validate_buffer2(target_bo);
1945 }
1946 }
1947
1948 static void
1949 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr *bufmgr)
1950 {
1951 int i;
1952
1953 for (i = 0; i < bufmgr->exec_count; i++) {
1954 drm_bacon_bo *bo = bufmgr->exec_bos[i];
1955 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1956
1957 /* Update the buffer offset */
1958 if (bufmgr->exec2_objects[i].offset != bo->offset64) {
1959 /* If we're seeing softpinned object here it means that the kernel
1960 * has relocated our object... Indicating a programming error
1961 */
1962 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
1963 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
1964 bo_gem->gem_handle, bo_gem->name,
1965 upper_32_bits(bo->offset64),
1966 lower_32_bits(bo->offset64),
1967 upper_32_bits(bufmgr->exec2_objects[i].offset),
1968 lower_32_bits(bufmgr->exec2_objects[i].offset));
1969 bo->offset64 = bufmgr->exec2_objects[i].offset;
1970 }
1971 }
1972 }
1973
1974 static int
1975 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
1976 int in_fence, int *out_fence,
1977 unsigned int flags)
1978 {
1979 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1980 struct drm_i915_gem_execbuffer2 execbuf;
1981 int ret = 0;
1982 int i;
1983
1984 if (to_bo_gem(bo)->has_error)
1985 return -ENOMEM;
1986
1987 pthread_mutex_lock(&bufmgr->lock);
1988 /* Update indices and set up the validate list. */
1989 drm_bacon_gem_bo_process_reloc2(bo);
1990
1991 /* Add the batch buffer to the validation list. There are no relocations
1992 * pointing to it.
1993 */
1994 drm_bacon_add_validate_buffer2(bo);
1995
1996 memclear(execbuf);
1997 execbuf.buffers_ptr = (uintptr_t)bufmgr->exec2_objects;
1998 execbuf.buffer_count = bufmgr->exec_count;
1999 execbuf.batch_start_offset = 0;
2000 execbuf.batch_len = used;
2001 execbuf.cliprects_ptr = 0;
2002 execbuf.num_cliprects = 0;
2003 execbuf.DR1 = 0;
2004 execbuf.DR4 = 0;
2005 execbuf.flags = flags;
2006 if (ctx == NULL)
2007 i915_execbuffer2_set_context_id(execbuf, 0);
2008 else
2009 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2010 execbuf.rsvd2 = 0;
2011 if (in_fence != -1) {
2012 execbuf.rsvd2 = in_fence;
2013 execbuf.flags |= I915_EXEC_FENCE_IN;
2014 }
2015 if (out_fence != NULL) {
2016 *out_fence = -1;
2017 execbuf.flags |= I915_EXEC_FENCE_OUT;
2018 }
2019
2020 if (bufmgr->no_exec)
2021 goto skip_execution;
2022
2023 ret = drmIoctl(bufmgr->fd,
2024 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2025 &execbuf);
2026 if (ret != 0) {
2027 ret = -errno;
2028 if (ret == -ENOSPC) {
2029 DBG("Execbuffer fails to pin. "
2030 "Estimate: %u. Actual: %u. Available: %u\n",
2031 drm_bacon_gem_estimate_batch_space(bufmgr->exec_bos,
2032 bufmgr->exec_count),
2033 drm_bacon_gem_compute_batch_space(bufmgr->exec_bos,
2034 bufmgr->exec_count),
2035 (unsigned int) bufmgr->gtt_size);
2036 }
2037 }
2038 drm_bacon_update_buffer_offsets2(bufmgr);
2039
2040 if (ret == 0 && out_fence != NULL)
2041 *out_fence = execbuf.rsvd2 >> 32;
2042
2043 skip_execution:
2044 if (INTEL_DEBUG & DEBUG_BUFMGR)
2045 drm_bacon_gem_dump_validation_list(bufmgr);
2046
2047 for (i = 0; i < bufmgr->exec_count; i++) {
2048 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr->exec_bos[i]);
2049
2050 bo_gem->idle = false;
2051
2052 /* Disconnect the buffer from the validate list */
2053 bo_gem->validate_index = -1;
2054 bufmgr->exec_bos[i] = NULL;
2055 }
2056 bufmgr->exec_count = 0;
2057 pthread_mutex_unlock(&bufmgr->lock);
2058
2059 return ret;
2060 }
2061
2062 int
2063 drm_bacon_bo_exec(drm_bacon_bo *bo, int used)
2064 {
2065 return do_exec2(bo, used, NULL, -1, NULL, I915_EXEC_RENDER);
2066 }
2067
2068 int
2069 drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags)
2070 {
2071 return do_exec2(bo, used, NULL, -1, NULL, flags);
2072 }
2073
2074 int
2075 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
2076 int used, unsigned int flags)
2077 {
2078 return do_exec2(bo, used, ctx, -1, NULL, flags);
2079 }
2080
2081 int
2082 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
2083 drm_bacon_context *ctx,
2084 int used,
2085 int in_fence,
2086 int *out_fence,
2087 unsigned int flags)
2088 {
2089 return do_exec2(bo, used, ctx, in_fence, out_fence, flags);
2090 }
2091
2092 static int
2093 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
2094 uint32_t tiling_mode,
2095 uint32_t stride)
2096 {
2097 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2098 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2099 struct drm_i915_gem_set_tiling set_tiling;
2100 int ret;
2101
2102 if (bo_gem->global_name == 0 &&
2103 tiling_mode == bo_gem->tiling_mode &&
2104 stride == bo_gem->stride)
2105 return 0;
2106
2107 memset(&set_tiling, 0, sizeof(set_tiling));
2108 do {
2109 /* set_tiling is slightly broken and overwrites the
2110 * input on the error path, so we have to open code
2111 * rmIoctl.
2112 */
2113 set_tiling.handle = bo_gem->gem_handle;
2114 set_tiling.tiling_mode = tiling_mode;
2115 set_tiling.stride = stride;
2116
2117 ret = ioctl(bufmgr->fd,
2118 DRM_IOCTL_I915_GEM_SET_TILING,
2119 &set_tiling);
2120 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2121 if (ret == -1)
2122 return -errno;
2123
2124 bo_gem->tiling_mode = set_tiling.tiling_mode;
2125 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2126 bo_gem->stride = set_tiling.stride;
2127 return 0;
2128 }
2129
2130 int
2131 drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2132 uint32_t stride)
2133 {
2134 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2135 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2136 int ret;
2137
2138 /* Tiling with userptr surfaces is not supported
2139 * on all hardware so refuse it for time being.
2140 */
2141 if (bo_gem->is_userptr)
2142 return -EINVAL;
2143
2144 /* Linear buffers have no stride. By ensuring that we only ever use
2145 * stride 0 with linear buffers, we simplify our code.
2146 */
2147 if (*tiling_mode == I915_TILING_NONE)
2148 stride = 0;
2149
2150 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2151 if (ret == 0)
2152 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
2153
2154 *tiling_mode = bo_gem->tiling_mode;
2155 return ret;
2156 }
2157
2158 int
2159 drm_bacon_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2160 uint32_t *swizzle_mode)
2161 {
2162 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2163
2164 *tiling_mode = bo_gem->tiling_mode;
2165 *swizzle_mode = bo_gem->swizzle_mode;
2166 return 0;
2167 }
2168
2169 int
2170 drm_bacon_bo_set_softpin_offset(drm_bacon_bo *bo, uint64_t offset)
2171 {
2172 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2173
2174 bo->offset64 = offset;
2175 bo_gem->kflags |= EXEC_OBJECT_PINNED;
2176
2177 return 0;
2178 }
2179
2180 drm_bacon_bo *
2181 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
2182 {
2183 int ret;
2184 uint32_t handle;
2185 drm_bacon_bo_gem *bo_gem;
2186 struct drm_i915_gem_get_tiling get_tiling;
2187
2188 pthread_mutex_lock(&bufmgr->lock);
2189 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
2190 if (ret) {
2191 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2192 pthread_mutex_unlock(&bufmgr->lock);
2193 return NULL;
2194 }
2195
2196 /*
2197 * See if the kernel has already returned this buffer to us. Just as
2198 * for named buffers, we must not create two bo's pointing at the same
2199 * kernel object
2200 */
2201 HASH_FIND(handle_hh, bufmgr->handle_table,
2202 &handle, sizeof(handle), bo_gem);
2203 if (bo_gem) {
2204 drm_bacon_bo_reference(&bo_gem->bo);
2205 goto out;
2206 }
2207
2208 bo_gem = calloc(1, sizeof(*bo_gem));
2209 if (!bo_gem)
2210 goto out;
2211
2212 p_atomic_set(&bo_gem->refcount, 1);
2213 list_inithead(&bo_gem->vma_list);
2214
2215 /* Determine size of bo. The fd-to-handle ioctl really should
2216 * return the size, but it doesn't. If we have kernel 3.12 or
2217 * later, we can lseek on the prime fd to get the size. Older
2218 * kernels will just fail, in which case we fall back to the
2219 * provided (estimated or guess size). */
2220 ret = lseek(prime_fd, 0, SEEK_END);
2221 if (ret != -1)
2222 bo_gem->bo.size = ret;
2223 else
2224 bo_gem->bo.size = size;
2225
2226 bo_gem->bo.handle = handle;
2227 bo_gem->bo.bufmgr = bufmgr;
2228
2229 bo_gem->gem_handle = handle;
2230 HASH_ADD(handle_hh, bufmgr->handle_table,
2231 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2232
2233 bo_gem->name = "prime";
2234 bo_gem->validate_index = -1;
2235 bo_gem->used_as_reloc_target = false;
2236 bo_gem->has_error = false;
2237 bo_gem->reusable = false;
2238
2239 memclear(get_tiling);
2240 get_tiling.handle = bo_gem->gem_handle;
2241 if (drmIoctl(bufmgr->fd,
2242 DRM_IOCTL_I915_GEM_GET_TILING,
2243 &get_tiling))
2244 goto err;
2245
2246 bo_gem->tiling_mode = get_tiling.tiling_mode;
2247 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2248 /* XXX stride is unknown */
2249 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
2250
2251 out:
2252 pthread_mutex_unlock(&bufmgr->lock);
2253 return &bo_gem->bo;
2254
2255 err:
2256 drm_bacon_gem_bo_free(&bo_gem->bo);
2257 pthread_mutex_unlock(&bufmgr->lock);
2258 return NULL;
2259 }
2260
2261 int
2262 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
2263 {
2264 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2265 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2266
2267 if (drmPrimeHandleToFD(bufmgr->fd, bo_gem->gem_handle,
2268 DRM_CLOEXEC, prime_fd) != 0)
2269 return -errno;
2270
2271 bo_gem->reusable = false;
2272
2273 return 0;
2274 }
2275
2276 int
2277 drm_bacon_bo_flink(drm_bacon_bo *bo, uint32_t *name)
2278 {
2279 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2280 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2281
2282 if (!bo_gem->global_name) {
2283 struct drm_gem_flink flink;
2284
2285 memclear(flink);
2286 flink.handle = bo_gem->gem_handle;
2287 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2288 return -errno;
2289
2290 pthread_mutex_lock(&bufmgr->lock);
2291 if (!bo_gem->global_name) {
2292 bo_gem->global_name = flink.name;
2293 bo_gem->reusable = false;
2294
2295 HASH_ADD(name_hh, bufmgr->name_table,
2296 global_name, sizeof(bo_gem->global_name),
2297 bo_gem);
2298 }
2299 pthread_mutex_unlock(&bufmgr->lock);
2300 }
2301
2302 *name = bo_gem->global_name;
2303 return 0;
2304 }
2305
2306 /**
2307 * Enables unlimited caching of buffer objects for reuse.
2308 *
2309 * This is potentially very memory expensive, as the cache at each bucket
2310 * size is only bounded by how many buffers of that size we've managed to have
2311 * in flight at once.
2312 */
2313 void
2314 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2315 {
2316 bufmgr->bo_reuse = true;
2317 }
2318
2319 /**
2320 * Disables implicit synchronisation before executing the bo
2321 *
2322 * This will cause rendering corruption unless you correctly manage explicit
2323 * fences for all rendering involving this buffer - including use by others.
2324 * Disabling the implicit serialisation is only required if that serialisation
2325 * is too coarse (for example, you have split the buffer into many
2326 * non-overlapping regions and are sharing the whole buffer between concurrent
2327 * independent command streams).
2328 *
2329 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2330 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2331 * or subsequent execbufs involving the bo will generate EINVAL.
2332 */
2333 void
2334 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo *bo)
2335 {
2336 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2337
2338 bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2339 }
2340
2341 /**
2342 * Enables implicit synchronisation before executing the bo
2343 *
2344 * This is the default behaviour of the kernel, to wait upon prior writes
2345 * completing on the object before rendering with it, or to wait for prior
2346 * reads to complete before writing into the object.
2347 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2348 * the kernel never to insert a stall before using the object. Then this
2349 * function can be used to restore the implicit sync before subsequent
2350 * rendering.
2351 */
2352 void
2353 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo *bo)
2354 {
2355 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2356
2357 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2358 }
2359
2360 /**
2361 * Query whether the kernel supports disabling of its implicit synchronisation
2362 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2363 */
2364 int
2365 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr *bufmgr)
2366 {
2367 return bufmgr->has_exec_async;
2368 }
2369
2370 /**
2371 * Return the additional aperture space required by the tree of buffer objects
2372 * rooted at bo.
2373 */
2374 static int
2375 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2376 {
2377 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2378 int i;
2379 int total = 0;
2380
2381 if (bo == NULL || bo_gem->included_in_check_aperture)
2382 return 0;
2383
2384 total += bo->size;
2385 bo_gem->included_in_check_aperture = true;
2386
2387 for (i = 0; i < bo_gem->reloc_count; i++)
2388 total +=
2389 drm_bacon_gem_bo_get_aperture_space(bo_gem->
2390 reloc_target_info[i].bo);
2391
2392 return total;
2393 }
2394
2395 /**
2396 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2397 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2398 */
2399 static void
2400 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2401 {
2402 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2403 int i;
2404
2405 if (bo == NULL || !bo_gem->included_in_check_aperture)
2406 return;
2407
2408 bo_gem->included_in_check_aperture = false;
2409
2410 for (i = 0; i < bo_gem->reloc_count; i++)
2411 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->
2412 reloc_target_info[i].bo);
2413 }
2414
2415 /**
2416 * Return a conservative estimate for the amount of aperture required
2417 * for a collection of buffers. This may double-count some buffers.
2418 */
2419 static unsigned int
2420 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2421 {
2422 int i;
2423 unsigned int total = 0;
2424
2425 for (i = 0; i < count; i++) {
2426 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2427 if (bo_gem != NULL)
2428 total += bo_gem->reloc_tree_size;
2429 }
2430 return total;
2431 }
2432
2433 /**
2434 * Return the amount of aperture needed for a collection of buffers.
2435 * This avoids double counting any buffers, at the cost of looking
2436 * at every buffer in the set.
2437 */
2438 static unsigned int
2439 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2440 {
2441 int i;
2442 unsigned int total = 0;
2443
2444 for (i = 0; i < count; i++) {
2445 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2446 /* For the first buffer object in the array, we get an
2447 * accurate count back for its reloc_tree size (since nothing
2448 * had been flagged as being counted yet). We can save that
2449 * value out as a more conservative reloc_tree_size that
2450 * avoids double-counting target buffers. Since the first
2451 * buffer happens to usually be the batch buffer in our
2452 * callers, this can pull us back from doing the tree
2453 * walk on every new batch emit.
2454 */
2455 if (i == 0) {
2456 drm_bacon_bo_gem *bo_gem =
2457 (drm_bacon_bo_gem *) bo_array[i];
2458 bo_gem->reloc_tree_size = total;
2459 }
2460 }
2461
2462 for (i = 0; i < count; i++)
2463 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2464 return total;
2465 }
2466
2467 /**
2468 * Return -1 if the batchbuffer should be flushed before attempting to
2469 * emit rendering referencing the buffers pointed to by bo_array.
2470 *
2471 * This is required because if we try to emit a batchbuffer with relocations
2472 * to a tree of buffers that won't simultaneously fit in the aperture,
2473 * the rendering will return an error at a point where the software is not
2474 * prepared to recover from it.
2475 *
2476 * However, we also want to emit the batchbuffer significantly before we reach
2477 * the limit, as a series of batchbuffers each of which references buffers
2478 * covering almost all of the aperture means that at each emit we end up
2479 * waiting to evict a buffer from the last rendering, and we get synchronous
2480 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2481 * get better parallelism.
2482 */
2483 int
2484 drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo **bo_array, int count)
2485 {
2486 drm_bacon_bufmgr *bufmgr = bo_array[0]->bufmgr;
2487 unsigned int total = 0;
2488 unsigned int threshold = bufmgr->gtt_size * 3 / 4;
2489
2490 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
2491
2492 if (total > threshold)
2493 total = drm_bacon_gem_compute_batch_space(bo_array, count);
2494
2495 if (total > threshold) {
2496 DBG("check_space: overflowed available aperture, "
2497 "%dkb vs %dkb\n",
2498 total / 1024, (int)bufmgr->gtt_size / 1024);
2499 return -ENOSPC;
2500 } else {
2501 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2502 (int)bufmgr->gtt_size / 1024);
2503 return 0;
2504 }
2505 }
2506
2507 /*
2508 * Disable buffer reuse for objects which are shared with the kernel
2509 * as scanout buffers
2510 */
2511 int
2512 drm_bacon_bo_disable_reuse(drm_bacon_bo *bo)
2513 {
2514 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2515
2516 bo_gem->reusable = false;
2517 return 0;
2518 }
2519
2520 int
2521 drm_bacon_bo_is_reusable(drm_bacon_bo *bo)
2522 {
2523 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2524
2525 return bo_gem->reusable;
2526 }
2527
2528 static int
2529 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2530 {
2531 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2532 int i;
2533
2534 for (i = 0; i < bo_gem->reloc_count; i++) {
2535 if (bo_gem->reloc_target_info[i].bo == target_bo)
2536 return 1;
2537 if (bo == bo_gem->reloc_target_info[i].bo)
2538 continue;
2539 if (_drm_bacon_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2540 target_bo))
2541 return 1;
2542 }
2543
2544 for (i = 0; i< bo_gem->softpin_target_count; i++) {
2545 if (bo_gem->softpin_target[i] == target_bo)
2546 return 1;
2547 if (_drm_bacon_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2548 return 1;
2549 }
2550
2551 return 0;
2552 }
2553
2554 /** Return true if target_bo is referenced by bo's relocation tree. */
2555 int
2556 drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2557 {
2558 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2559
2560 if (bo == NULL || target_bo == NULL)
2561 return 0;
2562 if (target_bo_gem->used_as_reloc_target)
2563 return _drm_bacon_gem_bo_references(bo, target_bo);
2564 return 0;
2565 }
2566
2567 static void
2568 add_bucket(drm_bacon_bufmgr *bufmgr, int size)
2569 {
2570 unsigned int i = bufmgr->num_buckets;
2571
2572 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
2573
2574 list_inithead(&bufmgr->cache_bucket[i].head);
2575 bufmgr->cache_bucket[i].size = size;
2576 bufmgr->num_buckets++;
2577 }
2578
2579 static void
2580 init_cache_buckets(drm_bacon_bufmgr *bufmgr)
2581 {
2582 unsigned long size, cache_max_size = 64 * 1024 * 1024;
2583
2584 /* OK, so power of two buckets was too wasteful of memory.
2585 * Give 3 other sizes between each power of two, to hopefully
2586 * cover things accurately enough. (The alternative is
2587 * probably to just go for exact matching of sizes, and assume
2588 * that for things like composited window resize the tiled
2589 * width/height alignment and rounding of sizes to pages will
2590 * get us useful cache hit rates anyway)
2591 */
2592 add_bucket(bufmgr, 4096);
2593 add_bucket(bufmgr, 4096 * 2);
2594 add_bucket(bufmgr, 4096 * 3);
2595
2596 /* Initialize the linked lists for BO reuse cache. */
2597 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2598 add_bucket(bufmgr, size);
2599
2600 add_bucket(bufmgr, size + size * 1 / 4);
2601 add_bucket(bufmgr, size + size * 2 / 4);
2602 add_bucket(bufmgr, size + size * 3 / 4);
2603 }
2604 }
2605
2606 void
2607 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
2608 {
2609 bufmgr->vma_max = limit;
2610
2611 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
2612 }
2613
2614 drm_bacon_context *
2615 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
2616 {
2617 struct drm_i915_gem_context_create create;
2618 drm_bacon_context *context = NULL;
2619 int ret;
2620
2621 context = calloc(1, sizeof(*context));
2622 if (!context)
2623 return NULL;
2624
2625 memclear(create);
2626 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2627 if (ret != 0) {
2628 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2629 strerror(errno));
2630 free(context);
2631 return NULL;
2632 }
2633
2634 context->ctx_id = create.ctx_id;
2635 context->bufmgr = bufmgr;
2636
2637 return context;
2638 }
2639
2640 int
2641 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
2642 {
2643 if (ctx == NULL)
2644 return -EINVAL;
2645
2646 *ctx_id = ctx->ctx_id;
2647
2648 return 0;
2649 }
2650
2651 void
2652 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
2653 {
2654 struct drm_i915_gem_context_destroy destroy;
2655 int ret;
2656
2657 if (ctx == NULL)
2658 return;
2659
2660 memclear(destroy);
2661
2662 destroy.ctx_id = ctx->ctx_id;
2663 ret = drmIoctl(ctx->bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2664 &destroy);
2665 if (ret != 0)
2666 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2667 strerror(errno));
2668
2669 free(ctx);
2670 }
2671
2672 int
2673 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
2674 uint32_t *reset_count,
2675 uint32_t *active,
2676 uint32_t *pending)
2677 {
2678 struct drm_i915_reset_stats stats;
2679 int ret;
2680
2681 if (ctx == NULL)
2682 return -EINVAL;
2683
2684 memclear(stats);
2685
2686 stats.ctx_id = ctx->ctx_id;
2687 ret = drmIoctl(ctx->bufmgr->fd,
2688 DRM_IOCTL_I915_GET_RESET_STATS,
2689 &stats);
2690 if (ret == 0) {
2691 if (reset_count != NULL)
2692 *reset_count = stats.reset_count;
2693
2694 if (active != NULL)
2695 *active = stats.batch_active;
2696
2697 if (pending != NULL)
2698 *pending = stats.batch_pending;
2699 }
2700
2701 return ret;
2702 }
2703
2704 int
2705 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
2706 uint32_t offset,
2707 uint64_t *result)
2708 {
2709 struct drm_i915_reg_read reg_read;
2710 int ret;
2711
2712 memclear(reg_read);
2713 reg_read.offset = offset;
2714
2715 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
2716
2717 *result = reg_read.val;
2718 return ret;
2719 }
2720
2721 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
2722 static struct list_head bufmgr_list = { &bufmgr_list, &bufmgr_list };
2723
2724 static drm_bacon_bufmgr *
2725 drm_bacon_bufmgr_gem_find(int fd)
2726 {
2727 list_for_each_entry(drm_bacon_bufmgr,
2728 bufmgr, &bufmgr_list, managers) {
2729 if (bufmgr->fd == fd) {
2730 p_atomic_inc(&bufmgr->refcount);
2731 return bufmgr;
2732 }
2733 }
2734
2735 return NULL;
2736 }
2737
2738 void
2739 drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr)
2740 {
2741 if (atomic_add_unless(&bufmgr->refcount, -1, 1)) {
2742 pthread_mutex_lock(&bufmgr_list_mutex);
2743
2744 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2745 list_del(&bufmgr->managers);
2746 drm_bacon_bufmgr_gem_destroy(bufmgr);
2747 }
2748
2749 pthread_mutex_unlock(&bufmgr_list_mutex);
2750 }
2751 }
2752
2753 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
2754 {
2755 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2756 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2757
2758 if (bo_gem->gtt_virtual)
2759 return bo_gem->gtt_virtual;
2760
2761 if (bo_gem->is_userptr)
2762 return NULL;
2763
2764 pthread_mutex_lock(&bufmgr->lock);
2765 if (bo_gem->gtt_virtual == NULL) {
2766 struct drm_i915_gem_mmap_gtt mmap_arg;
2767 void *ptr;
2768
2769 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
2770 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2771
2772 if (bo_gem->map_count++ == 0)
2773 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2774
2775 memclear(mmap_arg);
2776 mmap_arg.handle = bo_gem->gem_handle;
2777
2778 /* Get the fake offset back... */
2779 ptr = MAP_FAILED;
2780 if (drmIoctl(bufmgr->fd,
2781 DRM_IOCTL_I915_GEM_MMAP_GTT,
2782 &mmap_arg) == 0) {
2783 /* and mmap it */
2784 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
2785 MAP_SHARED, bufmgr->fd,
2786 mmap_arg.offset);
2787 }
2788 if (ptr == MAP_FAILED) {
2789 if (--bo_gem->map_count == 0)
2790 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2791 ptr = NULL;
2792 }
2793
2794 bo_gem->gtt_virtual = ptr;
2795 }
2796 pthread_mutex_unlock(&bufmgr->lock);
2797
2798 return bo_gem->gtt_virtual;
2799 }
2800
2801 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
2802 {
2803 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2804 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2805
2806 if (bo_gem->mem_virtual)
2807 return bo_gem->mem_virtual;
2808
2809 if (bo_gem->is_userptr) {
2810 /* Return the same user ptr */
2811 return bo_gem->user_virtual;
2812 }
2813
2814 pthread_mutex_lock(&bufmgr->lock);
2815 if (!bo_gem->mem_virtual) {
2816 struct drm_i915_gem_mmap mmap_arg;
2817
2818 if (bo_gem->map_count++ == 0)
2819 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2820
2821 DBG("bo_map: %d (%s), map_count=%d\n",
2822 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2823
2824 memclear(mmap_arg);
2825 mmap_arg.handle = bo_gem->gem_handle;
2826 mmap_arg.size = bo->size;
2827 if (drmIoctl(bufmgr->fd,
2828 DRM_IOCTL_I915_GEM_MMAP,
2829 &mmap_arg)) {
2830 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2831 __FILE__, __LINE__, bo_gem->gem_handle,
2832 bo_gem->name, strerror(errno));
2833 if (--bo_gem->map_count == 0)
2834 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2835 } else {
2836 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2837 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2838 }
2839 }
2840 pthread_mutex_unlock(&bufmgr->lock);
2841
2842 return bo_gem->mem_virtual;
2843 }
2844
2845 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
2846 {
2847 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2848 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2849
2850 if (bo_gem->wc_virtual)
2851 return bo_gem->wc_virtual;
2852
2853 if (bo_gem->is_userptr)
2854 return NULL;
2855
2856 pthread_mutex_lock(&bufmgr->lock);
2857 if (!bo_gem->wc_virtual) {
2858 struct drm_i915_gem_mmap mmap_arg;
2859
2860 if (bo_gem->map_count++ == 0)
2861 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2862
2863 DBG("bo_map: %d (%s), map_count=%d\n",
2864 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2865
2866 memclear(mmap_arg);
2867 mmap_arg.handle = bo_gem->gem_handle;
2868 mmap_arg.size = bo->size;
2869 mmap_arg.flags = I915_MMAP_WC;
2870 if (drmIoctl(bufmgr->fd,
2871 DRM_IOCTL_I915_GEM_MMAP,
2872 &mmap_arg)) {
2873 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2874 __FILE__, __LINE__, bo_gem->gem_handle,
2875 bo_gem->name, strerror(errno));
2876 if (--bo_gem->map_count == 0)
2877 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2878 } else {
2879 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2880 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2881 }
2882 }
2883 pthread_mutex_unlock(&bufmgr->lock);
2884
2885 return bo_gem->wc_virtual;
2886 }
2887
2888 /**
2889 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2890 * and manage map buffer objections.
2891 *
2892 * \param fd File descriptor of the opened DRM device.
2893 */
2894 drm_bacon_bufmgr *
2895 drm_bacon_bufmgr_gem_init(struct gen_device_info *devinfo,
2896 int fd, int batch_size)
2897 {
2898 drm_bacon_bufmgr *bufmgr;
2899 struct drm_i915_gem_get_aperture aperture;
2900 drm_i915_getparam_t gp;
2901 int ret, tmp;
2902
2903 pthread_mutex_lock(&bufmgr_list_mutex);
2904
2905 bufmgr = drm_bacon_bufmgr_gem_find(fd);
2906 if (bufmgr)
2907 goto exit;
2908
2909 bufmgr = calloc(1, sizeof(*bufmgr));
2910 if (bufmgr == NULL)
2911 goto exit;
2912
2913 bufmgr->fd = fd;
2914 p_atomic_set(&bufmgr->refcount, 1);
2915
2916 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
2917 free(bufmgr);
2918 bufmgr = NULL;
2919 goto exit;
2920 }
2921
2922 memclear(aperture);
2923 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
2924 bufmgr->gtt_size = aperture.aper_available_size;
2925
2926 memclear(gp);
2927 gp.value = &tmp;
2928
2929 gp.param = I915_PARAM_HAS_EXEC_ASYNC;
2930 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2931 bufmgr->has_exec_async = ret == 0;
2932
2933 bufmgr->has_llc = devinfo->has_llc;
2934
2935 /* Let's go with one relocation per every 2 dwords (but round down a bit
2936 * since a power of two will mean an extra page allocation for the reloc
2937 * buffer).
2938 *
2939 * Every 4 was too few for the blender benchmark.
2940 */
2941 bufmgr->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2942
2943 init_cache_buckets(bufmgr);
2944
2945 list_inithead(&bufmgr->vma_cache);
2946 bufmgr->vma_max = -1; /* unlimited by default */
2947
2948 list_add(&bufmgr->managers, &bufmgr_list);
2949
2950 exit:
2951 pthread_mutex_unlock(&bufmgr_list_mutex);
2952
2953 return bufmgr;
2954 }