6f46554d1f77c958c2ff611ac8200dc7ac0a936a
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "libdrm_macros.h"
60 #include "util/list.h"
61 #include "brw_bufmgr.h"
62 #include "intel_bufmgr_priv.h"
63 #include "intel_chipset.h"
64 #include "string.h"
65
66 #include "i915_drm.h"
67 #include "uthash.h"
68
69 #ifdef HAVE_VALGRIND
70 #include <valgrind.h>
71 #include <memcheck.h>
72 #define VG(x) x
73 #else
74 #define VG(x)
75 #endif
76
77 #define memclear(s) memset(&s, 0, sizeof(s))
78
79 #define DBG(...) do { \
80 if (bufmgr_gem->bufmgr.debug) \
81 fprintf(stderr, __VA_ARGS__); \
82 } while (0)
83
84 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
85 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
86
87 static inline int
88 atomic_add_unless(int *v, int add, int unless)
89 {
90 int c, old;
91 c = p_atomic_read(v);
92 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
93 c = old;
94 return c == unless;
95 }
96
97 /**
98 * upper_32_bits - return bits 32-63 of a number
99 * @n: the number we're accessing
100 *
101 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
102 * the "right shift count >= width of type" warning when that quantity is
103 * 32-bits.
104 */
105 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
106
107 /**
108 * lower_32_bits - return bits 0-31 of a number
109 * @n: the number we're accessing
110 */
111 #define lower_32_bits(n) ((__u32)(n))
112
113 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
114
115 struct drm_bacon_gem_bo_bucket {
116 struct list_head head;
117 unsigned long size;
118 };
119
120 typedef struct _drm_bacon_bufmgr_gem {
121 drm_bacon_bufmgr bufmgr;
122
123 int refcount;
124
125 int fd;
126
127 int max_relocs;
128
129 pthread_mutex_t lock;
130
131 struct drm_i915_gem_exec_object *exec_objects;
132 struct drm_i915_gem_exec_object2 *exec2_objects;
133 drm_bacon_bo **exec_bos;
134 int exec_size;
135 int exec_count;
136
137 /** Array of lists of cached gem objects of power-of-two sizes */
138 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
139 int num_buckets;
140 time_t time;
141
142 struct list_head managers;
143
144 drm_bacon_bo_gem *name_table;
145 drm_bacon_bo_gem *handle_table;
146
147 struct list_head vma_cache;
148 int vma_count, vma_open, vma_max;
149
150 uint64_t gtt_size;
151 int pci_device;
152 int gen;
153 unsigned int has_bsd : 1;
154 unsigned int has_blt : 1;
155 unsigned int has_llc : 1;
156 unsigned int has_wait_timeout : 1;
157 unsigned int bo_reuse : 1;
158 unsigned int no_exec : 1;
159 unsigned int has_vebox : 1;
160 unsigned int has_exec_async : 1;
161
162 struct {
163 void *ptr;
164 uint32_t handle;
165 } userptr_active;
166
167 } drm_bacon_bufmgr_gem;
168
169 typedef struct _drm_bacon_reloc_target_info {
170 drm_bacon_bo *bo;
171 } drm_bacon_reloc_target;
172
173 struct _drm_bacon_bo_gem {
174 drm_bacon_bo bo;
175
176 int refcount;
177 uint32_t gem_handle;
178 const char *name;
179
180 /**
181 * Kenel-assigned global name for this object
182 *
183 * List contains both flink named and prime fd'd objects
184 */
185 unsigned int global_name;
186
187 UT_hash_handle handle_hh;
188 UT_hash_handle name_hh;
189
190 /**
191 * Index of the buffer within the validation list while preparing a
192 * batchbuffer execution.
193 */
194 int validate_index;
195
196 /**
197 * Current tiling mode
198 */
199 uint32_t tiling_mode;
200 uint32_t swizzle_mode;
201 unsigned long stride;
202
203 unsigned long kflags;
204
205 time_t free_time;
206
207 /** Array passed to the DRM containing relocation information. */
208 struct drm_i915_gem_relocation_entry *relocs;
209 /**
210 * Array of info structs corresponding to relocs[i].target_handle etc
211 */
212 drm_bacon_reloc_target *reloc_target_info;
213 /** Number of entries in relocs */
214 int reloc_count;
215 /** Array of BOs that are referenced by this buffer and will be softpinned */
216 drm_bacon_bo **softpin_target;
217 /** Number softpinned BOs that are referenced by this buffer */
218 int softpin_target_count;
219 /** Maximum amount of softpinned BOs that are referenced by this buffer */
220 int softpin_target_size;
221
222 /** Mapped address for the buffer, saved across map/unmap cycles */
223 void *mem_virtual;
224 /** GTT virtual address for the buffer, saved across map/unmap cycles */
225 void *gtt_virtual;
226 /** WC CPU address for the buffer, saved across map/unmap cycles */
227 void *wc_virtual;
228 /**
229 * Virtual address of the buffer allocated by user, used for userptr
230 * objects only.
231 */
232 void *user_virtual;
233 int map_count;
234 struct list_head vma_list;
235
236 /** BO cache list */
237 struct list_head head;
238
239 /**
240 * Boolean of whether this BO and its children have been included in
241 * the current drm_bacon_bufmgr_check_aperture_space() total.
242 */
243 bool included_in_check_aperture;
244
245 /**
246 * Boolean of whether this buffer has been used as a relocation
247 * target and had its size accounted for, and thus can't have any
248 * further relocations added to it.
249 */
250 bool used_as_reloc_target;
251
252 /**
253 * Boolean of whether we have encountered an error whilst building the relocation tree.
254 */
255 bool has_error;
256
257 /**
258 * Boolean of whether this buffer can be re-used
259 */
260 bool reusable;
261
262 /**
263 * Boolean of whether the GPU is definitely not accessing the buffer.
264 *
265 * This is only valid when reusable, since non-reusable
266 * buffers are those that have been shared with other
267 * processes, so we don't know their state.
268 */
269 bool idle;
270
271 /**
272 * Boolean of whether this buffer was allocated with userptr
273 */
274 bool is_userptr;
275
276 /**
277 * Size in bytes of this buffer and its relocation descendents.
278 *
279 * Used to avoid costly tree walking in
280 * drm_bacon_bufmgr_check_aperture in the common case.
281 */
282 int reloc_tree_size;
283
284 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
285 bool mapped_cpu_write;
286 };
287
288 static unsigned int
289 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
290
291 static unsigned int
292 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
293
294 static int
295 drm_bacon_gem_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
296 uint32_t * swizzle_mode);
297
298 static int
299 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
300 uint32_t tiling_mode,
301 uint32_t stride);
302
303 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
304 time_t time);
305
306 static void drm_bacon_gem_bo_unreference(drm_bacon_bo *bo);
307
308 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
309
310 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
311 {
312 return (drm_bacon_bo_gem *)bo;
313 }
314
315 static unsigned long
316 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr_gem *bufmgr_gem, unsigned long size,
317 uint32_t *tiling_mode)
318 {
319 if (*tiling_mode == I915_TILING_NONE)
320 return size;
321
322 /* 965+ just need multiples of page size for tiling */
323 return ROUND_UP_TO(size, 4096);
324 }
325
326 /*
327 * Round a given pitch up to the minimum required for X tiling on a
328 * given chip. We use 512 as the minimum to allow for a later tiling
329 * change.
330 */
331 static unsigned long
332 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr_gem *bufmgr_gem,
333 unsigned long pitch, uint32_t *tiling_mode)
334 {
335 unsigned long tile_width;
336
337 /* If untiled, then just align it so that we can do rendering
338 * to it with the 3D engine.
339 */
340 if (*tiling_mode == I915_TILING_NONE)
341 return ALIGN(pitch, 64);
342
343 if (*tiling_mode == I915_TILING_X)
344 tile_width = 512;
345 else
346 tile_width = 128;
347
348 /* 965 is flexible */
349 return ROUND_UP_TO(pitch, tile_width);
350 }
351
352 static struct drm_bacon_gem_bo_bucket *
353 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr_gem *bufmgr_gem,
354 unsigned long size)
355 {
356 int i;
357
358 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
359 struct drm_bacon_gem_bo_bucket *bucket =
360 &bufmgr_gem->cache_bucket[i];
361 if (bucket->size >= size) {
362 return bucket;
363 }
364 }
365
366 return NULL;
367 }
368
369 static void
370 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr_gem *bufmgr_gem)
371 {
372 int i, j;
373
374 for (i = 0; i < bufmgr_gem->exec_count; i++) {
375 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
376 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
377
378 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
379 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
380 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
381 bo_gem->name);
382 continue;
383 }
384
385 for (j = 0; j < bo_gem->reloc_count; j++) {
386 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[j].bo;
387 drm_bacon_bo_gem *target_gem =
388 (drm_bacon_bo_gem *) target_bo;
389
390 DBG("%2d: %d %s(%s)@0x%08x %08x -> "
391 "%d (%s)@0x%08x %08x + 0x%08x\n",
392 i,
393 bo_gem->gem_handle,
394 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
395 bo_gem->name,
396 upper_32_bits(bo_gem->relocs[j].offset),
397 lower_32_bits(bo_gem->relocs[j].offset),
398 target_gem->gem_handle,
399 target_gem->name,
400 upper_32_bits(target_bo->offset64),
401 lower_32_bits(target_bo->offset64),
402 bo_gem->relocs[j].delta);
403 }
404
405 for (j = 0; j < bo_gem->softpin_target_count; j++) {
406 drm_bacon_bo *target_bo = bo_gem->softpin_target[j];
407 drm_bacon_bo_gem *target_gem =
408 (drm_bacon_bo_gem *) target_bo;
409 DBG("%2d: %d %s(%s) -> "
410 "%d *(%s)@0x%08x %08x\n",
411 i,
412 bo_gem->gem_handle,
413 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
414 bo_gem->name,
415 target_gem->gem_handle,
416 target_gem->name,
417 upper_32_bits(target_bo->offset64),
418 lower_32_bits(target_bo->offset64));
419 }
420 }
421 }
422
423 static inline void
424 drm_bacon_gem_bo_reference(drm_bacon_bo *bo)
425 {
426 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
427
428 p_atomic_inc(&bo_gem->refcount);
429 }
430
431 /**
432 * Adds the given buffer to the list of buffers to be validated (moved into the
433 * appropriate memory type) with the next batch submission.
434 *
435 * If a buffer is validated multiple times in a batch submission, it ends up
436 * with the intersection of the memory type flags and the union of the
437 * access flags.
438 */
439 static void
440 drm_bacon_add_validate_buffer(drm_bacon_bo *bo)
441 {
442 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
443 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
444 int index;
445
446 if (bo_gem->validate_index != -1)
447 return;
448
449 /* Extend the array of validation entries as necessary. */
450 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
451 int new_size = bufmgr_gem->exec_size * 2;
452
453 if (new_size == 0)
454 new_size = 5;
455
456 bufmgr_gem->exec_objects =
457 realloc(bufmgr_gem->exec_objects,
458 sizeof(*bufmgr_gem->exec_objects) * new_size);
459 bufmgr_gem->exec_bos =
460 realloc(bufmgr_gem->exec_bos,
461 sizeof(*bufmgr_gem->exec_bos) * new_size);
462 bufmgr_gem->exec_size = new_size;
463 }
464
465 index = bufmgr_gem->exec_count;
466 bo_gem->validate_index = index;
467 /* Fill in array entry */
468 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
469 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
470 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
471 bufmgr_gem->exec_objects[index].alignment = bo->align;
472 bufmgr_gem->exec_objects[index].offset = 0;
473 bufmgr_gem->exec_bos[index] = bo;
474 bufmgr_gem->exec_count++;
475 }
476
477 static void
478 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo)
479 {
480 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bo->bufmgr;
481 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
482 int index;
483
484 if (bo_gem->validate_index != -1)
485 return;
486
487 /* Extend the array of validation entries as necessary. */
488 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
489 int new_size = bufmgr_gem->exec_size * 2;
490
491 if (new_size == 0)
492 new_size = 5;
493
494 bufmgr_gem->exec2_objects =
495 realloc(bufmgr_gem->exec2_objects,
496 sizeof(*bufmgr_gem->exec2_objects) * new_size);
497 bufmgr_gem->exec_bos =
498 realloc(bufmgr_gem->exec_bos,
499 sizeof(*bufmgr_gem->exec_bos) * new_size);
500 bufmgr_gem->exec_size = new_size;
501 }
502
503 index = bufmgr_gem->exec_count;
504 bo_gem->validate_index = index;
505 /* Fill in array entry */
506 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
507 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
508 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
509 bufmgr_gem->exec2_objects[index].alignment = bo->align;
510 bufmgr_gem->exec2_objects[index].offset = bo->offset64;
511 bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags;
512 bufmgr_gem->exec2_objects[index].rsvd1 = 0;
513 bufmgr_gem->exec2_objects[index].rsvd2 = 0;
514 bufmgr_gem->exec_bos[index] = bo;
515 bufmgr_gem->exec_count++;
516 }
517
518 static void
519 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr_gem *bufmgr_gem,
520 drm_bacon_bo_gem *bo_gem,
521 unsigned int alignment)
522 {
523 unsigned int size;
524
525 assert(!bo_gem->used_as_reloc_target);
526
527 /* The older chipsets are far-less flexible in terms of tiling,
528 * and require tiled buffer to be size aligned in the aperture.
529 * This means that in the worst possible case we will need a hole
530 * twice as large as the object in order for it to fit into the
531 * aperture. Optimal packing is for wimps.
532 */
533 size = bo_gem->bo.size;
534
535 bo_gem->reloc_tree_size = size + alignment;
536 }
537
538 static int
539 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
540 {
541 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
542 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
543 unsigned int max_relocs = bufmgr_gem->max_relocs;
544
545 if (bo->size / 4 < max_relocs)
546 max_relocs = bo->size / 4;
547
548 bo_gem->relocs = malloc(max_relocs *
549 sizeof(struct drm_i915_gem_relocation_entry));
550 bo_gem->reloc_target_info = malloc(max_relocs *
551 sizeof(drm_bacon_reloc_target));
552 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
553 bo_gem->has_error = true;
554
555 free (bo_gem->relocs);
556 bo_gem->relocs = NULL;
557
558 free (bo_gem->reloc_target_info);
559 bo_gem->reloc_target_info = NULL;
560
561 return 1;
562 }
563
564 return 0;
565 }
566
567 static int
568 drm_bacon_gem_bo_busy(drm_bacon_bo *bo)
569 {
570 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
571 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
572 struct drm_i915_gem_busy busy;
573 int ret;
574
575 if (bo_gem->reusable && bo_gem->idle)
576 return false;
577
578 memclear(busy);
579 busy.handle = bo_gem->gem_handle;
580
581 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
582 if (ret == 0) {
583 bo_gem->idle = !busy.busy;
584 return busy.busy;
585 } else {
586 return false;
587 }
588 return (ret == 0 && busy.busy);
589 }
590
591 static int
592 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr_gem *bufmgr_gem,
593 drm_bacon_bo_gem *bo_gem, int state)
594 {
595 struct drm_i915_gem_madvise madv;
596
597 memclear(madv);
598 madv.handle = bo_gem->gem_handle;
599 madv.madv = state;
600 madv.retained = 1;
601 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
602
603 return madv.retained;
604 }
605
606 static int
607 drm_bacon_gem_bo_madvise(drm_bacon_bo *bo, int madv)
608 {
609 return drm_bacon_gem_bo_madvise_internal
610 ((drm_bacon_bufmgr_gem *) bo->bufmgr,
611 (drm_bacon_bo_gem *) bo,
612 madv);
613 }
614
615 /* drop the oldest entries that have been purged by the kernel */
616 static void
617 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr_gem *bufmgr_gem,
618 struct drm_bacon_gem_bo_bucket *bucket)
619 {
620 while (!list_empty(&bucket->head)) {
621 drm_bacon_bo_gem *bo_gem;
622
623 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
624 bucket->head.next, head);
625 if (drm_bacon_gem_bo_madvise_internal
626 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
627 break;
628
629 list_del(&bo_gem->head);
630 drm_bacon_gem_bo_free(&bo_gem->bo);
631 }
632 }
633
634 static drm_bacon_bo *
635 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
636 const char *name,
637 unsigned long size,
638 unsigned long flags,
639 uint32_t tiling_mode,
640 unsigned long stride,
641 unsigned int alignment)
642 {
643 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
644 drm_bacon_bo_gem *bo_gem;
645 unsigned int page_size = getpagesize();
646 int ret;
647 struct drm_bacon_gem_bo_bucket *bucket;
648 bool alloc_from_cache;
649 unsigned long bo_size;
650 bool for_render = false;
651
652 if (flags & BO_ALLOC_FOR_RENDER)
653 for_render = true;
654
655 /* Round the allocated size up to a power of two number of pages. */
656 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr_gem, size);
657
658 /* If we don't have caching at this size, don't actually round the
659 * allocation up.
660 */
661 if (bucket == NULL) {
662 bo_size = size;
663 if (bo_size < page_size)
664 bo_size = page_size;
665 } else {
666 bo_size = bucket->size;
667 }
668
669 pthread_mutex_lock(&bufmgr_gem->lock);
670 /* Get a buffer out of the cache if available */
671 retry:
672 alloc_from_cache = false;
673 if (bucket != NULL && !list_empty(&bucket->head)) {
674 if (for_render) {
675 /* Allocate new render-target BOs from the tail (MRU)
676 * of the list, as it will likely be hot in the GPU
677 * cache and in the aperture for us.
678 */
679 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
680 bucket->head.prev, head);
681 list_del(&bo_gem->head);
682 alloc_from_cache = true;
683 bo_gem->bo.align = alignment;
684 } else {
685 assert(alignment == 0);
686 /* For non-render-target BOs (where we're probably
687 * going to map it first thing in order to fill it
688 * with data), check if the last BO in the cache is
689 * unbusy, and only reuse in that case. Otherwise,
690 * allocating a new buffer is probably faster than
691 * waiting for the GPU to finish.
692 */
693 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
694 bucket->head.next, head);
695 if (!drm_bacon_gem_bo_busy(&bo_gem->bo)) {
696 alloc_from_cache = true;
697 list_del(&bo_gem->head);
698 }
699 }
700
701 if (alloc_from_cache) {
702 if (!drm_bacon_gem_bo_madvise_internal
703 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
704 drm_bacon_gem_bo_free(&bo_gem->bo);
705 drm_bacon_gem_bo_cache_purge_bucket(bufmgr_gem,
706 bucket);
707 goto retry;
708 }
709
710 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
711 tiling_mode,
712 stride)) {
713 drm_bacon_gem_bo_free(&bo_gem->bo);
714 goto retry;
715 }
716 }
717 }
718
719 if (!alloc_from_cache) {
720 struct drm_i915_gem_create create;
721
722 bo_gem = calloc(1, sizeof(*bo_gem));
723 if (!bo_gem)
724 goto err;
725
726 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
727 list (vma_list), so better set the list head here */
728 list_inithead(&bo_gem->vma_list);
729
730 bo_gem->bo.size = bo_size;
731
732 memclear(create);
733 create.size = bo_size;
734
735 ret = drmIoctl(bufmgr_gem->fd,
736 DRM_IOCTL_I915_GEM_CREATE,
737 &create);
738 if (ret != 0) {
739 free(bo_gem);
740 goto err;
741 }
742
743 bo_gem->gem_handle = create.handle;
744 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
745 gem_handle, sizeof(bo_gem->gem_handle),
746 bo_gem);
747
748 bo_gem->bo.handle = bo_gem->gem_handle;
749 bo_gem->bo.bufmgr = bufmgr;
750 bo_gem->bo.align = alignment;
751
752 bo_gem->tiling_mode = I915_TILING_NONE;
753 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
754 bo_gem->stride = 0;
755
756 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
757 tiling_mode,
758 stride))
759 goto err_free;
760 }
761
762 bo_gem->name = name;
763 p_atomic_set(&bo_gem->refcount, 1);
764 bo_gem->validate_index = -1;
765 bo_gem->used_as_reloc_target = false;
766 bo_gem->has_error = false;
767 bo_gem->reusable = true;
768
769 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
770 pthread_mutex_unlock(&bufmgr_gem->lock);
771
772 DBG("bo_create: buf %d (%s) %ldb\n",
773 bo_gem->gem_handle, bo_gem->name, size);
774
775 return &bo_gem->bo;
776
777 err_free:
778 drm_bacon_gem_bo_free(&bo_gem->bo);
779 err:
780 pthread_mutex_unlock(&bufmgr_gem->lock);
781 return NULL;
782 }
783
784 static drm_bacon_bo *
785 drm_bacon_gem_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
786 const char *name,
787 unsigned long size,
788 unsigned int alignment)
789 {
790 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
791 BO_ALLOC_FOR_RENDER,
792 I915_TILING_NONE, 0,
793 alignment);
794 }
795
796 static drm_bacon_bo *
797 drm_bacon_gem_bo_alloc(drm_bacon_bufmgr *bufmgr,
798 const char *name,
799 unsigned long size,
800 unsigned int alignment)
801 {
802 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
803 I915_TILING_NONE, 0, 0);
804 }
805
806 static drm_bacon_bo *
807 drm_bacon_gem_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
808 int x, int y, int cpp, uint32_t *tiling_mode,
809 unsigned long *pitch, unsigned long flags)
810 {
811 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
812 unsigned long size, stride;
813 uint32_t tiling;
814
815 do {
816 unsigned long aligned_y, height_alignment;
817
818 tiling = *tiling_mode;
819
820 /* If we're tiled, our allocations are in 8 or 32-row blocks,
821 * so failure to align our height means that we won't allocate
822 * enough pages.
823 *
824 * If we're untiled, we still have to align to 2 rows high
825 * because the data port accesses 2x2 blocks even if the
826 * bottom row isn't to be rendered, so failure to align means
827 * we could walk off the end of the GTT and fault. This is
828 * documented on 965, and may be the case on older chipsets
829 * too so we try to be careful.
830 */
831 aligned_y = y;
832 height_alignment = 2;
833
834 if (tiling == I915_TILING_X)
835 height_alignment = 8;
836 else if (tiling == I915_TILING_Y)
837 height_alignment = 32;
838 aligned_y = ALIGN(y, height_alignment);
839
840 stride = x * cpp;
841 stride = drm_bacon_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
842 size = stride * aligned_y;
843 size = drm_bacon_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
844 } while (*tiling_mode != tiling);
845 *pitch = stride;
846
847 if (tiling == I915_TILING_NONE)
848 stride = 0;
849
850 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
851 tiling, stride, 0);
852 }
853
854 static drm_bacon_bo *
855 drm_bacon_gem_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
856 const char *name,
857 void *addr,
858 uint32_t tiling_mode,
859 uint32_t stride,
860 unsigned long size,
861 unsigned long flags)
862 {
863 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
864 drm_bacon_bo_gem *bo_gem;
865 int ret;
866 struct drm_i915_gem_userptr userptr;
867
868 /* Tiling with userptr surfaces is not supported
869 * on all hardware so refuse it for time being.
870 */
871 if (tiling_mode != I915_TILING_NONE)
872 return NULL;
873
874 bo_gem = calloc(1, sizeof(*bo_gem));
875 if (!bo_gem)
876 return NULL;
877
878 p_atomic_set(&bo_gem->refcount, 1);
879 list_inithead(&bo_gem->vma_list);
880
881 bo_gem->bo.size = size;
882
883 memclear(userptr);
884 userptr.user_ptr = (__u64)((unsigned long)addr);
885 userptr.user_size = size;
886 userptr.flags = flags;
887
888 ret = drmIoctl(bufmgr_gem->fd,
889 DRM_IOCTL_I915_GEM_USERPTR,
890 &userptr);
891 if (ret != 0) {
892 DBG("bo_create_userptr: "
893 "ioctl failed with user ptr %p size 0x%lx, "
894 "user flags 0x%lx\n", addr, size, flags);
895 free(bo_gem);
896 return NULL;
897 }
898
899 pthread_mutex_lock(&bufmgr_gem->lock);
900
901 bo_gem->gem_handle = userptr.handle;
902 bo_gem->bo.handle = bo_gem->gem_handle;
903 bo_gem->bo.bufmgr = bufmgr;
904 bo_gem->is_userptr = true;
905 bo_gem->bo.virtual = addr;
906 /* Save the address provided by user */
907 bo_gem->user_virtual = addr;
908 bo_gem->tiling_mode = I915_TILING_NONE;
909 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
910 bo_gem->stride = 0;
911
912 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
913 gem_handle, sizeof(bo_gem->gem_handle),
914 bo_gem);
915
916 bo_gem->name = name;
917 bo_gem->validate_index = -1;
918 bo_gem->used_as_reloc_target = false;
919 bo_gem->has_error = false;
920 bo_gem->reusable = false;
921
922 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
923 pthread_mutex_unlock(&bufmgr_gem->lock);
924
925 DBG("bo_create_userptr: "
926 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
927 addr, bo_gem->gem_handle, bo_gem->name,
928 size, stride, tiling_mode);
929
930 return &bo_gem->bo;
931 }
932
933 static bool
934 has_userptr(drm_bacon_bufmgr_gem *bufmgr_gem)
935 {
936 int ret;
937 void *ptr;
938 long pgsz;
939 struct drm_i915_gem_userptr userptr;
940
941 pgsz = sysconf(_SC_PAGESIZE);
942 assert(pgsz > 0);
943
944 ret = posix_memalign(&ptr, pgsz, pgsz);
945 if (ret) {
946 DBG("Failed to get a page (%ld) for userptr detection!\n",
947 pgsz);
948 return false;
949 }
950
951 memclear(userptr);
952 userptr.user_ptr = (__u64)(unsigned long)ptr;
953 userptr.user_size = pgsz;
954
955 retry:
956 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
957 if (ret) {
958 if (errno == ENODEV && userptr.flags == 0) {
959 userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
960 goto retry;
961 }
962 free(ptr);
963 return false;
964 }
965
966 /* We don't release the userptr bo here as we want to keep the
967 * kernel mm tracking alive for our lifetime. The first time we
968 * create a userptr object the kernel has to install a mmu_notifer
969 * which is a heavyweight operation (e.g. it requires taking all
970 * mm_locks and stop_machine()).
971 */
972
973 bufmgr_gem->userptr_active.ptr = ptr;
974 bufmgr_gem->userptr_active.handle = userptr.handle;
975
976 return true;
977 }
978
979 static drm_bacon_bo *
980 check_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
981 const char *name,
982 void *addr,
983 uint32_t tiling_mode,
984 uint32_t stride,
985 unsigned long size,
986 unsigned long flags)
987 {
988 if (has_userptr((drm_bacon_bufmgr_gem *)bufmgr))
989 bufmgr->bo_alloc_userptr = drm_bacon_gem_bo_alloc_userptr;
990 else
991 bufmgr->bo_alloc_userptr = NULL;
992
993 return drm_bacon_bo_alloc_userptr(bufmgr, name, addr,
994 tiling_mode, stride, size, flags);
995 }
996
997 /**
998 * Returns a drm_bacon_bo wrapping the given buffer object handle.
999 *
1000 * This can be used when one application needs to pass a buffer object
1001 * to another.
1002 */
1003 drm_bacon_bo *
1004 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
1005 const char *name,
1006 unsigned int handle)
1007 {
1008 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
1009 drm_bacon_bo_gem *bo_gem;
1010 int ret;
1011 struct drm_gem_open open_arg;
1012 struct drm_i915_gem_get_tiling get_tiling;
1013
1014 /* At the moment most applications only have a few named bo.
1015 * For instance, in a DRI client only the render buffers passed
1016 * between X and the client are named. And since X returns the
1017 * alternating names for the front/back buffer a linear search
1018 * provides a sufficiently fast match.
1019 */
1020 pthread_mutex_lock(&bufmgr_gem->lock);
1021 HASH_FIND(name_hh, bufmgr_gem->name_table,
1022 &handle, sizeof(handle), bo_gem);
1023 if (bo_gem) {
1024 drm_bacon_gem_bo_reference(&bo_gem->bo);
1025 goto out;
1026 }
1027
1028 memclear(open_arg);
1029 open_arg.name = handle;
1030 ret = drmIoctl(bufmgr_gem->fd,
1031 DRM_IOCTL_GEM_OPEN,
1032 &open_arg);
1033 if (ret != 0) {
1034 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1035 name, handle, strerror(errno));
1036 bo_gem = NULL;
1037 goto out;
1038 }
1039 /* Now see if someone has used a prime handle to get this
1040 * object from the kernel before by looking through the list
1041 * again for a matching gem_handle
1042 */
1043 HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1044 &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1045 if (bo_gem) {
1046 drm_bacon_gem_bo_reference(&bo_gem->bo);
1047 goto out;
1048 }
1049
1050 bo_gem = calloc(1, sizeof(*bo_gem));
1051 if (!bo_gem)
1052 goto out;
1053
1054 p_atomic_set(&bo_gem->refcount, 1);
1055 list_inithead(&bo_gem->vma_list);
1056
1057 bo_gem->bo.size = open_arg.size;
1058 bo_gem->bo.offset = 0;
1059 bo_gem->bo.offset64 = 0;
1060 bo_gem->bo.virtual = NULL;
1061 bo_gem->bo.bufmgr = bufmgr;
1062 bo_gem->name = name;
1063 bo_gem->validate_index = -1;
1064 bo_gem->gem_handle = open_arg.handle;
1065 bo_gem->bo.handle = open_arg.handle;
1066 bo_gem->global_name = handle;
1067 bo_gem->reusable = false;
1068
1069 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1070 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1071 HASH_ADD(name_hh, bufmgr_gem->name_table,
1072 global_name, sizeof(bo_gem->global_name), bo_gem);
1073
1074 memclear(get_tiling);
1075 get_tiling.handle = bo_gem->gem_handle;
1076 ret = drmIoctl(bufmgr_gem->fd,
1077 DRM_IOCTL_I915_GEM_GET_TILING,
1078 &get_tiling);
1079 if (ret != 0)
1080 goto err_unref;
1081
1082 bo_gem->tiling_mode = get_tiling.tiling_mode;
1083 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1084 /* XXX stride is unknown */
1085 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1086 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1087
1088 out:
1089 pthread_mutex_unlock(&bufmgr_gem->lock);
1090 return &bo_gem->bo;
1091
1092 err_unref:
1093 drm_bacon_gem_bo_free(&bo_gem->bo);
1094 pthread_mutex_unlock(&bufmgr_gem->lock);
1095 return NULL;
1096 }
1097
1098 static void
1099 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
1100 {
1101 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1102 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1103 struct drm_gem_close close;
1104 int ret;
1105
1106 list_del(&bo_gem->vma_list);
1107 if (bo_gem->mem_virtual) {
1108 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1109 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1110 bufmgr_gem->vma_count--;
1111 }
1112 if (bo_gem->wc_virtual) {
1113 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1114 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1115 bufmgr_gem->vma_count--;
1116 }
1117 if (bo_gem->gtt_virtual) {
1118 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1119 bufmgr_gem->vma_count--;
1120 }
1121
1122 if (bo_gem->global_name)
1123 HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1124 HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1125
1126 /* Close this object */
1127 memclear(close);
1128 close.handle = bo_gem->gem_handle;
1129 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1130 if (ret != 0) {
1131 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1132 bo_gem->gem_handle, bo_gem->name, strerror(errno));
1133 }
1134 free(bo);
1135 }
1136
1137 static void
1138 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
1139 {
1140 #if HAVE_VALGRIND
1141 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1142
1143 if (bo_gem->mem_virtual)
1144 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1145
1146 if (bo_gem->wc_virtual)
1147 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1148
1149 if (bo_gem->gtt_virtual)
1150 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1151 #endif
1152 }
1153
1154 /** Frees all cached buffers significantly older than @time. */
1155 static void
1156 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr_gem *bufmgr_gem, time_t time)
1157 {
1158 int i;
1159
1160 if (bufmgr_gem->time == time)
1161 return;
1162
1163 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1164 struct drm_bacon_gem_bo_bucket *bucket =
1165 &bufmgr_gem->cache_bucket[i];
1166
1167 while (!list_empty(&bucket->head)) {
1168 drm_bacon_bo_gem *bo_gem;
1169
1170 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1171 bucket->head.next, head);
1172 if (time - bo_gem->free_time <= 1)
1173 break;
1174
1175 list_del(&bo_gem->head);
1176
1177 drm_bacon_gem_bo_free(&bo_gem->bo);
1178 }
1179 }
1180
1181 bufmgr_gem->time = time;
1182 }
1183
1184 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr_gem *bufmgr_gem)
1185 {
1186 int limit;
1187
1188 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1189 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1190
1191 if (bufmgr_gem->vma_max < 0)
1192 return;
1193
1194 /* We may need to evict a few entries in order to create new mmaps */
1195 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1196 if (limit < 0)
1197 limit = 0;
1198
1199 while (bufmgr_gem->vma_count > limit) {
1200 drm_bacon_bo_gem *bo_gem;
1201
1202 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1203 bufmgr_gem->vma_cache.next,
1204 vma_list);
1205 assert(bo_gem->map_count == 0);
1206 list_delinit(&bo_gem->vma_list);
1207
1208 if (bo_gem->mem_virtual) {
1209 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1210 bo_gem->mem_virtual = NULL;
1211 bufmgr_gem->vma_count--;
1212 }
1213 if (bo_gem->wc_virtual) {
1214 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1215 bo_gem->wc_virtual = NULL;
1216 bufmgr_gem->vma_count--;
1217 }
1218 if (bo_gem->gtt_virtual) {
1219 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1220 bo_gem->gtt_virtual = NULL;
1221 bufmgr_gem->vma_count--;
1222 }
1223 }
1224 }
1225
1226 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr_gem *bufmgr_gem,
1227 drm_bacon_bo_gem *bo_gem)
1228 {
1229 bufmgr_gem->vma_open--;
1230 list_addtail(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1231 if (bo_gem->mem_virtual)
1232 bufmgr_gem->vma_count++;
1233 if (bo_gem->wc_virtual)
1234 bufmgr_gem->vma_count++;
1235 if (bo_gem->gtt_virtual)
1236 bufmgr_gem->vma_count++;
1237 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
1238 }
1239
1240 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr_gem *bufmgr_gem,
1241 drm_bacon_bo_gem *bo_gem)
1242 {
1243 bufmgr_gem->vma_open++;
1244 list_del(&bo_gem->vma_list);
1245 if (bo_gem->mem_virtual)
1246 bufmgr_gem->vma_count--;
1247 if (bo_gem->wc_virtual)
1248 bufmgr_gem->vma_count--;
1249 if (bo_gem->gtt_virtual)
1250 bufmgr_gem->vma_count--;
1251 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
1252 }
1253
1254 static void
1255 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1256 {
1257 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1258 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1259 struct drm_bacon_gem_bo_bucket *bucket;
1260 int i;
1261
1262 /* Unreference all the target buffers */
1263 for (i = 0; i < bo_gem->reloc_count; i++) {
1264 if (bo_gem->reloc_target_info[i].bo != bo) {
1265 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1266 reloc_target_info[i].bo,
1267 time);
1268 }
1269 }
1270 for (i = 0; i < bo_gem->softpin_target_count; i++)
1271 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1272 time);
1273 bo_gem->kflags = 0;
1274 bo_gem->reloc_count = 0;
1275 bo_gem->used_as_reloc_target = false;
1276 bo_gem->softpin_target_count = 0;
1277
1278 DBG("bo_unreference final: %d (%s)\n",
1279 bo_gem->gem_handle, bo_gem->name);
1280
1281 /* release memory associated with this object */
1282 if (bo_gem->reloc_target_info) {
1283 free(bo_gem->reloc_target_info);
1284 bo_gem->reloc_target_info = NULL;
1285 }
1286 if (bo_gem->relocs) {
1287 free(bo_gem->relocs);
1288 bo_gem->relocs = NULL;
1289 }
1290 if (bo_gem->softpin_target) {
1291 free(bo_gem->softpin_target);
1292 bo_gem->softpin_target = NULL;
1293 bo_gem->softpin_target_size = 0;
1294 }
1295
1296 /* Clear any left-over mappings */
1297 if (bo_gem->map_count) {
1298 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1299 bo_gem->map_count = 0;
1300 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1301 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1302 }
1303
1304 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1305 /* Put the buffer into our internal cache for reuse if we can. */
1306 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1307 drm_bacon_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1308 I915_MADV_DONTNEED)) {
1309 bo_gem->free_time = time;
1310
1311 bo_gem->name = NULL;
1312 bo_gem->validate_index = -1;
1313
1314 list_addtail(&bo_gem->head, &bucket->head);
1315 } else {
1316 drm_bacon_gem_bo_free(bo);
1317 }
1318 }
1319
1320 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1321 time_t time)
1322 {
1323 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1324
1325 assert(p_atomic_read(&bo_gem->refcount) > 0);
1326 if (p_atomic_dec_zero(&bo_gem->refcount))
1327 drm_bacon_gem_bo_unreference_final(bo, time);
1328 }
1329
1330 static void drm_bacon_gem_bo_unreference(drm_bacon_bo *bo)
1331 {
1332 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1333
1334 assert(p_atomic_read(&bo_gem->refcount) > 0);
1335
1336 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1337 drm_bacon_bufmgr_gem *bufmgr_gem =
1338 (drm_bacon_bufmgr_gem *) bo->bufmgr;
1339 struct timespec time;
1340
1341 clock_gettime(CLOCK_MONOTONIC, &time);
1342
1343 pthread_mutex_lock(&bufmgr_gem->lock);
1344
1345 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1346 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1347 drm_bacon_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1348 }
1349
1350 pthread_mutex_unlock(&bufmgr_gem->lock);
1351 }
1352 }
1353
1354 static int drm_bacon_gem_bo_map(drm_bacon_bo *bo, int write_enable)
1355 {
1356 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1357 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1358 struct drm_i915_gem_set_domain set_domain;
1359 int ret;
1360
1361 if (bo_gem->is_userptr) {
1362 /* Return the same user ptr */
1363 bo->virtual = bo_gem->user_virtual;
1364 return 0;
1365 }
1366
1367 pthread_mutex_lock(&bufmgr_gem->lock);
1368
1369 if (bo_gem->map_count++ == 0)
1370 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
1371
1372 if (!bo_gem->mem_virtual) {
1373 struct drm_i915_gem_mmap mmap_arg;
1374
1375 DBG("bo_map: %d (%s), map_count=%d\n",
1376 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1377
1378 memclear(mmap_arg);
1379 mmap_arg.handle = bo_gem->gem_handle;
1380 mmap_arg.size = bo->size;
1381 ret = drmIoctl(bufmgr_gem->fd,
1382 DRM_IOCTL_I915_GEM_MMAP,
1383 &mmap_arg);
1384 if (ret != 0) {
1385 ret = -errno;
1386 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1387 __FILE__, __LINE__, bo_gem->gem_handle,
1388 bo_gem->name, strerror(errno));
1389 if (--bo_gem->map_count == 0)
1390 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1391 pthread_mutex_unlock(&bufmgr_gem->lock);
1392 return ret;
1393 }
1394 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1395 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1396 }
1397 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1398 bo_gem->mem_virtual);
1399 bo->virtual = bo_gem->mem_virtual;
1400
1401 memclear(set_domain);
1402 set_domain.handle = bo_gem->gem_handle;
1403 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1404 if (write_enable)
1405 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1406 else
1407 set_domain.write_domain = 0;
1408 ret = drmIoctl(bufmgr_gem->fd,
1409 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1410 &set_domain);
1411 if (ret != 0) {
1412 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1413 __FILE__, __LINE__, bo_gem->gem_handle,
1414 strerror(errno));
1415 }
1416
1417 if (write_enable)
1418 bo_gem->mapped_cpu_write = true;
1419
1420 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1421 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1422 pthread_mutex_unlock(&bufmgr_gem->lock);
1423
1424 return 0;
1425 }
1426
1427 static int
1428 map_gtt(drm_bacon_bo *bo)
1429 {
1430 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1431 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1432 int ret;
1433
1434 if (bo_gem->is_userptr)
1435 return -EINVAL;
1436
1437 if (bo_gem->map_count++ == 0)
1438 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
1439
1440 /* Get a mapping of the buffer if we haven't before. */
1441 if (bo_gem->gtt_virtual == NULL) {
1442 struct drm_i915_gem_mmap_gtt mmap_arg;
1443
1444 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1445 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1446
1447 memclear(mmap_arg);
1448 mmap_arg.handle = bo_gem->gem_handle;
1449
1450 /* Get the fake offset back... */
1451 ret = drmIoctl(bufmgr_gem->fd,
1452 DRM_IOCTL_I915_GEM_MMAP_GTT,
1453 &mmap_arg);
1454 if (ret != 0) {
1455 ret = -errno;
1456 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1457 __FILE__, __LINE__,
1458 bo_gem->gem_handle, bo_gem->name,
1459 strerror(errno));
1460 if (--bo_gem->map_count == 0)
1461 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1462 return ret;
1463 }
1464
1465 /* and mmap it */
1466 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1467 MAP_SHARED, bufmgr_gem->fd,
1468 mmap_arg.offset);
1469 if (bo_gem->gtt_virtual == MAP_FAILED) {
1470 bo_gem->gtt_virtual = NULL;
1471 ret = -errno;
1472 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1473 __FILE__, __LINE__,
1474 bo_gem->gem_handle, bo_gem->name,
1475 strerror(errno));
1476 if (--bo_gem->map_count == 0)
1477 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1478 return ret;
1479 }
1480 }
1481
1482 bo->virtual = bo_gem->gtt_virtual;
1483
1484 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1485 bo_gem->gtt_virtual);
1486
1487 return 0;
1488 }
1489
1490 int
1491 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1492 {
1493 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1494 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1495 struct drm_i915_gem_set_domain set_domain;
1496 int ret;
1497
1498 pthread_mutex_lock(&bufmgr_gem->lock);
1499
1500 ret = map_gtt(bo);
1501 if (ret) {
1502 pthread_mutex_unlock(&bufmgr_gem->lock);
1503 return ret;
1504 }
1505
1506 /* Now move it to the GTT domain so that the GPU and CPU
1507 * caches are flushed and the GPU isn't actively using the
1508 * buffer.
1509 *
1510 * The pagefault handler does this domain change for us when
1511 * it has unbound the BO from the GTT, but it's up to us to
1512 * tell it when we're about to use things if we had done
1513 * rendering and it still happens to be bound to the GTT.
1514 */
1515 memclear(set_domain);
1516 set_domain.handle = bo_gem->gem_handle;
1517 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1518 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1519 ret = drmIoctl(bufmgr_gem->fd,
1520 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1521 &set_domain);
1522 if (ret != 0) {
1523 DBG("%s:%d: Error setting domain %d: %s\n",
1524 __FILE__, __LINE__, bo_gem->gem_handle,
1525 strerror(errno));
1526 }
1527
1528 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1529 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1530 pthread_mutex_unlock(&bufmgr_gem->lock);
1531
1532 return 0;
1533 }
1534
1535 /**
1536 * Performs a mapping of the buffer object like the normal GTT
1537 * mapping, but avoids waiting for the GPU to be done reading from or
1538 * rendering to the buffer.
1539 *
1540 * This is used in the implementation of GL_ARB_map_buffer_range: The
1541 * user asks to create a buffer, then does a mapping, fills some
1542 * space, runs a drawing command, then asks to map it again without
1543 * synchronizing because it guarantees that it won't write over the
1544 * data that the GPU is busy using (or, more specifically, that if it
1545 * does write over the data, it acknowledges that rendering is
1546 * undefined).
1547 */
1548
1549 int
1550 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1551 {
1552 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1553 #ifdef HAVE_VALGRIND
1554 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1555 #endif
1556 int ret;
1557
1558 /* If the CPU cache isn't coherent with the GTT, then use a
1559 * regular synchronized mapping. The problem is that we don't
1560 * track where the buffer was last used on the CPU side in
1561 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1562 * we would potentially corrupt the buffer even when the user
1563 * does reasonable things.
1564 */
1565 if (!bufmgr_gem->has_llc)
1566 return drm_bacon_gem_bo_map_gtt(bo);
1567
1568 pthread_mutex_lock(&bufmgr_gem->lock);
1569
1570 ret = map_gtt(bo);
1571 if (ret == 0) {
1572 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1573 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1574 }
1575
1576 pthread_mutex_unlock(&bufmgr_gem->lock);
1577
1578 return ret;
1579 }
1580
1581 static int drm_bacon_gem_bo_unmap(drm_bacon_bo *bo)
1582 {
1583 drm_bacon_bufmgr_gem *bufmgr_gem;
1584 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1585 int ret = 0;
1586
1587 if (bo == NULL)
1588 return 0;
1589
1590 if (bo_gem->is_userptr)
1591 return 0;
1592
1593 bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1594
1595 pthread_mutex_lock(&bufmgr_gem->lock);
1596
1597 if (bo_gem->map_count <= 0) {
1598 DBG("attempted to unmap an unmapped bo\n");
1599 pthread_mutex_unlock(&bufmgr_gem->lock);
1600 /* Preserve the old behaviour of just treating this as a
1601 * no-op rather than reporting the error.
1602 */
1603 return 0;
1604 }
1605
1606 if (bo_gem->mapped_cpu_write) {
1607 struct drm_i915_gem_sw_finish sw_finish;
1608
1609 /* Cause a flush to happen if the buffer's pinned for
1610 * scanout, so the results show up in a timely manner.
1611 * Unlike GTT set domains, this only does work if the
1612 * buffer should be scanout-related.
1613 */
1614 memclear(sw_finish);
1615 sw_finish.handle = bo_gem->gem_handle;
1616 ret = drmIoctl(bufmgr_gem->fd,
1617 DRM_IOCTL_I915_GEM_SW_FINISH,
1618 &sw_finish);
1619 ret = ret == -1 ? -errno : 0;
1620
1621 bo_gem->mapped_cpu_write = false;
1622 }
1623
1624 /* We need to unmap after every innovation as we cannot track
1625 * an open vma for every bo as that will exhaust the system
1626 * limits and cause later failures.
1627 */
1628 if (--bo_gem->map_count == 0) {
1629 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1630 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1631 bo->virtual = NULL;
1632 }
1633 pthread_mutex_unlock(&bufmgr_gem->lock);
1634
1635 return ret;
1636 }
1637
1638 static int
1639 drm_bacon_gem_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1640 unsigned long size, const void *data)
1641 {
1642 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1643 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1644 struct drm_i915_gem_pwrite pwrite;
1645 int ret;
1646
1647 if (bo_gem->is_userptr)
1648 return -EINVAL;
1649
1650 memclear(pwrite);
1651 pwrite.handle = bo_gem->gem_handle;
1652 pwrite.offset = offset;
1653 pwrite.size = size;
1654 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1655 ret = drmIoctl(bufmgr_gem->fd,
1656 DRM_IOCTL_I915_GEM_PWRITE,
1657 &pwrite);
1658 if (ret != 0) {
1659 ret = -errno;
1660 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1661 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1662 (int)size, strerror(errno));
1663 }
1664
1665 return ret;
1666 }
1667
1668 static int
1669 drm_bacon_gem_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1670 unsigned long size, void *data)
1671 {
1672 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1673 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1674 struct drm_i915_gem_pread pread;
1675 int ret;
1676
1677 if (bo_gem->is_userptr)
1678 return -EINVAL;
1679
1680 memclear(pread);
1681 pread.handle = bo_gem->gem_handle;
1682 pread.offset = offset;
1683 pread.size = size;
1684 pread.data_ptr = (uint64_t) (uintptr_t) data;
1685 ret = drmIoctl(bufmgr_gem->fd,
1686 DRM_IOCTL_I915_GEM_PREAD,
1687 &pread);
1688 if (ret != 0) {
1689 ret = -errno;
1690 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1691 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1692 (int)size, strerror(errno));
1693 }
1694
1695 return ret;
1696 }
1697
1698 /** Waits for all GPU rendering with the object to have completed. */
1699 static void
1700 drm_bacon_gem_bo_wait_rendering(drm_bacon_bo *bo)
1701 {
1702 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1703 }
1704
1705 /**
1706 * Waits on a BO for the given amount of time.
1707 *
1708 * @bo: buffer object to wait for
1709 * @timeout_ns: amount of time to wait in nanoseconds.
1710 * If value is less than 0, an infinite wait will occur.
1711 *
1712 * Returns 0 if the wait was successful ie. the last batch referencing the
1713 * object has completed within the allotted time. Otherwise some negative return
1714 * value describes the error. Of particular interest is -ETIME when the wait has
1715 * failed to yield the desired result.
1716 *
1717 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1718 * the operation to give up after a certain amount of time. Another subtle
1719 * difference is the internal locking semantics are different (this variant does
1720 * not hold the lock for the duration of the wait). This makes the wait subject
1721 * to a larger userspace race window.
1722 *
1723 * The implementation shall wait until the object is no longer actively
1724 * referenced within a batch buffer at the time of the call. The wait will
1725 * not guarantee that the buffer is re-issued via another thread, or an flinked
1726 * handle. Userspace must make sure this race does not occur if such precision
1727 * is important.
1728 *
1729 * Note that some kernels have broken the inifite wait for negative values
1730 * promise, upgrade to latest stable kernels if this is the case.
1731 */
1732 int
1733 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1734 {
1735 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1736 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1737 struct drm_i915_gem_wait wait;
1738 int ret;
1739
1740 if (!bufmgr_gem->has_wait_timeout) {
1741 DBG("%s:%d: Timed wait is not supported. Falling back to "
1742 "infinite wait\n", __FILE__, __LINE__);
1743 if (timeout_ns) {
1744 drm_bacon_gem_bo_wait_rendering(bo);
1745 return 0;
1746 } else {
1747 return drm_bacon_gem_bo_busy(bo) ? -ETIME : 0;
1748 }
1749 }
1750
1751 memclear(wait);
1752 wait.bo_handle = bo_gem->gem_handle;
1753 wait.timeout_ns = timeout_ns;
1754 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1755 if (ret == -1)
1756 return -errno;
1757
1758 return ret;
1759 }
1760
1761 /**
1762 * Sets the object to the GTT read and possibly write domain, used by the X
1763 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1764 *
1765 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1766 * can do tiled pixmaps this way.
1767 */
1768 void
1769 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1770 {
1771 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1772 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1773 struct drm_i915_gem_set_domain set_domain;
1774 int ret;
1775
1776 memclear(set_domain);
1777 set_domain.handle = bo_gem->gem_handle;
1778 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1779 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1780 ret = drmIoctl(bufmgr_gem->fd,
1781 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1782 &set_domain);
1783 if (ret != 0) {
1784 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1785 __FILE__, __LINE__, bo_gem->gem_handle,
1786 set_domain.read_domains, set_domain.write_domain,
1787 strerror(errno));
1788 }
1789 }
1790
1791 static void
1792 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr *bufmgr)
1793 {
1794 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
1795 struct drm_gem_close close_bo;
1796 int i, ret;
1797
1798 free(bufmgr_gem->exec2_objects);
1799 free(bufmgr_gem->exec_objects);
1800 free(bufmgr_gem->exec_bos);
1801
1802 pthread_mutex_destroy(&bufmgr_gem->lock);
1803
1804 /* Free any cached buffer objects we were going to reuse */
1805 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1806 struct drm_bacon_gem_bo_bucket *bucket =
1807 &bufmgr_gem->cache_bucket[i];
1808 drm_bacon_bo_gem *bo_gem;
1809
1810 while (!list_empty(&bucket->head)) {
1811 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1812 bucket->head.next, head);
1813 list_del(&bo_gem->head);
1814
1815 drm_bacon_gem_bo_free(&bo_gem->bo);
1816 }
1817 }
1818
1819 /* Release userptr bo kept hanging around for optimisation. */
1820 if (bufmgr_gem->userptr_active.ptr) {
1821 memclear(close_bo);
1822 close_bo.handle = bufmgr_gem->userptr_active.handle;
1823 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1824 free(bufmgr_gem->userptr_active.ptr);
1825 if (ret)
1826 fprintf(stderr,
1827 "Failed to release test userptr object! (%d) "
1828 "i915 kernel driver may not be sane!\n", errno);
1829 }
1830
1831 free(bufmgr);
1832 }
1833
1834 /**
1835 * Adds the target buffer to the validation list and adds the relocation
1836 * to the reloc_buffer's relocation list.
1837 *
1838 * The relocation entry at the given offset must already contain the
1839 * precomputed relocation value, because the kernel will optimize out
1840 * the relocation entry write when the buffer hasn't moved from the
1841 * last known offset in target_bo.
1842 */
1843 static int
1844 do_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1845 drm_bacon_bo *target_bo, uint32_t target_offset,
1846 uint32_t read_domains, uint32_t write_domain)
1847 {
1848 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1849 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1850 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1851
1852 if (bo_gem->has_error)
1853 return -ENOMEM;
1854
1855 if (target_bo_gem->has_error) {
1856 bo_gem->has_error = true;
1857 return -ENOMEM;
1858 }
1859
1860 /* Create a new relocation list if needed */
1861 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1862 return -ENOMEM;
1863
1864 /* Check overflow */
1865 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1866
1867 /* Check args */
1868 assert(offset <= bo->size - 4);
1869 assert((write_domain & (write_domain - 1)) == 0);
1870
1871 /* Make sure that we're not adding a reloc to something whose size has
1872 * already been accounted for.
1873 */
1874 assert(!bo_gem->used_as_reloc_target);
1875 if (target_bo_gem != bo_gem) {
1876 target_bo_gem->used_as_reloc_target = true;
1877 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1878 }
1879
1880 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1881 if (target_bo != bo)
1882 drm_bacon_gem_bo_reference(target_bo);
1883
1884 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1885 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1886 bo_gem->relocs[bo_gem->reloc_count].target_handle =
1887 target_bo_gem->gem_handle;
1888 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1889 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1890 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1891 bo_gem->reloc_count++;
1892
1893 return 0;
1894 }
1895
1896 static int
1897 drm_bacon_gem_bo_add_softpin_target(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
1898 {
1899 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1900 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1901 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1902 if (bo_gem->has_error)
1903 return -ENOMEM;
1904
1905 if (target_bo_gem->has_error) {
1906 bo_gem->has_error = true;
1907 return -ENOMEM;
1908 }
1909
1910 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
1911 return -EINVAL;
1912 if (target_bo_gem == bo_gem)
1913 return -EINVAL;
1914
1915 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
1916 int new_size = bo_gem->softpin_target_size * 2;
1917 if (new_size == 0)
1918 new_size = bufmgr_gem->max_relocs;
1919
1920 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
1921 sizeof(drm_bacon_bo *));
1922 if (!bo_gem->softpin_target)
1923 return -ENOMEM;
1924
1925 bo_gem->softpin_target_size = new_size;
1926 }
1927 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
1928 drm_bacon_gem_bo_reference(target_bo);
1929 bo_gem->softpin_target_count++;
1930
1931 return 0;
1932 }
1933
1934 static int
1935 drm_bacon_gem_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1936 drm_bacon_bo *target_bo, uint32_t target_offset,
1937 uint32_t read_domains, uint32_t write_domain)
1938 {
1939 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *)target_bo;
1940
1941 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
1942 return drm_bacon_gem_bo_add_softpin_target(bo, target_bo);
1943 else
1944 return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1945 read_domains, write_domain);
1946 }
1947
1948 int
1949 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
1950 {
1951 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1952
1953 return bo_gem->reloc_count;
1954 }
1955
1956 /**
1957 * Removes existing relocation entries in the BO after "start".
1958 *
1959 * This allows a user to avoid a two-step process for state setup with
1960 * counting up all the buffer objects and doing a
1961 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1962 * relocations for the state setup. Instead, save the state of the
1963 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1964 * state, and then check if it still fits in the aperture.
1965 *
1966 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1967 * involving this buffer in the tree are undefined after this call.
1968 *
1969 * This also removes all softpinned targets being referenced by the BO.
1970 */
1971 void
1972 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
1973 {
1974 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1975 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1976 int i;
1977 struct timespec time;
1978
1979 clock_gettime(CLOCK_MONOTONIC, &time);
1980
1981 assert(bo_gem->reloc_count >= start);
1982
1983 /* Unreference the cleared target buffers */
1984 pthread_mutex_lock(&bufmgr_gem->lock);
1985
1986 for (i = start; i < bo_gem->reloc_count; i++) {
1987 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_target_info[i].bo;
1988 if (&target_bo_gem->bo != bo) {
1989 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1990 time.tv_sec);
1991 }
1992 }
1993 bo_gem->reloc_count = start;
1994
1995 for (i = 0; i < bo_gem->softpin_target_count; i++) {
1996 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->softpin_target[i];
1997 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
1998 }
1999 bo_gem->softpin_target_count = 0;
2000
2001 pthread_mutex_unlock(&bufmgr_gem->lock);
2002
2003 }
2004
2005 /**
2006 * Walk the tree of relocations rooted at BO and accumulate the list of
2007 * validations to be performed and update the relocation buffers with
2008 * index values into the validation list.
2009 */
2010 static void
2011 drm_bacon_gem_bo_process_reloc(drm_bacon_bo *bo)
2012 {
2013 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2014 int i;
2015
2016 if (bo_gem->relocs == NULL)
2017 return;
2018
2019 for (i = 0; i < bo_gem->reloc_count; i++) {
2020 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2021
2022 if (target_bo == bo)
2023 continue;
2024
2025 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2026
2027 /* Continue walking the tree depth-first. */
2028 drm_bacon_gem_bo_process_reloc(target_bo);
2029
2030 /* Add the target to the validate list */
2031 drm_bacon_add_validate_buffer(target_bo);
2032 }
2033 }
2034
2035 static void
2036 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
2037 {
2038 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
2039 int i;
2040
2041 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2042 return;
2043
2044 for (i = 0; i < bo_gem->reloc_count; i++) {
2045 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2046
2047 if (target_bo == bo)
2048 continue;
2049
2050 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2051
2052 /* Continue walking the tree depth-first. */
2053 drm_bacon_gem_bo_process_reloc2(target_bo);
2054
2055 /* Add the target to the validate list */
2056 drm_bacon_add_validate_buffer2(target_bo);
2057 }
2058
2059 for (i = 0; i < bo_gem->softpin_target_count; i++) {
2060 drm_bacon_bo *target_bo = bo_gem->softpin_target[i];
2061
2062 if (target_bo == bo)
2063 continue;
2064
2065 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2066 drm_bacon_gem_bo_process_reloc2(target_bo);
2067 drm_bacon_add_validate_buffer2(target_bo);
2068 }
2069 }
2070
2071
2072 static void
2073 drm_bacon_update_buffer_offsets(drm_bacon_bufmgr_gem *bufmgr_gem)
2074 {
2075 int i;
2076
2077 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2078 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
2079 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2080
2081 /* Update the buffer offset */
2082 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2083 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2084 bo_gem->gem_handle, bo_gem->name,
2085 upper_32_bits(bo->offset64),
2086 lower_32_bits(bo->offset64),
2087 upper_32_bits(bufmgr_gem->exec_objects[i].offset),
2088 lower_32_bits(bufmgr_gem->exec_objects[i].offset));
2089 bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2090 bo->offset = bufmgr_gem->exec_objects[i].offset;
2091 }
2092 }
2093 }
2094
2095 static void
2096 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr_gem *bufmgr_gem)
2097 {
2098 int i;
2099
2100 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2101 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
2102 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
2103
2104 /* Update the buffer offset */
2105 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2106 /* If we're seeing softpinned object here it means that the kernel
2107 * has relocated our object... Indicating a programming error
2108 */
2109 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2110 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2111 bo_gem->gem_handle, bo_gem->name,
2112 upper_32_bits(bo->offset64),
2113 lower_32_bits(bo->offset64),
2114 upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2115 lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2116 bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2117 bo->offset = bufmgr_gem->exec2_objects[i].offset;
2118 }
2119 }
2120 }
2121
2122 static int
2123 drm_bacon_gem_bo_exec(drm_bacon_bo *bo, int used,
2124 drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2125 {
2126 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2127 struct drm_i915_gem_execbuffer execbuf;
2128 int ret, i;
2129
2130 if (to_bo_gem(bo)->has_error)
2131 return -ENOMEM;
2132
2133 pthread_mutex_lock(&bufmgr_gem->lock);
2134 /* Update indices and set up the validate list. */
2135 drm_bacon_gem_bo_process_reloc(bo);
2136
2137 /* Add the batch buffer to the validation list. There are no
2138 * relocations pointing to it.
2139 */
2140 drm_bacon_add_validate_buffer(bo);
2141
2142 memclear(execbuf);
2143 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2144 execbuf.buffer_count = bufmgr_gem->exec_count;
2145 execbuf.batch_start_offset = 0;
2146 execbuf.batch_len = used;
2147 execbuf.cliprects_ptr = (uintptr_t) cliprects;
2148 execbuf.num_cliprects = num_cliprects;
2149 execbuf.DR1 = 0;
2150 execbuf.DR4 = DR4;
2151
2152 ret = drmIoctl(bufmgr_gem->fd,
2153 DRM_IOCTL_I915_GEM_EXECBUFFER,
2154 &execbuf);
2155 if (ret != 0) {
2156 ret = -errno;
2157 if (errno == ENOSPC) {
2158 DBG("Execbuffer fails to pin. "
2159 "Estimate: %u. Actual: %u. Available: %u\n",
2160 drm_bacon_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2161 bufmgr_gem->
2162 exec_count),
2163 drm_bacon_gem_compute_batch_space(bufmgr_gem->exec_bos,
2164 bufmgr_gem->
2165 exec_count),
2166 (unsigned int)bufmgr_gem->gtt_size);
2167 }
2168 }
2169 drm_bacon_update_buffer_offsets(bufmgr_gem);
2170
2171 if (bufmgr_gem->bufmgr.debug)
2172 drm_bacon_gem_dump_validation_list(bufmgr_gem);
2173
2174 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2175 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2176
2177 bo_gem->idle = false;
2178
2179 /* Disconnect the buffer from the validate list */
2180 bo_gem->validate_index = -1;
2181 bufmgr_gem->exec_bos[i] = NULL;
2182 }
2183 bufmgr_gem->exec_count = 0;
2184 pthread_mutex_unlock(&bufmgr_gem->lock);
2185
2186 return ret;
2187 }
2188
2189 static int
2190 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
2191 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2192 int in_fence, int *out_fence,
2193 unsigned int flags)
2194 {
2195 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bo->bufmgr;
2196 struct drm_i915_gem_execbuffer2 execbuf;
2197 int ret = 0;
2198 int i;
2199
2200 if (to_bo_gem(bo)->has_error)
2201 return -ENOMEM;
2202
2203 switch (flags & 0x7) {
2204 default:
2205 return -EINVAL;
2206 case I915_EXEC_BLT:
2207 if (!bufmgr_gem->has_blt)
2208 return -EINVAL;
2209 break;
2210 case I915_EXEC_BSD:
2211 if (!bufmgr_gem->has_bsd)
2212 return -EINVAL;
2213 break;
2214 case I915_EXEC_VEBOX:
2215 if (!bufmgr_gem->has_vebox)
2216 return -EINVAL;
2217 break;
2218 case I915_EXEC_RENDER:
2219 case I915_EXEC_DEFAULT:
2220 break;
2221 }
2222
2223 pthread_mutex_lock(&bufmgr_gem->lock);
2224 /* Update indices and set up the validate list. */
2225 drm_bacon_gem_bo_process_reloc2(bo);
2226
2227 /* Add the batch buffer to the validation list. There are no relocations
2228 * pointing to it.
2229 */
2230 drm_bacon_add_validate_buffer2(bo);
2231
2232 memclear(execbuf);
2233 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2234 execbuf.buffer_count = bufmgr_gem->exec_count;
2235 execbuf.batch_start_offset = 0;
2236 execbuf.batch_len = used;
2237 execbuf.cliprects_ptr = (uintptr_t)cliprects;
2238 execbuf.num_cliprects = num_cliprects;
2239 execbuf.DR1 = 0;
2240 execbuf.DR4 = DR4;
2241 execbuf.flags = flags;
2242 if (ctx == NULL)
2243 i915_execbuffer2_set_context_id(execbuf, 0);
2244 else
2245 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2246 execbuf.rsvd2 = 0;
2247 if (in_fence != -1) {
2248 execbuf.rsvd2 = in_fence;
2249 execbuf.flags |= I915_EXEC_FENCE_IN;
2250 }
2251 if (out_fence != NULL) {
2252 *out_fence = -1;
2253 execbuf.flags |= I915_EXEC_FENCE_OUT;
2254 }
2255
2256 if (bufmgr_gem->no_exec)
2257 goto skip_execution;
2258
2259 ret = drmIoctl(bufmgr_gem->fd,
2260 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2261 &execbuf);
2262 if (ret != 0) {
2263 ret = -errno;
2264 if (ret == -ENOSPC) {
2265 DBG("Execbuffer fails to pin. "
2266 "Estimate: %u. Actual: %u. Available: %u\n",
2267 drm_bacon_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2268 bufmgr_gem->exec_count),
2269 drm_bacon_gem_compute_batch_space(bufmgr_gem->exec_bos,
2270 bufmgr_gem->exec_count),
2271 (unsigned int) bufmgr_gem->gtt_size);
2272 }
2273 }
2274 drm_bacon_update_buffer_offsets2(bufmgr_gem);
2275
2276 if (ret == 0 && out_fence != NULL)
2277 *out_fence = execbuf.rsvd2 >> 32;
2278
2279 skip_execution:
2280 if (bufmgr_gem->bufmgr.debug)
2281 drm_bacon_gem_dump_validation_list(bufmgr_gem);
2282
2283 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2284 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2285
2286 bo_gem->idle = false;
2287
2288 /* Disconnect the buffer from the validate list */
2289 bo_gem->validate_index = -1;
2290 bufmgr_gem->exec_bos[i] = NULL;
2291 }
2292 bufmgr_gem->exec_count = 0;
2293 pthread_mutex_unlock(&bufmgr_gem->lock);
2294
2295 return ret;
2296 }
2297
2298 static int
2299 drm_bacon_gem_bo_exec2(drm_bacon_bo *bo, int used,
2300 drm_clip_rect_t *cliprects, int num_cliprects,
2301 int DR4)
2302 {
2303 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2304 -1, NULL, I915_EXEC_RENDER);
2305 }
2306
2307 static int
2308 drm_bacon_gem_bo_mrb_exec2(drm_bacon_bo *bo, int used,
2309 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2310 unsigned int flags)
2311 {
2312 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2313 -1, NULL, flags);
2314 }
2315
2316 int
2317 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
2318 int used, unsigned int flags)
2319 {
2320 return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2321 }
2322
2323 int
2324 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
2325 drm_bacon_context *ctx,
2326 int used,
2327 int in_fence,
2328 int *out_fence,
2329 unsigned int flags)
2330 {
2331 return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2332 }
2333
2334 static int
2335 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
2336 uint32_t tiling_mode,
2337 uint32_t stride)
2338 {
2339 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2340 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2341 struct drm_i915_gem_set_tiling set_tiling;
2342 int ret;
2343
2344 if (bo_gem->global_name == 0 &&
2345 tiling_mode == bo_gem->tiling_mode &&
2346 stride == bo_gem->stride)
2347 return 0;
2348
2349 memset(&set_tiling, 0, sizeof(set_tiling));
2350 do {
2351 /* set_tiling is slightly broken and overwrites the
2352 * input on the error path, so we have to open code
2353 * rmIoctl.
2354 */
2355 set_tiling.handle = bo_gem->gem_handle;
2356 set_tiling.tiling_mode = tiling_mode;
2357 set_tiling.stride = stride;
2358
2359 ret = ioctl(bufmgr_gem->fd,
2360 DRM_IOCTL_I915_GEM_SET_TILING,
2361 &set_tiling);
2362 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2363 if (ret == -1)
2364 return -errno;
2365
2366 bo_gem->tiling_mode = set_tiling.tiling_mode;
2367 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2368 bo_gem->stride = set_tiling.stride;
2369 return 0;
2370 }
2371
2372 static int
2373 drm_bacon_gem_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2374 uint32_t stride)
2375 {
2376 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2377 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2378 int ret;
2379
2380 /* Tiling with userptr surfaces is not supported
2381 * on all hardware so refuse it for time being.
2382 */
2383 if (bo_gem->is_userptr)
2384 return -EINVAL;
2385
2386 /* Linear buffers have no stride. By ensuring that we only ever use
2387 * stride 0 with linear buffers, we simplify our code.
2388 */
2389 if (*tiling_mode == I915_TILING_NONE)
2390 stride = 0;
2391
2392 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2393 if (ret == 0)
2394 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2395
2396 *tiling_mode = bo_gem->tiling_mode;
2397 return ret;
2398 }
2399
2400 static int
2401 drm_bacon_gem_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2402 uint32_t * swizzle_mode)
2403 {
2404 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2405
2406 *tiling_mode = bo_gem->tiling_mode;
2407 *swizzle_mode = bo_gem->swizzle_mode;
2408 return 0;
2409 }
2410
2411 static int
2412 drm_bacon_gem_bo_set_softpin_offset(drm_bacon_bo *bo, uint64_t offset)
2413 {
2414 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2415
2416 bo->offset64 = offset;
2417 bo->offset = offset;
2418 bo_gem->kflags |= EXEC_OBJECT_PINNED;
2419
2420 return 0;
2421 }
2422
2423 drm_bacon_bo *
2424 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
2425 {
2426 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2427 int ret;
2428 uint32_t handle;
2429 drm_bacon_bo_gem *bo_gem;
2430 struct drm_i915_gem_get_tiling get_tiling;
2431
2432 pthread_mutex_lock(&bufmgr_gem->lock);
2433 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2434 if (ret) {
2435 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2436 pthread_mutex_unlock(&bufmgr_gem->lock);
2437 return NULL;
2438 }
2439
2440 /*
2441 * See if the kernel has already returned this buffer to us. Just as
2442 * for named buffers, we must not create two bo's pointing at the same
2443 * kernel object
2444 */
2445 HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2446 &handle, sizeof(handle), bo_gem);
2447 if (bo_gem) {
2448 drm_bacon_gem_bo_reference(&bo_gem->bo);
2449 goto out;
2450 }
2451
2452 bo_gem = calloc(1, sizeof(*bo_gem));
2453 if (!bo_gem)
2454 goto out;
2455
2456 p_atomic_set(&bo_gem->refcount, 1);
2457 list_inithead(&bo_gem->vma_list);
2458
2459 /* Determine size of bo. The fd-to-handle ioctl really should
2460 * return the size, but it doesn't. If we have kernel 3.12 or
2461 * later, we can lseek on the prime fd to get the size. Older
2462 * kernels will just fail, in which case we fall back to the
2463 * provided (estimated or guess size). */
2464 ret = lseek(prime_fd, 0, SEEK_END);
2465 if (ret != -1)
2466 bo_gem->bo.size = ret;
2467 else
2468 bo_gem->bo.size = size;
2469
2470 bo_gem->bo.handle = handle;
2471 bo_gem->bo.bufmgr = bufmgr;
2472
2473 bo_gem->gem_handle = handle;
2474 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2475 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2476
2477 bo_gem->name = "prime";
2478 bo_gem->validate_index = -1;
2479 bo_gem->used_as_reloc_target = false;
2480 bo_gem->has_error = false;
2481 bo_gem->reusable = false;
2482
2483 memclear(get_tiling);
2484 get_tiling.handle = bo_gem->gem_handle;
2485 if (drmIoctl(bufmgr_gem->fd,
2486 DRM_IOCTL_I915_GEM_GET_TILING,
2487 &get_tiling))
2488 goto err;
2489
2490 bo_gem->tiling_mode = get_tiling.tiling_mode;
2491 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2492 /* XXX stride is unknown */
2493 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2494
2495 out:
2496 pthread_mutex_unlock(&bufmgr_gem->lock);
2497 return &bo_gem->bo;
2498
2499 err:
2500 drm_bacon_gem_bo_free(&bo_gem->bo);
2501 pthread_mutex_unlock(&bufmgr_gem->lock);
2502 return NULL;
2503 }
2504
2505 int
2506 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
2507 {
2508 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2509 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2510
2511 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2512 DRM_CLOEXEC, prime_fd) != 0)
2513 return -errno;
2514
2515 bo_gem->reusable = false;
2516
2517 return 0;
2518 }
2519
2520 static int
2521 drm_bacon_gem_bo_flink(drm_bacon_bo *bo, uint32_t * name)
2522 {
2523 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2524 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2525
2526 if (!bo_gem->global_name) {
2527 struct drm_gem_flink flink;
2528
2529 memclear(flink);
2530 flink.handle = bo_gem->gem_handle;
2531 if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2532 return -errno;
2533
2534 pthread_mutex_lock(&bufmgr_gem->lock);
2535 if (!bo_gem->global_name) {
2536 bo_gem->global_name = flink.name;
2537 bo_gem->reusable = false;
2538
2539 HASH_ADD(name_hh, bufmgr_gem->name_table,
2540 global_name, sizeof(bo_gem->global_name),
2541 bo_gem);
2542 }
2543 pthread_mutex_unlock(&bufmgr_gem->lock);
2544 }
2545
2546 *name = bo_gem->global_name;
2547 return 0;
2548 }
2549
2550 /**
2551 * Enables unlimited caching of buffer objects for reuse.
2552 *
2553 * This is potentially very memory expensive, as the cache at each bucket
2554 * size is only bounded by how many buffers of that size we've managed to have
2555 * in flight at once.
2556 */
2557 void
2558 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2559 {
2560 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2561
2562 bufmgr_gem->bo_reuse = true;
2563 }
2564
2565 /**
2566 * Disables implicit synchronisation before executing the bo
2567 *
2568 * This will cause rendering corruption unless you correctly manage explicit
2569 * fences for all rendering involving this buffer - including use by others.
2570 * Disabling the implicit serialisation is only required if that serialisation
2571 * is too coarse (for example, you have split the buffer into many
2572 * non-overlapping regions and are sharing the whole buffer between concurrent
2573 * independent command streams).
2574 *
2575 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2576 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2577 * or subsequent execbufs involving the bo will generate EINVAL.
2578 */
2579 void
2580 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo *bo)
2581 {
2582 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2583
2584 bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2585 }
2586
2587 /**
2588 * Enables implicit synchronisation before executing the bo
2589 *
2590 * This is the default behaviour of the kernel, to wait upon prior writes
2591 * completing on the object before rendering with it, or to wait for prior
2592 * reads to complete before writing into the object.
2593 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2594 * the kernel never to insert a stall before using the object. Then this
2595 * function can be used to restore the implicit sync before subsequent
2596 * rendering.
2597 */
2598 void
2599 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo *bo)
2600 {
2601 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2602
2603 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2604 }
2605
2606 /**
2607 * Query whether the kernel supports disabling of its implicit synchronisation
2608 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2609 */
2610 int
2611 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr *bufmgr)
2612 {
2613 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2614
2615 return bufmgr_gem->has_exec_async;
2616 }
2617
2618 /**
2619 * Return the additional aperture space required by the tree of buffer objects
2620 * rooted at bo.
2621 */
2622 static int
2623 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2624 {
2625 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2626 int i;
2627 int total = 0;
2628
2629 if (bo == NULL || bo_gem->included_in_check_aperture)
2630 return 0;
2631
2632 total += bo->size;
2633 bo_gem->included_in_check_aperture = true;
2634
2635 for (i = 0; i < bo_gem->reloc_count; i++)
2636 total +=
2637 drm_bacon_gem_bo_get_aperture_space(bo_gem->
2638 reloc_target_info[i].bo);
2639
2640 return total;
2641 }
2642
2643 /**
2644 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2645 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2646 */
2647 static void
2648 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2649 {
2650 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2651 int i;
2652
2653 if (bo == NULL || !bo_gem->included_in_check_aperture)
2654 return;
2655
2656 bo_gem->included_in_check_aperture = false;
2657
2658 for (i = 0; i < bo_gem->reloc_count; i++)
2659 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->
2660 reloc_target_info[i].bo);
2661 }
2662
2663 /**
2664 * Return a conservative estimate for the amount of aperture required
2665 * for a collection of buffers. This may double-count some buffers.
2666 */
2667 static unsigned int
2668 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2669 {
2670 int i;
2671 unsigned int total = 0;
2672
2673 for (i = 0; i < count; i++) {
2674 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2675 if (bo_gem != NULL)
2676 total += bo_gem->reloc_tree_size;
2677 }
2678 return total;
2679 }
2680
2681 /**
2682 * Return the amount of aperture needed for a collection of buffers.
2683 * This avoids double counting any buffers, at the cost of looking
2684 * at every buffer in the set.
2685 */
2686 static unsigned int
2687 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2688 {
2689 int i;
2690 unsigned int total = 0;
2691
2692 for (i = 0; i < count; i++) {
2693 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2694 /* For the first buffer object in the array, we get an
2695 * accurate count back for its reloc_tree size (since nothing
2696 * had been flagged as being counted yet). We can save that
2697 * value out as a more conservative reloc_tree_size that
2698 * avoids double-counting target buffers. Since the first
2699 * buffer happens to usually be the batch buffer in our
2700 * callers, this can pull us back from doing the tree
2701 * walk on every new batch emit.
2702 */
2703 if (i == 0) {
2704 drm_bacon_bo_gem *bo_gem =
2705 (drm_bacon_bo_gem *) bo_array[i];
2706 bo_gem->reloc_tree_size = total;
2707 }
2708 }
2709
2710 for (i = 0; i < count; i++)
2711 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2712 return total;
2713 }
2714
2715 /**
2716 * Return -1 if the batchbuffer should be flushed before attempting to
2717 * emit rendering referencing the buffers pointed to by bo_array.
2718 *
2719 * This is required because if we try to emit a batchbuffer with relocations
2720 * to a tree of buffers that won't simultaneously fit in the aperture,
2721 * the rendering will return an error at a point where the software is not
2722 * prepared to recover from it.
2723 *
2724 * However, we also want to emit the batchbuffer significantly before we reach
2725 * the limit, as a series of batchbuffers each of which references buffers
2726 * covering almost all of the aperture means that at each emit we end up
2727 * waiting to evict a buffer from the last rendering, and we get synchronous
2728 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2729 * get better parallelism.
2730 */
2731 static int
2732 drm_bacon_gem_check_aperture_space(drm_bacon_bo **bo_array, int count)
2733 {
2734 drm_bacon_bufmgr_gem *bufmgr_gem =
2735 (drm_bacon_bufmgr_gem *) bo_array[0]->bufmgr;
2736 unsigned int total = 0;
2737 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2738
2739 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
2740
2741 if (total > threshold)
2742 total = drm_bacon_gem_compute_batch_space(bo_array, count);
2743
2744 if (total > threshold) {
2745 DBG("check_space: overflowed available aperture, "
2746 "%dkb vs %dkb\n",
2747 total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2748 return -ENOSPC;
2749 } else {
2750 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2751 (int)bufmgr_gem->gtt_size / 1024);
2752 return 0;
2753 }
2754 }
2755
2756 /*
2757 * Disable buffer reuse for objects which are shared with the kernel
2758 * as scanout buffers
2759 */
2760 static int
2761 drm_bacon_gem_bo_disable_reuse(drm_bacon_bo *bo)
2762 {
2763 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2764
2765 bo_gem->reusable = false;
2766 return 0;
2767 }
2768
2769 static int
2770 drm_bacon_gem_bo_is_reusable(drm_bacon_bo *bo)
2771 {
2772 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2773
2774 return bo_gem->reusable;
2775 }
2776
2777 static int
2778 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2779 {
2780 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2781 int i;
2782
2783 for (i = 0; i < bo_gem->reloc_count; i++) {
2784 if (bo_gem->reloc_target_info[i].bo == target_bo)
2785 return 1;
2786 if (bo == bo_gem->reloc_target_info[i].bo)
2787 continue;
2788 if (_drm_bacon_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2789 target_bo))
2790 return 1;
2791 }
2792
2793 for (i = 0; i< bo_gem->softpin_target_count; i++) {
2794 if (bo_gem->softpin_target[i] == target_bo)
2795 return 1;
2796 if (_drm_bacon_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2797 return 1;
2798 }
2799
2800 return 0;
2801 }
2802
2803 /** Return true if target_bo is referenced by bo's relocation tree. */
2804 static int
2805 drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2806 {
2807 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2808
2809 if (bo == NULL || target_bo == NULL)
2810 return 0;
2811 if (target_bo_gem->used_as_reloc_target)
2812 return _drm_bacon_gem_bo_references(bo, target_bo);
2813 return 0;
2814 }
2815
2816 static void
2817 add_bucket(drm_bacon_bufmgr_gem *bufmgr_gem, int size)
2818 {
2819 unsigned int i = bufmgr_gem->num_buckets;
2820
2821 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2822
2823 list_inithead(&bufmgr_gem->cache_bucket[i].head);
2824 bufmgr_gem->cache_bucket[i].size = size;
2825 bufmgr_gem->num_buckets++;
2826 }
2827
2828 static void
2829 init_cache_buckets(drm_bacon_bufmgr_gem *bufmgr_gem)
2830 {
2831 unsigned long size, cache_max_size = 64 * 1024 * 1024;
2832
2833 /* OK, so power of two buckets was too wasteful of memory.
2834 * Give 3 other sizes between each power of two, to hopefully
2835 * cover things accurately enough. (The alternative is
2836 * probably to just go for exact matching of sizes, and assume
2837 * that for things like composited window resize the tiled
2838 * width/height alignment and rounding of sizes to pages will
2839 * get us useful cache hit rates anyway)
2840 */
2841 add_bucket(bufmgr_gem, 4096);
2842 add_bucket(bufmgr_gem, 4096 * 2);
2843 add_bucket(bufmgr_gem, 4096 * 3);
2844
2845 /* Initialize the linked lists for BO reuse cache. */
2846 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2847 add_bucket(bufmgr_gem, size);
2848
2849 add_bucket(bufmgr_gem, size + size * 1 / 4);
2850 add_bucket(bufmgr_gem, size + size * 2 / 4);
2851 add_bucket(bufmgr_gem, size + size * 3 / 4);
2852 }
2853 }
2854
2855 void
2856 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
2857 {
2858 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
2859
2860 bufmgr_gem->vma_max = limit;
2861
2862 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
2863 }
2864
2865 static int
2866 parse_devid_override(const char *devid_override)
2867 {
2868 static const struct {
2869 const char *name;
2870 int pci_id;
2871 } name_map[] = {
2872 { "brw", PCI_CHIP_I965_GM },
2873 { "g4x", PCI_CHIP_GM45_GM },
2874 { "ilk", PCI_CHIP_ILD_G },
2875 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
2876 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
2877 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
2878 { "byt", PCI_CHIP_VALLEYVIEW_3 },
2879 { "bdw", 0x1620 | BDW_ULX },
2880 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
2881 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
2882 };
2883 unsigned int i;
2884
2885 for (i = 0; i < ARRAY_SIZE(name_map); i++) {
2886 if (!strcmp(name_map[i].name, devid_override))
2887 return name_map[i].pci_id;
2888 }
2889
2890 return strtod(devid_override, NULL);
2891 }
2892
2893 /**
2894 * Get the PCI ID for the device. This can be overridden by setting the
2895 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
2896 */
2897 static int
2898 get_pci_device_id(drm_bacon_bufmgr_gem *bufmgr_gem)
2899 {
2900 char *devid_override;
2901 int devid = 0;
2902 int ret;
2903 drm_i915_getparam_t gp;
2904
2905 if (geteuid() == getuid()) {
2906 devid_override = getenv("INTEL_DEVID_OVERRIDE");
2907 if (devid_override) {
2908 bufmgr_gem->no_exec = true;
2909 return parse_devid_override(devid_override);
2910 }
2911 }
2912
2913 memclear(gp);
2914 gp.param = I915_PARAM_CHIPSET_ID;
2915 gp.value = &devid;
2916 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2917 if (ret) {
2918 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2919 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2920 }
2921 return devid;
2922 }
2923
2924 int
2925 drm_bacon_bufmgr_gem_get_devid(drm_bacon_bufmgr *bufmgr)
2926 {
2927 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
2928
2929 return bufmgr_gem->pci_device;
2930 }
2931
2932 drm_bacon_context *
2933 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
2934 {
2935 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
2936 struct drm_i915_gem_context_create create;
2937 drm_bacon_context *context = NULL;
2938 int ret;
2939
2940 context = calloc(1, sizeof(*context));
2941 if (!context)
2942 return NULL;
2943
2944 memclear(create);
2945 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2946 if (ret != 0) {
2947 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2948 strerror(errno));
2949 free(context);
2950 return NULL;
2951 }
2952
2953 context->ctx_id = create.ctx_id;
2954 context->bufmgr = bufmgr;
2955
2956 return context;
2957 }
2958
2959 int
2960 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
2961 {
2962 if (ctx == NULL)
2963 return -EINVAL;
2964
2965 *ctx_id = ctx->ctx_id;
2966
2967 return 0;
2968 }
2969
2970 void
2971 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
2972 {
2973 drm_bacon_bufmgr_gem *bufmgr_gem;
2974 struct drm_i915_gem_context_destroy destroy;
2975 int ret;
2976
2977 if (ctx == NULL)
2978 return;
2979
2980 memclear(destroy);
2981
2982 bufmgr_gem = (drm_bacon_bufmgr_gem *)ctx->bufmgr;
2983 destroy.ctx_id = ctx->ctx_id;
2984 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2985 &destroy);
2986 if (ret != 0)
2987 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2988 strerror(errno));
2989
2990 free(ctx);
2991 }
2992
2993 int
2994 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
2995 uint32_t *reset_count,
2996 uint32_t *active,
2997 uint32_t *pending)
2998 {
2999 drm_bacon_bufmgr_gem *bufmgr_gem;
3000 struct drm_i915_reset_stats stats;
3001 int ret;
3002
3003 if (ctx == NULL)
3004 return -EINVAL;
3005
3006 memclear(stats);
3007
3008 bufmgr_gem = (drm_bacon_bufmgr_gem *)ctx->bufmgr;
3009 stats.ctx_id = ctx->ctx_id;
3010 ret = drmIoctl(bufmgr_gem->fd,
3011 DRM_IOCTL_I915_GET_RESET_STATS,
3012 &stats);
3013 if (ret == 0) {
3014 if (reset_count != NULL)
3015 *reset_count = stats.reset_count;
3016
3017 if (active != NULL)
3018 *active = stats.batch_active;
3019
3020 if (pending != NULL)
3021 *pending = stats.batch_pending;
3022 }
3023
3024 return ret;
3025 }
3026
3027 int
3028 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
3029 uint32_t offset,
3030 uint64_t *result)
3031 {
3032 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3033 struct drm_i915_reg_read reg_read;
3034 int ret;
3035
3036 memclear(reg_read);
3037 reg_read.offset = offset;
3038
3039 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3040
3041 *result = reg_read.val;
3042 return ret;
3043 }
3044
3045 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3046 static struct list_head bufmgr_list = { &bufmgr_list, &bufmgr_list };
3047
3048 static drm_bacon_bufmgr_gem *
3049 drm_bacon_bufmgr_gem_find(int fd)
3050 {
3051 list_for_each_entry(drm_bacon_bufmgr_gem,
3052 bufmgr_gem, &bufmgr_list, managers) {
3053 if (bufmgr_gem->fd == fd) {
3054 p_atomic_inc(&bufmgr_gem->refcount);
3055 return bufmgr_gem;
3056 }
3057 }
3058
3059 return NULL;
3060 }
3061
3062 static void
3063 drm_bacon_bufmgr_gem_unref(drm_bacon_bufmgr *bufmgr)
3064 {
3065 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3066
3067 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3068 pthread_mutex_lock(&bufmgr_list_mutex);
3069
3070 if (p_atomic_dec_zero(&bufmgr_gem->refcount)) {
3071 list_del(&bufmgr_gem->managers);
3072 drm_bacon_bufmgr_gem_destroy(bufmgr);
3073 }
3074
3075 pthread_mutex_unlock(&bufmgr_list_mutex);
3076 }
3077 }
3078
3079 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
3080 {
3081 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3082 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3083
3084 if (bo_gem->gtt_virtual)
3085 return bo_gem->gtt_virtual;
3086
3087 if (bo_gem->is_userptr)
3088 return NULL;
3089
3090 pthread_mutex_lock(&bufmgr_gem->lock);
3091 if (bo_gem->gtt_virtual == NULL) {
3092 struct drm_i915_gem_mmap_gtt mmap_arg;
3093 void *ptr;
3094
3095 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3096 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3097
3098 if (bo_gem->map_count++ == 0)
3099 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3100
3101 memclear(mmap_arg);
3102 mmap_arg.handle = bo_gem->gem_handle;
3103
3104 /* Get the fake offset back... */
3105 ptr = MAP_FAILED;
3106 if (drmIoctl(bufmgr_gem->fd,
3107 DRM_IOCTL_I915_GEM_MMAP_GTT,
3108 &mmap_arg) == 0) {
3109 /* and mmap it */
3110 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3111 MAP_SHARED, bufmgr_gem->fd,
3112 mmap_arg.offset);
3113 }
3114 if (ptr == MAP_FAILED) {
3115 if (--bo_gem->map_count == 0)
3116 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3117 ptr = NULL;
3118 }
3119
3120 bo_gem->gtt_virtual = ptr;
3121 }
3122 pthread_mutex_unlock(&bufmgr_gem->lock);
3123
3124 return bo_gem->gtt_virtual;
3125 }
3126
3127 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
3128 {
3129 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3130 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3131
3132 if (bo_gem->mem_virtual)
3133 return bo_gem->mem_virtual;
3134
3135 if (bo_gem->is_userptr) {
3136 /* Return the same user ptr */
3137 return bo_gem->user_virtual;
3138 }
3139
3140 pthread_mutex_lock(&bufmgr_gem->lock);
3141 if (!bo_gem->mem_virtual) {
3142 struct drm_i915_gem_mmap mmap_arg;
3143
3144 if (bo_gem->map_count++ == 0)
3145 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3146
3147 DBG("bo_map: %d (%s), map_count=%d\n",
3148 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3149
3150 memclear(mmap_arg);
3151 mmap_arg.handle = bo_gem->gem_handle;
3152 mmap_arg.size = bo->size;
3153 if (drmIoctl(bufmgr_gem->fd,
3154 DRM_IOCTL_I915_GEM_MMAP,
3155 &mmap_arg)) {
3156 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3157 __FILE__, __LINE__, bo_gem->gem_handle,
3158 bo_gem->name, strerror(errno));
3159 if (--bo_gem->map_count == 0)
3160 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3161 } else {
3162 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3163 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3164 }
3165 }
3166 pthread_mutex_unlock(&bufmgr_gem->lock);
3167
3168 return bo_gem->mem_virtual;
3169 }
3170
3171 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
3172 {
3173 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3174 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3175
3176 if (bo_gem->wc_virtual)
3177 return bo_gem->wc_virtual;
3178
3179 if (bo_gem->is_userptr)
3180 return NULL;
3181
3182 pthread_mutex_lock(&bufmgr_gem->lock);
3183 if (!bo_gem->wc_virtual) {
3184 struct drm_i915_gem_mmap mmap_arg;
3185
3186 if (bo_gem->map_count++ == 0)
3187 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3188
3189 DBG("bo_map: %d (%s), map_count=%d\n",
3190 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3191
3192 memclear(mmap_arg);
3193 mmap_arg.handle = bo_gem->gem_handle;
3194 mmap_arg.size = bo->size;
3195 mmap_arg.flags = I915_MMAP_WC;
3196 if (drmIoctl(bufmgr_gem->fd,
3197 DRM_IOCTL_I915_GEM_MMAP,
3198 &mmap_arg)) {
3199 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3200 __FILE__, __LINE__, bo_gem->gem_handle,
3201 bo_gem->name, strerror(errno));
3202 if (--bo_gem->map_count == 0)
3203 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3204 } else {
3205 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3206 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3207 }
3208 }
3209 pthread_mutex_unlock(&bufmgr_gem->lock);
3210
3211 return bo_gem->wc_virtual;
3212 }
3213
3214 /**
3215 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3216 * and manage map buffer objections.
3217 *
3218 * \param fd File descriptor of the opened DRM device.
3219 */
3220 drm_bacon_bufmgr *
3221 drm_bacon_bufmgr_gem_init(int fd, int batch_size)
3222 {
3223 drm_bacon_bufmgr_gem *bufmgr_gem;
3224 struct drm_i915_gem_get_aperture aperture;
3225 drm_i915_getparam_t gp;
3226 int ret, tmp;
3227 bool exec2 = false;
3228
3229 pthread_mutex_lock(&bufmgr_list_mutex);
3230
3231 bufmgr_gem = drm_bacon_bufmgr_gem_find(fd);
3232 if (bufmgr_gem)
3233 goto exit;
3234
3235 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3236 if (bufmgr_gem == NULL)
3237 goto exit;
3238
3239 bufmgr_gem->fd = fd;
3240 p_atomic_set(&bufmgr_gem->refcount, 1);
3241
3242 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3243 free(bufmgr_gem);
3244 bufmgr_gem = NULL;
3245 goto exit;
3246 }
3247
3248 memclear(aperture);
3249 ret = drmIoctl(bufmgr_gem->fd,
3250 DRM_IOCTL_I915_GEM_GET_APERTURE,
3251 &aperture);
3252
3253 if (ret == 0)
3254 bufmgr_gem->gtt_size = aperture.aper_available_size;
3255 else {
3256 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3257 strerror(errno));
3258 bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3259 fprintf(stderr, "Assuming %dkB available aperture size.\n"
3260 "May lead to reduced performance or incorrect "
3261 "rendering.\n",
3262 (int)bufmgr_gem->gtt_size / 1024);
3263 }
3264
3265 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3266
3267 if (IS_GEN4(bufmgr_gem->pci_device))
3268 bufmgr_gem->gen = 4;
3269 else if (IS_GEN5(bufmgr_gem->pci_device))
3270 bufmgr_gem->gen = 5;
3271 else if (IS_GEN6(bufmgr_gem->pci_device))
3272 bufmgr_gem->gen = 6;
3273 else if (IS_GEN7(bufmgr_gem->pci_device))
3274 bufmgr_gem->gen = 7;
3275 else if (IS_GEN8(bufmgr_gem->pci_device))
3276 bufmgr_gem->gen = 8;
3277 else if (IS_GEN9(bufmgr_gem->pci_device))
3278 bufmgr_gem->gen = 9;
3279 else {
3280 free(bufmgr_gem);
3281 bufmgr_gem = NULL;
3282 goto exit;
3283 }
3284
3285 memclear(gp);
3286 gp.value = &tmp;
3287
3288 gp.param = I915_PARAM_HAS_EXECBUF2;
3289 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3290 if (!ret)
3291 exec2 = true;
3292
3293 gp.param = I915_PARAM_HAS_BSD;
3294 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3295 bufmgr_gem->has_bsd = ret == 0;
3296
3297 gp.param = I915_PARAM_HAS_BLT;
3298 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3299 bufmgr_gem->has_blt = ret == 0;
3300
3301 gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3302 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3303 bufmgr_gem->has_exec_async = ret == 0;
3304
3305 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3306
3307 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3308 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3309 bufmgr_gem->has_wait_timeout = ret == 0;
3310
3311 gp.param = I915_PARAM_HAS_LLC;
3312 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3313 if (ret != 0) {
3314 /* Kernel does not supports HAS_LLC query, fallback to GPU
3315 * generation detection and assume that we have LLC on GEN6/7
3316 */
3317 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3318 IS_GEN7(bufmgr_gem->pci_device));
3319 } else
3320 bufmgr_gem->has_llc = *gp.value;
3321
3322 gp.param = I915_PARAM_HAS_VEBOX;
3323 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3324 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3325
3326 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3327 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3328 if (ret == 0 && *gp.value > 0)
3329 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_bacon_gem_bo_set_softpin_offset;
3330
3331 /* Let's go with one relocation per every 2 dwords (but round down a bit
3332 * since a power of two will mean an extra page allocation for the reloc
3333 * buffer).
3334 *
3335 * Every 4 was too few for the blender benchmark.
3336 */
3337 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3338
3339 bufmgr_gem->bufmgr.bo_alloc = drm_bacon_gem_bo_alloc;
3340 bufmgr_gem->bufmgr.bo_alloc_for_render =
3341 drm_bacon_gem_bo_alloc_for_render;
3342 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_bacon_gem_bo_alloc_tiled;
3343 bufmgr_gem->bufmgr.bo_reference = drm_bacon_gem_bo_reference;
3344 bufmgr_gem->bufmgr.bo_unreference = drm_bacon_gem_bo_unreference;
3345 bufmgr_gem->bufmgr.bo_map = drm_bacon_gem_bo_map;
3346 bufmgr_gem->bufmgr.bo_unmap = drm_bacon_gem_bo_unmap;
3347 bufmgr_gem->bufmgr.bo_subdata = drm_bacon_gem_bo_subdata;
3348 bufmgr_gem->bufmgr.bo_get_subdata = drm_bacon_gem_bo_get_subdata;
3349 bufmgr_gem->bufmgr.bo_wait_rendering = drm_bacon_gem_bo_wait_rendering;
3350 bufmgr_gem->bufmgr.bo_emit_reloc = drm_bacon_gem_bo_emit_reloc;
3351 bufmgr_gem->bufmgr.bo_get_tiling = drm_bacon_gem_bo_get_tiling;
3352 bufmgr_gem->bufmgr.bo_set_tiling = drm_bacon_gem_bo_set_tiling;
3353 bufmgr_gem->bufmgr.bo_flink = drm_bacon_gem_bo_flink;
3354 /* Use the new one if available */
3355 if (exec2) {
3356 bufmgr_gem->bufmgr.bo_exec = drm_bacon_gem_bo_exec2;
3357 bufmgr_gem->bufmgr.bo_mrb_exec = drm_bacon_gem_bo_mrb_exec2;
3358 } else
3359 bufmgr_gem->bufmgr.bo_exec = drm_bacon_gem_bo_exec;
3360 bufmgr_gem->bufmgr.bo_busy = drm_bacon_gem_bo_busy;
3361 bufmgr_gem->bufmgr.bo_madvise = drm_bacon_gem_bo_madvise;
3362 bufmgr_gem->bufmgr.destroy = drm_bacon_bufmgr_gem_unref;
3363 bufmgr_gem->bufmgr.debug = 0;
3364 bufmgr_gem->bufmgr.check_aperture_space =
3365 drm_bacon_gem_check_aperture_space;
3366 bufmgr_gem->bufmgr.bo_disable_reuse = drm_bacon_gem_bo_disable_reuse;
3367 bufmgr_gem->bufmgr.bo_is_reusable = drm_bacon_gem_bo_is_reusable;
3368 bufmgr_gem->bufmgr.bo_references = drm_bacon_gem_bo_references;
3369
3370 init_cache_buckets(bufmgr_gem);
3371
3372 list_inithead(&bufmgr_gem->vma_cache);
3373 bufmgr_gem->vma_max = -1; /* unlimited by default */
3374
3375 list_add(&bufmgr_gem->managers, &bufmgr_list);
3376
3377 exit:
3378 pthread_mutex_unlock(&bufmgr_list_mutex);
3379
3380 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3381 }