c67f813bdf55b3db1ee465aadbbfa3ae2379e9f1
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "libdrm_macros.h"
60 #include "libdrm_lists.h"
61 #include "brw_bufmgr.h"
62 #include "intel_bufmgr_priv.h"
63 #include "intel_chipset.h"
64 #include "string.h"
65
66 #include "i915_drm.h"
67 #include "uthash.h"
68
69 #ifdef HAVE_VALGRIND
70 #include <valgrind.h>
71 #include <memcheck.h>
72 #define VG(x) x
73 #else
74 #define VG(x)
75 #endif
76
77 #define memclear(s) memset(&s, 0, sizeof(s))
78
79 #define DBG(...) do { \
80 if (bufmgr_gem->bufmgr.debug) \
81 fprintf(stderr, __VA_ARGS__); \
82 } while (0)
83
84 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
85 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
86
87 static inline int
88 atomic_add_unless(int *v, int add, int unless)
89 {
90 int c, old;
91 c = p_atomic_read(v);
92 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
93 c = old;
94 return c == unless;
95 }
96
97 /**
98 * upper_32_bits - return bits 32-63 of a number
99 * @n: the number we're accessing
100 *
101 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
102 * the "right shift count >= width of type" warning when that quantity is
103 * 32-bits.
104 */
105 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
106
107 /**
108 * lower_32_bits - return bits 0-31 of a number
109 * @n: the number we're accessing
110 */
111 #define lower_32_bits(n) ((__u32)(n))
112
113 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
114
115 struct drm_bacon_gem_bo_bucket {
116 drmMMListHead head;
117 unsigned long size;
118 };
119
120 typedef struct _drm_bacon_bufmgr_gem {
121 drm_bacon_bufmgr bufmgr;
122
123 int refcount;
124
125 int fd;
126
127 int max_relocs;
128
129 pthread_mutex_t lock;
130
131 struct drm_i915_gem_exec_object *exec_objects;
132 struct drm_i915_gem_exec_object2 *exec2_objects;
133 drm_bacon_bo **exec_bos;
134 int exec_size;
135 int exec_count;
136
137 /** Array of lists of cached gem objects of power-of-two sizes */
138 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
139 int num_buckets;
140 time_t time;
141
142 drmMMListHead managers;
143
144 drm_bacon_bo_gem *name_table;
145 drm_bacon_bo_gem *handle_table;
146
147 drmMMListHead vma_cache;
148 int vma_count, vma_open, vma_max;
149
150 uint64_t gtt_size;
151 int available_fences;
152 int pci_device;
153 int gen;
154 unsigned int has_bsd : 1;
155 unsigned int has_blt : 1;
156 unsigned int has_relaxed_fencing : 1;
157 unsigned int has_llc : 1;
158 unsigned int has_wait_timeout : 1;
159 unsigned int bo_reuse : 1;
160 unsigned int no_exec : 1;
161 unsigned int has_vebox : 1;
162 unsigned int has_exec_async : 1;
163 bool fenced_relocs;
164
165 struct {
166 void *ptr;
167 uint32_t handle;
168 } userptr_active;
169
170 } drm_bacon_bufmgr_gem;
171
172 #define DRM_INTEL_RELOC_FENCE (1<<0)
173
174 typedef struct _drm_bacon_reloc_target_info {
175 drm_bacon_bo *bo;
176 int flags;
177 } drm_bacon_reloc_target;
178
179 struct _drm_bacon_bo_gem {
180 drm_bacon_bo bo;
181
182 int refcount;
183 uint32_t gem_handle;
184 const char *name;
185
186 /**
187 * Kenel-assigned global name for this object
188 *
189 * List contains both flink named and prime fd'd objects
190 */
191 unsigned int global_name;
192
193 UT_hash_handle handle_hh;
194 UT_hash_handle name_hh;
195
196 /**
197 * Index of the buffer within the validation list while preparing a
198 * batchbuffer execution.
199 */
200 int validate_index;
201
202 /**
203 * Current tiling mode
204 */
205 uint32_t tiling_mode;
206 uint32_t swizzle_mode;
207 unsigned long stride;
208
209 unsigned long kflags;
210
211 time_t free_time;
212
213 /** Array passed to the DRM containing relocation information. */
214 struct drm_i915_gem_relocation_entry *relocs;
215 /**
216 * Array of info structs corresponding to relocs[i].target_handle etc
217 */
218 drm_bacon_reloc_target *reloc_target_info;
219 /** Number of entries in relocs */
220 int reloc_count;
221 /** Array of BOs that are referenced by this buffer and will be softpinned */
222 drm_bacon_bo **softpin_target;
223 /** Number softpinned BOs that are referenced by this buffer */
224 int softpin_target_count;
225 /** Maximum amount of softpinned BOs that are referenced by this buffer */
226 int softpin_target_size;
227
228 /** Mapped address for the buffer, saved across map/unmap cycles */
229 void *mem_virtual;
230 /** GTT virtual address for the buffer, saved across map/unmap cycles */
231 void *gtt_virtual;
232 /** WC CPU address for the buffer, saved across map/unmap cycles */
233 void *wc_virtual;
234 /**
235 * Virtual address of the buffer allocated by user, used for userptr
236 * objects only.
237 */
238 void *user_virtual;
239 int map_count;
240 drmMMListHead vma_list;
241
242 /** BO cache list */
243 drmMMListHead head;
244
245 /**
246 * Boolean of whether this BO and its children have been included in
247 * the current drm_bacon_bufmgr_check_aperture_space() total.
248 */
249 bool included_in_check_aperture;
250
251 /**
252 * Boolean of whether this buffer has been used as a relocation
253 * target and had its size accounted for, and thus can't have any
254 * further relocations added to it.
255 */
256 bool used_as_reloc_target;
257
258 /**
259 * Boolean of whether we have encountered an error whilst building the relocation tree.
260 */
261 bool has_error;
262
263 /**
264 * Boolean of whether this buffer can be re-used
265 */
266 bool reusable;
267
268 /**
269 * Boolean of whether the GPU is definitely not accessing the buffer.
270 *
271 * This is only valid when reusable, since non-reusable
272 * buffers are those that have been shared with other
273 * processes, so we don't know their state.
274 */
275 bool idle;
276
277 /**
278 * Boolean of whether this buffer was allocated with userptr
279 */
280 bool is_userptr;
281
282 /**
283 * Size in bytes of this buffer and its relocation descendents.
284 *
285 * Used to avoid costly tree walking in
286 * drm_bacon_bufmgr_check_aperture in the common case.
287 */
288 int reloc_tree_size;
289
290 /**
291 * Number of potential fence registers required by this buffer and its
292 * relocations.
293 */
294 int reloc_tree_fences;
295
296 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
297 bool mapped_cpu_write;
298 };
299
300 static unsigned int
301 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
302
303 static unsigned int
304 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
305
306 static int
307 drm_bacon_gem_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
308 uint32_t * swizzle_mode);
309
310 static int
311 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
312 uint32_t tiling_mode,
313 uint32_t stride);
314
315 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
316 time_t time);
317
318 static void drm_bacon_gem_bo_unreference(drm_bacon_bo *bo);
319
320 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
321
322 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
323 {
324 return (drm_bacon_bo_gem *)bo;
325 }
326
327 static unsigned long
328 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr_gem *bufmgr_gem, unsigned long size,
329 uint32_t *tiling_mode)
330 {
331 unsigned long min_size, max_size;
332 unsigned long i;
333
334 if (*tiling_mode == I915_TILING_NONE)
335 return size;
336
337 /* 965+ just need multiples of page size for tiling */
338 if (bufmgr_gem->gen >= 4)
339 return ROUND_UP_TO(size, 4096);
340
341 /* Older chips need powers of two, of at least 512k or 1M */
342 if (bufmgr_gem->gen == 3) {
343 min_size = 1024*1024;
344 max_size = 128*1024*1024;
345 } else {
346 min_size = 512*1024;
347 max_size = 64*1024*1024;
348 }
349
350 if (size > max_size) {
351 *tiling_mode = I915_TILING_NONE;
352 return size;
353 }
354
355 /* Do we need to allocate every page for the fence? */
356 if (bufmgr_gem->has_relaxed_fencing)
357 return ROUND_UP_TO(size, 4096);
358
359 for (i = min_size; i < size; i <<= 1)
360 ;
361
362 return i;
363 }
364
365 /*
366 * Round a given pitch up to the minimum required for X tiling on a
367 * given chip. We use 512 as the minimum to allow for a later tiling
368 * change.
369 */
370 static unsigned long
371 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr_gem *bufmgr_gem,
372 unsigned long pitch, uint32_t *tiling_mode)
373 {
374 unsigned long tile_width;
375 unsigned long i;
376
377 /* If untiled, then just align it so that we can do rendering
378 * to it with the 3D engine.
379 */
380 if (*tiling_mode == I915_TILING_NONE)
381 return ALIGN(pitch, 64);
382
383 if (*tiling_mode == I915_TILING_X
384 || (IS_915(bufmgr_gem->pci_device)
385 && *tiling_mode == I915_TILING_Y))
386 tile_width = 512;
387 else
388 tile_width = 128;
389
390 /* 965 is flexible */
391 if (bufmgr_gem->gen >= 4)
392 return ROUND_UP_TO(pitch, tile_width);
393
394 /* The older hardware has a maximum pitch of 8192 with tiled
395 * surfaces, so fallback to untiled if it's too large.
396 */
397 if (pitch > 8192) {
398 *tiling_mode = I915_TILING_NONE;
399 return ALIGN(pitch, 64);
400 }
401
402 /* Pre-965 needs power of two tile width */
403 for (i = tile_width; i < pitch; i <<= 1)
404 ;
405
406 return i;
407 }
408
409 static struct drm_bacon_gem_bo_bucket *
410 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr_gem *bufmgr_gem,
411 unsigned long size)
412 {
413 int i;
414
415 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
416 struct drm_bacon_gem_bo_bucket *bucket =
417 &bufmgr_gem->cache_bucket[i];
418 if (bucket->size >= size) {
419 return bucket;
420 }
421 }
422
423 return NULL;
424 }
425
426 static void
427 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr_gem *bufmgr_gem)
428 {
429 int i, j;
430
431 for (i = 0; i < bufmgr_gem->exec_count; i++) {
432 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
433 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
434
435 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
436 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
437 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
438 bo_gem->name);
439 continue;
440 }
441
442 for (j = 0; j < bo_gem->reloc_count; j++) {
443 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[j].bo;
444 drm_bacon_bo_gem *target_gem =
445 (drm_bacon_bo_gem *) target_bo;
446
447 DBG("%2d: %d %s(%s)@0x%08x %08x -> "
448 "%d (%s)@0x%08x %08x + 0x%08x\n",
449 i,
450 bo_gem->gem_handle,
451 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
452 bo_gem->name,
453 upper_32_bits(bo_gem->relocs[j].offset),
454 lower_32_bits(bo_gem->relocs[j].offset),
455 target_gem->gem_handle,
456 target_gem->name,
457 upper_32_bits(target_bo->offset64),
458 lower_32_bits(target_bo->offset64),
459 bo_gem->relocs[j].delta);
460 }
461
462 for (j = 0; j < bo_gem->softpin_target_count; j++) {
463 drm_bacon_bo *target_bo = bo_gem->softpin_target[j];
464 drm_bacon_bo_gem *target_gem =
465 (drm_bacon_bo_gem *) target_bo;
466 DBG("%2d: %d %s(%s) -> "
467 "%d *(%s)@0x%08x %08x\n",
468 i,
469 bo_gem->gem_handle,
470 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
471 bo_gem->name,
472 target_gem->gem_handle,
473 target_gem->name,
474 upper_32_bits(target_bo->offset64),
475 lower_32_bits(target_bo->offset64));
476 }
477 }
478 }
479
480 static inline void
481 drm_bacon_gem_bo_reference(drm_bacon_bo *bo)
482 {
483 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
484
485 p_atomic_inc(&bo_gem->refcount);
486 }
487
488 /**
489 * Adds the given buffer to the list of buffers to be validated (moved into the
490 * appropriate memory type) with the next batch submission.
491 *
492 * If a buffer is validated multiple times in a batch submission, it ends up
493 * with the intersection of the memory type flags and the union of the
494 * access flags.
495 */
496 static void
497 drm_bacon_add_validate_buffer(drm_bacon_bo *bo)
498 {
499 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
500 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
501 int index;
502
503 if (bo_gem->validate_index != -1)
504 return;
505
506 /* Extend the array of validation entries as necessary. */
507 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
508 int new_size = bufmgr_gem->exec_size * 2;
509
510 if (new_size == 0)
511 new_size = 5;
512
513 bufmgr_gem->exec_objects =
514 realloc(bufmgr_gem->exec_objects,
515 sizeof(*bufmgr_gem->exec_objects) * new_size);
516 bufmgr_gem->exec_bos =
517 realloc(bufmgr_gem->exec_bos,
518 sizeof(*bufmgr_gem->exec_bos) * new_size);
519 bufmgr_gem->exec_size = new_size;
520 }
521
522 index = bufmgr_gem->exec_count;
523 bo_gem->validate_index = index;
524 /* Fill in array entry */
525 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
526 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
527 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
528 bufmgr_gem->exec_objects[index].alignment = bo->align;
529 bufmgr_gem->exec_objects[index].offset = 0;
530 bufmgr_gem->exec_bos[index] = bo;
531 bufmgr_gem->exec_count++;
532 }
533
534 static void
535 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo, int need_fence)
536 {
537 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bo->bufmgr;
538 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
539 int index;
540 unsigned long flags;
541
542 flags = 0;
543 if (need_fence)
544 flags |= EXEC_OBJECT_NEEDS_FENCE;
545
546 if (bo_gem->validate_index != -1) {
547 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
548 return;
549 }
550
551 /* Extend the array of validation entries as necessary. */
552 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
553 int new_size = bufmgr_gem->exec_size * 2;
554
555 if (new_size == 0)
556 new_size = 5;
557
558 bufmgr_gem->exec2_objects =
559 realloc(bufmgr_gem->exec2_objects,
560 sizeof(*bufmgr_gem->exec2_objects) * new_size);
561 bufmgr_gem->exec_bos =
562 realloc(bufmgr_gem->exec_bos,
563 sizeof(*bufmgr_gem->exec_bos) * new_size);
564 bufmgr_gem->exec_size = new_size;
565 }
566
567 index = bufmgr_gem->exec_count;
568 bo_gem->validate_index = index;
569 /* Fill in array entry */
570 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
571 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
572 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
573 bufmgr_gem->exec2_objects[index].alignment = bo->align;
574 bufmgr_gem->exec2_objects[index].offset = bo->offset64;
575 bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags;
576 bufmgr_gem->exec2_objects[index].rsvd1 = 0;
577 bufmgr_gem->exec2_objects[index].rsvd2 = 0;
578 bufmgr_gem->exec_bos[index] = bo;
579 bufmgr_gem->exec_count++;
580 }
581
582 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
583 sizeof(uint32_t))
584
585 static void
586 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr_gem *bufmgr_gem,
587 drm_bacon_bo_gem *bo_gem,
588 unsigned int alignment)
589 {
590 unsigned int size;
591
592 assert(!bo_gem->used_as_reloc_target);
593
594 /* The older chipsets are far-less flexible in terms of tiling,
595 * and require tiled buffer to be size aligned in the aperture.
596 * This means that in the worst possible case we will need a hole
597 * twice as large as the object in order for it to fit into the
598 * aperture. Optimal packing is for wimps.
599 */
600 size = bo_gem->bo.size;
601 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
602 unsigned int min_size;
603
604 if (bufmgr_gem->has_relaxed_fencing) {
605 if (bufmgr_gem->gen == 3)
606 min_size = 1024*1024;
607 else
608 min_size = 512*1024;
609
610 while (min_size < size)
611 min_size *= 2;
612 } else
613 min_size = size;
614
615 /* Account for worst-case alignment. */
616 alignment = MAX2(alignment, min_size);
617 }
618
619 bo_gem->reloc_tree_size = size + alignment;
620 }
621
622 static int
623 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
624 {
625 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
626 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
627 unsigned int max_relocs = bufmgr_gem->max_relocs;
628
629 if (bo->size / 4 < max_relocs)
630 max_relocs = bo->size / 4;
631
632 bo_gem->relocs = malloc(max_relocs *
633 sizeof(struct drm_i915_gem_relocation_entry));
634 bo_gem->reloc_target_info = malloc(max_relocs *
635 sizeof(drm_bacon_reloc_target));
636 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
637 bo_gem->has_error = true;
638
639 free (bo_gem->relocs);
640 bo_gem->relocs = NULL;
641
642 free (bo_gem->reloc_target_info);
643 bo_gem->reloc_target_info = NULL;
644
645 return 1;
646 }
647
648 return 0;
649 }
650
651 static int
652 drm_bacon_gem_bo_busy(drm_bacon_bo *bo)
653 {
654 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
655 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
656 struct drm_i915_gem_busy busy;
657 int ret;
658
659 if (bo_gem->reusable && bo_gem->idle)
660 return false;
661
662 memclear(busy);
663 busy.handle = bo_gem->gem_handle;
664
665 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
666 if (ret == 0) {
667 bo_gem->idle = !busy.busy;
668 return busy.busy;
669 } else {
670 return false;
671 }
672 return (ret == 0 && busy.busy);
673 }
674
675 static int
676 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr_gem *bufmgr_gem,
677 drm_bacon_bo_gem *bo_gem, int state)
678 {
679 struct drm_i915_gem_madvise madv;
680
681 memclear(madv);
682 madv.handle = bo_gem->gem_handle;
683 madv.madv = state;
684 madv.retained = 1;
685 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
686
687 return madv.retained;
688 }
689
690 static int
691 drm_bacon_gem_bo_madvise(drm_bacon_bo *bo, int madv)
692 {
693 return drm_bacon_gem_bo_madvise_internal
694 ((drm_bacon_bufmgr_gem *) bo->bufmgr,
695 (drm_bacon_bo_gem *) bo,
696 madv);
697 }
698
699 /* drop the oldest entries that have been purged by the kernel */
700 static void
701 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr_gem *bufmgr_gem,
702 struct drm_bacon_gem_bo_bucket *bucket)
703 {
704 while (!DRMLISTEMPTY(&bucket->head)) {
705 drm_bacon_bo_gem *bo_gem;
706
707 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
708 bucket->head.next, head);
709 if (drm_bacon_gem_bo_madvise_internal
710 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
711 break;
712
713 DRMLISTDEL(&bo_gem->head);
714 drm_bacon_gem_bo_free(&bo_gem->bo);
715 }
716 }
717
718 static drm_bacon_bo *
719 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
720 const char *name,
721 unsigned long size,
722 unsigned long flags,
723 uint32_t tiling_mode,
724 unsigned long stride,
725 unsigned int alignment)
726 {
727 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
728 drm_bacon_bo_gem *bo_gem;
729 unsigned int page_size = getpagesize();
730 int ret;
731 struct drm_bacon_gem_bo_bucket *bucket;
732 bool alloc_from_cache;
733 unsigned long bo_size;
734 bool for_render = false;
735
736 if (flags & BO_ALLOC_FOR_RENDER)
737 for_render = true;
738
739 /* Round the allocated size up to a power of two number of pages. */
740 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr_gem, size);
741
742 /* If we don't have caching at this size, don't actually round the
743 * allocation up.
744 */
745 if (bucket == NULL) {
746 bo_size = size;
747 if (bo_size < page_size)
748 bo_size = page_size;
749 } else {
750 bo_size = bucket->size;
751 }
752
753 pthread_mutex_lock(&bufmgr_gem->lock);
754 /* Get a buffer out of the cache if available */
755 retry:
756 alloc_from_cache = false;
757 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
758 if (for_render) {
759 /* Allocate new render-target BOs from the tail (MRU)
760 * of the list, as it will likely be hot in the GPU
761 * cache and in the aperture for us.
762 */
763 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
764 bucket->head.prev, head);
765 DRMLISTDEL(&bo_gem->head);
766 alloc_from_cache = true;
767 bo_gem->bo.align = alignment;
768 } else {
769 assert(alignment == 0);
770 /* For non-render-target BOs (where we're probably
771 * going to map it first thing in order to fill it
772 * with data), check if the last BO in the cache is
773 * unbusy, and only reuse in that case. Otherwise,
774 * allocating a new buffer is probably faster than
775 * waiting for the GPU to finish.
776 */
777 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
778 bucket->head.next, head);
779 if (!drm_bacon_gem_bo_busy(&bo_gem->bo)) {
780 alloc_from_cache = true;
781 DRMLISTDEL(&bo_gem->head);
782 }
783 }
784
785 if (alloc_from_cache) {
786 if (!drm_bacon_gem_bo_madvise_internal
787 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
788 drm_bacon_gem_bo_free(&bo_gem->bo);
789 drm_bacon_gem_bo_cache_purge_bucket(bufmgr_gem,
790 bucket);
791 goto retry;
792 }
793
794 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
795 tiling_mode,
796 stride)) {
797 drm_bacon_gem_bo_free(&bo_gem->bo);
798 goto retry;
799 }
800 }
801 }
802
803 if (!alloc_from_cache) {
804 struct drm_i915_gem_create create;
805
806 bo_gem = calloc(1, sizeof(*bo_gem));
807 if (!bo_gem)
808 goto err;
809
810 /* drm_bacon_gem_bo_free calls DRMLISTDEL() for an uninitialized
811 list (vma_list), so better set the list head here */
812 DRMINITLISTHEAD(&bo_gem->vma_list);
813
814 bo_gem->bo.size = bo_size;
815
816 memclear(create);
817 create.size = bo_size;
818
819 ret = drmIoctl(bufmgr_gem->fd,
820 DRM_IOCTL_I915_GEM_CREATE,
821 &create);
822 if (ret != 0) {
823 free(bo_gem);
824 goto err;
825 }
826
827 bo_gem->gem_handle = create.handle;
828 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
829 gem_handle, sizeof(bo_gem->gem_handle),
830 bo_gem);
831
832 bo_gem->bo.handle = bo_gem->gem_handle;
833 bo_gem->bo.bufmgr = bufmgr;
834 bo_gem->bo.align = alignment;
835
836 bo_gem->tiling_mode = I915_TILING_NONE;
837 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
838 bo_gem->stride = 0;
839
840 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
841 tiling_mode,
842 stride))
843 goto err_free;
844 }
845
846 bo_gem->name = name;
847 p_atomic_set(&bo_gem->refcount, 1);
848 bo_gem->validate_index = -1;
849 bo_gem->reloc_tree_fences = 0;
850 bo_gem->used_as_reloc_target = false;
851 bo_gem->has_error = false;
852 bo_gem->reusable = true;
853
854 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment);
855 pthread_mutex_unlock(&bufmgr_gem->lock);
856
857 DBG("bo_create: buf %d (%s) %ldb\n",
858 bo_gem->gem_handle, bo_gem->name, size);
859
860 return &bo_gem->bo;
861
862 err_free:
863 drm_bacon_gem_bo_free(&bo_gem->bo);
864 err:
865 pthread_mutex_unlock(&bufmgr_gem->lock);
866 return NULL;
867 }
868
869 static drm_bacon_bo *
870 drm_bacon_gem_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
871 const char *name,
872 unsigned long size,
873 unsigned int alignment)
874 {
875 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
876 BO_ALLOC_FOR_RENDER,
877 I915_TILING_NONE, 0,
878 alignment);
879 }
880
881 static drm_bacon_bo *
882 drm_bacon_gem_bo_alloc(drm_bacon_bufmgr *bufmgr,
883 const char *name,
884 unsigned long size,
885 unsigned int alignment)
886 {
887 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
888 I915_TILING_NONE, 0, 0);
889 }
890
891 static drm_bacon_bo *
892 drm_bacon_gem_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
893 int x, int y, int cpp, uint32_t *tiling_mode,
894 unsigned long *pitch, unsigned long flags)
895 {
896 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
897 unsigned long size, stride;
898 uint32_t tiling;
899
900 do {
901 unsigned long aligned_y, height_alignment;
902
903 tiling = *tiling_mode;
904
905 /* If we're tiled, our allocations are in 8 or 32-row blocks,
906 * so failure to align our height means that we won't allocate
907 * enough pages.
908 *
909 * If we're untiled, we still have to align to 2 rows high
910 * because the data port accesses 2x2 blocks even if the
911 * bottom row isn't to be rendered, so failure to align means
912 * we could walk off the end of the GTT and fault. This is
913 * documented on 965, and may be the case on older chipsets
914 * too so we try to be careful.
915 */
916 aligned_y = y;
917 height_alignment = 2;
918
919 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
920 height_alignment = 16;
921 else if (tiling == I915_TILING_X
922 || (IS_915(bufmgr_gem->pci_device)
923 && tiling == I915_TILING_Y))
924 height_alignment = 8;
925 else if (tiling == I915_TILING_Y)
926 height_alignment = 32;
927 aligned_y = ALIGN(y, height_alignment);
928
929 stride = x * cpp;
930 stride = drm_bacon_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
931 size = stride * aligned_y;
932 size = drm_bacon_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
933 } while (*tiling_mode != tiling);
934 *pitch = stride;
935
936 if (tiling == I915_TILING_NONE)
937 stride = 0;
938
939 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
940 tiling, stride, 0);
941 }
942
943 static drm_bacon_bo *
944 drm_bacon_gem_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
945 const char *name,
946 void *addr,
947 uint32_t tiling_mode,
948 uint32_t stride,
949 unsigned long size,
950 unsigned long flags)
951 {
952 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
953 drm_bacon_bo_gem *bo_gem;
954 int ret;
955 struct drm_i915_gem_userptr userptr;
956
957 /* Tiling with userptr surfaces is not supported
958 * on all hardware so refuse it for time being.
959 */
960 if (tiling_mode != I915_TILING_NONE)
961 return NULL;
962
963 bo_gem = calloc(1, sizeof(*bo_gem));
964 if (!bo_gem)
965 return NULL;
966
967 p_atomic_set(&bo_gem->refcount, 1);
968 DRMINITLISTHEAD(&bo_gem->vma_list);
969
970 bo_gem->bo.size = size;
971
972 memclear(userptr);
973 userptr.user_ptr = (__u64)((unsigned long)addr);
974 userptr.user_size = size;
975 userptr.flags = flags;
976
977 ret = drmIoctl(bufmgr_gem->fd,
978 DRM_IOCTL_I915_GEM_USERPTR,
979 &userptr);
980 if (ret != 0) {
981 DBG("bo_create_userptr: "
982 "ioctl failed with user ptr %p size 0x%lx, "
983 "user flags 0x%lx\n", addr, size, flags);
984 free(bo_gem);
985 return NULL;
986 }
987
988 pthread_mutex_lock(&bufmgr_gem->lock);
989
990 bo_gem->gem_handle = userptr.handle;
991 bo_gem->bo.handle = bo_gem->gem_handle;
992 bo_gem->bo.bufmgr = bufmgr;
993 bo_gem->is_userptr = true;
994 bo_gem->bo.virtual = addr;
995 /* Save the address provided by user */
996 bo_gem->user_virtual = addr;
997 bo_gem->tiling_mode = I915_TILING_NONE;
998 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
999 bo_gem->stride = 0;
1000
1001 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1002 gem_handle, sizeof(bo_gem->gem_handle),
1003 bo_gem);
1004
1005 bo_gem->name = name;
1006 bo_gem->validate_index = -1;
1007 bo_gem->reloc_tree_fences = 0;
1008 bo_gem->used_as_reloc_target = false;
1009 bo_gem->has_error = false;
1010 bo_gem->reusable = false;
1011
1012 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1013 pthread_mutex_unlock(&bufmgr_gem->lock);
1014
1015 DBG("bo_create_userptr: "
1016 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
1017 addr, bo_gem->gem_handle, bo_gem->name,
1018 size, stride, tiling_mode);
1019
1020 return &bo_gem->bo;
1021 }
1022
1023 static bool
1024 has_userptr(drm_bacon_bufmgr_gem *bufmgr_gem)
1025 {
1026 int ret;
1027 void *ptr;
1028 long pgsz;
1029 struct drm_i915_gem_userptr userptr;
1030
1031 pgsz = sysconf(_SC_PAGESIZE);
1032 assert(pgsz > 0);
1033
1034 ret = posix_memalign(&ptr, pgsz, pgsz);
1035 if (ret) {
1036 DBG("Failed to get a page (%ld) for userptr detection!\n",
1037 pgsz);
1038 return false;
1039 }
1040
1041 memclear(userptr);
1042 userptr.user_ptr = (__u64)(unsigned long)ptr;
1043 userptr.user_size = pgsz;
1044
1045 retry:
1046 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
1047 if (ret) {
1048 if (errno == ENODEV && userptr.flags == 0) {
1049 userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
1050 goto retry;
1051 }
1052 free(ptr);
1053 return false;
1054 }
1055
1056 /* We don't release the userptr bo here as we want to keep the
1057 * kernel mm tracking alive for our lifetime. The first time we
1058 * create a userptr object the kernel has to install a mmu_notifer
1059 * which is a heavyweight operation (e.g. it requires taking all
1060 * mm_locks and stop_machine()).
1061 */
1062
1063 bufmgr_gem->userptr_active.ptr = ptr;
1064 bufmgr_gem->userptr_active.handle = userptr.handle;
1065
1066 return true;
1067 }
1068
1069 static drm_bacon_bo *
1070 check_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
1071 const char *name,
1072 void *addr,
1073 uint32_t tiling_mode,
1074 uint32_t stride,
1075 unsigned long size,
1076 unsigned long flags)
1077 {
1078 if (has_userptr((drm_bacon_bufmgr_gem *)bufmgr))
1079 bufmgr->bo_alloc_userptr = drm_bacon_gem_bo_alloc_userptr;
1080 else
1081 bufmgr->bo_alloc_userptr = NULL;
1082
1083 return drm_bacon_bo_alloc_userptr(bufmgr, name, addr,
1084 tiling_mode, stride, size, flags);
1085 }
1086
1087 /**
1088 * Returns a drm_bacon_bo wrapping the given buffer object handle.
1089 *
1090 * This can be used when one application needs to pass a buffer object
1091 * to another.
1092 */
1093 drm_bacon_bo *
1094 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
1095 const char *name,
1096 unsigned int handle)
1097 {
1098 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
1099 drm_bacon_bo_gem *bo_gem;
1100 int ret;
1101 struct drm_gem_open open_arg;
1102 struct drm_i915_gem_get_tiling get_tiling;
1103
1104 /* At the moment most applications only have a few named bo.
1105 * For instance, in a DRI client only the render buffers passed
1106 * between X and the client are named. And since X returns the
1107 * alternating names for the front/back buffer a linear search
1108 * provides a sufficiently fast match.
1109 */
1110 pthread_mutex_lock(&bufmgr_gem->lock);
1111 HASH_FIND(name_hh, bufmgr_gem->name_table,
1112 &handle, sizeof(handle), bo_gem);
1113 if (bo_gem) {
1114 drm_bacon_gem_bo_reference(&bo_gem->bo);
1115 goto out;
1116 }
1117
1118 memclear(open_arg);
1119 open_arg.name = handle;
1120 ret = drmIoctl(bufmgr_gem->fd,
1121 DRM_IOCTL_GEM_OPEN,
1122 &open_arg);
1123 if (ret != 0) {
1124 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1125 name, handle, strerror(errno));
1126 bo_gem = NULL;
1127 goto out;
1128 }
1129 /* Now see if someone has used a prime handle to get this
1130 * object from the kernel before by looking through the list
1131 * again for a matching gem_handle
1132 */
1133 HASH_FIND(handle_hh, bufmgr_gem->handle_table,
1134 &open_arg.handle, sizeof(open_arg.handle), bo_gem);
1135 if (bo_gem) {
1136 drm_bacon_gem_bo_reference(&bo_gem->bo);
1137 goto out;
1138 }
1139
1140 bo_gem = calloc(1, sizeof(*bo_gem));
1141 if (!bo_gem)
1142 goto out;
1143
1144 p_atomic_set(&bo_gem->refcount, 1);
1145 DRMINITLISTHEAD(&bo_gem->vma_list);
1146
1147 bo_gem->bo.size = open_arg.size;
1148 bo_gem->bo.offset = 0;
1149 bo_gem->bo.offset64 = 0;
1150 bo_gem->bo.virtual = NULL;
1151 bo_gem->bo.bufmgr = bufmgr;
1152 bo_gem->name = name;
1153 bo_gem->validate_index = -1;
1154 bo_gem->gem_handle = open_arg.handle;
1155 bo_gem->bo.handle = open_arg.handle;
1156 bo_gem->global_name = handle;
1157 bo_gem->reusable = false;
1158
1159 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
1160 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
1161 HASH_ADD(name_hh, bufmgr_gem->name_table,
1162 global_name, sizeof(bo_gem->global_name), bo_gem);
1163
1164 memclear(get_tiling);
1165 get_tiling.handle = bo_gem->gem_handle;
1166 ret = drmIoctl(bufmgr_gem->fd,
1167 DRM_IOCTL_I915_GEM_GET_TILING,
1168 &get_tiling);
1169 if (ret != 0)
1170 goto err_unref;
1171
1172 bo_gem->tiling_mode = get_tiling.tiling_mode;
1173 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1174 /* XXX stride is unknown */
1175 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
1176 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1177
1178 out:
1179 pthread_mutex_unlock(&bufmgr_gem->lock);
1180 return &bo_gem->bo;
1181
1182 err_unref:
1183 drm_bacon_gem_bo_free(&bo_gem->bo);
1184 pthread_mutex_unlock(&bufmgr_gem->lock);
1185 return NULL;
1186 }
1187
1188 static void
1189 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
1190 {
1191 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1192 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1193 struct drm_gem_close close;
1194 int ret;
1195
1196 DRMLISTDEL(&bo_gem->vma_list);
1197 if (bo_gem->mem_virtual) {
1198 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1199 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1200 bufmgr_gem->vma_count--;
1201 }
1202 if (bo_gem->wc_virtual) {
1203 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1204 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1205 bufmgr_gem->vma_count--;
1206 }
1207 if (bo_gem->gtt_virtual) {
1208 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1209 bufmgr_gem->vma_count--;
1210 }
1211
1212 if (bo_gem->global_name)
1213 HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem);
1214 HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem);
1215
1216 /* Close this object */
1217 memclear(close);
1218 close.handle = bo_gem->gem_handle;
1219 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1220 if (ret != 0) {
1221 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1222 bo_gem->gem_handle, bo_gem->name, strerror(errno));
1223 }
1224 free(bo);
1225 }
1226
1227 static void
1228 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
1229 {
1230 #if HAVE_VALGRIND
1231 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1232
1233 if (bo_gem->mem_virtual)
1234 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1235
1236 if (bo_gem->wc_virtual)
1237 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1238
1239 if (bo_gem->gtt_virtual)
1240 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1241 #endif
1242 }
1243
1244 /** Frees all cached buffers significantly older than @time. */
1245 static void
1246 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr_gem *bufmgr_gem, time_t time)
1247 {
1248 int i;
1249
1250 if (bufmgr_gem->time == time)
1251 return;
1252
1253 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1254 struct drm_bacon_gem_bo_bucket *bucket =
1255 &bufmgr_gem->cache_bucket[i];
1256
1257 while (!DRMLISTEMPTY(&bucket->head)) {
1258 drm_bacon_bo_gem *bo_gem;
1259
1260 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
1261 bucket->head.next, head);
1262 if (time - bo_gem->free_time <= 1)
1263 break;
1264
1265 DRMLISTDEL(&bo_gem->head);
1266
1267 drm_bacon_gem_bo_free(&bo_gem->bo);
1268 }
1269 }
1270
1271 bufmgr_gem->time = time;
1272 }
1273
1274 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr_gem *bufmgr_gem)
1275 {
1276 int limit;
1277
1278 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1279 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1280
1281 if (bufmgr_gem->vma_max < 0)
1282 return;
1283
1284 /* We may need to evict a few entries in order to create new mmaps */
1285 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1286 if (limit < 0)
1287 limit = 0;
1288
1289 while (bufmgr_gem->vma_count > limit) {
1290 drm_bacon_bo_gem *bo_gem;
1291
1292 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
1293 bufmgr_gem->vma_cache.next,
1294 vma_list);
1295 assert(bo_gem->map_count == 0);
1296 DRMLISTDELINIT(&bo_gem->vma_list);
1297
1298 if (bo_gem->mem_virtual) {
1299 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1300 bo_gem->mem_virtual = NULL;
1301 bufmgr_gem->vma_count--;
1302 }
1303 if (bo_gem->wc_virtual) {
1304 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1305 bo_gem->wc_virtual = NULL;
1306 bufmgr_gem->vma_count--;
1307 }
1308 if (bo_gem->gtt_virtual) {
1309 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1310 bo_gem->gtt_virtual = NULL;
1311 bufmgr_gem->vma_count--;
1312 }
1313 }
1314 }
1315
1316 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr_gem *bufmgr_gem,
1317 drm_bacon_bo_gem *bo_gem)
1318 {
1319 bufmgr_gem->vma_open--;
1320 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1321 if (bo_gem->mem_virtual)
1322 bufmgr_gem->vma_count++;
1323 if (bo_gem->wc_virtual)
1324 bufmgr_gem->vma_count++;
1325 if (bo_gem->gtt_virtual)
1326 bufmgr_gem->vma_count++;
1327 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
1328 }
1329
1330 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr_gem *bufmgr_gem,
1331 drm_bacon_bo_gem *bo_gem)
1332 {
1333 bufmgr_gem->vma_open++;
1334 DRMLISTDEL(&bo_gem->vma_list);
1335 if (bo_gem->mem_virtual)
1336 bufmgr_gem->vma_count--;
1337 if (bo_gem->wc_virtual)
1338 bufmgr_gem->vma_count--;
1339 if (bo_gem->gtt_virtual)
1340 bufmgr_gem->vma_count--;
1341 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
1342 }
1343
1344 static void
1345 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1346 {
1347 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1348 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1349 struct drm_bacon_gem_bo_bucket *bucket;
1350 int i;
1351
1352 /* Unreference all the target buffers */
1353 for (i = 0; i < bo_gem->reloc_count; i++) {
1354 if (bo_gem->reloc_target_info[i].bo != bo) {
1355 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1356 reloc_target_info[i].bo,
1357 time);
1358 }
1359 }
1360 for (i = 0; i < bo_gem->softpin_target_count; i++)
1361 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1362 time);
1363 bo_gem->kflags = 0;
1364 bo_gem->reloc_count = 0;
1365 bo_gem->used_as_reloc_target = false;
1366 bo_gem->softpin_target_count = 0;
1367
1368 DBG("bo_unreference final: %d (%s)\n",
1369 bo_gem->gem_handle, bo_gem->name);
1370
1371 /* release memory associated with this object */
1372 if (bo_gem->reloc_target_info) {
1373 free(bo_gem->reloc_target_info);
1374 bo_gem->reloc_target_info = NULL;
1375 }
1376 if (bo_gem->relocs) {
1377 free(bo_gem->relocs);
1378 bo_gem->relocs = NULL;
1379 }
1380 if (bo_gem->softpin_target) {
1381 free(bo_gem->softpin_target);
1382 bo_gem->softpin_target = NULL;
1383 bo_gem->softpin_target_size = 0;
1384 }
1385
1386 /* Clear any left-over mappings */
1387 if (bo_gem->map_count) {
1388 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1389 bo_gem->map_count = 0;
1390 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1391 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1392 }
1393
1394 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1395 /* Put the buffer into our internal cache for reuse if we can. */
1396 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1397 drm_bacon_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1398 I915_MADV_DONTNEED)) {
1399 bo_gem->free_time = time;
1400
1401 bo_gem->name = NULL;
1402 bo_gem->validate_index = -1;
1403
1404 DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1405 } else {
1406 drm_bacon_gem_bo_free(bo);
1407 }
1408 }
1409
1410 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1411 time_t time)
1412 {
1413 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1414
1415 assert(p_atomic_read(&bo_gem->refcount) > 0);
1416 if (p_atomic_dec_zero(&bo_gem->refcount))
1417 drm_bacon_gem_bo_unreference_final(bo, time);
1418 }
1419
1420 static void drm_bacon_gem_bo_unreference(drm_bacon_bo *bo)
1421 {
1422 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1423
1424 assert(p_atomic_read(&bo_gem->refcount) > 0);
1425
1426 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1427 drm_bacon_bufmgr_gem *bufmgr_gem =
1428 (drm_bacon_bufmgr_gem *) bo->bufmgr;
1429 struct timespec time;
1430
1431 clock_gettime(CLOCK_MONOTONIC, &time);
1432
1433 pthread_mutex_lock(&bufmgr_gem->lock);
1434
1435 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1436 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1437 drm_bacon_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1438 }
1439
1440 pthread_mutex_unlock(&bufmgr_gem->lock);
1441 }
1442 }
1443
1444 static int drm_bacon_gem_bo_map(drm_bacon_bo *bo, int write_enable)
1445 {
1446 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1447 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1448 struct drm_i915_gem_set_domain set_domain;
1449 int ret;
1450
1451 if (bo_gem->is_userptr) {
1452 /* Return the same user ptr */
1453 bo->virtual = bo_gem->user_virtual;
1454 return 0;
1455 }
1456
1457 pthread_mutex_lock(&bufmgr_gem->lock);
1458
1459 if (bo_gem->map_count++ == 0)
1460 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
1461
1462 if (!bo_gem->mem_virtual) {
1463 struct drm_i915_gem_mmap mmap_arg;
1464
1465 DBG("bo_map: %d (%s), map_count=%d\n",
1466 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1467
1468 memclear(mmap_arg);
1469 mmap_arg.handle = bo_gem->gem_handle;
1470 mmap_arg.size = bo->size;
1471 ret = drmIoctl(bufmgr_gem->fd,
1472 DRM_IOCTL_I915_GEM_MMAP,
1473 &mmap_arg);
1474 if (ret != 0) {
1475 ret = -errno;
1476 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1477 __FILE__, __LINE__, bo_gem->gem_handle,
1478 bo_gem->name, strerror(errno));
1479 if (--bo_gem->map_count == 0)
1480 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1481 pthread_mutex_unlock(&bufmgr_gem->lock);
1482 return ret;
1483 }
1484 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1485 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1486 }
1487 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1488 bo_gem->mem_virtual);
1489 bo->virtual = bo_gem->mem_virtual;
1490
1491 memclear(set_domain);
1492 set_domain.handle = bo_gem->gem_handle;
1493 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1494 if (write_enable)
1495 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1496 else
1497 set_domain.write_domain = 0;
1498 ret = drmIoctl(bufmgr_gem->fd,
1499 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1500 &set_domain);
1501 if (ret != 0) {
1502 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1503 __FILE__, __LINE__, bo_gem->gem_handle,
1504 strerror(errno));
1505 }
1506
1507 if (write_enable)
1508 bo_gem->mapped_cpu_write = true;
1509
1510 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1511 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1512 pthread_mutex_unlock(&bufmgr_gem->lock);
1513
1514 return 0;
1515 }
1516
1517 static int
1518 map_gtt(drm_bacon_bo *bo)
1519 {
1520 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1521 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1522 int ret;
1523
1524 if (bo_gem->is_userptr)
1525 return -EINVAL;
1526
1527 if (bo_gem->map_count++ == 0)
1528 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
1529
1530 /* Get a mapping of the buffer if we haven't before. */
1531 if (bo_gem->gtt_virtual == NULL) {
1532 struct drm_i915_gem_mmap_gtt mmap_arg;
1533
1534 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1535 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1536
1537 memclear(mmap_arg);
1538 mmap_arg.handle = bo_gem->gem_handle;
1539
1540 /* Get the fake offset back... */
1541 ret = drmIoctl(bufmgr_gem->fd,
1542 DRM_IOCTL_I915_GEM_MMAP_GTT,
1543 &mmap_arg);
1544 if (ret != 0) {
1545 ret = -errno;
1546 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1547 __FILE__, __LINE__,
1548 bo_gem->gem_handle, bo_gem->name,
1549 strerror(errno));
1550 if (--bo_gem->map_count == 0)
1551 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1552 return ret;
1553 }
1554
1555 /* and mmap it */
1556 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1557 MAP_SHARED, bufmgr_gem->fd,
1558 mmap_arg.offset);
1559 if (bo_gem->gtt_virtual == MAP_FAILED) {
1560 bo_gem->gtt_virtual = NULL;
1561 ret = -errno;
1562 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1563 __FILE__, __LINE__,
1564 bo_gem->gem_handle, bo_gem->name,
1565 strerror(errno));
1566 if (--bo_gem->map_count == 0)
1567 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1568 return ret;
1569 }
1570 }
1571
1572 bo->virtual = bo_gem->gtt_virtual;
1573
1574 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1575 bo_gem->gtt_virtual);
1576
1577 return 0;
1578 }
1579
1580 int
1581 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1582 {
1583 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1584 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1585 struct drm_i915_gem_set_domain set_domain;
1586 int ret;
1587
1588 pthread_mutex_lock(&bufmgr_gem->lock);
1589
1590 ret = map_gtt(bo);
1591 if (ret) {
1592 pthread_mutex_unlock(&bufmgr_gem->lock);
1593 return ret;
1594 }
1595
1596 /* Now move it to the GTT domain so that the GPU and CPU
1597 * caches are flushed and the GPU isn't actively using the
1598 * buffer.
1599 *
1600 * The pagefault handler does this domain change for us when
1601 * it has unbound the BO from the GTT, but it's up to us to
1602 * tell it when we're about to use things if we had done
1603 * rendering and it still happens to be bound to the GTT.
1604 */
1605 memclear(set_domain);
1606 set_domain.handle = bo_gem->gem_handle;
1607 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1608 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1609 ret = drmIoctl(bufmgr_gem->fd,
1610 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1611 &set_domain);
1612 if (ret != 0) {
1613 DBG("%s:%d: Error setting domain %d: %s\n",
1614 __FILE__, __LINE__, bo_gem->gem_handle,
1615 strerror(errno));
1616 }
1617
1618 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1619 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1620 pthread_mutex_unlock(&bufmgr_gem->lock);
1621
1622 return 0;
1623 }
1624
1625 /**
1626 * Performs a mapping of the buffer object like the normal GTT
1627 * mapping, but avoids waiting for the GPU to be done reading from or
1628 * rendering to the buffer.
1629 *
1630 * This is used in the implementation of GL_ARB_map_buffer_range: The
1631 * user asks to create a buffer, then does a mapping, fills some
1632 * space, runs a drawing command, then asks to map it again without
1633 * synchronizing because it guarantees that it won't write over the
1634 * data that the GPU is busy using (or, more specifically, that if it
1635 * does write over the data, it acknowledges that rendering is
1636 * undefined).
1637 */
1638
1639 int
1640 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1641 {
1642 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1643 #ifdef HAVE_VALGRIND
1644 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1645 #endif
1646 int ret;
1647
1648 /* If the CPU cache isn't coherent with the GTT, then use a
1649 * regular synchronized mapping. The problem is that we don't
1650 * track where the buffer was last used on the CPU side in
1651 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1652 * we would potentially corrupt the buffer even when the user
1653 * does reasonable things.
1654 */
1655 if (!bufmgr_gem->has_llc)
1656 return drm_bacon_gem_bo_map_gtt(bo);
1657
1658 pthread_mutex_lock(&bufmgr_gem->lock);
1659
1660 ret = map_gtt(bo);
1661 if (ret == 0) {
1662 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1663 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1664 }
1665
1666 pthread_mutex_unlock(&bufmgr_gem->lock);
1667
1668 return ret;
1669 }
1670
1671 static int drm_bacon_gem_bo_unmap(drm_bacon_bo *bo)
1672 {
1673 drm_bacon_bufmgr_gem *bufmgr_gem;
1674 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1675 int ret = 0;
1676
1677 if (bo == NULL)
1678 return 0;
1679
1680 if (bo_gem->is_userptr)
1681 return 0;
1682
1683 bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1684
1685 pthread_mutex_lock(&bufmgr_gem->lock);
1686
1687 if (bo_gem->map_count <= 0) {
1688 DBG("attempted to unmap an unmapped bo\n");
1689 pthread_mutex_unlock(&bufmgr_gem->lock);
1690 /* Preserve the old behaviour of just treating this as a
1691 * no-op rather than reporting the error.
1692 */
1693 return 0;
1694 }
1695
1696 if (bo_gem->mapped_cpu_write) {
1697 struct drm_i915_gem_sw_finish sw_finish;
1698
1699 /* Cause a flush to happen if the buffer's pinned for
1700 * scanout, so the results show up in a timely manner.
1701 * Unlike GTT set domains, this only does work if the
1702 * buffer should be scanout-related.
1703 */
1704 memclear(sw_finish);
1705 sw_finish.handle = bo_gem->gem_handle;
1706 ret = drmIoctl(bufmgr_gem->fd,
1707 DRM_IOCTL_I915_GEM_SW_FINISH,
1708 &sw_finish);
1709 ret = ret == -1 ? -errno : 0;
1710
1711 bo_gem->mapped_cpu_write = false;
1712 }
1713
1714 /* We need to unmap after every innovation as we cannot track
1715 * an open vma for every bo as that will exhaust the system
1716 * limits and cause later failures.
1717 */
1718 if (--bo_gem->map_count == 0) {
1719 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
1720 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1721 bo->virtual = NULL;
1722 }
1723 pthread_mutex_unlock(&bufmgr_gem->lock);
1724
1725 return ret;
1726 }
1727
1728 int
1729 drm_bacon_gem_bo_unmap_gtt(drm_bacon_bo *bo)
1730 {
1731 return drm_bacon_gem_bo_unmap(bo);
1732 }
1733
1734 static int
1735 drm_bacon_gem_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1736 unsigned long size, const void *data)
1737 {
1738 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1739 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1740 struct drm_i915_gem_pwrite pwrite;
1741 int ret;
1742
1743 if (bo_gem->is_userptr)
1744 return -EINVAL;
1745
1746 memclear(pwrite);
1747 pwrite.handle = bo_gem->gem_handle;
1748 pwrite.offset = offset;
1749 pwrite.size = size;
1750 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1751 ret = drmIoctl(bufmgr_gem->fd,
1752 DRM_IOCTL_I915_GEM_PWRITE,
1753 &pwrite);
1754 if (ret != 0) {
1755 ret = -errno;
1756 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1757 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1758 (int)size, strerror(errno));
1759 }
1760
1761 return ret;
1762 }
1763
1764 static int
1765 drm_bacon_gem_get_pipe_from_crtc_id(drm_bacon_bufmgr *bufmgr, int crtc_id)
1766 {
1767 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
1768 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1769 int ret;
1770
1771 memclear(get_pipe_from_crtc_id);
1772 get_pipe_from_crtc_id.crtc_id = crtc_id;
1773 ret = drmIoctl(bufmgr_gem->fd,
1774 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1775 &get_pipe_from_crtc_id);
1776 if (ret != 0) {
1777 /* We return -1 here to signal that we don't
1778 * know which pipe is associated with this crtc.
1779 * This lets the caller know that this information
1780 * isn't available; using the wrong pipe for
1781 * vblank waiting can cause the chipset to lock up
1782 */
1783 return -1;
1784 }
1785
1786 return get_pipe_from_crtc_id.pipe;
1787 }
1788
1789 static int
1790 drm_bacon_gem_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1791 unsigned long size, void *data)
1792 {
1793 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1794 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1795 struct drm_i915_gem_pread pread;
1796 int ret;
1797
1798 if (bo_gem->is_userptr)
1799 return -EINVAL;
1800
1801 memclear(pread);
1802 pread.handle = bo_gem->gem_handle;
1803 pread.offset = offset;
1804 pread.size = size;
1805 pread.data_ptr = (uint64_t) (uintptr_t) data;
1806 ret = drmIoctl(bufmgr_gem->fd,
1807 DRM_IOCTL_I915_GEM_PREAD,
1808 &pread);
1809 if (ret != 0) {
1810 ret = -errno;
1811 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1812 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1813 (int)size, strerror(errno));
1814 }
1815
1816 return ret;
1817 }
1818
1819 /** Waits for all GPU rendering with the object to have completed. */
1820 static void
1821 drm_bacon_gem_bo_wait_rendering(drm_bacon_bo *bo)
1822 {
1823 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1824 }
1825
1826 /**
1827 * Waits on a BO for the given amount of time.
1828 *
1829 * @bo: buffer object to wait for
1830 * @timeout_ns: amount of time to wait in nanoseconds.
1831 * If value is less than 0, an infinite wait will occur.
1832 *
1833 * Returns 0 if the wait was successful ie. the last batch referencing the
1834 * object has completed within the allotted time. Otherwise some negative return
1835 * value describes the error. Of particular interest is -ETIME when the wait has
1836 * failed to yield the desired result.
1837 *
1838 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1839 * the operation to give up after a certain amount of time. Another subtle
1840 * difference is the internal locking semantics are different (this variant does
1841 * not hold the lock for the duration of the wait). This makes the wait subject
1842 * to a larger userspace race window.
1843 *
1844 * The implementation shall wait until the object is no longer actively
1845 * referenced within a batch buffer at the time of the call. The wait will
1846 * not guarantee that the buffer is re-issued via another thread, or an flinked
1847 * handle. Userspace must make sure this race does not occur if such precision
1848 * is important.
1849 *
1850 * Note that some kernels have broken the inifite wait for negative values
1851 * promise, upgrade to latest stable kernels if this is the case.
1852 */
1853 int
1854 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1855 {
1856 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1857 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1858 struct drm_i915_gem_wait wait;
1859 int ret;
1860
1861 if (!bufmgr_gem->has_wait_timeout) {
1862 DBG("%s:%d: Timed wait is not supported. Falling back to "
1863 "infinite wait\n", __FILE__, __LINE__);
1864 if (timeout_ns) {
1865 drm_bacon_gem_bo_wait_rendering(bo);
1866 return 0;
1867 } else {
1868 return drm_bacon_gem_bo_busy(bo) ? -ETIME : 0;
1869 }
1870 }
1871
1872 memclear(wait);
1873 wait.bo_handle = bo_gem->gem_handle;
1874 wait.timeout_ns = timeout_ns;
1875 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1876 if (ret == -1)
1877 return -errno;
1878
1879 return ret;
1880 }
1881
1882 /**
1883 * Sets the object to the GTT read and possibly write domain, used by the X
1884 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1885 *
1886 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1887 * can do tiled pixmaps this way.
1888 */
1889 void
1890 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1891 {
1892 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1893 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1894 struct drm_i915_gem_set_domain set_domain;
1895 int ret;
1896
1897 memclear(set_domain);
1898 set_domain.handle = bo_gem->gem_handle;
1899 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1900 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1901 ret = drmIoctl(bufmgr_gem->fd,
1902 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1903 &set_domain);
1904 if (ret != 0) {
1905 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1906 __FILE__, __LINE__, bo_gem->gem_handle,
1907 set_domain.read_domains, set_domain.write_domain,
1908 strerror(errno));
1909 }
1910 }
1911
1912 static void
1913 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr *bufmgr)
1914 {
1915 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
1916 struct drm_gem_close close_bo;
1917 int i, ret;
1918
1919 free(bufmgr_gem->exec2_objects);
1920 free(bufmgr_gem->exec_objects);
1921 free(bufmgr_gem->exec_bos);
1922
1923 pthread_mutex_destroy(&bufmgr_gem->lock);
1924
1925 /* Free any cached buffer objects we were going to reuse */
1926 for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1927 struct drm_bacon_gem_bo_bucket *bucket =
1928 &bufmgr_gem->cache_bucket[i];
1929 drm_bacon_bo_gem *bo_gem;
1930
1931 while (!DRMLISTEMPTY(&bucket->head)) {
1932 bo_gem = DRMLISTENTRY(drm_bacon_bo_gem,
1933 bucket->head.next, head);
1934 DRMLISTDEL(&bo_gem->head);
1935
1936 drm_bacon_gem_bo_free(&bo_gem->bo);
1937 }
1938 }
1939
1940 /* Release userptr bo kept hanging around for optimisation. */
1941 if (bufmgr_gem->userptr_active.ptr) {
1942 memclear(close_bo);
1943 close_bo.handle = bufmgr_gem->userptr_active.handle;
1944 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1945 free(bufmgr_gem->userptr_active.ptr);
1946 if (ret)
1947 fprintf(stderr,
1948 "Failed to release test userptr object! (%d) "
1949 "i915 kernel driver may not be sane!\n", errno);
1950 }
1951
1952 free(bufmgr);
1953 }
1954
1955 /**
1956 * Adds the target buffer to the validation list and adds the relocation
1957 * to the reloc_buffer's relocation list.
1958 *
1959 * The relocation entry at the given offset must already contain the
1960 * precomputed relocation value, because the kernel will optimize out
1961 * the relocation entry write when the buffer hasn't moved from the
1962 * last known offset in target_bo.
1963 */
1964 static int
1965 do_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1966 drm_bacon_bo *target_bo, uint32_t target_offset,
1967 uint32_t read_domains, uint32_t write_domain,
1968 bool need_fence)
1969 {
1970 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
1971 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1972 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1973 bool fenced_command;
1974
1975 if (bo_gem->has_error)
1976 return -ENOMEM;
1977
1978 if (target_bo_gem->has_error) {
1979 bo_gem->has_error = true;
1980 return -ENOMEM;
1981 }
1982
1983 /* We never use HW fences for rendering on 965+ */
1984 if (bufmgr_gem->gen >= 4)
1985 need_fence = false;
1986
1987 fenced_command = need_fence;
1988 if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1989 need_fence = false;
1990
1991 /* Create a new relocation list if needed */
1992 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1993 return -ENOMEM;
1994
1995 /* Check overflow */
1996 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1997
1998 /* Check args */
1999 assert(offset <= bo->size - 4);
2000 assert((write_domain & (write_domain - 1)) == 0);
2001
2002 /* An object needing a fence is a tiled buffer, so it won't have
2003 * relocs to other buffers.
2004 */
2005 if (need_fence) {
2006 assert(target_bo_gem->reloc_count == 0);
2007 target_bo_gem->reloc_tree_fences = 1;
2008 }
2009
2010 /* Make sure that we're not adding a reloc to something whose size has
2011 * already been accounted for.
2012 */
2013 assert(!bo_gem->used_as_reloc_target);
2014 if (target_bo_gem != bo_gem) {
2015 target_bo_gem->used_as_reloc_target = true;
2016 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
2017 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
2018 }
2019
2020 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
2021 if (target_bo != bo)
2022 drm_bacon_gem_bo_reference(target_bo);
2023 if (fenced_command)
2024 bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
2025 DRM_INTEL_RELOC_FENCE;
2026 else
2027 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
2028
2029 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
2030 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
2031 bo_gem->relocs[bo_gem->reloc_count].target_handle =
2032 target_bo_gem->gem_handle;
2033 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
2034 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
2035 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
2036 bo_gem->reloc_count++;
2037
2038 return 0;
2039 }
2040
2041 static void
2042 drm_bacon_gem_bo_use_48b_address_range(drm_bacon_bo *bo, uint32_t enable)
2043 {
2044 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2045
2046 if (enable)
2047 bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2048 else
2049 bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
2050 }
2051
2052 static int
2053 drm_bacon_gem_bo_add_softpin_target(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2054 {
2055 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2056 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2057 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2058 if (bo_gem->has_error)
2059 return -ENOMEM;
2060
2061 if (target_bo_gem->has_error) {
2062 bo_gem->has_error = true;
2063 return -ENOMEM;
2064 }
2065
2066 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
2067 return -EINVAL;
2068 if (target_bo_gem == bo_gem)
2069 return -EINVAL;
2070
2071 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
2072 int new_size = bo_gem->softpin_target_size * 2;
2073 if (new_size == 0)
2074 new_size = bufmgr_gem->max_relocs;
2075
2076 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
2077 sizeof(drm_bacon_bo *));
2078 if (!bo_gem->softpin_target)
2079 return -ENOMEM;
2080
2081 bo_gem->softpin_target_size = new_size;
2082 }
2083 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
2084 drm_bacon_gem_bo_reference(target_bo);
2085 bo_gem->softpin_target_count++;
2086
2087 return 0;
2088 }
2089
2090 static int
2091 drm_bacon_gem_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
2092 drm_bacon_bo *target_bo, uint32_t target_offset,
2093 uint32_t read_domains, uint32_t write_domain)
2094 {
2095 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bo->bufmgr;
2096 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *)target_bo;
2097
2098 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
2099 return drm_bacon_gem_bo_add_softpin_target(bo, target_bo);
2100 else
2101 return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2102 read_domains, write_domain,
2103 !bufmgr_gem->fenced_relocs);
2104 }
2105
2106 static int
2107 drm_bacon_gem_bo_emit_reloc_fence(drm_bacon_bo *bo, uint32_t offset,
2108 drm_bacon_bo *target_bo,
2109 uint32_t target_offset,
2110 uint32_t read_domains, uint32_t write_domain)
2111 {
2112 return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
2113 read_domains, write_domain, true);
2114 }
2115
2116 int
2117 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
2118 {
2119 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2120
2121 return bo_gem->reloc_count;
2122 }
2123
2124 /**
2125 * Removes existing relocation entries in the BO after "start".
2126 *
2127 * This allows a user to avoid a two-step process for state setup with
2128 * counting up all the buffer objects and doing a
2129 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
2130 * relocations for the state setup. Instead, save the state of the
2131 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
2132 * state, and then check if it still fits in the aperture.
2133 *
2134 * Any further drm_bacon_bufmgr_check_aperture_space() queries
2135 * involving this buffer in the tree are undefined after this call.
2136 *
2137 * This also removes all softpinned targets being referenced by the BO.
2138 */
2139 void
2140 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
2141 {
2142 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2143 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2144 int i;
2145 struct timespec time;
2146
2147 clock_gettime(CLOCK_MONOTONIC, &time);
2148
2149 assert(bo_gem->reloc_count >= start);
2150
2151 /* Unreference the cleared target buffers */
2152 pthread_mutex_lock(&bufmgr_gem->lock);
2153
2154 for (i = start; i < bo_gem->reloc_count; i++) {
2155 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_target_info[i].bo;
2156 if (&target_bo_gem->bo != bo) {
2157 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
2158 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
2159 time.tv_sec);
2160 }
2161 }
2162 bo_gem->reloc_count = start;
2163
2164 for (i = 0; i < bo_gem->softpin_target_count; i++) {
2165 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->softpin_target[i];
2166 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
2167 }
2168 bo_gem->softpin_target_count = 0;
2169
2170 pthread_mutex_unlock(&bufmgr_gem->lock);
2171
2172 }
2173
2174 /**
2175 * Walk the tree of relocations rooted at BO and accumulate the list of
2176 * validations to be performed and update the relocation buffers with
2177 * index values into the validation list.
2178 */
2179 static void
2180 drm_bacon_gem_bo_process_reloc(drm_bacon_bo *bo)
2181 {
2182 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2183 int i;
2184
2185 if (bo_gem->relocs == NULL)
2186 return;
2187
2188 for (i = 0; i < bo_gem->reloc_count; i++) {
2189 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2190
2191 if (target_bo == bo)
2192 continue;
2193
2194 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2195
2196 /* Continue walking the tree depth-first. */
2197 drm_bacon_gem_bo_process_reloc(target_bo);
2198
2199 /* Add the target to the validate list */
2200 drm_bacon_add_validate_buffer(target_bo);
2201 }
2202 }
2203
2204 static void
2205 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
2206 {
2207 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
2208 int i;
2209
2210 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
2211 return;
2212
2213 for (i = 0; i < bo_gem->reloc_count; i++) {
2214 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
2215 int need_fence;
2216
2217 if (target_bo == bo)
2218 continue;
2219
2220 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2221
2222 /* Continue walking the tree depth-first. */
2223 drm_bacon_gem_bo_process_reloc2(target_bo);
2224
2225 need_fence = (bo_gem->reloc_target_info[i].flags &
2226 DRM_INTEL_RELOC_FENCE);
2227
2228 /* Add the target to the validate list */
2229 drm_bacon_add_validate_buffer2(target_bo, need_fence);
2230 }
2231
2232 for (i = 0; i < bo_gem->softpin_target_count; i++) {
2233 drm_bacon_bo *target_bo = bo_gem->softpin_target[i];
2234
2235 if (target_bo == bo)
2236 continue;
2237
2238 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
2239 drm_bacon_gem_bo_process_reloc2(target_bo);
2240 drm_bacon_add_validate_buffer2(target_bo, false);
2241 }
2242 }
2243
2244
2245 static void
2246 drm_bacon_update_buffer_offsets(drm_bacon_bufmgr_gem *bufmgr_gem)
2247 {
2248 int i;
2249
2250 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2251 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
2252 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2253
2254 /* Update the buffer offset */
2255 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
2256 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2257 bo_gem->gem_handle, bo_gem->name,
2258 upper_32_bits(bo->offset64),
2259 lower_32_bits(bo->offset64),
2260 upper_32_bits(bufmgr_gem->exec_objects[i].offset),
2261 lower_32_bits(bufmgr_gem->exec_objects[i].offset));
2262 bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2263 bo->offset = bufmgr_gem->exec_objects[i].offset;
2264 }
2265 }
2266 }
2267
2268 static void
2269 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr_gem *bufmgr_gem)
2270 {
2271 int i;
2272
2273 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2274 drm_bacon_bo *bo = bufmgr_gem->exec_bos[i];
2275 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
2276
2277 /* Update the buffer offset */
2278 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2279 /* If we're seeing softpinned object here it means that the kernel
2280 * has relocated our object... Indicating a programming error
2281 */
2282 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
2283 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
2284 bo_gem->gem_handle, bo_gem->name,
2285 upper_32_bits(bo->offset64),
2286 lower_32_bits(bo->offset64),
2287 upper_32_bits(bufmgr_gem->exec2_objects[i].offset),
2288 lower_32_bits(bufmgr_gem->exec2_objects[i].offset));
2289 bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2290 bo->offset = bufmgr_gem->exec2_objects[i].offset;
2291 }
2292 }
2293 }
2294
2295 static int
2296 drm_bacon_gem_bo_exec(drm_bacon_bo *bo, int used,
2297 drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2298 {
2299 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2300 struct drm_i915_gem_execbuffer execbuf;
2301 int ret, i;
2302
2303 if (to_bo_gem(bo)->has_error)
2304 return -ENOMEM;
2305
2306 pthread_mutex_lock(&bufmgr_gem->lock);
2307 /* Update indices and set up the validate list. */
2308 drm_bacon_gem_bo_process_reloc(bo);
2309
2310 /* Add the batch buffer to the validation list. There are no
2311 * relocations pointing to it.
2312 */
2313 drm_bacon_add_validate_buffer(bo);
2314
2315 memclear(execbuf);
2316 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2317 execbuf.buffer_count = bufmgr_gem->exec_count;
2318 execbuf.batch_start_offset = 0;
2319 execbuf.batch_len = used;
2320 execbuf.cliprects_ptr = (uintptr_t) cliprects;
2321 execbuf.num_cliprects = num_cliprects;
2322 execbuf.DR1 = 0;
2323 execbuf.DR4 = DR4;
2324
2325 ret = drmIoctl(bufmgr_gem->fd,
2326 DRM_IOCTL_I915_GEM_EXECBUFFER,
2327 &execbuf);
2328 if (ret != 0) {
2329 ret = -errno;
2330 if (errno == ENOSPC) {
2331 DBG("Execbuffer fails to pin. "
2332 "Estimate: %u. Actual: %u. Available: %u\n",
2333 drm_bacon_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2334 bufmgr_gem->
2335 exec_count),
2336 drm_bacon_gem_compute_batch_space(bufmgr_gem->exec_bos,
2337 bufmgr_gem->
2338 exec_count),
2339 (unsigned int)bufmgr_gem->gtt_size);
2340 }
2341 }
2342 drm_bacon_update_buffer_offsets(bufmgr_gem);
2343
2344 if (bufmgr_gem->bufmgr.debug)
2345 drm_bacon_gem_dump_validation_list(bufmgr_gem);
2346
2347 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2348 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2349
2350 bo_gem->idle = false;
2351
2352 /* Disconnect the buffer from the validate list */
2353 bo_gem->validate_index = -1;
2354 bufmgr_gem->exec_bos[i] = NULL;
2355 }
2356 bufmgr_gem->exec_count = 0;
2357 pthread_mutex_unlock(&bufmgr_gem->lock);
2358
2359 return ret;
2360 }
2361
2362 static int
2363 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
2364 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2365 int in_fence, int *out_fence,
2366 unsigned int flags)
2367 {
2368 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bo->bufmgr;
2369 struct drm_i915_gem_execbuffer2 execbuf;
2370 int ret = 0;
2371 int i;
2372
2373 if (to_bo_gem(bo)->has_error)
2374 return -ENOMEM;
2375
2376 switch (flags & 0x7) {
2377 default:
2378 return -EINVAL;
2379 case I915_EXEC_BLT:
2380 if (!bufmgr_gem->has_blt)
2381 return -EINVAL;
2382 break;
2383 case I915_EXEC_BSD:
2384 if (!bufmgr_gem->has_bsd)
2385 return -EINVAL;
2386 break;
2387 case I915_EXEC_VEBOX:
2388 if (!bufmgr_gem->has_vebox)
2389 return -EINVAL;
2390 break;
2391 case I915_EXEC_RENDER:
2392 case I915_EXEC_DEFAULT:
2393 break;
2394 }
2395
2396 pthread_mutex_lock(&bufmgr_gem->lock);
2397 /* Update indices and set up the validate list. */
2398 drm_bacon_gem_bo_process_reloc2(bo);
2399
2400 /* Add the batch buffer to the validation list. There are no relocations
2401 * pointing to it.
2402 */
2403 drm_bacon_add_validate_buffer2(bo, 0);
2404
2405 memclear(execbuf);
2406 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2407 execbuf.buffer_count = bufmgr_gem->exec_count;
2408 execbuf.batch_start_offset = 0;
2409 execbuf.batch_len = used;
2410 execbuf.cliprects_ptr = (uintptr_t)cliprects;
2411 execbuf.num_cliprects = num_cliprects;
2412 execbuf.DR1 = 0;
2413 execbuf.DR4 = DR4;
2414 execbuf.flags = flags;
2415 if (ctx == NULL)
2416 i915_execbuffer2_set_context_id(execbuf, 0);
2417 else
2418 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2419 execbuf.rsvd2 = 0;
2420 if (in_fence != -1) {
2421 execbuf.rsvd2 = in_fence;
2422 execbuf.flags |= I915_EXEC_FENCE_IN;
2423 }
2424 if (out_fence != NULL) {
2425 *out_fence = -1;
2426 execbuf.flags |= I915_EXEC_FENCE_OUT;
2427 }
2428
2429 if (bufmgr_gem->no_exec)
2430 goto skip_execution;
2431
2432 ret = drmIoctl(bufmgr_gem->fd,
2433 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2434 &execbuf);
2435 if (ret != 0) {
2436 ret = -errno;
2437 if (ret == -ENOSPC) {
2438 DBG("Execbuffer fails to pin. "
2439 "Estimate: %u. Actual: %u. Available: %u\n",
2440 drm_bacon_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2441 bufmgr_gem->exec_count),
2442 drm_bacon_gem_compute_batch_space(bufmgr_gem->exec_bos,
2443 bufmgr_gem->exec_count),
2444 (unsigned int) bufmgr_gem->gtt_size);
2445 }
2446 }
2447 drm_bacon_update_buffer_offsets2(bufmgr_gem);
2448
2449 if (ret == 0 && out_fence != NULL)
2450 *out_fence = execbuf.rsvd2 >> 32;
2451
2452 skip_execution:
2453 if (bufmgr_gem->bufmgr.debug)
2454 drm_bacon_gem_dump_validation_list(bufmgr_gem);
2455
2456 for (i = 0; i < bufmgr_gem->exec_count; i++) {
2457 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]);
2458
2459 bo_gem->idle = false;
2460
2461 /* Disconnect the buffer from the validate list */
2462 bo_gem->validate_index = -1;
2463 bufmgr_gem->exec_bos[i] = NULL;
2464 }
2465 bufmgr_gem->exec_count = 0;
2466 pthread_mutex_unlock(&bufmgr_gem->lock);
2467
2468 return ret;
2469 }
2470
2471 static int
2472 drm_bacon_gem_bo_exec2(drm_bacon_bo *bo, int used,
2473 drm_clip_rect_t *cliprects, int num_cliprects,
2474 int DR4)
2475 {
2476 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2477 -1, NULL, I915_EXEC_RENDER);
2478 }
2479
2480 static int
2481 drm_bacon_gem_bo_mrb_exec2(drm_bacon_bo *bo, int used,
2482 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2483 unsigned int flags)
2484 {
2485 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2486 -1, NULL, flags);
2487 }
2488
2489 int
2490 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
2491 int used, unsigned int flags)
2492 {
2493 return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags);
2494 }
2495
2496 int
2497 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
2498 drm_bacon_context *ctx,
2499 int used,
2500 int in_fence,
2501 int *out_fence,
2502 unsigned int flags)
2503 {
2504 return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags);
2505 }
2506
2507 static int
2508 drm_bacon_gem_bo_pin(drm_bacon_bo *bo, uint32_t alignment)
2509 {
2510 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2511 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2512 struct drm_i915_gem_pin pin;
2513 int ret;
2514
2515 memclear(pin);
2516 pin.handle = bo_gem->gem_handle;
2517 pin.alignment = alignment;
2518
2519 ret = drmIoctl(bufmgr_gem->fd,
2520 DRM_IOCTL_I915_GEM_PIN,
2521 &pin);
2522 if (ret != 0)
2523 return -errno;
2524
2525 bo->offset64 = pin.offset;
2526 bo->offset = pin.offset;
2527 return 0;
2528 }
2529
2530 static int
2531 drm_bacon_gem_bo_unpin(drm_bacon_bo *bo)
2532 {
2533 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2534 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2535 struct drm_i915_gem_unpin unpin;
2536 int ret;
2537
2538 memclear(unpin);
2539 unpin.handle = bo_gem->gem_handle;
2540
2541 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2542 if (ret != 0)
2543 return -errno;
2544
2545 return 0;
2546 }
2547
2548 static int
2549 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
2550 uint32_t tiling_mode,
2551 uint32_t stride)
2552 {
2553 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2554 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2555 struct drm_i915_gem_set_tiling set_tiling;
2556 int ret;
2557
2558 if (bo_gem->global_name == 0 &&
2559 tiling_mode == bo_gem->tiling_mode &&
2560 stride == bo_gem->stride)
2561 return 0;
2562
2563 memset(&set_tiling, 0, sizeof(set_tiling));
2564 do {
2565 /* set_tiling is slightly broken and overwrites the
2566 * input on the error path, so we have to open code
2567 * rmIoctl.
2568 */
2569 set_tiling.handle = bo_gem->gem_handle;
2570 set_tiling.tiling_mode = tiling_mode;
2571 set_tiling.stride = stride;
2572
2573 ret = ioctl(bufmgr_gem->fd,
2574 DRM_IOCTL_I915_GEM_SET_TILING,
2575 &set_tiling);
2576 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2577 if (ret == -1)
2578 return -errno;
2579
2580 bo_gem->tiling_mode = set_tiling.tiling_mode;
2581 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2582 bo_gem->stride = set_tiling.stride;
2583 return 0;
2584 }
2585
2586 static int
2587 drm_bacon_gem_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2588 uint32_t stride)
2589 {
2590 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2591 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2592 int ret;
2593
2594 /* Tiling with userptr surfaces is not supported
2595 * on all hardware so refuse it for time being.
2596 */
2597 if (bo_gem->is_userptr)
2598 return -EINVAL;
2599
2600 /* Linear buffers have no stride. By ensuring that we only ever use
2601 * stride 0 with linear buffers, we simplify our code.
2602 */
2603 if (*tiling_mode == I915_TILING_NONE)
2604 stride = 0;
2605
2606 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2607 if (ret == 0)
2608 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2609
2610 *tiling_mode = bo_gem->tiling_mode;
2611 return ret;
2612 }
2613
2614 static int
2615 drm_bacon_gem_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2616 uint32_t * swizzle_mode)
2617 {
2618 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2619
2620 *tiling_mode = bo_gem->tiling_mode;
2621 *swizzle_mode = bo_gem->swizzle_mode;
2622 return 0;
2623 }
2624
2625 static int
2626 drm_bacon_gem_bo_set_softpin_offset(drm_bacon_bo *bo, uint64_t offset)
2627 {
2628 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2629
2630 bo->offset64 = offset;
2631 bo->offset = offset;
2632 bo_gem->kflags |= EXEC_OBJECT_PINNED;
2633
2634 return 0;
2635 }
2636
2637 drm_bacon_bo *
2638 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
2639 {
2640 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2641 int ret;
2642 uint32_t handle;
2643 drm_bacon_bo_gem *bo_gem;
2644 struct drm_i915_gem_get_tiling get_tiling;
2645
2646 pthread_mutex_lock(&bufmgr_gem->lock);
2647 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2648 if (ret) {
2649 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2650 pthread_mutex_unlock(&bufmgr_gem->lock);
2651 return NULL;
2652 }
2653
2654 /*
2655 * See if the kernel has already returned this buffer to us. Just as
2656 * for named buffers, we must not create two bo's pointing at the same
2657 * kernel object
2658 */
2659 HASH_FIND(handle_hh, bufmgr_gem->handle_table,
2660 &handle, sizeof(handle), bo_gem);
2661 if (bo_gem) {
2662 drm_bacon_gem_bo_reference(&bo_gem->bo);
2663 goto out;
2664 }
2665
2666 bo_gem = calloc(1, sizeof(*bo_gem));
2667 if (!bo_gem)
2668 goto out;
2669
2670 p_atomic_set(&bo_gem->refcount, 1);
2671 DRMINITLISTHEAD(&bo_gem->vma_list);
2672
2673 /* Determine size of bo. The fd-to-handle ioctl really should
2674 * return the size, but it doesn't. If we have kernel 3.12 or
2675 * later, we can lseek on the prime fd to get the size. Older
2676 * kernels will just fail, in which case we fall back to the
2677 * provided (estimated or guess size). */
2678 ret = lseek(prime_fd, 0, SEEK_END);
2679 if (ret != -1)
2680 bo_gem->bo.size = ret;
2681 else
2682 bo_gem->bo.size = size;
2683
2684 bo_gem->bo.handle = handle;
2685 bo_gem->bo.bufmgr = bufmgr;
2686
2687 bo_gem->gem_handle = handle;
2688 HASH_ADD(handle_hh, bufmgr_gem->handle_table,
2689 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2690
2691 bo_gem->name = "prime";
2692 bo_gem->validate_index = -1;
2693 bo_gem->reloc_tree_fences = 0;
2694 bo_gem->used_as_reloc_target = false;
2695 bo_gem->has_error = false;
2696 bo_gem->reusable = false;
2697
2698 memclear(get_tiling);
2699 get_tiling.handle = bo_gem->gem_handle;
2700 if (drmIoctl(bufmgr_gem->fd,
2701 DRM_IOCTL_I915_GEM_GET_TILING,
2702 &get_tiling))
2703 goto err;
2704
2705 bo_gem->tiling_mode = get_tiling.tiling_mode;
2706 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2707 /* XXX stride is unknown */
2708 drm_bacon_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0);
2709
2710 out:
2711 pthread_mutex_unlock(&bufmgr_gem->lock);
2712 return &bo_gem->bo;
2713
2714 err:
2715 drm_bacon_gem_bo_free(&bo_gem->bo);
2716 pthread_mutex_unlock(&bufmgr_gem->lock);
2717 return NULL;
2718 }
2719
2720 int
2721 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
2722 {
2723 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2724 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2725
2726 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2727 DRM_CLOEXEC, prime_fd) != 0)
2728 return -errno;
2729
2730 bo_gem->reusable = false;
2731
2732 return 0;
2733 }
2734
2735 static int
2736 drm_bacon_gem_bo_flink(drm_bacon_bo *bo, uint32_t * name)
2737 {
2738 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
2739 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2740
2741 if (!bo_gem->global_name) {
2742 struct drm_gem_flink flink;
2743
2744 memclear(flink);
2745 flink.handle = bo_gem->gem_handle;
2746 if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink))
2747 return -errno;
2748
2749 pthread_mutex_lock(&bufmgr_gem->lock);
2750 if (!bo_gem->global_name) {
2751 bo_gem->global_name = flink.name;
2752 bo_gem->reusable = false;
2753
2754 HASH_ADD(name_hh, bufmgr_gem->name_table,
2755 global_name, sizeof(bo_gem->global_name),
2756 bo_gem);
2757 }
2758 pthread_mutex_unlock(&bufmgr_gem->lock);
2759 }
2760
2761 *name = bo_gem->global_name;
2762 return 0;
2763 }
2764
2765 /**
2766 * Enables unlimited caching of buffer objects for reuse.
2767 *
2768 * This is potentially very memory expensive, as the cache at each bucket
2769 * size is only bounded by how many buffers of that size we've managed to have
2770 * in flight at once.
2771 */
2772 void
2773 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2774 {
2775 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2776
2777 bufmgr_gem->bo_reuse = true;
2778 }
2779
2780 /**
2781 * Disables implicit synchronisation before executing the bo
2782 *
2783 * This will cause rendering corruption unless you correctly manage explicit
2784 * fences for all rendering involving this buffer - including use by others.
2785 * Disabling the implicit serialisation is only required if that serialisation
2786 * is too coarse (for example, you have split the buffer into many
2787 * non-overlapping regions and are sharing the whole buffer between concurrent
2788 * independent command streams).
2789 *
2790 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2791 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2792 * or subsequent execbufs involving the bo will generate EINVAL.
2793 */
2794 void
2795 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo *bo)
2796 {
2797 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2798
2799 bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2800 }
2801
2802 /**
2803 * Enables implicit synchronisation before executing the bo
2804 *
2805 * This is the default behaviour of the kernel, to wait upon prior writes
2806 * completing on the object before rendering with it, or to wait for prior
2807 * reads to complete before writing into the object.
2808 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2809 * the kernel never to insert a stall before using the object. Then this
2810 * function can be used to restore the implicit sync before subsequent
2811 * rendering.
2812 */
2813 void
2814 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo *bo)
2815 {
2816 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2817
2818 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2819 }
2820
2821 /**
2822 * Query whether the kernel supports disabling of its implicit synchronisation
2823 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2824 */
2825 int
2826 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr *bufmgr)
2827 {
2828 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bufmgr;
2829
2830 return bufmgr_gem->has_exec_async;
2831 }
2832
2833 /**
2834 * Enable use of fenced reloc type.
2835 *
2836 * New code should enable this to avoid unnecessary fence register
2837 * allocation. If this option is not enabled, all relocs will have fence
2838 * register allocated.
2839 */
2840 void
2841 drm_bacon_bufmgr_gem_enable_fenced_relocs(drm_bacon_bufmgr *bufmgr)
2842 {
2843 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
2844
2845 if (bufmgr_gem->bufmgr.bo_exec == drm_bacon_gem_bo_exec2)
2846 bufmgr_gem->fenced_relocs = true;
2847 }
2848
2849 /**
2850 * Return the additional aperture space required by the tree of buffer objects
2851 * rooted at bo.
2852 */
2853 static int
2854 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2855 {
2856 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2857 int i;
2858 int total = 0;
2859
2860 if (bo == NULL || bo_gem->included_in_check_aperture)
2861 return 0;
2862
2863 total += bo->size;
2864 bo_gem->included_in_check_aperture = true;
2865
2866 for (i = 0; i < bo_gem->reloc_count; i++)
2867 total +=
2868 drm_bacon_gem_bo_get_aperture_space(bo_gem->
2869 reloc_target_info[i].bo);
2870
2871 return total;
2872 }
2873
2874 /**
2875 * Count the number of buffers in this list that need a fence reg
2876 *
2877 * If the count is greater than the number of available regs, we'll have
2878 * to ask the caller to resubmit a batch with fewer tiled buffers.
2879 *
2880 * This function over-counts if the same buffer is used multiple times.
2881 */
2882 static unsigned int
2883 drm_bacon_gem_total_fences(drm_bacon_bo ** bo_array, int count)
2884 {
2885 int i;
2886 unsigned int total = 0;
2887
2888 for (i = 0; i < count; i++) {
2889 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2890
2891 if (bo_gem == NULL)
2892 continue;
2893
2894 total += bo_gem->reloc_tree_fences;
2895 }
2896 return total;
2897 }
2898
2899 /**
2900 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2901 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2902 */
2903 static void
2904 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2905 {
2906 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2907 int i;
2908
2909 if (bo == NULL || !bo_gem->included_in_check_aperture)
2910 return;
2911
2912 bo_gem->included_in_check_aperture = false;
2913
2914 for (i = 0; i < bo_gem->reloc_count; i++)
2915 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->
2916 reloc_target_info[i].bo);
2917 }
2918
2919 /**
2920 * Return a conservative estimate for the amount of aperture required
2921 * for a collection of buffers. This may double-count some buffers.
2922 */
2923 static unsigned int
2924 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2925 {
2926 int i;
2927 unsigned int total = 0;
2928
2929 for (i = 0; i < count; i++) {
2930 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2931 if (bo_gem != NULL)
2932 total += bo_gem->reloc_tree_size;
2933 }
2934 return total;
2935 }
2936
2937 /**
2938 * Return the amount of aperture needed for a collection of buffers.
2939 * This avoids double counting any buffers, at the cost of looking
2940 * at every buffer in the set.
2941 */
2942 static unsigned int
2943 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2944 {
2945 int i;
2946 unsigned int total = 0;
2947
2948 for (i = 0; i < count; i++) {
2949 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2950 /* For the first buffer object in the array, we get an
2951 * accurate count back for its reloc_tree size (since nothing
2952 * had been flagged as being counted yet). We can save that
2953 * value out as a more conservative reloc_tree_size that
2954 * avoids double-counting target buffers. Since the first
2955 * buffer happens to usually be the batch buffer in our
2956 * callers, this can pull us back from doing the tree
2957 * walk on every new batch emit.
2958 */
2959 if (i == 0) {
2960 drm_bacon_bo_gem *bo_gem =
2961 (drm_bacon_bo_gem *) bo_array[i];
2962 bo_gem->reloc_tree_size = total;
2963 }
2964 }
2965
2966 for (i = 0; i < count; i++)
2967 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2968 return total;
2969 }
2970
2971 /**
2972 * Return -1 if the batchbuffer should be flushed before attempting to
2973 * emit rendering referencing the buffers pointed to by bo_array.
2974 *
2975 * This is required because if we try to emit a batchbuffer with relocations
2976 * to a tree of buffers that won't simultaneously fit in the aperture,
2977 * the rendering will return an error at a point where the software is not
2978 * prepared to recover from it.
2979 *
2980 * However, we also want to emit the batchbuffer significantly before we reach
2981 * the limit, as a series of batchbuffers each of which references buffers
2982 * covering almost all of the aperture means that at each emit we end up
2983 * waiting to evict a buffer from the last rendering, and we get synchronous
2984 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2985 * get better parallelism.
2986 */
2987 static int
2988 drm_bacon_gem_check_aperture_space(drm_bacon_bo **bo_array, int count)
2989 {
2990 drm_bacon_bufmgr_gem *bufmgr_gem =
2991 (drm_bacon_bufmgr_gem *) bo_array[0]->bufmgr;
2992 unsigned int total = 0;
2993 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2994 int total_fences;
2995
2996 /* Check for fence reg constraints if necessary */
2997 if (bufmgr_gem->available_fences) {
2998 total_fences = drm_bacon_gem_total_fences(bo_array, count);
2999 if (total_fences > bufmgr_gem->available_fences)
3000 return -ENOSPC;
3001 }
3002
3003 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
3004
3005 if (total > threshold)
3006 total = drm_bacon_gem_compute_batch_space(bo_array, count);
3007
3008 if (total > threshold) {
3009 DBG("check_space: overflowed available aperture, "
3010 "%dkb vs %dkb\n",
3011 total / 1024, (int)bufmgr_gem->gtt_size / 1024);
3012 return -ENOSPC;
3013 } else {
3014 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
3015 (int)bufmgr_gem->gtt_size / 1024);
3016 return 0;
3017 }
3018 }
3019
3020 /*
3021 * Disable buffer reuse for objects which are shared with the kernel
3022 * as scanout buffers
3023 */
3024 static int
3025 drm_bacon_gem_bo_disable_reuse(drm_bacon_bo *bo)
3026 {
3027 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3028
3029 bo_gem->reusable = false;
3030 return 0;
3031 }
3032
3033 static int
3034 drm_bacon_gem_bo_is_reusable(drm_bacon_bo *bo)
3035 {
3036 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3037
3038 return bo_gem->reusable;
3039 }
3040
3041 static int
3042 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
3043 {
3044 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3045 int i;
3046
3047 for (i = 0; i < bo_gem->reloc_count; i++) {
3048 if (bo_gem->reloc_target_info[i].bo == target_bo)
3049 return 1;
3050 if (bo == bo_gem->reloc_target_info[i].bo)
3051 continue;
3052 if (_drm_bacon_gem_bo_references(bo_gem->reloc_target_info[i].bo,
3053 target_bo))
3054 return 1;
3055 }
3056
3057 for (i = 0; i< bo_gem->softpin_target_count; i++) {
3058 if (bo_gem->softpin_target[i] == target_bo)
3059 return 1;
3060 if (_drm_bacon_gem_bo_references(bo_gem->softpin_target[i], target_bo))
3061 return 1;
3062 }
3063
3064 return 0;
3065 }
3066
3067 /** Return true if target_bo is referenced by bo's relocation tree. */
3068 static int
3069 drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
3070 {
3071 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
3072
3073 if (bo == NULL || target_bo == NULL)
3074 return 0;
3075 if (target_bo_gem->used_as_reloc_target)
3076 return _drm_bacon_gem_bo_references(bo, target_bo);
3077 return 0;
3078 }
3079
3080 static void
3081 add_bucket(drm_bacon_bufmgr_gem *bufmgr_gem, int size)
3082 {
3083 unsigned int i = bufmgr_gem->num_buckets;
3084
3085 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3086
3087 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3088 bufmgr_gem->cache_bucket[i].size = size;
3089 bufmgr_gem->num_buckets++;
3090 }
3091
3092 static void
3093 init_cache_buckets(drm_bacon_bufmgr_gem *bufmgr_gem)
3094 {
3095 unsigned long size, cache_max_size = 64 * 1024 * 1024;
3096
3097 /* OK, so power of two buckets was too wasteful of memory.
3098 * Give 3 other sizes between each power of two, to hopefully
3099 * cover things accurately enough. (The alternative is
3100 * probably to just go for exact matching of sizes, and assume
3101 * that for things like composited window resize the tiled
3102 * width/height alignment and rounding of sizes to pages will
3103 * get us useful cache hit rates anyway)
3104 */
3105 add_bucket(bufmgr_gem, 4096);
3106 add_bucket(bufmgr_gem, 4096 * 2);
3107 add_bucket(bufmgr_gem, 4096 * 3);
3108
3109 /* Initialize the linked lists for BO reuse cache. */
3110 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3111 add_bucket(bufmgr_gem, size);
3112
3113 add_bucket(bufmgr_gem, size + size * 1 / 4);
3114 add_bucket(bufmgr_gem, size + size * 2 / 4);
3115 add_bucket(bufmgr_gem, size + size * 3 / 4);
3116 }
3117 }
3118
3119 void
3120 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
3121 {
3122 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3123
3124 bufmgr_gem->vma_max = limit;
3125
3126 drm_bacon_gem_bo_purge_vma_cache(bufmgr_gem);
3127 }
3128
3129 static int
3130 parse_devid_override(const char *devid_override)
3131 {
3132 static const struct {
3133 const char *name;
3134 int pci_id;
3135 } name_map[] = {
3136 { "brw", PCI_CHIP_I965_GM },
3137 { "g4x", PCI_CHIP_GM45_GM },
3138 { "ilk", PCI_CHIP_ILD_G },
3139 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
3140 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
3141 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
3142 { "byt", PCI_CHIP_VALLEYVIEW_3 },
3143 { "bdw", 0x1620 | BDW_ULX },
3144 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
3145 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
3146 };
3147 unsigned int i;
3148
3149 for (i = 0; i < ARRAY_SIZE(name_map); i++) {
3150 if (!strcmp(name_map[i].name, devid_override))
3151 return name_map[i].pci_id;
3152 }
3153
3154 return strtod(devid_override, NULL);
3155 }
3156
3157 /**
3158 * Get the PCI ID for the device. This can be overridden by setting the
3159 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3160 */
3161 static int
3162 get_pci_device_id(drm_bacon_bufmgr_gem *bufmgr_gem)
3163 {
3164 char *devid_override;
3165 int devid = 0;
3166 int ret;
3167 drm_i915_getparam_t gp;
3168
3169 if (geteuid() == getuid()) {
3170 devid_override = getenv("INTEL_DEVID_OVERRIDE");
3171 if (devid_override) {
3172 bufmgr_gem->no_exec = true;
3173 return parse_devid_override(devid_override);
3174 }
3175 }
3176
3177 memclear(gp);
3178 gp.param = I915_PARAM_CHIPSET_ID;
3179 gp.value = &devid;
3180 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3181 if (ret) {
3182 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3183 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3184 }
3185 return devid;
3186 }
3187
3188 int
3189 drm_bacon_bufmgr_gem_get_devid(drm_bacon_bufmgr *bufmgr)
3190 {
3191 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3192
3193 return bufmgr_gem->pci_device;
3194 }
3195
3196 drm_bacon_context *
3197 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
3198 {
3199 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3200 struct drm_i915_gem_context_create create;
3201 drm_bacon_context *context = NULL;
3202 int ret;
3203
3204 context = calloc(1, sizeof(*context));
3205 if (!context)
3206 return NULL;
3207
3208 memclear(create);
3209 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3210 if (ret != 0) {
3211 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3212 strerror(errno));
3213 free(context);
3214 return NULL;
3215 }
3216
3217 context->ctx_id = create.ctx_id;
3218 context->bufmgr = bufmgr;
3219
3220 return context;
3221 }
3222
3223 int
3224 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
3225 {
3226 if (ctx == NULL)
3227 return -EINVAL;
3228
3229 *ctx_id = ctx->ctx_id;
3230
3231 return 0;
3232 }
3233
3234 void
3235 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
3236 {
3237 drm_bacon_bufmgr_gem *bufmgr_gem;
3238 struct drm_i915_gem_context_destroy destroy;
3239 int ret;
3240
3241 if (ctx == NULL)
3242 return;
3243
3244 memclear(destroy);
3245
3246 bufmgr_gem = (drm_bacon_bufmgr_gem *)ctx->bufmgr;
3247 destroy.ctx_id = ctx->ctx_id;
3248 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3249 &destroy);
3250 if (ret != 0)
3251 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3252 strerror(errno));
3253
3254 free(ctx);
3255 }
3256
3257 int
3258 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
3259 uint32_t *reset_count,
3260 uint32_t *active,
3261 uint32_t *pending)
3262 {
3263 drm_bacon_bufmgr_gem *bufmgr_gem;
3264 struct drm_i915_reset_stats stats;
3265 int ret;
3266
3267 if (ctx == NULL)
3268 return -EINVAL;
3269
3270 memclear(stats);
3271
3272 bufmgr_gem = (drm_bacon_bufmgr_gem *)ctx->bufmgr;
3273 stats.ctx_id = ctx->ctx_id;
3274 ret = drmIoctl(bufmgr_gem->fd,
3275 DRM_IOCTL_I915_GET_RESET_STATS,
3276 &stats);
3277 if (ret == 0) {
3278 if (reset_count != NULL)
3279 *reset_count = stats.reset_count;
3280
3281 if (active != NULL)
3282 *active = stats.batch_active;
3283
3284 if (pending != NULL)
3285 *pending = stats.batch_pending;
3286 }
3287
3288 return ret;
3289 }
3290
3291 int
3292 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
3293 uint32_t offset,
3294 uint64_t *result)
3295 {
3296 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3297 struct drm_i915_reg_read reg_read;
3298 int ret;
3299
3300 memclear(reg_read);
3301 reg_read.offset = offset;
3302
3303 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3304
3305 *result = reg_read.val;
3306 return ret;
3307 }
3308
3309 int
3310 drm_bacon_get_subslice_total(int fd, unsigned int *subslice_total)
3311 {
3312 drm_i915_getparam_t gp;
3313 int ret;
3314
3315 memclear(gp);
3316 gp.value = (int*)subslice_total;
3317 gp.param = I915_PARAM_SUBSLICE_TOTAL;
3318 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3319 if (ret)
3320 return -errno;
3321
3322 return 0;
3323 }
3324
3325 int
3326 drm_bacon_get_eu_total(int fd, unsigned int *eu_total)
3327 {
3328 drm_i915_getparam_t gp;
3329 int ret;
3330
3331 memclear(gp);
3332 gp.value = (int*)eu_total;
3333 gp.param = I915_PARAM_EU_TOTAL;
3334 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
3335 if (ret)
3336 return -errno;
3337
3338 return 0;
3339 }
3340
3341 int
3342 drm_bacon_get_pooled_eu(int fd)
3343 {
3344 drm_i915_getparam_t gp;
3345 int ret = -1;
3346
3347 memclear(gp);
3348 gp.param = I915_PARAM_HAS_POOLED_EU;
3349 gp.value = &ret;
3350 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3351 return -errno;
3352
3353 return ret;
3354 }
3355
3356 int
3357 drm_bacon_get_min_eu_in_pool(int fd)
3358 {
3359 drm_i915_getparam_t gp;
3360 int ret = -1;
3361
3362 memclear(gp);
3363 gp.param = I915_PARAM_MIN_EU_IN_POOL;
3364 gp.value = &ret;
3365 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
3366 return -errno;
3367
3368 return ret;
3369 }
3370
3371 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3372 static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3373
3374 static drm_bacon_bufmgr_gem *
3375 drm_bacon_bufmgr_gem_find(int fd)
3376 {
3377 drm_bacon_bufmgr_gem *bufmgr_gem;
3378
3379 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3380 if (bufmgr_gem->fd == fd) {
3381 p_atomic_inc(&bufmgr_gem->refcount);
3382 return bufmgr_gem;
3383 }
3384 }
3385
3386 return NULL;
3387 }
3388
3389 static void
3390 drm_bacon_bufmgr_gem_unref(drm_bacon_bufmgr *bufmgr)
3391 {
3392 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *)bufmgr;
3393
3394 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3395 pthread_mutex_lock(&bufmgr_list_mutex);
3396
3397 if (p_atomic_dec_zero(&bufmgr_gem->refcount)) {
3398 DRMLISTDEL(&bufmgr_gem->managers);
3399 drm_bacon_bufmgr_gem_destroy(bufmgr);
3400 }
3401
3402 pthread_mutex_unlock(&bufmgr_list_mutex);
3403 }
3404 }
3405
3406 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
3407 {
3408 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3409 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3410
3411 if (bo_gem->gtt_virtual)
3412 return bo_gem->gtt_virtual;
3413
3414 if (bo_gem->is_userptr)
3415 return NULL;
3416
3417 pthread_mutex_lock(&bufmgr_gem->lock);
3418 if (bo_gem->gtt_virtual == NULL) {
3419 struct drm_i915_gem_mmap_gtt mmap_arg;
3420 void *ptr;
3421
3422 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
3423 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3424
3425 if (bo_gem->map_count++ == 0)
3426 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3427
3428 memclear(mmap_arg);
3429 mmap_arg.handle = bo_gem->gem_handle;
3430
3431 /* Get the fake offset back... */
3432 ptr = MAP_FAILED;
3433 if (drmIoctl(bufmgr_gem->fd,
3434 DRM_IOCTL_I915_GEM_MMAP_GTT,
3435 &mmap_arg) == 0) {
3436 /* and mmap it */
3437 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
3438 MAP_SHARED, bufmgr_gem->fd,
3439 mmap_arg.offset);
3440 }
3441 if (ptr == MAP_FAILED) {
3442 if (--bo_gem->map_count == 0)
3443 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3444 ptr = NULL;
3445 }
3446
3447 bo_gem->gtt_virtual = ptr;
3448 }
3449 pthread_mutex_unlock(&bufmgr_gem->lock);
3450
3451 return bo_gem->gtt_virtual;
3452 }
3453
3454 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
3455 {
3456 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3457 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3458
3459 if (bo_gem->mem_virtual)
3460 return bo_gem->mem_virtual;
3461
3462 if (bo_gem->is_userptr) {
3463 /* Return the same user ptr */
3464 return bo_gem->user_virtual;
3465 }
3466
3467 pthread_mutex_lock(&bufmgr_gem->lock);
3468 if (!bo_gem->mem_virtual) {
3469 struct drm_i915_gem_mmap mmap_arg;
3470
3471 if (bo_gem->map_count++ == 0)
3472 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3473
3474 DBG("bo_map: %d (%s), map_count=%d\n",
3475 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3476
3477 memclear(mmap_arg);
3478 mmap_arg.handle = bo_gem->gem_handle;
3479 mmap_arg.size = bo->size;
3480 if (drmIoctl(bufmgr_gem->fd,
3481 DRM_IOCTL_I915_GEM_MMAP,
3482 &mmap_arg)) {
3483 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3484 __FILE__, __LINE__, bo_gem->gem_handle,
3485 bo_gem->name, strerror(errno));
3486 if (--bo_gem->map_count == 0)
3487 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3488 } else {
3489 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3490 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3491 }
3492 }
3493 pthread_mutex_unlock(&bufmgr_gem->lock);
3494
3495 return bo_gem->mem_virtual;
3496 }
3497
3498 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
3499 {
3500 drm_bacon_bufmgr_gem *bufmgr_gem = (drm_bacon_bufmgr_gem *) bo->bufmgr;
3501 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
3502
3503 if (bo_gem->wc_virtual)
3504 return bo_gem->wc_virtual;
3505
3506 if (bo_gem->is_userptr)
3507 return NULL;
3508
3509 pthread_mutex_lock(&bufmgr_gem->lock);
3510 if (!bo_gem->wc_virtual) {
3511 struct drm_i915_gem_mmap mmap_arg;
3512
3513 if (bo_gem->map_count++ == 0)
3514 drm_bacon_gem_bo_open_vma(bufmgr_gem, bo_gem);
3515
3516 DBG("bo_map: %d (%s), map_count=%d\n",
3517 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
3518
3519 memclear(mmap_arg);
3520 mmap_arg.handle = bo_gem->gem_handle;
3521 mmap_arg.size = bo->size;
3522 mmap_arg.flags = I915_MMAP_WC;
3523 if (drmIoctl(bufmgr_gem->fd,
3524 DRM_IOCTL_I915_GEM_MMAP,
3525 &mmap_arg)) {
3526 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
3527 __FILE__, __LINE__, bo_gem->gem_handle,
3528 bo_gem->name, strerror(errno));
3529 if (--bo_gem->map_count == 0)
3530 drm_bacon_gem_bo_close_vma(bufmgr_gem, bo_gem);
3531 } else {
3532 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
3533 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
3534 }
3535 }
3536 pthread_mutex_unlock(&bufmgr_gem->lock);
3537
3538 return bo_gem->wc_virtual;
3539 }
3540
3541 /**
3542 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3543 * and manage map buffer objections.
3544 *
3545 * \param fd File descriptor of the opened DRM device.
3546 */
3547 drm_bacon_bufmgr *
3548 drm_bacon_bufmgr_gem_init(int fd, int batch_size)
3549 {
3550 drm_bacon_bufmgr_gem *bufmgr_gem;
3551 struct drm_i915_gem_get_aperture aperture;
3552 drm_i915_getparam_t gp;
3553 int ret, tmp;
3554 bool exec2 = false;
3555
3556 pthread_mutex_lock(&bufmgr_list_mutex);
3557
3558 bufmgr_gem = drm_bacon_bufmgr_gem_find(fd);
3559 if (bufmgr_gem)
3560 goto exit;
3561
3562 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3563 if (bufmgr_gem == NULL)
3564 goto exit;
3565
3566 bufmgr_gem->fd = fd;
3567 p_atomic_set(&bufmgr_gem->refcount, 1);
3568
3569 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3570 free(bufmgr_gem);
3571 bufmgr_gem = NULL;
3572 goto exit;
3573 }
3574
3575 memclear(aperture);
3576 ret = drmIoctl(bufmgr_gem->fd,
3577 DRM_IOCTL_I915_GEM_GET_APERTURE,
3578 &aperture);
3579
3580 if (ret == 0)
3581 bufmgr_gem->gtt_size = aperture.aper_available_size;
3582 else {
3583 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3584 strerror(errno));
3585 bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3586 fprintf(stderr, "Assuming %dkB available aperture size.\n"
3587 "May lead to reduced performance or incorrect "
3588 "rendering.\n",
3589 (int)bufmgr_gem->gtt_size / 1024);
3590 }
3591
3592 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3593
3594 if (IS_GEN2(bufmgr_gem->pci_device))
3595 bufmgr_gem->gen = 2;
3596 else if (IS_GEN3(bufmgr_gem->pci_device))
3597 bufmgr_gem->gen = 3;
3598 else if (IS_GEN4(bufmgr_gem->pci_device))
3599 bufmgr_gem->gen = 4;
3600 else if (IS_GEN5(bufmgr_gem->pci_device))
3601 bufmgr_gem->gen = 5;
3602 else if (IS_GEN6(bufmgr_gem->pci_device))
3603 bufmgr_gem->gen = 6;
3604 else if (IS_GEN7(bufmgr_gem->pci_device))
3605 bufmgr_gem->gen = 7;
3606 else if (IS_GEN8(bufmgr_gem->pci_device))
3607 bufmgr_gem->gen = 8;
3608 else if (IS_GEN9(bufmgr_gem->pci_device))
3609 bufmgr_gem->gen = 9;
3610 else {
3611 free(bufmgr_gem);
3612 bufmgr_gem = NULL;
3613 goto exit;
3614 }
3615
3616 if (IS_GEN3(bufmgr_gem->pci_device) &&
3617 bufmgr_gem->gtt_size > 256*1024*1024) {
3618 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3619 * be used for tiled blits. To simplify the accounting, just
3620 * subtract the unmappable part (fixed to 256MB on all known
3621 * gen3 devices) if the kernel advertises it. */
3622 bufmgr_gem->gtt_size -= 256*1024*1024;
3623 }
3624
3625 memclear(gp);
3626 gp.value = &tmp;
3627
3628 gp.param = I915_PARAM_HAS_EXECBUF2;
3629 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3630 if (!ret)
3631 exec2 = true;
3632
3633 gp.param = I915_PARAM_HAS_BSD;
3634 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3635 bufmgr_gem->has_bsd = ret == 0;
3636
3637 gp.param = I915_PARAM_HAS_BLT;
3638 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3639 bufmgr_gem->has_blt = ret == 0;
3640
3641 gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3642 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3643 bufmgr_gem->has_relaxed_fencing = ret == 0;
3644
3645 gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3646 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3647 bufmgr_gem->has_exec_async = ret == 0;
3648
3649 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr;
3650
3651 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3652 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3653 bufmgr_gem->has_wait_timeout = ret == 0;
3654
3655 gp.param = I915_PARAM_HAS_LLC;
3656 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3657 if (ret != 0) {
3658 /* Kernel does not supports HAS_LLC query, fallback to GPU
3659 * generation detection and assume that we have LLC on GEN6/7
3660 */
3661 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3662 IS_GEN7(bufmgr_gem->pci_device));
3663 } else
3664 bufmgr_gem->has_llc = *gp.value;
3665
3666 gp.param = I915_PARAM_HAS_VEBOX;
3667 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3668 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3669
3670 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
3671 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3672 if (ret == 0 && *gp.value > 0)
3673 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_bacon_gem_bo_set_softpin_offset;
3674
3675 if (bufmgr_gem->gen < 4) {
3676 gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3677 gp.value = &bufmgr_gem->available_fences;
3678 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3679 if (ret) {
3680 fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3681 errno);
3682 fprintf(stderr, "param: %d, val: %d\n", gp.param,
3683 *gp.value);
3684 bufmgr_gem->available_fences = 0;
3685 } else {
3686 /* XXX The kernel reports the total number of fences,
3687 * including any that may be pinned.
3688 *
3689 * We presume that there will be at least one pinned
3690 * fence for the scanout buffer, but there may be more
3691 * than one scanout and the user may be manually
3692 * pinning buffers. Let's move to execbuffer2 and
3693 * thereby forget the insanity of using fences...
3694 */
3695 bufmgr_gem->available_fences -= 2;
3696 if (bufmgr_gem->available_fences < 0)
3697 bufmgr_gem->available_fences = 0;
3698 }
3699 }
3700
3701 if (bufmgr_gem->gen >= 8) {
3702 gp.param = I915_PARAM_HAS_ALIASING_PPGTT;
3703 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3704 if (ret == 0 && *gp.value == 3)
3705 bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_bacon_gem_bo_use_48b_address_range;
3706 }
3707
3708 /* Let's go with one relocation per every 2 dwords (but round down a bit
3709 * since a power of two will mean an extra page allocation for the reloc
3710 * buffer).
3711 *
3712 * Every 4 was too few for the blender benchmark.
3713 */
3714 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3715
3716 bufmgr_gem->bufmgr.bo_alloc = drm_bacon_gem_bo_alloc;
3717 bufmgr_gem->bufmgr.bo_alloc_for_render =
3718 drm_bacon_gem_bo_alloc_for_render;
3719 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_bacon_gem_bo_alloc_tiled;
3720 bufmgr_gem->bufmgr.bo_reference = drm_bacon_gem_bo_reference;
3721 bufmgr_gem->bufmgr.bo_unreference = drm_bacon_gem_bo_unreference;
3722 bufmgr_gem->bufmgr.bo_map = drm_bacon_gem_bo_map;
3723 bufmgr_gem->bufmgr.bo_unmap = drm_bacon_gem_bo_unmap;
3724 bufmgr_gem->bufmgr.bo_subdata = drm_bacon_gem_bo_subdata;
3725 bufmgr_gem->bufmgr.bo_get_subdata = drm_bacon_gem_bo_get_subdata;
3726 bufmgr_gem->bufmgr.bo_wait_rendering = drm_bacon_gem_bo_wait_rendering;
3727 bufmgr_gem->bufmgr.bo_emit_reloc = drm_bacon_gem_bo_emit_reloc;
3728 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_bacon_gem_bo_emit_reloc_fence;
3729 bufmgr_gem->bufmgr.bo_pin = drm_bacon_gem_bo_pin;
3730 bufmgr_gem->bufmgr.bo_unpin = drm_bacon_gem_bo_unpin;
3731 bufmgr_gem->bufmgr.bo_get_tiling = drm_bacon_gem_bo_get_tiling;
3732 bufmgr_gem->bufmgr.bo_set_tiling = drm_bacon_gem_bo_set_tiling;
3733 bufmgr_gem->bufmgr.bo_flink = drm_bacon_gem_bo_flink;
3734 /* Use the new one if available */
3735 if (exec2) {
3736 bufmgr_gem->bufmgr.bo_exec = drm_bacon_gem_bo_exec2;
3737 bufmgr_gem->bufmgr.bo_mrb_exec = drm_bacon_gem_bo_mrb_exec2;
3738 } else
3739 bufmgr_gem->bufmgr.bo_exec = drm_bacon_gem_bo_exec;
3740 bufmgr_gem->bufmgr.bo_busy = drm_bacon_gem_bo_busy;
3741 bufmgr_gem->bufmgr.bo_madvise = drm_bacon_gem_bo_madvise;
3742 bufmgr_gem->bufmgr.destroy = drm_bacon_bufmgr_gem_unref;
3743 bufmgr_gem->bufmgr.debug = 0;
3744 bufmgr_gem->bufmgr.check_aperture_space =
3745 drm_bacon_gem_check_aperture_space;
3746 bufmgr_gem->bufmgr.bo_disable_reuse = drm_bacon_gem_bo_disable_reuse;
3747 bufmgr_gem->bufmgr.bo_is_reusable = drm_bacon_gem_bo_is_reusable;
3748 bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3749 drm_bacon_gem_get_pipe_from_crtc_id;
3750 bufmgr_gem->bufmgr.bo_references = drm_bacon_gem_bo_references;
3751
3752 init_cache_buckets(bufmgr_gem);
3753
3754 DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3755 bufmgr_gem->vma_max = -1; /* unlimited by default */
3756
3757 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3758
3759 exit:
3760 pthread_mutex_unlock(&bufmgr_list_mutex);
3761
3762 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3763 }