i965/drm: Drop deprecated drm_bacon_bo::offset.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_bufmgr_gem.c
1 /**************************************************************************
2 *
3 * Copyright © 2007 Red Hat Inc.
4 * Copyright © 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30 /*
31 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 * Eric Anholt <eric@anholt.net>
34 * Dave Airlie <airlied@linux.ie>
35 */
36
37 #ifdef HAVE_CONFIG_H
38 #include "config.h"
39 #endif
40
41 #include <xf86drm.h>
42 #include <util/u_atomic.h>
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <assert.h>
49 #include <pthread.h>
50 #include <sys/ioctl.h>
51 #include <sys/stat.h>
52 #include <sys/types.h>
53 #include <stdbool.h>
54
55 #include "errno.h"
56 #ifndef ETIME
57 #define ETIME ETIMEDOUT
58 #endif
59 #include "common/gen_debug.h"
60 #include "libdrm_macros.h"
61 #include "main/macros.h"
62 #include "util/macros.h"
63 #include "util/list.h"
64 #include "brw_bufmgr.h"
65 #include "intel_chipset.h"
66 #include "string.h"
67
68 #include "i915_drm.h"
69 #include "uthash.h"
70
71 #ifdef HAVE_VALGRIND
72 #include <valgrind.h>
73 #include <memcheck.h>
74 #define VG(x) x
75 #else
76 #define VG(x)
77 #endif
78
79 #define memclear(s) memset(&s, 0, sizeof(s))
80
81 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
82
83 static inline int
84 atomic_add_unless(int *v, int add, int unless)
85 {
86 int c, old;
87 c = p_atomic_read(v);
88 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
89 c = old;
90 return c == unless;
91 }
92
93 /**
94 * upper_32_bits - return bits 32-63 of a number
95 * @n: the number we're accessing
96 *
97 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress
98 * the "right shift count >= width of type" warning when that quantity is
99 * 32-bits.
100 */
101 #define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16))
102
103 /**
104 * lower_32_bits - return bits 0-31 of a number
105 * @n: the number we're accessing
106 */
107 #define lower_32_bits(n) ((__u32)(n))
108
109 struct _drm_bacon_context {
110 unsigned int ctx_id;
111 struct _drm_bacon_bufmgr *bufmgr;
112 };
113
114 typedef struct _drm_bacon_bo_gem drm_bacon_bo_gem;
115
116 struct drm_bacon_gem_bo_bucket {
117 struct list_head head;
118 unsigned long size;
119 };
120
121 typedef struct _drm_bacon_bufmgr {
122 int refcount;
123
124 int fd;
125
126 int max_relocs;
127
128 pthread_mutex_t lock;
129
130 struct drm_i915_gem_exec_object2 *exec2_objects;
131 drm_bacon_bo **exec_bos;
132 int exec_size;
133 int exec_count;
134
135 /** Array of lists of cached gem objects of power-of-two sizes */
136 struct drm_bacon_gem_bo_bucket cache_bucket[14 * 4];
137 int num_buckets;
138 time_t time;
139
140 struct list_head managers;
141
142 drm_bacon_bo_gem *name_table;
143 drm_bacon_bo_gem *handle_table;
144
145 struct list_head vma_cache;
146 int vma_count, vma_open, vma_max;
147
148 uint64_t gtt_size;
149 int pci_device;
150 int gen;
151 unsigned int has_bsd : 1;
152 unsigned int has_blt : 1;
153 unsigned int has_llc : 1;
154 unsigned int bo_reuse : 1;
155 unsigned int no_exec : 1;
156 unsigned int has_vebox : 1;
157 unsigned int has_exec_async : 1;
158
159 struct {
160 void *ptr;
161 uint32_t handle;
162 } userptr_active;
163
164 } drm_bacon_bufmgr;
165
166 typedef struct _drm_bacon_reloc_target_info {
167 drm_bacon_bo *bo;
168 } drm_bacon_reloc_target;
169
170 struct _drm_bacon_bo_gem {
171 drm_bacon_bo bo;
172
173 int refcount;
174 uint32_t gem_handle;
175 const char *name;
176
177 /**
178 * Kenel-assigned global name for this object
179 *
180 * List contains both flink named and prime fd'd objects
181 */
182 unsigned int global_name;
183
184 UT_hash_handle handle_hh;
185 UT_hash_handle name_hh;
186
187 /**
188 * Index of the buffer within the validation list while preparing a
189 * batchbuffer execution.
190 */
191 int validate_index;
192
193 /**
194 * Current tiling mode
195 */
196 uint32_t tiling_mode;
197 uint32_t swizzle_mode;
198 unsigned long stride;
199
200 unsigned long kflags;
201
202 time_t free_time;
203
204 /** Array passed to the DRM containing relocation information. */
205 struct drm_i915_gem_relocation_entry *relocs;
206 /**
207 * Array of info structs corresponding to relocs[i].target_handle etc
208 */
209 drm_bacon_reloc_target *reloc_target_info;
210 /** Number of entries in relocs */
211 int reloc_count;
212 /** Array of BOs that are referenced by this buffer and will be softpinned */
213 drm_bacon_bo **softpin_target;
214 /** Number softpinned BOs that are referenced by this buffer */
215 int softpin_target_count;
216 /** Maximum amount of softpinned BOs that are referenced by this buffer */
217 int softpin_target_size;
218
219 /** Mapped address for the buffer, saved across map/unmap cycles */
220 void *mem_virtual;
221 /** GTT virtual address for the buffer, saved across map/unmap cycles */
222 void *gtt_virtual;
223 /** WC CPU address for the buffer, saved across map/unmap cycles */
224 void *wc_virtual;
225 /**
226 * Virtual address of the buffer allocated by user, used for userptr
227 * objects only.
228 */
229 void *user_virtual;
230 int map_count;
231 struct list_head vma_list;
232
233 /** BO cache list */
234 struct list_head head;
235
236 /**
237 * Boolean of whether this BO and its children have been included in
238 * the current drm_bacon_bufmgr_check_aperture_space() total.
239 */
240 bool included_in_check_aperture;
241
242 /**
243 * Boolean of whether this buffer has been used as a relocation
244 * target and had its size accounted for, and thus can't have any
245 * further relocations added to it.
246 */
247 bool used_as_reloc_target;
248
249 /**
250 * Boolean of whether we have encountered an error whilst building the relocation tree.
251 */
252 bool has_error;
253
254 /**
255 * Boolean of whether this buffer can be re-used
256 */
257 bool reusable;
258
259 /**
260 * Boolean of whether the GPU is definitely not accessing the buffer.
261 *
262 * This is only valid when reusable, since non-reusable
263 * buffers are those that have been shared with other
264 * processes, so we don't know their state.
265 */
266 bool idle;
267
268 /**
269 * Boolean of whether this buffer was allocated with userptr
270 */
271 bool is_userptr;
272
273 /**
274 * Size in bytes of this buffer and its relocation descendents.
275 *
276 * Used to avoid costly tree walking in
277 * drm_bacon_bufmgr_check_aperture in the common case.
278 */
279 int reloc_tree_size;
280
281 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */
282 bool mapped_cpu_write;
283 };
284
285 static unsigned int
286 drm_bacon_gem_estimate_batch_space(drm_bacon_bo ** bo_array, int count);
287
288 static unsigned int
289 drm_bacon_gem_compute_batch_space(drm_bacon_bo ** bo_array, int count);
290
291 static int
292 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
293 uint32_t tiling_mode,
294 uint32_t stride);
295
296 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
297 time_t time);
298
299 static void drm_bacon_gem_bo_free(drm_bacon_bo *bo);
300
301 static inline drm_bacon_bo_gem *to_bo_gem(drm_bacon_bo *bo)
302 {
303 return (drm_bacon_bo_gem *)bo;
304 }
305
306 static unsigned long
307 drm_bacon_gem_bo_tile_size(drm_bacon_bufmgr *bufmgr, unsigned long size,
308 uint32_t *tiling_mode)
309 {
310 if (*tiling_mode == I915_TILING_NONE)
311 return size;
312
313 /* 965+ just need multiples of page size for tiling */
314 return ALIGN(size, 4096);
315 }
316
317 /*
318 * Round a given pitch up to the minimum required for X tiling on a
319 * given chip. We use 512 as the minimum to allow for a later tiling
320 * change.
321 */
322 static unsigned long
323 drm_bacon_gem_bo_tile_pitch(drm_bacon_bufmgr *bufmgr,
324 unsigned long pitch, uint32_t *tiling_mode)
325 {
326 unsigned long tile_width;
327
328 /* If untiled, then just align it so that we can do rendering
329 * to it with the 3D engine.
330 */
331 if (*tiling_mode == I915_TILING_NONE)
332 return ALIGN(pitch, 64);
333
334 if (*tiling_mode == I915_TILING_X)
335 tile_width = 512;
336 else
337 tile_width = 128;
338
339 /* 965 is flexible */
340 return ALIGN(pitch, tile_width);
341 }
342
343 static struct drm_bacon_gem_bo_bucket *
344 drm_bacon_gem_bo_bucket_for_size(drm_bacon_bufmgr *bufmgr,
345 unsigned long size)
346 {
347 int i;
348
349 for (i = 0; i < bufmgr->num_buckets; i++) {
350 struct drm_bacon_gem_bo_bucket *bucket =
351 &bufmgr->cache_bucket[i];
352 if (bucket->size >= size) {
353 return bucket;
354 }
355 }
356
357 return NULL;
358 }
359
360 static void
361 drm_bacon_gem_dump_validation_list(drm_bacon_bufmgr *bufmgr)
362 {
363 int i, j;
364
365 for (i = 0; i < bufmgr->exec_count; i++) {
366 drm_bacon_bo *bo = bufmgr->exec_bos[i];
367 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
368
369 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
370 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
371 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
372 bo_gem->name);
373 continue;
374 }
375
376 for (j = 0; j < bo_gem->reloc_count; j++) {
377 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[j].bo;
378 drm_bacon_bo_gem *target_gem =
379 (drm_bacon_bo_gem *) target_bo;
380
381 DBG("%2d: %d %s(%s)@0x%08x %08x -> "
382 "%d (%s)@0x%08x %08x + 0x%08x\n",
383 i,
384 bo_gem->gem_handle,
385 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
386 bo_gem->name,
387 upper_32_bits(bo_gem->relocs[j].offset),
388 lower_32_bits(bo_gem->relocs[j].offset),
389 target_gem->gem_handle,
390 target_gem->name,
391 upper_32_bits(target_bo->offset64),
392 lower_32_bits(target_bo->offset64),
393 bo_gem->relocs[j].delta);
394 }
395
396 for (j = 0; j < bo_gem->softpin_target_count; j++) {
397 drm_bacon_bo *target_bo = bo_gem->softpin_target[j];
398 drm_bacon_bo_gem *target_gem =
399 (drm_bacon_bo_gem *) target_bo;
400 DBG("%2d: %d %s(%s) -> "
401 "%d *(%s)@0x%08x %08x\n",
402 i,
403 bo_gem->gem_handle,
404 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "",
405 bo_gem->name,
406 target_gem->gem_handle,
407 target_gem->name,
408 upper_32_bits(target_bo->offset64),
409 lower_32_bits(target_bo->offset64));
410 }
411 }
412 }
413
414 inline void
415 drm_bacon_bo_reference(drm_bacon_bo *bo)
416 {
417 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
418
419 p_atomic_inc(&bo_gem->refcount);
420 }
421
422 static void
423 drm_bacon_add_validate_buffer2(drm_bacon_bo *bo)
424 {
425 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
426 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
427 int index;
428
429 if (bo_gem->validate_index != -1)
430 return;
431
432 /* Extend the array of validation entries as necessary. */
433 if (bufmgr->exec_count == bufmgr->exec_size) {
434 int new_size = bufmgr->exec_size * 2;
435
436 if (new_size == 0)
437 new_size = 5;
438
439 bufmgr->exec2_objects =
440 realloc(bufmgr->exec2_objects,
441 sizeof(*bufmgr->exec2_objects) * new_size);
442 bufmgr->exec_bos =
443 realloc(bufmgr->exec_bos,
444 sizeof(*bufmgr->exec_bos) * new_size);
445 bufmgr->exec_size = new_size;
446 }
447
448 index = bufmgr->exec_count;
449 bo_gem->validate_index = index;
450 /* Fill in array entry */
451 bufmgr->exec2_objects[index].handle = bo_gem->gem_handle;
452 bufmgr->exec2_objects[index].relocation_count = bo_gem->reloc_count;
453 bufmgr->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
454 bufmgr->exec2_objects[index].alignment = bo->align;
455 bufmgr->exec2_objects[index].offset = bo->offset64;
456 bufmgr->exec2_objects[index].flags = bo_gem->kflags;
457 bufmgr->exec2_objects[index].rsvd1 = 0;
458 bufmgr->exec2_objects[index].rsvd2 = 0;
459 bufmgr->exec_bos[index] = bo;
460 bufmgr->exec_count++;
461 }
462
463 static void
464 drm_bacon_bo_gem_set_in_aperture_size(drm_bacon_bufmgr *bufmgr,
465 drm_bacon_bo_gem *bo_gem,
466 unsigned int alignment)
467 {
468 unsigned int size;
469
470 assert(!bo_gem->used_as_reloc_target);
471
472 /* The older chipsets are far-less flexible in terms of tiling,
473 * and require tiled buffer to be size aligned in the aperture.
474 * This means that in the worst possible case we will need a hole
475 * twice as large as the object in order for it to fit into the
476 * aperture. Optimal packing is for wimps.
477 */
478 size = bo_gem->bo.size;
479
480 bo_gem->reloc_tree_size = size + alignment;
481 }
482
483 static int
484 drm_bacon_setup_reloc_list(drm_bacon_bo *bo)
485 {
486 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
487 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
488 unsigned int max_relocs = bufmgr->max_relocs;
489
490 if (bo->size / 4 < max_relocs)
491 max_relocs = bo->size / 4;
492
493 bo_gem->relocs = malloc(max_relocs *
494 sizeof(struct drm_i915_gem_relocation_entry));
495 bo_gem->reloc_target_info = malloc(max_relocs *
496 sizeof(drm_bacon_reloc_target));
497 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
498 bo_gem->has_error = true;
499
500 free (bo_gem->relocs);
501 bo_gem->relocs = NULL;
502
503 free (bo_gem->reloc_target_info);
504 bo_gem->reloc_target_info = NULL;
505
506 return 1;
507 }
508
509 return 0;
510 }
511
512 int
513 drm_bacon_bo_busy(drm_bacon_bo *bo)
514 {
515 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
516 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
517 struct drm_i915_gem_busy busy;
518 int ret;
519
520 if (bo_gem->reusable && bo_gem->idle)
521 return false;
522
523 memclear(busy);
524 busy.handle = bo_gem->gem_handle;
525
526 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
527 if (ret == 0) {
528 bo_gem->idle = !busy.busy;
529 return busy.busy;
530 } else {
531 return false;
532 }
533 return (ret == 0 && busy.busy);
534 }
535
536 static int
537 drm_bacon_gem_bo_madvise_internal(drm_bacon_bufmgr *bufmgr,
538 drm_bacon_bo_gem *bo_gem, int state)
539 {
540 struct drm_i915_gem_madvise madv;
541
542 memclear(madv);
543 madv.handle = bo_gem->gem_handle;
544 madv.madv = state;
545 madv.retained = 1;
546 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
547
548 return madv.retained;
549 }
550
551 int
552 drm_bacon_bo_madvise(drm_bacon_bo *bo, int madv)
553 {
554 return drm_bacon_gem_bo_madvise_internal(bo->bufmgr,
555 (drm_bacon_bo_gem *) bo,
556 madv);
557 }
558
559 /* drop the oldest entries that have been purged by the kernel */
560 static void
561 drm_bacon_gem_bo_cache_purge_bucket(drm_bacon_bufmgr *bufmgr,
562 struct drm_bacon_gem_bo_bucket *bucket)
563 {
564 while (!list_empty(&bucket->head)) {
565 drm_bacon_bo_gem *bo_gem;
566
567 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
568 bucket->head.next, head);
569 if (drm_bacon_gem_bo_madvise_internal
570 (bufmgr, bo_gem, I915_MADV_DONTNEED))
571 break;
572
573 list_del(&bo_gem->head);
574 drm_bacon_gem_bo_free(&bo_gem->bo);
575 }
576 }
577
578 static drm_bacon_bo *
579 drm_bacon_gem_bo_alloc_internal(drm_bacon_bufmgr *bufmgr,
580 const char *name,
581 unsigned long size,
582 unsigned long flags,
583 uint32_t tiling_mode,
584 unsigned long stride,
585 unsigned int alignment)
586 {
587 drm_bacon_bo_gem *bo_gem;
588 unsigned int page_size = getpagesize();
589 int ret;
590 struct drm_bacon_gem_bo_bucket *bucket;
591 bool alloc_from_cache;
592 unsigned long bo_size;
593 bool for_render = false;
594
595 if (flags & BO_ALLOC_FOR_RENDER)
596 for_render = true;
597
598 /* Round the allocated size up to a power of two number of pages. */
599 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, size);
600
601 /* If we don't have caching at this size, don't actually round the
602 * allocation up.
603 */
604 if (bucket == NULL) {
605 bo_size = size;
606 if (bo_size < page_size)
607 bo_size = page_size;
608 } else {
609 bo_size = bucket->size;
610 }
611
612 pthread_mutex_lock(&bufmgr->lock);
613 /* Get a buffer out of the cache if available */
614 retry:
615 alloc_from_cache = false;
616 if (bucket != NULL && !list_empty(&bucket->head)) {
617 if (for_render) {
618 /* Allocate new render-target BOs from the tail (MRU)
619 * of the list, as it will likely be hot in the GPU
620 * cache and in the aperture for us.
621 */
622 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
623 bucket->head.prev, head);
624 list_del(&bo_gem->head);
625 alloc_from_cache = true;
626 bo_gem->bo.align = alignment;
627 } else {
628 assert(alignment == 0);
629 /* For non-render-target BOs (where we're probably
630 * going to map it first thing in order to fill it
631 * with data), check if the last BO in the cache is
632 * unbusy, and only reuse in that case. Otherwise,
633 * allocating a new buffer is probably faster than
634 * waiting for the GPU to finish.
635 */
636 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
637 bucket->head.next, head);
638 if (!drm_bacon_bo_busy(&bo_gem->bo)) {
639 alloc_from_cache = true;
640 list_del(&bo_gem->head);
641 }
642 }
643
644 if (alloc_from_cache) {
645 if (!drm_bacon_gem_bo_madvise_internal
646 (bufmgr, bo_gem, I915_MADV_WILLNEED)) {
647 drm_bacon_gem_bo_free(&bo_gem->bo);
648 drm_bacon_gem_bo_cache_purge_bucket(bufmgr,
649 bucket);
650 goto retry;
651 }
652
653 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
654 tiling_mode,
655 stride)) {
656 drm_bacon_gem_bo_free(&bo_gem->bo);
657 goto retry;
658 }
659 }
660 }
661
662 if (!alloc_from_cache) {
663 struct drm_i915_gem_create create;
664
665 bo_gem = calloc(1, sizeof(*bo_gem));
666 if (!bo_gem)
667 goto err;
668
669 /* drm_bacon_gem_bo_free calls list_del() for an uninitialized
670 list (vma_list), so better set the list head here */
671 list_inithead(&bo_gem->vma_list);
672
673 bo_gem->bo.size = bo_size;
674
675 memclear(create);
676 create.size = bo_size;
677
678 ret = drmIoctl(bufmgr->fd,
679 DRM_IOCTL_I915_GEM_CREATE,
680 &create);
681 if (ret != 0) {
682 free(bo_gem);
683 goto err;
684 }
685
686 bo_gem->gem_handle = create.handle;
687 HASH_ADD(handle_hh, bufmgr->handle_table,
688 gem_handle, sizeof(bo_gem->gem_handle),
689 bo_gem);
690
691 bo_gem->bo.handle = bo_gem->gem_handle;
692 bo_gem->bo.bufmgr = bufmgr;
693 bo_gem->bo.align = alignment;
694
695 bo_gem->tiling_mode = I915_TILING_NONE;
696 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
697 bo_gem->stride = 0;
698
699 if (drm_bacon_gem_bo_set_tiling_internal(&bo_gem->bo,
700 tiling_mode,
701 stride))
702 goto err_free;
703 }
704
705 bo_gem->name = name;
706 p_atomic_set(&bo_gem->refcount, 1);
707 bo_gem->validate_index = -1;
708 bo_gem->used_as_reloc_target = false;
709 bo_gem->has_error = false;
710 bo_gem->reusable = true;
711
712 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, alignment);
713 pthread_mutex_unlock(&bufmgr->lock);
714
715 DBG("bo_create: buf %d (%s) %ldb\n",
716 bo_gem->gem_handle, bo_gem->name, size);
717
718 return &bo_gem->bo;
719
720 err_free:
721 drm_bacon_gem_bo_free(&bo_gem->bo);
722 err:
723 pthread_mutex_unlock(&bufmgr->lock);
724 return NULL;
725 }
726
727 drm_bacon_bo *
728 drm_bacon_bo_alloc_for_render(drm_bacon_bufmgr *bufmgr,
729 const char *name,
730 unsigned long size,
731 unsigned int alignment)
732 {
733 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size,
734 BO_ALLOC_FOR_RENDER,
735 I915_TILING_NONE, 0,
736 alignment);
737 }
738
739 drm_bacon_bo *
740 drm_bacon_bo_alloc(drm_bacon_bufmgr *bufmgr,
741 const char *name,
742 unsigned long size,
743 unsigned int alignment)
744 {
745 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, 0,
746 I915_TILING_NONE, 0, 0);
747 }
748
749 drm_bacon_bo *
750 drm_bacon_bo_alloc_tiled(drm_bacon_bufmgr *bufmgr, const char *name,
751 int x, int y, int cpp, uint32_t *tiling_mode,
752 unsigned long *pitch, unsigned long flags)
753 {
754 unsigned long size, stride;
755 uint32_t tiling;
756
757 do {
758 unsigned long aligned_y, height_alignment;
759
760 tiling = *tiling_mode;
761
762 /* If we're tiled, our allocations are in 8 or 32-row blocks,
763 * so failure to align our height means that we won't allocate
764 * enough pages.
765 *
766 * If we're untiled, we still have to align to 2 rows high
767 * because the data port accesses 2x2 blocks even if the
768 * bottom row isn't to be rendered, so failure to align means
769 * we could walk off the end of the GTT and fault. This is
770 * documented on 965, and may be the case on older chipsets
771 * too so we try to be careful.
772 */
773 aligned_y = y;
774 height_alignment = 2;
775
776 if (tiling == I915_TILING_X)
777 height_alignment = 8;
778 else if (tiling == I915_TILING_Y)
779 height_alignment = 32;
780 aligned_y = ALIGN(y, height_alignment);
781
782 stride = x * cpp;
783 stride = drm_bacon_gem_bo_tile_pitch(bufmgr, stride, tiling_mode);
784 size = stride * aligned_y;
785 size = drm_bacon_gem_bo_tile_size(bufmgr, size, tiling_mode);
786 } while (*tiling_mode != tiling);
787 *pitch = stride;
788
789 if (tiling == I915_TILING_NONE)
790 stride = 0;
791
792 return drm_bacon_gem_bo_alloc_internal(bufmgr, name, size, flags,
793 tiling, stride, 0);
794 }
795
796 drm_bacon_bo *
797 drm_bacon_bo_alloc_userptr(drm_bacon_bufmgr *bufmgr,
798 const char *name,
799 void *addr,
800 uint32_t tiling_mode,
801 uint32_t stride,
802 unsigned long size,
803 unsigned long flags)
804 {
805 drm_bacon_bo_gem *bo_gem;
806 int ret;
807 struct drm_i915_gem_userptr userptr;
808
809 /* Tiling with userptr surfaces is not supported
810 * on all hardware so refuse it for time being.
811 */
812 if (tiling_mode != I915_TILING_NONE)
813 return NULL;
814
815 bo_gem = calloc(1, sizeof(*bo_gem));
816 if (!bo_gem)
817 return NULL;
818
819 p_atomic_set(&bo_gem->refcount, 1);
820 list_inithead(&bo_gem->vma_list);
821
822 bo_gem->bo.size = size;
823
824 memclear(userptr);
825 userptr.user_ptr = (__u64)((unsigned long)addr);
826 userptr.user_size = size;
827 userptr.flags = flags;
828
829 ret = drmIoctl(bufmgr->fd,
830 DRM_IOCTL_I915_GEM_USERPTR,
831 &userptr);
832 if (ret != 0) {
833 DBG("bo_create_userptr: "
834 "ioctl failed with user ptr %p size 0x%lx, "
835 "user flags 0x%lx\n", addr, size, flags);
836 free(bo_gem);
837 return NULL;
838 }
839
840 pthread_mutex_lock(&bufmgr->lock);
841
842 bo_gem->gem_handle = userptr.handle;
843 bo_gem->bo.handle = bo_gem->gem_handle;
844 bo_gem->bo.bufmgr = bufmgr;
845 bo_gem->is_userptr = true;
846 bo_gem->bo.virtual = addr;
847 /* Save the address provided by user */
848 bo_gem->user_virtual = addr;
849 bo_gem->tiling_mode = I915_TILING_NONE;
850 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
851 bo_gem->stride = 0;
852
853 HASH_ADD(handle_hh, bufmgr->handle_table,
854 gem_handle, sizeof(bo_gem->gem_handle),
855 bo_gem);
856
857 bo_gem->name = name;
858 bo_gem->validate_index = -1;
859 bo_gem->used_as_reloc_target = false;
860 bo_gem->has_error = false;
861 bo_gem->reusable = false;
862
863 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
864 pthread_mutex_unlock(&bufmgr->lock);
865
866 DBG("bo_create_userptr: "
867 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
868 addr, bo_gem->gem_handle, bo_gem->name,
869 size, stride, tiling_mode);
870
871 return &bo_gem->bo;
872 }
873
874 bool
875 drm_bacon_has_userptr(drm_bacon_bufmgr *bufmgr)
876 {
877 int ret;
878 void *ptr;
879 long pgsz;
880 struct drm_i915_gem_userptr userptr;
881
882 pgsz = sysconf(_SC_PAGESIZE);
883 assert(pgsz > 0);
884
885 ret = posix_memalign(&ptr, pgsz, pgsz);
886 if (ret) {
887 DBG("Failed to get a page (%ld) for userptr detection!\n",
888 pgsz);
889 return false;
890 }
891
892 memclear(userptr);
893 userptr.user_ptr = (__u64)(unsigned long)ptr;
894 userptr.user_size = pgsz;
895
896 retry:
897 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
898 if (ret) {
899 if (errno == ENODEV && userptr.flags == 0) {
900 userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
901 goto retry;
902 }
903 free(ptr);
904 return false;
905 }
906
907 /* We don't release the userptr bo here as we want to keep the
908 * kernel mm tracking alive for our lifetime. The first time we
909 * create a userptr object the kernel has to install a mmu_notifer
910 * which is a heavyweight operation (e.g. it requires taking all
911 * mm_locks and stop_machine()).
912 */
913
914 bufmgr->userptr_active.ptr = ptr;
915 bufmgr->userptr_active.handle = userptr.handle;
916
917 return true;
918 }
919
920 /**
921 * Returns a drm_bacon_bo wrapping the given buffer object handle.
922 *
923 * This can be used when one application needs to pass a buffer object
924 * to another.
925 */
926 drm_bacon_bo *
927 drm_bacon_bo_gem_create_from_name(drm_bacon_bufmgr *bufmgr,
928 const char *name,
929 unsigned int handle)
930 {
931 drm_bacon_bo_gem *bo_gem;
932 int ret;
933 struct drm_gem_open open_arg;
934 struct drm_i915_gem_get_tiling get_tiling;
935
936 /* At the moment most applications only have a few named bo.
937 * For instance, in a DRI client only the render buffers passed
938 * between X and the client are named. And since X returns the
939 * alternating names for the front/back buffer a linear search
940 * provides a sufficiently fast match.
941 */
942 pthread_mutex_lock(&bufmgr->lock);
943 HASH_FIND(name_hh, bufmgr->name_table,
944 &handle, sizeof(handle), bo_gem);
945 if (bo_gem) {
946 drm_bacon_bo_reference(&bo_gem->bo);
947 goto out;
948 }
949
950 memclear(open_arg);
951 open_arg.name = handle;
952 ret = drmIoctl(bufmgr->fd,
953 DRM_IOCTL_GEM_OPEN,
954 &open_arg);
955 if (ret != 0) {
956 DBG("Couldn't reference %s handle 0x%08x: %s\n",
957 name, handle, strerror(errno));
958 bo_gem = NULL;
959 goto out;
960 }
961 /* Now see if someone has used a prime handle to get this
962 * object from the kernel before by looking through the list
963 * again for a matching gem_handle
964 */
965 HASH_FIND(handle_hh, bufmgr->handle_table,
966 &open_arg.handle, sizeof(open_arg.handle), bo_gem);
967 if (bo_gem) {
968 drm_bacon_bo_reference(&bo_gem->bo);
969 goto out;
970 }
971
972 bo_gem = calloc(1, sizeof(*bo_gem));
973 if (!bo_gem)
974 goto out;
975
976 p_atomic_set(&bo_gem->refcount, 1);
977 list_inithead(&bo_gem->vma_list);
978
979 bo_gem->bo.size = open_arg.size;
980 bo_gem->bo.offset64 = 0;
981 bo_gem->bo.virtual = NULL;
982 bo_gem->bo.bufmgr = bufmgr;
983 bo_gem->name = name;
984 bo_gem->validate_index = -1;
985 bo_gem->gem_handle = open_arg.handle;
986 bo_gem->bo.handle = open_arg.handle;
987 bo_gem->global_name = handle;
988 bo_gem->reusable = false;
989
990 HASH_ADD(handle_hh, bufmgr->handle_table,
991 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
992 HASH_ADD(name_hh, bufmgr->name_table,
993 global_name, sizeof(bo_gem->global_name), bo_gem);
994
995 memclear(get_tiling);
996 get_tiling.handle = bo_gem->gem_handle;
997 ret = drmIoctl(bufmgr->fd,
998 DRM_IOCTL_I915_GEM_GET_TILING,
999 &get_tiling);
1000 if (ret != 0)
1001 goto err_unref;
1002
1003 bo_gem->tiling_mode = get_tiling.tiling_mode;
1004 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1005 /* XXX stride is unknown */
1006 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
1007 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1008
1009 out:
1010 pthread_mutex_unlock(&bufmgr->lock);
1011 return &bo_gem->bo;
1012
1013 err_unref:
1014 drm_bacon_gem_bo_free(&bo_gem->bo);
1015 pthread_mutex_unlock(&bufmgr->lock);
1016 return NULL;
1017 }
1018
1019 static void
1020 drm_bacon_gem_bo_free(drm_bacon_bo *bo)
1021 {
1022 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1023 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1024 struct drm_gem_close close;
1025 int ret;
1026
1027 list_del(&bo_gem->vma_list);
1028 if (bo_gem->mem_virtual) {
1029 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1030 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1031 bufmgr->vma_count--;
1032 }
1033 if (bo_gem->wc_virtual) {
1034 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0));
1035 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1036 bufmgr->vma_count--;
1037 }
1038 if (bo_gem->gtt_virtual) {
1039 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1040 bufmgr->vma_count--;
1041 }
1042
1043 if (bo_gem->global_name)
1044 HASH_DELETE(name_hh, bufmgr->name_table, bo_gem);
1045 HASH_DELETE(handle_hh, bufmgr->handle_table, bo_gem);
1046
1047 /* Close this object */
1048 memclear(close);
1049 close.handle = bo_gem->gem_handle;
1050 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close);
1051 if (ret != 0) {
1052 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1053 bo_gem->gem_handle, bo_gem->name, strerror(errno));
1054 }
1055 free(bo);
1056 }
1057
1058 static void
1059 drm_bacon_gem_bo_mark_mmaps_incoherent(drm_bacon_bo *bo)
1060 {
1061 #if HAVE_VALGRIND
1062 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1063
1064 if (bo_gem->mem_virtual)
1065 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1066
1067 if (bo_gem->wc_virtual)
1068 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size);
1069
1070 if (bo_gem->gtt_virtual)
1071 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1072 #endif
1073 }
1074
1075 /** Frees all cached buffers significantly older than @time. */
1076 static void
1077 drm_bacon_gem_cleanup_bo_cache(drm_bacon_bufmgr *bufmgr, time_t time)
1078 {
1079 int i;
1080
1081 if (bufmgr->time == time)
1082 return;
1083
1084 for (i = 0; i < bufmgr->num_buckets; i++) {
1085 struct drm_bacon_gem_bo_bucket *bucket =
1086 &bufmgr->cache_bucket[i];
1087
1088 while (!list_empty(&bucket->head)) {
1089 drm_bacon_bo_gem *bo_gem;
1090
1091 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1092 bucket->head.next, head);
1093 if (time - bo_gem->free_time <= 1)
1094 break;
1095
1096 list_del(&bo_gem->head);
1097
1098 drm_bacon_gem_bo_free(&bo_gem->bo);
1099 }
1100 }
1101
1102 bufmgr->time = time;
1103 }
1104
1105 static void drm_bacon_gem_bo_purge_vma_cache(drm_bacon_bufmgr *bufmgr)
1106 {
1107 int limit;
1108
1109 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1110 bufmgr->vma_count, bufmgr->vma_open, bufmgr->vma_max);
1111
1112 if (bufmgr->vma_max < 0)
1113 return;
1114
1115 /* We may need to evict a few entries in order to create new mmaps */
1116 limit = bufmgr->vma_max - 2*bufmgr->vma_open;
1117 if (limit < 0)
1118 limit = 0;
1119
1120 while (bufmgr->vma_count > limit) {
1121 drm_bacon_bo_gem *bo_gem;
1122
1123 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1124 bufmgr->vma_cache.next,
1125 vma_list);
1126 assert(bo_gem->map_count == 0);
1127 list_delinit(&bo_gem->vma_list);
1128
1129 if (bo_gem->mem_virtual) {
1130 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1131 bo_gem->mem_virtual = NULL;
1132 bufmgr->vma_count--;
1133 }
1134 if (bo_gem->wc_virtual) {
1135 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size);
1136 bo_gem->wc_virtual = NULL;
1137 bufmgr->vma_count--;
1138 }
1139 if (bo_gem->gtt_virtual) {
1140 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1141 bo_gem->gtt_virtual = NULL;
1142 bufmgr->vma_count--;
1143 }
1144 }
1145 }
1146
1147 static void drm_bacon_gem_bo_close_vma(drm_bacon_bufmgr *bufmgr,
1148 drm_bacon_bo_gem *bo_gem)
1149 {
1150 bufmgr->vma_open--;
1151 list_addtail(&bo_gem->vma_list, &bufmgr->vma_cache);
1152 if (bo_gem->mem_virtual)
1153 bufmgr->vma_count++;
1154 if (bo_gem->wc_virtual)
1155 bufmgr->vma_count++;
1156 if (bo_gem->gtt_virtual)
1157 bufmgr->vma_count++;
1158 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1159 }
1160
1161 static void drm_bacon_gem_bo_open_vma(drm_bacon_bufmgr *bufmgr,
1162 drm_bacon_bo_gem *bo_gem)
1163 {
1164 bufmgr->vma_open++;
1165 list_del(&bo_gem->vma_list);
1166 if (bo_gem->mem_virtual)
1167 bufmgr->vma_count--;
1168 if (bo_gem->wc_virtual)
1169 bufmgr->vma_count--;
1170 if (bo_gem->gtt_virtual)
1171 bufmgr->vma_count--;
1172 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
1173 }
1174
1175 static void
1176 drm_bacon_gem_bo_unreference_final(drm_bacon_bo *bo, time_t time)
1177 {
1178 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1179 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1180 struct drm_bacon_gem_bo_bucket *bucket;
1181 int i;
1182
1183 /* Unreference all the target buffers */
1184 for (i = 0; i < bo_gem->reloc_count; i++) {
1185 if (bo_gem->reloc_target_info[i].bo != bo) {
1186 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->
1187 reloc_target_info[i].bo,
1188 time);
1189 }
1190 }
1191 for (i = 0; i < bo_gem->softpin_target_count; i++)
1192 drm_bacon_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
1193 time);
1194 bo_gem->kflags = 0;
1195 bo_gem->reloc_count = 0;
1196 bo_gem->used_as_reloc_target = false;
1197 bo_gem->softpin_target_count = 0;
1198
1199 DBG("bo_unreference final: %d (%s)\n",
1200 bo_gem->gem_handle, bo_gem->name);
1201
1202 /* release memory associated with this object */
1203 if (bo_gem->reloc_target_info) {
1204 free(bo_gem->reloc_target_info);
1205 bo_gem->reloc_target_info = NULL;
1206 }
1207 if (bo_gem->relocs) {
1208 free(bo_gem->relocs);
1209 bo_gem->relocs = NULL;
1210 }
1211 if (bo_gem->softpin_target) {
1212 free(bo_gem->softpin_target);
1213 bo_gem->softpin_target = NULL;
1214 bo_gem->softpin_target_size = 0;
1215 }
1216
1217 /* Clear any left-over mappings */
1218 if (bo_gem->map_count) {
1219 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1220 bo_gem->map_count = 0;
1221 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1222 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1223 }
1224
1225 bucket = drm_bacon_gem_bo_bucket_for_size(bufmgr, bo->size);
1226 /* Put the buffer into our internal cache for reuse if we can. */
1227 if (bufmgr->bo_reuse && bo_gem->reusable && bucket != NULL &&
1228 drm_bacon_gem_bo_madvise_internal(bufmgr, bo_gem,
1229 I915_MADV_DONTNEED)) {
1230 bo_gem->free_time = time;
1231
1232 bo_gem->name = NULL;
1233 bo_gem->validate_index = -1;
1234
1235 list_addtail(&bo_gem->head, &bucket->head);
1236 } else {
1237 drm_bacon_gem_bo_free(bo);
1238 }
1239 }
1240
1241 static void drm_bacon_gem_bo_unreference_locked_timed(drm_bacon_bo *bo,
1242 time_t time)
1243 {
1244 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1245
1246 assert(p_atomic_read(&bo_gem->refcount) > 0);
1247 if (p_atomic_dec_zero(&bo_gem->refcount))
1248 drm_bacon_gem_bo_unreference_final(bo, time);
1249 }
1250
1251 void
1252 drm_bacon_bo_unreference(drm_bacon_bo *bo)
1253 {
1254 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1255
1256 if (bo == NULL)
1257 return;
1258
1259 assert(p_atomic_read(&bo_gem->refcount) > 0);
1260
1261 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1262 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1263 struct timespec time;
1264
1265 clock_gettime(CLOCK_MONOTONIC, &time);
1266
1267 pthread_mutex_lock(&bufmgr->lock);
1268
1269 if (p_atomic_dec_zero(&bo_gem->refcount)) {
1270 drm_bacon_gem_bo_unreference_final(bo, time.tv_sec);
1271 drm_bacon_gem_cleanup_bo_cache(bufmgr, time.tv_sec);
1272 }
1273
1274 pthread_mutex_unlock(&bufmgr->lock);
1275 }
1276 }
1277
1278 int
1279 drm_bacon_bo_map(drm_bacon_bo *bo, int write_enable)
1280 {
1281 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1282 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1283 struct drm_i915_gem_set_domain set_domain;
1284 int ret;
1285
1286 if (bo_gem->is_userptr) {
1287 /* Return the same user ptr */
1288 bo->virtual = bo_gem->user_virtual;
1289 return 0;
1290 }
1291
1292 pthread_mutex_lock(&bufmgr->lock);
1293
1294 if (bo_gem->map_count++ == 0)
1295 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1296
1297 if (!bo_gem->mem_virtual) {
1298 struct drm_i915_gem_mmap mmap_arg;
1299
1300 DBG("bo_map: %d (%s), map_count=%d\n",
1301 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1302
1303 memclear(mmap_arg);
1304 mmap_arg.handle = bo_gem->gem_handle;
1305 mmap_arg.size = bo->size;
1306 ret = drmIoctl(bufmgr->fd,
1307 DRM_IOCTL_I915_GEM_MMAP,
1308 &mmap_arg);
1309 if (ret != 0) {
1310 ret = -errno;
1311 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1312 __FILE__, __LINE__, bo_gem->gem_handle,
1313 bo_gem->name, strerror(errno));
1314 if (--bo_gem->map_count == 0)
1315 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1316 pthread_mutex_unlock(&bufmgr->lock);
1317 return ret;
1318 }
1319 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1320 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1321 }
1322 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1323 bo_gem->mem_virtual);
1324 bo->virtual = bo_gem->mem_virtual;
1325
1326 memclear(set_domain);
1327 set_domain.handle = bo_gem->gem_handle;
1328 set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1329 if (write_enable)
1330 set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1331 else
1332 set_domain.write_domain = 0;
1333 ret = drmIoctl(bufmgr->fd,
1334 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1335 &set_domain);
1336 if (ret != 0) {
1337 DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1338 __FILE__, __LINE__, bo_gem->gem_handle,
1339 strerror(errno));
1340 }
1341
1342 if (write_enable)
1343 bo_gem->mapped_cpu_write = true;
1344
1345 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1346 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1347 pthread_mutex_unlock(&bufmgr->lock);
1348
1349 return 0;
1350 }
1351
1352 static int
1353 map_gtt(drm_bacon_bo *bo)
1354 {
1355 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1356 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1357 int ret;
1358
1359 if (bo_gem->is_userptr)
1360 return -EINVAL;
1361
1362 if (bo_gem->map_count++ == 0)
1363 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
1364
1365 /* Get a mapping of the buffer if we haven't before. */
1366 if (bo_gem->gtt_virtual == NULL) {
1367 struct drm_i915_gem_mmap_gtt mmap_arg;
1368
1369 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1370 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1371
1372 memclear(mmap_arg);
1373 mmap_arg.handle = bo_gem->gem_handle;
1374
1375 /* Get the fake offset back... */
1376 ret = drmIoctl(bufmgr->fd,
1377 DRM_IOCTL_I915_GEM_MMAP_GTT,
1378 &mmap_arg);
1379 if (ret != 0) {
1380 ret = -errno;
1381 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1382 __FILE__, __LINE__,
1383 bo_gem->gem_handle, bo_gem->name,
1384 strerror(errno));
1385 if (--bo_gem->map_count == 0)
1386 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1387 return ret;
1388 }
1389
1390 /* and mmap it */
1391 bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1392 MAP_SHARED, bufmgr->fd,
1393 mmap_arg.offset);
1394 if (bo_gem->gtt_virtual == MAP_FAILED) {
1395 bo_gem->gtt_virtual = NULL;
1396 ret = -errno;
1397 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1398 __FILE__, __LINE__,
1399 bo_gem->gem_handle, bo_gem->name,
1400 strerror(errno));
1401 if (--bo_gem->map_count == 0)
1402 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1403 return ret;
1404 }
1405 }
1406
1407 bo->virtual = bo_gem->gtt_virtual;
1408
1409 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1410 bo_gem->gtt_virtual);
1411
1412 return 0;
1413 }
1414
1415 int
1416 drm_bacon_gem_bo_map_gtt(drm_bacon_bo *bo)
1417 {
1418 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1419 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1420 struct drm_i915_gem_set_domain set_domain;
1421 int ret;
1422
1423 pthread_mutex_lock(&bufmgr->lock);
1424
1425 ret = map_gtt(bo);
1426 if (ret) {
1427 pthread_mutex_unlock(&bufmgr->lock);
1428 return ret;
1429 }
1430
1431 /* Now move it to the GTT domain so that the GPU and CPU
1432 * caches are flushed and the GPU isn't actively using the
1433 * buffer.
1434 *
1435 * The pagefault handler does this domain change for us when
1436 * it has unbound the BO from the GTT, but it's up to us to
1437 * tell it when we're about to use things if we had done
1438 * rendering and it still happens to be bound to the GTT.
1439 */
1440 memclear(set_domain);
1441 set_domain.handle = bo_gem->gem_handle;
1442 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1443 set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1444 ret = drmIoctl(bufmgr->fd,
1445 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1446 &set_domain);
1447 if (ret != 0) {
1448 DBG("%s:%d: Error setting domain %d: %s\n",
1449 __FILE__, __LINE__, bo_gem->gem_handle,
1450 strerror(errno));
1451 }
1452
1453 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1454 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1455 pthread_mutex_unlock(&bufmgr->lock);
1456
1457 return 0;
1458 }
1459
1460 /**
1461 * Performs a mapping of the buffer object like the normal GTT
1462 * mapping, but avoids waiting for the GPU to be done reading from or
1463 * rendering to the buffer.
1464 *
1465 * This is used in the implementation of GL_ARB_map_buffer_range: The
1466 * user asks to create a buffer, then does a mapping, fills some
1467 * space, runs a drawing command, then asks to map it again without
1468 * synchronizing because it guarantees that it won't write over the
1469 * data that the GPU is busy using (or, more specifically, that if it
1470 * does write over the data, it acknowledges that rendering is
1471 * undefined).
1472 */
1473
1474 int
1475 drm_bacon_gem_bo_map_unsynchronized(drm_bacon_bo *bo)
1476 {
1477 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1478 #ifdef HAVE_VALGRIND
1479 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1480 #endif
1481 int ret;
1482
1483 /* If the CPU cache isn't coherent with the GTT, then use a
1484 * regular synchronized mapping. The problem is that we don't
1485 * track where the buffer was last used on the CPU side in
1486 * terms of drm_bacon_bo_map vs drm_bacon_gem_bo_map_gtt, so
1487 * we would potentially corrupt the buffer even when the user
1488 * does reasonable things.
1489 */
1490 if (!bufmgr->has_llc)
1491 return drm_bacon_gem_bo_map_gtt(bo);
1492
1493 pthread_mutex_lock(&bufmgr->lock);
1494
1495 ret = map_gtt(bo);
1496 if (ret == 0) {
1497 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1498 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1499 }
1500
1501 pthread_mutex_unlock(&bufmgr->lock);
1502
1503 return ret;
1504 }
1505
1506 int
1507 drm_bacon_bo_unmap(drm_bacon_bo *bo)
1508 {
1509 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1510 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1511 int ret = 0;
1512
1513 if (bo == NULL)
1514 return 0;
1515
1516 if (bo_gem->is_userptr)
1517 return 0;
1518
1519 pthread_mutex_lock(&bufmgr->lock);
1520
1521 if (bo_gem->map_count <= 0) {
1522 DBG("attempted to unmap an unmapped bo\n");
1523 pthread_mutex_unlock(&bufmgr->lock);
1524 /* Preserve the old behaviour of just treating this as a
1525 * no-op rather than reporting the error.
1526 */
1527 return 0;
1528 }
1529
1530 if (bo_gem->mapped_cpu_write) {
1531 struct drm_i915_gem_sw_finish sw_finish;
1532
1533 /* Cause a flush to happen if the buffer's pinned for
1534 * scanout, so the results show up in a timely manner.
1535 * Unlike GTT set domains, this only does work if the
1536 * buffer should be scanout-related.
1537 */
1538 memclear(sw_finish);
1539 sw_finish.handle = bo_gem->gem_handle;
1540 ret = drmIoctl(bufmgr->fd,
1541 DRM_IOCTL_I915_GEM_SW_FINISH,
1542 &sw_finish);
1543 ret = ret == -1 ? -errno : 0;
1544
1545 bo_gem->mapped_cpu_write = false;
1546 }
1547
1548 /* We need to unmap after every innovation as we cannot track
1549 * an open vma for every bo as that will exhaust the system
1550 * limits and cause later failures.
1551 */
1552 if (--bo_gem->map_count == 0) {
1553 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
1554 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1555 bo->virtual = NULL;
1556 }
1557 pthread_mutex_unlock(&bufmgr->lock);
1558
1559 return ret;
1560 }
1561
1562 int
1563 drm_bacon_bo_subdata(drm_bacon_bo *bo, unsigned long offset,
1564 unsigned long size, const void *data)
1565 {
1566 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1567 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1568 struct drm_i915_gem_pwrite pwrite;
1569 int ret;
1570
1571 if (bo_gem->is_userptr)
1572 return -EINVAL;
1573
1574 memclear(pwrite);
1575 pwrite.handle = bo_gem->gem_handle;
1576 pwrite.offset = offset;
1577 pwrite.size = size;
1578 pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1579 ret = drmIoctl(bufmgr->fd,
1580 DRM_IOCTL_I915_GEM_PWRITE,
1581 &pwrite);
1582 if (ret != 0) {
1583 ret = -errno;
1584 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1585 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1586 (int)size, strerror(errno));
1587 }
1588
1589 return ret;
1590 }
1591
1592 int
1593 drm_bacon_bo_get_subdata(drm_bacon_bo *bo, unsigned long offset,
1594 unsigned long size, void *data)
1595 {
1596 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1597 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1598 struct drm_i915_gem_pread pread;
1599 int ret;
1600
1601 if (bo_gem->is_userptr)
1602 return -EINVAL;
1603
1604 memclear(pread);
1605 pread.handle = bo_gem->gem_handle;
1606 pread.offset = offset;
1607 pread.size = size;
1608 pread.data_ptr = (uint64_t) (uintptr_t) data;
1609 ret = drmIoctl(bufmgr->fd,
1610 DRM_IOCTL_I915_GEM_PREAD,
1611 &pread);
1612 if (ret != 0) {
1613 ret = -errno;
1614 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1615 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1616 (int)size, strerror(errno));
1617 }
1618
1619 return ret;
1620 }
1621
1622 /** Waits for all GPU rendering with the object to have completed. */
1623 void
1624 drm_bacon_bo_wait_rendering(drm_bacon_bo *bo)
1625 {
1626 drm_bacon_gem_bo_start_gtt_access(bo, 1);
1627 }
1628
1629 /**
1630 * Waits on a BO for the given amount of time.
1631 *
1632 * @bo: buffer object to wait for
1633 * @timeout_ns: amount of time to wait in nanoseconds.
1634 * If value is less than 0, an infinite wait will occur.
1635 *
1636 * Returns 0 if the wait was successful ie. the last batch referencing the
1637 * object has completed within the allotted time. Otherwise some negative return
1638 * value describes the error. Of particular interest is -ETIME when the wait has
1639 * failed to yield the desired result.
1640 *
1641 * Similar to drm_bacon_gem_bo_wait_rendering except a timeout parameter allows
1642 * the operation to give up after a certain amount of time. Another subtle
1643 * difference is the internal locking semantics are different (this variant does
1644 * not hold the lock for the duration of the wait). This makes the wait subject
1645 * to a larger userspace race window.
1646 *
1647 * The implementation shall wait until the object is no longer actively
1648 * referenced within a batch buffer at the time of the call. The wait will
1649 * not guarantee that the buffer is re-issued via another thread, or an flinked
1650 * handle. Userspace must make sure this race does not occur if such precision
1651 * is important.
1652 *
1653 * Note that some kernels have broken the inifite wait for negative values
1654 * promise, upgrade to latest stable kernels if this is the case.
1655 */
1656 int
1657 drm_bacon_gem_bo_wait(drm_bacon_bo *bo, int64_t timeout_ns)
1658 {
1659 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1660 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1661 struct drm_i915_gem_wait wait;
1662 int ret;
1663
1664 memclear(wait);
1665 wait.bo_handle = bo_gem->gem_handle;
1666 wait.timeout_ns = timeout_ns;
1667 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1668 if (ret == -1)
1669 return -errno;
1670
1671 return ret;
1672 }
1673
1674 /**
1675 * Sets the object to the GTT read and possibly write domain, used by the X
1676 * 2D driver in the absence of kernel support to do drm_bacon_gem_bo_map_gtt().
1677 *
1678 * In combination with drm_bacon_gem_bo_pin() and manual fence management, we
1679 * can do tiled pixmaps this way.
1680 */
1681 void
1682 drm_bacon_gem_bo_start_gtt_access(drm_bacon_bo *bo, int write_enable)
1683 {
1684 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1685 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1686 struct drm_i915_gem_set_domain set_domain;
1687 int ret;
1688
1689 memclear(set_domain);
1690 set_domain.handle = bo_gem->gem_handle;
1691 set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1692 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1693 ret = drmIoctl(bufmgr->fd,
1694 DRM_IOCTL_I915_GEM_SET_DOMAIN,
1695 &set_domain);
1696 if (ret != 0) {
1697 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1698 __FILE__, __LINE__, bo_gem->gem_handle,
1699 set_domain.read_domains, set_domain.write_domain,
1700 strerror(errno));
1701 }
1702 }
1703
1704 static void
1705 drm_bacon_bufmgr_gem_destroy(drm_bacon_bufmgr *bufmgr)
1706 {
1707 struct drm_gem_close close_bo;
1708 int i, ret;
1709
1710 free(bufmgr->exec2_objects);
1711 free(bufmgr->exec_bos);
1712
1713 pthread_mutex_destroy(&bufmgr->lock);
1714
1715 /* Free any cached buffer objects we were going to reuse */
1716 for (i = 0; i < bufmgr->num_buckets; i++) {
1717 struct drm_bacon_gem_bo_bucket *bucket =
1718 &bufmgr->cache_bucket[i];
1719 drm_bacon_bo_gem *bo_gem;
1720
1721 while (!list_empty(&bucket->head)) {
1722 bo_gem = LIST_ENTRY(drm_bacon_bo_gem,
1723 bucket->head.next, head);
1724 list_del(&bo_gem->head);
1725
1726 drm_bacon_gem_bo_free(&bo_gem->bo);
1727 }
1728 }
1729
1730 /* Release userptr bo kept hanging around for optimisation. */
1731 if (bufmgr->userptr_active.ptr) {
1732 memclear(close_bo);
1733 close_bo.handle = bufmgr->userptr_active.handle;
1734 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
1735 free(bufmgr->userptr_active.ptr);
1736 if (ret)
1737 fprintf(stderr,
1738 "Failed to release test userptr object! (%d) "
1739 "i915 kernel driver may not be sane!\n", errno);
1740 }
1741
1742 free(bufmgr);
1743 }
1744
1745 /**
1746 * Adds the target buffer to the validation list and adds the relocation
1747 * to the reloc_buffer's relocation list.
1748 *
1749 * The relocation entry at the given offset must already contain the
1750 * precomputed relocation value, because the kernel will optimize out
1751 * the relocation entry write when the buffer hasn't moved from the
1752 * last known offset in target_bo.
1753 */
1754 static int
1755 do_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1756 drm_bacon_bo *target_bo, uint32_t target_offset,
1757 uint32_t read_domains, uint32_t write_domain)
1758 {
1759 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1760 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1761 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1762
1763 if (bo_gem->has_error)
1764 return -ENOMEM;
1765
1766 if (target_bo_gem->has_error) {
1767 bo_gem->has_error = true;
1768 return -ENOMEM;
1769 }
1770
1771 /* Create a new relocation list if needed */
1772 if (bo_gem->relocs == NULL && drm_bacon_setup_reloc_list(bo))
1773 return -ENOMEM;
1774
1775 /* Check overflow */
1776 assert(bo_gem->reloc_count < bufmgr->max_relocs);
1777
1778 /* Check args */
1779 assert(offset <= bo->size - 4);
1780 assert((write_domain & (write_domain - 1)) == 0);
1781
1782 /* Make sure that we're not adding a reloc to something whose size has
1783 * already been accounted for.
1784 */
1785 assert(!bo_gem->used_as_reloc_target);
1786 if (target_bo_gem != bo_gem) {
1787 target_bo_gem->used_as_reloc_target = true;
1788 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1789 }
1790
1791 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1792 if (target_bo != bo)
1793 drm_bacon_bo_reference(target_bo);
1794
1795 bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1796 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1797 bo_gem->relocs[bo_gem->reloc_count].target_handle =
1798 target_bo_gem->gem_handle;
1799 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1800 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1801 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1802 bo_gem->reloc_count++;
1803
1804 return 0;
1805 }
1806
1807 static int
1808 drm_bacon_gem_bo_add_softpin_target(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
1809 {
1810 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1811 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1812 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
1813
1814 if (bo_gem->has_error)
1815 return -ENOMEM;
1816
1817 if (target_bo_gem->has_error) {
1818 bo_gem->has_error = true;
1819 return -ENOMEM;
1820 }
1821
1822 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED))
1823 return -EINVAL;
1824 if (target_bo_gem == bo_gem)
1825 return -EINVAL;
1826
1827 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
1828 int new_size = bo_gem->softpin_target_size * 2;
1829 if (new_size == 0)
1830 new_size = bufmgr->max_relocs;
1831
1832 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
1833 sizeof(drm_bacon_bo *));
1834 if (!bo_gem->softpin_target)
1835 return -ENOMEM;
1836
1837 bo_gem->softpin_target_size = new_size;
1838 }
1839 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
1840 drm_bacon_bo_reference(target_bo);
1841 bo_gem->softpin_target_count++;
1842
1843 return 0;
1844 }
1845
1846 int
1847 drm_bacon_bo_emit_reloc(drm_bacon_bo *bo, uint32_t offset,
1848 drm_bacon_bo *target_bo, uint32_t target_offset,
1849 uint32_t read_domains, uint32_t write_domain)
1850 {
1851 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *)target_bo;
1852
1853 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED)
1854 return drm_bacon_gem_bo_add_softpin_target(bo, target_bo);
1855 else
1856 return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1857 read_domains, write_domain);
1858 }
1859
1860 int
1861 drm_bacon_gem_bo_get_reloc_count(drm_bacon_bo *bo)
1862 {
1863 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1864
1865 return bo_gem->reloc_count;
1866 }
1867
1868 /**
1869 * Removes existing relocation entries in the BO after "start".
1870 *
1871 * This allows a user to avoid a two-step process for state setup with
1872 * counting up all the buffer objects and doing a
1873 * drm_bacon_bufmgr_check_aperture_space() before emitting any of the
1874 * relocations for the state setup. Instead, save the state of the
1875 * batchbuffer including drm_bacon_gem_get_reloc_count(), emit all the
1876 * state, and then check if it still fits in the aperture.
1877 *
1878 * Any further drm_bacon_bufmgr_check_aperture_space() queries
1879 * involving this buffer in the tree are undefined after this call.
1880 *
1881 * This also removes all softpinned targets being referenced by the BO.
1882 */
1883 void
1884 drm_bacon_gem_bo_clear_relocs(drm_bacon_bo *bo, int start)
1885 {
1886 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1887 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
1888 int i;
1889 struct timespec time;
1890
1891 clock_gettime(CLOCK_MONOTONIC, &time);
1892
1893 assert(bo_gem->reloc_count >= start);
1894
1895 /* Unreference the cleared target buffers */
1896 pthread_mutex_lock(&bufmgr->lock);
1897
1898 for (i = start; i < bo_gem->reloc_count; i++) {
1899 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->reloc_target_info[i].bo;
1900 if (&target_bo_gem->bo != bo) {
1901 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1902 time.tv_sec);
1903 }
1904 }
1905 bo_gem->reloc_count = start;
1906
1907 for (i = 0; i < bo_gem->softpin_target_count; i++) {
1908 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) bo_gem->softpin_target[i];
1909 drm_bacon_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
1910 }
1911 bo_gem->softpin_target_count = 0;
1912
1913 pthread_mutex_unlock(&bufmgr->lock);
1914
1915 }
1916
1917 static void
1918 drm_bacon_gem_bo_process_reloc2(drm_bacon_bo *bo)
1919 {
1920 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1921 int i;
1922
1923 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
1924 return;
1925
1926 for (i = 0; i < bo_gem->reloc_count; i++) {
1927 drm_bacon_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1928
1929 if (target_bo == bo)
1930 continue;
1931
1932 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1933
1934 /* Continue walking the tree depth-first. */
1935 drm_bacon_gem_bo_process_reloc2(target_bo);
1936
1937 /* Add the target to the validate list */
1938 drm_bacon_add_validate_buffer2(target_bo);
1939 }
1940
1941 for (i = 0; i < bo_gem->softpin_target_count; i++) {
1942 drm_bacon_bo *target_bo = bo_gem->softpin_target[i];
1943
1944 if (target_bo == bo)
1945 continue;
1946
1947 drm_bacon_gem_bo_mark_mmaps_incoherent(bo);
1948 drm_bacon_gem_bo_process_reloc2(target_bo);
1949 drm_bacon_add_validate_buffer2(target_bo);
1950 }
1951 }
1952
1953 static void
1954 drm_bacon_update_buffer_offsets2 (drm_bacon_bufmgr *bufmgr)
1955 {
1956 int i;
1957
1958 for (i = 0; i < bufmgr->exec_count; i++) {
1959 drm_bacon_bo *bo = bufmgr->exec_bos[i];
1960 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *)bo;
1961
1962 /* Update the buffer offset */
1963 if (bufmgr->exec2_objects[i].offset != bo->offset64) {
1964 /* If we're seeing softpinned object here it means that the kernel
1965 * has relocated our object... Indicating a programming error
1966 */
1967 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED));
1968 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n",
1969 bo_gem->gem_handle, bo_gem->name,
1970 upper_32_bits(bo->offset64),
1971 lower_32_bits(bo->offset64),
1972 upper_32_bits(bufmgr->exec2_objects[i].offset),
1973 lower_32_bits(bufmgr->exec2_objects[i].offset));
1974 bo->offset64 = bufmgr->exec2_objects[i].offset;
1975 }
1976 }
1977 }
1978
1979 static int
1980 do_exec2(drm_bacon_bo *bo, int used, drm_bacon_context *ctx,
1981 int in_fence, int *out_fence,
1982 unsigned int flags)
1983 {
1984 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
1985 struct drm_i915_gem_execbuffer2 execbuf;
1986 int ret = 0;
1987 int i;
1988
1989 if (to_bo_gem(bo)->has_error)
1990 return -ENOMEM;
1991
1992 switch (flags & 0x7) {
1993 default:
1994 return -EINVAL;
1995 case I915_EXEC_BLT:
1996 if (!bufmgr->has_blt)
1997 return -EINVAL;
1998 break;
1999 case I915_EXEC_BSD:
2000 if (!bufmgr->has_bsd)
2001 return -EINVAL;
2002 break;
2003 case I915_EXEC_VEBOX:
2004 if (!bufmgr->has_vebox)
2005 return -EINVAL;
2006 break;
2007 case I915_EXEC_RENDER:
2008 case I915_EXEC_DEFAULT:
2009 break;
2010 }
2011
2012 pthread_mutex_lock(&bufmgr->lock);
2013 /* Update indices and set up the validate list. */
2014 drm_bacon_gem_bo_process_reloc2(bo);
2015
2016 /* Add the batch buffer to the validation list. There are no relocations
2017 * pointing to it.
2018 */
2019 drm_bacon_add_validate_buffer2(bo);
2020
2021 memclear(execbuf);
2022 execbuf.buffers_ptr = (uintptr_t)bufmgr->exec2_objects;
2023 execbuf.buffer_count = bufmgr->exec_count;
2024 execbuf.batch_start_offset = 0;
2025 execbuf.batch_len = used;
2026 execbuf.cliprects_ptr = 0;
2027 execbuf.num_cliprects = 0;
2028 execbuf.DR1 = 0;
2029 execbuf.DR4 = 0;
2030 execbuf.flags = flags;
2031 if (ctx == NULL)
2032 i915_execbuffer2_set_context_id(execbuf, 0);
2033 else
2034 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2035 execbuf.rsvd2 = 0;
2036 if (in_fence != -1) {
2037 execbuf.rsvd2 = in_fence;
2038 execbuf.flags |= I915_EXEC_FENCE_IN;
2039 }
2040 if (out_fence != NULL) {
2041 *out_fence = -1;
2042 execbuf.flags |= I915_EXEC_FENCE_OUT;
2043 }
2044
2045 if (bufmgr->no_exec)
2046 goto skip_execution;
2047
2048 ret = drmIoctl(bufmgr->fd,
2049 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR,
2050 &execbuf);
2051 if (ret != 0) {
2052 ret = -errno;
2053 if (ret == -ENOSPC) {
2054 DBG("Execbuffer fails to pin. "
2055 "Estimate: %u. Actual: %u. Available: %u\n",
2056 drm_bacon_gem_estimate_batch_space(bufmgr->exec_bos,
2057 bufmgr->exec_count),
2058 drm_bacon_gem_compute_batch_space(bufmgr->exec_bos,
2059 bufmgr->exec_count),
2060 (unsigned int) bufmgr->gtt_size);
2061 }
2062 }
2063 drm_bacon_update_buffer_offsets2(bufmgr);
2064
2065 if (ret == 0 && out_fence != NULL)
2066 *out_fence = execbuf.rsvd2 >> 32;
2067
2068 skip_execution:
2069 if (INTEL_DEBUG & DEBUG_BUFMGR)
2070 drm_bacon_gem_dump_validation_list(bufmgr);
2071
2072 for (i = 0; i < bufmgr->exec_count; i++) {
2073 drm_bacon_bo_gem *bo_gem = to_bo_gem(bufmgr->exec_bos[i]);
2074
2075 bo_gem->idle = false;
2076
2077 /* Disconnect the buffer from the validate list */
2078 bo_gem->validate_index = -1;
2079 bufmgr->exec_bos[i] = NULL;
2080 }
2081 bufmgr->exec_count = 0;
2082 pthread_mutex_unlock(&bufmgr->lock);
2083
2084 return ret;
2085 }
2086
2087 int
2088 drm_bacon_bo_exec(drm_bacon_bo *bo, int used)
2089 {
2090 return do_exec2(bo, used, NULL, -1, NULL, I915_EXEC_RENDER);
2091 }
2092
2093 int
2094 drm_bacon_bo_mrb_exec(drm_bacon_bo *bo, int used, unsigned int flags)
2095 {
2096 return do_exec2(bo, used, NULL, -1, NULL, flags);
2097 }
2098
2099 int
2100 drm_bacon_gem_bo_context_exec(drm_bacon_bo *bo, drm_bacon_context *ctx,
2101 int used, unsigned int flags)
2102 {
2103 return do_exec2(bo, used, ctx, -1, NULL, flags);
2104 }
2105
2106 int
2107 drm_bacon_gem_bo_fence_exec(drm_bacon_bo *bo,
2108 drm_bacon_context *ctx,
2109 int used,
2110 int in_fence,
2111 int *out_fence,
2112 unsigned int flags)
2113 {
2114 return do_exec2(bo, used, ctx, in_fence, out_fence, flags);
2115 }
2116
2117 static int
2118 drm_bacon_gem_bo_set_tiling_internal(drm_bacon_bo *bo,
2119 uint32_t tiling_mode,
2120 uint32_t stride)
2121 {
2122 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2123 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2124 struct drm_i915_gem_set_tiling set_tiling;
2125 int ret;
2126
2127 if (bo_gem->global_name == 0 &&
2128 tiling_mode == bo_gem->tiling_mode &&
2129 stride == bo_gem->stride)
2130 return 0;
2131
2132 memset(&set_tiling, 0, sizeof(set_tiling));
2133 do {
2134 /* set_tiling is slightly broken and overwrites the
2135 * input on the error path, so we have to open code
2136 * rmIoctl.
2137 */
2138 set_tiling.handle = bo_gem->gem_handle;
2139 set_tiling.tiling_mode = tiling_mode;
2140 set_tiling.stride = stride;
2141
2142 ret = ioctl(bufmgr->fd,
2143 DRM_IOCTL_I915_GEM_SET_TILING,
2144 &set_tiling);
2145 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2146 if (ret == -1)
2147 return -errno;
2148
2149 bo_gem->tiling_mode = set_tiling.tiling_mode;
2150 bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2151 bo_gem->stride = set_tiling.stride;
2152 return 0;
2153 }
2154
2155 int
2156 drm_bacon_bo_set_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2157 uint32_t stride)
2158 {
2159 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2160 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2161 int ret;
2162
2163 /* Tiling with userptr surfaces is not supported
2164 * on all hardware so refuse it for time being.
2165 */
2166 if (bo_gem->is_userptr)
2167 return -EINVAL;
2168
2169 /* Linear buffers have no stride. By ensuring that we only ever use
2170 * stride 0 with linear buffers, we simplify our code.
2171 */
2172 if (*tiling_mode == I915_TILING_NONE)
2173 stride = 0;
2174
2175 ret = drm_bacon_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2176 if (ret == 0)
2177 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
2178
2179 *tiling_mode = bo_gem->tiling_mode;
2180 return ret;
2181 }
2182
2183 int
2184 drm_bacon_bo_get_tiling(drm_bacon_bo *bo, uint32_t * tiling_mode,
2185 uint32_t *swizzle_mode)
2186 {
2187 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2188
2189 *tiling_mode = bo_gem->tiling_mode;
2190 *swizzle_mode = bo_gem->swizzle_mode;
2191 return 0;
2192 }
2193
2194 int
2195 drm_bacon_bo_set_softpin_offset(drm_bacon_bo *bo, uint64_t offset)
2196 {
2197 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2198
2199 bo->offset64 = offset;
2200 bo_gem->kflags |= EXEC_OBJECT_PINNED;
2201
2202 return 0;
2203 }
2204
2205 drm_bacon_bo *
2206 drm_bacon_bo_gem_create_from_prime(drm_bacon_bufmgr *bufmgr, int prime_fd, int size)
2207 {
2208 int ret;
2209 uint32_t handle;
2210 drm_bacon_bo_gem *bo_gem;
2211 struct drm_i915_gem_get_tiling get_tiling;
2212
2213 pthread_mutex_lock(&bufmgr->lock);
2214 ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
2215 if (ret) {
2216 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno));
2217 pthread_mutex_unlock(&bufmgr->lock);
2218 return NULL;
2219 }
2220
2221 /*
2222 * See if the kernel has already returned this buffer to us. Just as
2223 * for named buffers, we must not create two bo's pointing at the same
2224 * kernel object
2225 */
2226 HASH_FIND(handle_hh, bufmgr->handle_table,
2227 &handle, sizeof(handle), bo_gem);
2228 if (bo_gem) {
2229 drm_bacon_bo_reference(&bo_gem->bo);
2230 goto out;
2231 }
2232
2233 bo_gem = calloc(1, sizeof(*bo_gem));
2234 if (!bo_gem)
2235 goto out;
2236
2237 p_atomic_set(&bo_gem->refcount, 1);
2238 list_inithead(&bo_gem->vma_list);
2239
2240 /* Determine size of bo. The fd-to-handle ioctl really should
2241 * return the size, but it doesn't. If we have kernel 3.12 or
2242 * later, we can lseek on the prime fd to get the size. Older
2243 * kernels will just fail, in which case we fall back to the
2244 * provided (estimated or guess size). */
2245 ret = lseek(prime_fd, 0, SEEK_END);
2246 if (ret != -1)
2247 bo_gem->bo.size = ret;
2248 else
2249 bo_gem->bo.size = size;
2250
2251 bo_gem->bo.handle = handle;
2252 bo_gem->bo.bufmgr = bufmgr;
2253
2254 bo_gem->gem_handle = handle;
2255 HASH_ADD(handle_hh, bufmgr->handle_table,
2256 gem_handle, sizeof(bo_gem->gem_handle), bo_gem);
2257
2258 bo_gem->name = "prime";
2259 bo_gem->validate_index = -1;
2260 bo_gem->used_as_reloc_target = false;
2261 bo_gem->has_error = false;
2262 bo_gem->reusable = false;
2263
2264 memclear(get_tiling);
2265 get_tiling.handle = bo_gem->gem_handle;
2266 if (drmIoctl(bufmgr->fd,
2267 DRM_IOCTL_I915_GEM_GET_TILING,
2268 &get_tiling))
2269 goto err;
2270
2271 bo_gem->tiling_mode = get_tiling.tiling_mode;
2272 bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2273 /* XXX stride is unknown */
2274 drm_bacon_bo_gem_set_in_aperture_size(bufmgr, bo_gem, 0);
2275
2276 out:
2277 pthread_mutex_unlock(&bufmgr->lock);
2278 return &bo_gem->bo;
2279
2280 err:
2281 drm_bacon_gem_bo_free(&bo_gem->bo);
2282 pthread_mutex_unlock(&bufmgr->lock);
2283 return NULL;
2284 }
2285
2286 int
2287 drm_bacon_bo_gem_export_to_prime(drm_bacon_bo *bo, int *prime_fd)
2288 {
2289 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2290 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2291
2292 if (drmPrimeHandleToFD(bufmgr->fd, bo_gem->gem_handle,
2293 DRM_CLOEXEC, prime_fd) != 0)
2294 return -errno;
2295
2296 bo_gem->reusable = false;
2297
2298 return 0;
2299 }
2300
2301 int
2302 drm_bacon_bo_flink(drm_bacon_bo *bo, uint32_t *name)
2303 {
2304 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2305 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2306
2307 if (!bo_gem->global_name) {
2308 struct drm_gem_flink flink;
2309
2310 memclear(flink);
2311 flink.handle = bo_gem->gem_handle;
2312 if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2313 return -errno;
2314
2315 pthread_mutex_lock(&bufmgr->lock);
2316 if (!bo_gem->global_name) {
2317 bo_gem->global_name = flink.name;
2318 bo_gem->reusable = false;
2319
2320 HASH_ADD(name_hh, bufmgr->name_table,
2321 global_name, sizeof(bo_gem->global_name),
2322 bo_gem);
2323 }
2324 pthread_mutex_unlock(&bufmgr->lock);
2325 }
2326
2327 *name = bo_gem->global_name;
2328 return 0;
2329 }
2330
2331 /**
2332 * Enables unlimited caching of buffer objects for reuse.
2333 *
2334 * This is potentially very memory expensive, as the cache at each bucket
2335 * size is only bounded by how many buffers of that size we've managed to have
2336 * in flight at once.
2337 */
2338 void
2339 drm_bacon_bufmgr_gem_enable_reuse(drm_bacon_bufmgr *bufmgr)
2340 {
2341 bufmgr->bo_reuse = true;
2342 }
2343
2344 /**
2345 * Disables implicit synchronisation before executing the bo
2346 *
2347 * This will cause rendering corruption unless you correctly manage explicit
2348 * fences for all rendering involving this buffer - including use by others.
2349 * Disabling the implicit serialisation is only required if that serialisation
2350 * is too coarse (for example, you have split the buffer into many
2351 * non-overlapping regions and are sharing the whole buffer between concurrent
2352 * independent command streams).
2353 *
2354 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC,
2355 * which can be checked using drm_bacon_bufmgr_can_disable_implicit_sync,
2356 * or subsequent execbufs involving the bo will generate EINVAL.
2357 */
2358 void
2359 drm_bacon_gem_bo_disable_implicit_sync(drm_bacon_bo *bo)
2360 {
2361 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2362
2363 bo_gem->kflags |= EXEC_OBJECT_ASYNC;
2364 }
2365
2366 /**
2367 * Enables implicit synchronisation before executing the bo
2368 *
2369 * This is the default behaviour of the kernel, to wait upon prior writes
2370 * completing on the object before rendering with it, or to wait for prior
2371 * reads to complete before writing into the object.
2372 * drm_bacon_gem_bo_disable_implicit_sync() can stop this behaviour, telling
2373 * the kernel never to insert a stall before using the object. Then this
2374 * function can be used to restore the implicit sync before subsequent
2375 * rendering.
2376 */
2377 void
2378 drm_bacon_gem_bo_enable_implicit_sync(drm_bacon_bo *bo)
2379 {
2380 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2381
2382 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC;
2383 }
2384
2385 /**
2386 * Query whether the kernel supports disabling of its implicit synchronisation
2387 * before execbuf. See drm_bacon_gem_bo_disable_implicit_sync()
2388 */
2389 int
2390 drm_bacon_bufmgr_gem_can_disable_implicit_sync(drm_bacon_bufmgr *bufmgr)
2391 {
2392 return bufmgr->has_exec_async;
2393 }
2394
2395 /**
2396 * Return the additional aperture space required by the tree of buffer objects
2397 * rooted at bo.
2398 */
2399 static int
2400 drm_bacon_gem_bo_get_aperture_space(drm_bacon_bo *bo)
2401 {
2402 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2403 int i;
2404 int total = 0;
2405
2406 if (bo == NULL || bo_gem->included_in_check_aperture)
2407 return 0;
2408
2409 total += bo->size;
2410 bo_gem->included_in_check_aperture = true;
2411
2412 for (i = 0; i < bo_gem->reloc_count; i++)
2413 total +=
2414 drm_bacon_gem_bo_get_aperture_space(bo_gem->
2415 reloc_target_info[i].bo);
2416
2417 return total;
2418 }
2419
2420 /**
2421 * Clear the flag set by drm_bacon_gem_bo_get_aperture_space() so we're ready
2422 * for the next drm_bacon_bufmgr_check_aperture_space() call.
2423 */
2424 static void
2425 drm_bacon_gem_bo_clear_aperture_space_flag(drm_bacon_bo *bo)
2426 {
2427 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2428 int i;
2429
2430 if (bo == NULL || !bo_gem->included_in_check_aperture)
2431 return;
2432
2433 bo_gem->included_in_check_aperture = false;
2434
2435 for (i = 0; i < bo_gem->reloc_count; i++)
2436 drm_bacon_gem_bo_clear_aperture_space_flag(bo_gem->
2437 reloc_target_info[i].bo);
2438 }
2439
2440 /**
2441 * Return a conservative estimate for the amount of aperture required
2442 * for a collection of buffers. This may double-count some buffers.
2443 */
2444 static unsigned int
2445 drm_bacon_gem_estimate_batch_space(drm_bacon_bo **bo_array, int count)
2446 {
2447 int i;
2448 unsigned int total = 0;
2449
2450 for (i = 0; i < count; i++) {
2451 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo_array[i];
2452 if (bo_gem != NULL)
2453 total += bo_gem->reloc_tree_size;
2454 }
2455 return total;
2456 }
2457
2458 /**
2459 * Return the amount of aperture needed for a collection of buffers.
2460 * This avoids double counting any buffers, at the cost of looking
2461 * at every buffer in the set.
2462 */
2463 static unsigned int
2464 drm_bacon_gem_compute_batch_space(drm_bacon_bo **bo_array, int count)
2465 {
2466 int i;
2467 unsigned int total = 0;
2468
2469 for (i = 0; i < count; i++) {
2470 total += drm_bacon_gem_bo_get_aperture_space(bo_array[i]);
2471 /* For the first buffer object in the array, we get an
2472 * accurate count back for its reloc_tree size (since nothing
2473 * had been flagged as being counted yet). We can save that
2474 * value out as a more conservative reloc_tree_size that
2475 * avoids double-counting target buffers. Since the first
2476 * buffer happens to usually be the batch buffer in our
2477 * callers, this can pull us back from doing the tree
2478 * walk on every new batch emit.
2479 */
2480 if (i == 0) {
2481 drm_bacon_bo_gem *bo_gem =
2482 (drm_bacon_bo_gem *) bo_array[i];
2483 bo_gem->reloc_tree_size = total;
2484 }
2485 }
2486
2487 for (i = 0; i < count; i++)
2488 drm_bacon_gem_bo_clear_aperture_space_flag(bo_array[i]);
2489 return total;
2490 }
2491
2492 /**
2493 * Return -1 if the batchbuffer should be flushed before attempting to
2494 * emit rendering referencing the buffers pointed to by bo_array.
2495 *
2496 * This is required because if we try to emit a batchbuffer with relocations
2497 * to a tree of buffers that won't simultaneously fit in the aperture,
2498 * the rendering will return an error at a point where the software is not
2499 * prepared to recover from it.
2500 *
2501 * However, we also want to emit the batchbuffer significantly before we reach
2502 * the limit, as a series of batchbuffers each of which references buffers
2503 * covering almost all of the aperture means that at each emit we end up
2504 * waiting to evict a buffer from the last rendering, and we get synchronous
2505 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to
2506 * get better parallelism.
2507 */
2508 int
2509 drm_bacon_bufmgr_check_aperture_space(drm_bacon_bo **bo_array, int count)
2510 {
2511 drm_bacon_bufmgr *bufmgr = bo_array[0]->bufmgr;
2512 unsigned int total = 0;
2513 unsigned int threshold = bufmgr->gtt_size * 3 / 4;
2514
2515 total = drm_bacon_gem_estimate_batch_space(bo_array, count);
2516
2517 if (total > threshold)
2518 total = drm_bacon_gem_compute_batch_space(bo_array, count);
2519
2520 if (total > threshold) {
2521 DBG("check_space: overflowed available aperture, "
2522 "%dkb vs %dkb\n",
2523 total / 1024, (int)bufmgr->gtt_size / 1024);
2524 return -ENOSPC;
2525 } else {
2526 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2527 (int)bufmgr->gtt_size / 1024);
2528 return 0;
2529 }
2530 }
2531
2532 /*
2533 * Disable buffer reuse for objects which are shared with the kernel
2534 * as scanout buffers
2535 */
2536 int
2537 drm_bacon_bo_disable_reuse(drm_bacon_bo *bo)
2538 {
2539 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2540
2541 bo_gem->reusable = false;
2542 return 0;
2543 }
2544
2545 int
2546 drm_bacon_bo_is_reusable(drm_bacon_bo *bo)
2547 {
2548 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2549
2550 return bo_gem->reusable;
2551 }
2552
2553 static int
2554 _drm_bacon_gem_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2555 {
2556 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2557 int i;
2558
2559 for (i = 0; i < bo_gem->reloc_count; i++) {
2560 if (bo_gem->reloc_target_info[i].bo == target_bo)
2561 return 1;
2562 if (bo == bo_gem->reloc_target_info[i].bo)
2563 continue;
2564 if (_drm_bacon_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2565 target_bo))
2566 return 1;
2567 }
2568
2569 for (i = 0; i< bo_gem->softpin_target_count; i++) {
2570 if (bo_gem->softpin_target[i] == target_bo)
2571 return 1;
2572 if (_drm_bacon_gem_bo_references(bo_gem->softpin_target[i], target_bo))
2573 return 1;
2574 }
2575
2576 return 0;
2577 }
2578
2579 /** Return true if target_bo is referenced by bo's relocation tree. */
2580 int
2581 drm_bacon_bo_references(drm_bacon_bo *bo, drm_bacon_bo *target_bo)
2582 {
2583 drm_bacon_bo_gem *target_bo_gem = (drm_bacon_bo_gem *) target_bo;
2584
2585 if (bo == NULL || target_bo == NULL)
2586 return 0;
2587 if (target_bo_gem->used_as_reloc_target)
2588 return _drm_bacon_gem_bo_references(bo, target_bo);
2589 return 0;
2590 }
2591
2592 static void
2593 add_bucket(drm_bacon_bufmgr *bufmgr, int size)
2594 {
2595 unsigned int i = bufmgr->num_buckets;
2596
2597 assert(i < ARRAY_SIZE(bufmgr->cache_bucket));
2598
2599 list_inithead(&bufmgr->cache_bucket[i].head);
2600 bufmgr->cache_bucket[i].size = size;
2601 bufmgr->num_buckets++;
2602 }
2603
2604 static void
2605 init_cache_buckets(drm_bacon_bufmgr *bufmgr)
2606 {
2607 unsigned long size, cache_max_size = 64 * 1024 * 1024;
2608
2609 /* OK, so power of two buckets was too wasteful of memory.
2610 * Give 3 other sizes between each power of two, to hopefully
2611 * cover things accurately enough. (The alternative is
2612 * probably to just go for exact matching of sizes, and assume
2613 * that for things like composited window resize the tiled
2614 * width/height alignment and rounding of sizes to pages will
2615 * get us useful cache hit rates anyway)
2616 */
2617 add_bucket(bufmgr, 4096);
2618 add_bucket(bufmgr, 4096 * 2);
2619 add_bucket(bufmgr, 4096 * 3);
2620
2621 /* Initialize the linked lists for BO reuse cache. */
2622 for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2623 add_bucket(bufmgr, size);
2624
2625 add_bucket(bufmgr, size + size * 1 / 4);
2626 add_bucket(bufmgr, size + size * 2 / 4);
2627 add_bucket(bufmgr, size + size * 3 / 4);
2628 }
2629 }
2630
2631 void
2632 drm_bacon_bufmgr_gem_set_vma_cache_size(drm_bacon_bufmgr *bufmgr, int limit)
2633 {
2634 bufmgr->vma_max = limit;
2635
2636 drm_bacon_gem_bo_purge_vma_cache(bufmgr);
2637 }
2638
2639 static int
2640 parse_devid_override(const char *devid_override)
2641 {
2642 static const struct {
2643 const char *name;
2644 int pci_id;
2645 } name_map[] = {
2646 { "brw", PCI_CHIP_I965_GM },
2647 { "g4x", PCI_CHIP_GM45_GM },
2648 { "ilk", PCI_CHIP_ILD_G },
2649 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS },
2650 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 },
2651 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 },
2652 { "byt", PCI_CHIP_VALLEYVIEW_3 },
2653 { "bdw", 0x1620 | BDW_ULX },
2654 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 },
2655 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 },
2656 };
2657 unsigned int i;
2658
2659 for (i = 0; i < ARRAY_SIZE(name_map); i++) {
2660 if (!strcmp(name_map[i].name, devid_override))
2661 return name_map[i].pci_id;
2662 }
2663
2664 return strtod(devid_override, NULL);
2665 }
2666
2667 /**
2668 * Get the PCI ID for the device. This can be overridden by setting the
2669 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
2670 */
2671 static int
2672 get_pci_device_id(drm_bacon_bufmgr *bufmgr)
2673 {
2674 char *devid_override;
2675 int devid = 0;
2676 int ret;
2677 drm_i915_getparam_t gp;
2678
2679 if (geteuid() == getuid()) {
2680 devid_override = getenv("INTEL_DEVID_OVERRIDE");
2681 if (devid_override) {
2682 bufmgr->no_exec = true;
2683 return parse_devid_override(devid_override);
2684 }
2685 }
2686
2687 memclear(gp);
2688 gp.param = I915_PARAM_CHIPSET_ID;
2689 gp.value = &devid;
2690 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2691 if (ret) {
2692 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2693 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2694 }
2695 return devid;
2696 }
2697
2698 int
2699 drm_bacon_bufmgr_gem_get_devid(drm_bacon_bufmgr *bufmgr)
2700 {
2701 return bufmgr->pci_device;
2702 }
2703
2704 drm_bacon_context *
2705 drm_bacon_gem_context_create(drm_bacon_bufmgr *bufmgr)
2706 {
2707 struct drm_i915_gem_context_create create;
2708 drm_bacon_context *context = NULL;
2709 int ret;
2710
2711 context = calloc(1, sizeof(*context));
2712 if (!context)
2713 return NULL;
2714
2715 memclear(create);
2716 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2717 if (ret != 0) {
2718 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2719 strerror(errno));
2720 free(context);
2721 return NULL;
2722 }
2723
2724 context->ctx_id = create.ctx_id;
2725 context->bufmgr = bufmgr;
2726
2727 return context;
2728 }
2729
2730 int
2731 drm_bacon_gem_context_get_id(drm_bacon_context *ctx, uint32_t *ctx_id)
2732 {
2733 if (ctx == NULL)
2734 return -EINVAL;
2735
2736 *ctx_id = ctx->ctx_id;
2737
2738 return 0;
2739 }
2740
2741 void
2742 drm_bacon_gem_context_destroy(drm_bacon_context *ctx)
2743 {
2744 struct drm_i915_gem_context_destroy destroy;
2745 int ret;
2746
2747 if (ctx == NULL)
2748 return;
2749
2750 memclear(destroy);
2751
2752 destroy.ctx_id = ctx->ctx_id;
2753 ret = drmIoctl(ctx->bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2754 &destroy);
2755 if (ret != 0)
2756 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2757 strerror(errno));
2758
2759 free(ctx);
2760 }
2761
2762 int
2763 drm_bacon_get_reset_stats(drm_bacon_context *ctx,
2764 uint32_t *reset_count,
2765 uint32_t *active,
2766 uint32_t *pending)
2767 {
2768 struct drm_i915_reset_stats stats;
2769 int ret;
2770
2771 if (ctx == NULL)
2772 return -EINVAL;
2773
2774 memclear(stats);
2775
2776 stats.ctx_id = ctx->ctx_id;
2777 ret = drmIoctl(ctx->bufmgr->fd,
2778 DRM_IOCTL_I915_GET_RESET_STATS,
2779 &stats);
2780 if (ret == 0) {
2781 if (reset_count != NULL)
2782 *reset_count = stats.reset_count;
2783
2784 if (active != NULL)
2785 *active = stats.batch_active;
2786
2787 if (pending != NULL)
2788 *pending = stats.batch_pending;
2789 }
2790
2791 return ret;
2792 }
2793
2794 int
2795 drm_bacon_reg_read(drm_bacon_bufmgr *bufmgr,
2796 uint32_t offset,
2797 uint64_t *result)
2798 {
2799 struct drm_i915_reg_read reg_read;
2800 int ret;
2801
2802 memclear(reg_read);
2803 reg_read.offset = offset;
2804
2805 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
2806
2807 *result = reg_read.val;
2808 return ret;
2809 }
2810
2811 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
2812 static struct list_head bufmgr_list = { &bufmgr_list, &bufmgr_list };
2813
2814 static drm_bacon_bufmgr *
2815 drm_bacon_bufmgr_gem_find(int fd)
2816 {
2817 list_for_each_entry(drm_bacon_bufmgr,
2818 bufmgr, &bufmgr_list, managers) {
2819 if (bufmgr->fd == fd) {
2820 p_atomic_inc(&bufmgr->refcount);
2821 return bufmgr;
2822 }
2823 }
2824
2825 return NULL;
2826 }
2827
2828 void
2829 drm_bacon_bufmgr_destroy(drm_bacon_bufmgr *bufmgr)
2830 {
2831 if (atomic_add_unless(&bufmgr->refcount, -1, 1)) {
2832 pthread_mutex_lock(&bufmgr_list_mutex);
2833
2834 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2835 list_del(&bufmgr->managers);
2836 drm_bacon_bufmgr_gem_destroy(bufmgr);
2837 }
2838
2839 pthread_mutex_unlock(&bufmgr_list_mutex);
2840 }
2841 }
2842
2843 void *drm_bacon_gem_bo_map__gtt(drm_bacon_bo *bo)
2844 {
2845 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2846 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2847
2848 if (bo_gem->gtt_virtual)
2849 return bo_gem->gtt_virtual;
2850
2851 if (bo_gem->is_userptr)
2852 return NULL;
2853
2854 pthread_mutex_lock(&bufmgr->lock);
2855 if (bo_gem->gtt_virtual == NULL) {
2856 struct drm_i915_gem_mmap_gtt mmap_arg;
2857 void *ptr;
2858
2859 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
2860 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2861
2862 if (bo_gem->map_count++ == 0)
2863 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2864
2865 memclear(mmap_arg);
2866 mmap_arg.handle = bo_gem->gem_handle;
2867
2868 /* Get the fake offset back... */
2869 ptr = MAP_FAILED;
2870 if (drmIoctl(bufmgr->fd,
2871 DRM_IOCTL_I915_GEM_MMAP_GTT,
2872 &mmap_arg) == 0) {
2873 /* and mmap it */
2874 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
2875 MAP_SHARED, bufmgr->fd,
2876 mmap_arg.offset);
2877 }
2878 if (ptr == MAP_FAILED) {
2879 if (--bo_gem->map_count == 0)
2880 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2881 ptr = NULL;
2882 }
2883
2884 bo_gem->gtt_virtual = ptr;
2885 }
2886 pthread_mutex_unlock(&bufmgr->lock);
2887
2888 return bo_gem->gtt_virtual;
2889 }
2890
2891 void *drm_bacon_gem_bo_map__cpu(drm_bacon_bo *bo)
2892 {
2893 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2894 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2895
2896 if (bo_gem->mem_virtual)
2897 return bo_gem->mem_virtual;
2898
2899 if (bo_gem->is_userptr) {
2900 /* Return the same user ptr */
2901 return bo_gem->user_virtual;
2902 }
2903
2904 pthread_mutex_lock(&bufmgr->lock);
2905 if (!bo_gem->mem_virtual) {
2906 struct drm_i915_gem_mmap mmap_arg;
2907
2908 if (bo_gem->map_count++ == 0)
2909 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2910
2911 DBG("bo_map: %d (%s), map_count=%d\n",
2912 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2913
2914 memclear(mmap_arg);
2915 mmap_arg.handle = bo_gem->gem_handle;
2916 mmap_arg.size = bo->size;
2917 if (drmIoctl(bufmgr->fd,
2918 DRM_IOCTL_I915_GEM_MMAP,
2919 &mmap_arg)) {
2920 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2921 __FILE__, __LINE__, bo_gem->gem_handle,
2922 bo_gem->name, strerror(errno));
2923 if (--bo_gem->map_count == 0)
2924 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2925 } else {
2926 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2927 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2928 }
2929 }
2930 pthread_mutex_unlock(&bufmgr->lock);
2931
2932 return bo_gem->mem_virtual;
2933 }
2934
2935 void *drm_bacon_gem_bo_map__wc(drm_bacon_bo *bo)
2936 {
2937 drm_bacon_bufmgr *bufmgr = bo->bufmgr;
2938 drm_bacon_bo_gem *bo_gem = (drm_bacon_bo_gem *) bo;
2939
2940 if (bo_gem->wc_virtual)
2941 return bo_gem->wc_virtual;
2942
2943 if (bo_gem->is_userptr)
2944 return NULL;
2945
2946 pthread_mutex_lock(&bufmgr->lock);
2947 if (!bo_gem->wc_virtual) {
2948 struct drm_i915_gem_mmap mmap_arg;
2949
2950 if (bo_gem->map_count++ == 0)
2951 drm_bacon_gem_bo_open_vma(bufmgr, bo_gem);
2952
2953 DBG("bo_map: %d (%s), map_count=%d\n",
2954 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
2955
2956 memclear(mmap_arg);
2957 mmap_arg.handle = bo_gem->gem_handle;
2958 mmap_arg.size = bo->size;
2959 mmap_arg.flags = I915_MMAP_WC;
2960 if (drmIoctl(bufmgr->fd,
2961 DRM_IOCTL_I915_GEM_MMAP,
2962 &mmap_arg)) {
2963 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
2964 __FILE__, __LINE__, bo_gem->gem_handle,
2965 bo_gem->name, strerror(errno));
2966 if (--bo_gem->map_count == 0)
2967 drm_bacon_gem_bo_close_vma(bufmgr, bo_gem);
2968 } else {
2969 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
2970 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
2971 }
2972 }
2973 pthread_mutex_unlock(&bufmgr->lock);
2974
2975 return bo_gem->wc_virtual;
2976 }
2977
2978 /**
2979 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2980 * and manage map buffer objections.
2981 *
2982 * \param fd File descriptor of the opened DRM device.
2983 */
2984 drm_bacon_bufmgr *
2985 drm_bacon_bufmgr_gem_init(int fd, int batch_size)
2986 {
2987 drm_bacon_bufmgr *bufmgr;
2988 struct drm_i915_gem_get_aperture aperture;
2989 drm_i915_getparam_t gp;
2990 int ret, tmp;
2991
2992 pthread_mutex_lock(&bufmgr_list_mutex);
2993
2994 bufmgr = drm_bacon_bufmgr_gem_find(fd);
2995 if (bufmgr)
2996 goto exit;
2997
2998 bufmgr = calloc(1, sizeof(*bufmgr));
2999 if (bufmgr == NULL)
3000 goto exit;
3001
3002 bufmgr->fd = fd;
3003 p_atomic_set(&bufmgr->refcount, 1);
3004
3005 if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
3006 free(bufmgr);
3007 bufmgr = NULL;
3008 goto exit;
3009 }
3010
3011 memclear(aperture);
3012 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
3013 bufmgr->gtt_size = aperture.aper_available_size;
3014
3015 bufmgr->pci_device = get_pci_device_id(bufmgr);
3016
3017 if (IS_GEN4(bufmgr->pci_device))
3018 bufmgr->gen = 4;
3019 else if (IS_GEN5(bufmgr->pci_device))
3020 bufmgr->gen = 5;
3021 else if (IS_GEN6(bufmgr->pci_device))
3022 bufmgr->gen = 6;
3023 else if (IS_GEN7(bufmgr->pci_device))
3024 bufmgr->gen = 7;
3025 else if (IS_GEN8(bufmgr->pci_device))
3026 bufmgr->gen = 8;
3027 else if (IS_GEN9(bufmgr->pci_device))
3028 bufmgr->gen = 9;
3029 else {
3030 free(bufmgr);
3031 bufmgr = NULL;
3032 goto exit;
3033 }
3034
3035 memclear(gp);
3036 gp.value = &tmp;
3037
3038 gp.param = I915_PARAM_HAS_BSD;
3039 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3040 bufmgr->has_bsd = ret == 0;
3041
3042 gp.param = I915_PARAM_HAS_BLT;
3043 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3044 bufmgr->has_blt = ret == 0;
3045
3046 gp.param = I915_PARAM_HAS_EXEC_ASYNC;
3047 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3048 bufmgr->has_exec_async = ret == 0;
3049
3050 gp.param = I915_PARAM_HAS_LLC;
3051 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3052 if (ret != 0) {
3053 /* Kernel does not supports HAS_LLC query, fallback to GPU
3054 * generation detection and assume that we have LLC on GEN6/7
3055 */
3056 bufmgr->has_llc = (IS_GEN6(bufmgr->pci_device) |
3057 IS_GEN7(bufmgr->pci_device));
3058 } else
3059 bufmgr->has_llc = *gp.value;
3060
3061 gp.param = I915_PARAM_HAS_VEBOX;
3062 ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3063 bufmgr->has_vebox = (ret == 0) & (*gp.value > 0);
3064
3065 /* Let's go with one relocation per every 2 dwords (but round down a bit
3066 * since a power of two will mean an extra page allocation for the reloc
3067 * buffer).
3068 *
3069 * Every 4 was too few for the blender benchmark.
3070 */
3071 bufmgr->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3072
3073 init_cache_buckets(bufmgr);
3074
3075 list_inithead(&bufmgr->vma_cache);
3076 bufmgr->vma_max = -1; /* unlimited by default */
3077
3078 list_add(&bufmgr->managers, &bufmgr_list);
3079
3080 exit:
3081 pthread_mutex_unlock(&bufmgr_list_mutex);
3082
3083 return bufmgr;
3084 }